import gradio as gr import spaces from guidance import gen from guidance.models import Transformers from mymodule.utils import load_model_and_tok model, tok = load_model_and_tok() @spaces.GPU(duration=10) def greet(prompt): temp = f"{prompt} Only use results from the genomic data commons in your response and provide frequencies as a percentage. Only report the final response." lm = Transformers(model=model, tokenizer=tok) lm += temp lm += gen( "response", n=1, temperature=0, max_tokens=1024, regex="The final answer is: \d*\.\d*%", ) return lm["response"] demo = gr.Interface(fn=greet, inputs=gr.Text(), outputs=gr.Text()) demo.launch()