kjcjohnson's picture
Back to TinyLlama :(not enough memory):
dd652af
raw
history blame
379 Bytes
import gradio as gr
import loop
MODEL_ID = "TinyLlama/TinyLlama_v1.1_math_code"
handler = loop.EndpointHandler(MODEL_ID)
def respond(prompt, grammar):
args = { "inputs": prompt, "grammar": grammar }
return handler(args)[0]
demo = gr.Interface(
respond,
inputs=["textarea", "textarea"],
outputs=["textarea"]
)
if __name__ == "__main__":
demo.launch()