Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoTokenizer,AutoModelForCasualLM | |
def haiku_generate(): | |
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCasualLM.from_pretrained{ | |
model_id, | |
device_map="auto", | |
low_cpu_mem_usage=True | |
} | |
model.eval() | |
prompt ="δΏ³ε₯γ γγηζγγ¦" | |
inputs = tokenizer(prompt,return_tensors="pt") | |
if not torch.cuda.is_available(): | |
inputs ={k:v.to("cpu") for k,v in inputs.items()} | |
with torch.no_glad(): | |
outputs = model.generate{ | |
**inputs, | |
max_new_tokens=25, | |
do_sample=True, | |
temperature=0.7, | |
top_p=0.9 | |
} | |
return tokenizer.decode(outputs[0],skip_special_tokens=True) | |
def greet(name): | |
return "Hello " + name + "!!" | |
demo = gr.Interface(fn=haiku_generate, outputs="text") | |
demo.launch() | |