Spaces:
Running
Running
import gradio as gr | |
from transformers import AutoTokenizer,AutoModelForCausalLM | |
import torch | |
def haiku_generate(): | |
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
device_map="auto", | |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
low_cpu_mem_usage=True | |
) | |
model.eval() | |
prompt ="δΏ³ε₯γ γγηζγγ¦" | |
inputs = tokenizer(prompt,return_tensors="pt") | |
if not torch.cuda.is_available(): | |
inputs ={k:v.to("cpu") for k,v in inputs.items()} | |
with torch.no_glad(): | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=25, | |
do_sample=True, | |
temperature=0.7, | |
top_p=0.9 | |
) | |
return tokenizer.decode(outputs[0],skip_special_tokens=True) | |
with gr.Blocks() as demo: | |
output = gr.Textbox(label="δΏ³ε₯") | |
btn = gr.Button("ηζ") | |
btn.click(fn=haiku_generate,outputs=output) | |
demo.launch() | |