import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel, PeftConfig base_model = "mistralai/Mistral-7B-v0.1" config = PeftConfig.from_pretrained("kiki7sun/mixtral-academic-finetune0119") model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", low_cpu_mem_usage=True, torch_dtype=torch.bfloat16) ft_model = PeftModel.from_pretrained(model, "kiki7sun/mixtral-academic-finetune0119") # ft_model = PeftModel.from_pretrained(model, 'kiki7sun/mixtral-academic-finetune-QLoRA-0121') tokenizer = AutoTokenizer.from_pretrained( base_model, add_bos_token=True, trust_remote_code=True, ) ft_model.eval() def greet(your_prompt): model_input = tokenizer(your_prompt, return_tensors="pt").to("cpu") with torch.no_grad(): generation = ft_model.generate(**model_input, max_new_tokens = 150) result = tokenizer.decode(generation[0], skip_special_tokens=True) return result demo = gr.Interface(fn=greet, inputs="textbox", outputs="textbox", title="Academic Kitchen ChatChat", ) if __name__ == "__main__": demo.launch()