File size: 1,298 Bytes
825fc05
587fbe2
52f04a2
 
825fc05
587fbe2
825fc05
02c6443
955ca24
 
80623a7
52f04a2
587fbe2
52f04a2
587fbe2
 
1da413a
587fbe2
 
 
 
 
c1cd225
 
587fbe2
f17a93e
587fbe2
 
 
 
52f04a2
 
 
c770d11
52f04a2
587fbe2
3386cd9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel, PeftConfig

base_model = "mistralai/Mistral-7B-v0.1"

config = PeftConfig.from_pretrained("kiki7sun/mixtral-academic-finetune0119")
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", 
                                             low_cpu_mem_usage=True,
                                             torch_dtype=torch.bfloat16)
ft_model = PeftModel.from_pretrained(model, "kiki7sun/mixtral-academic-finetune0119")

# ft_model = PeftModel.from_pretrained(model, 'kiki7sun/mixtral-academic-finetune-QLoRA-0121')

tokenizer = AutoTokenizer.from_pretrained(
    base_model,
    add_bos_token=True,
    trust_remote_code=True,
)

ft_model.eval()
def greet(your_prompt):
  model_input = tokenizer(your_prompt, return_tensors="pt").to("cpu")
  with torch.no_grad():
      generation = ft_model.generate(**model_input, max_new_tokens = 150)
      result = tokenizer.decode(generation[0], skip_special_tokens=True)

  return result

demo = gr.Interface(fn=greet, 
                    inputs="textbox", 
                    outputs="textbox",
                    title="Academic Kitchen ChatChat",
                   )


if __name__ == "__main__":
    demo.launch()