c commited on
Commit
587fbe2
·
verified ·
1 Parent(s): 825fc05

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -4
app.py CHANGED
@@ -1,7 +1,39 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
+ from peft import PeftModel
5
 
6
+ base_model = "mistralai/Mistral-7B-v0.1"
 
7
 
8
+ bnb_config = BitsAndBytesConfig(
9
+ load_in_4bit= True,
10
+ bnb_4bit_quant_type= "nf4",
11
+ bnb_4bit_compute_dtype= torch.bfloat16,
12
+ bnb_4bit_use_double_quant= False,
13
+ )
14
+ model = AutoModelForCausalLM.from_pretrained(
15
+ base_model,
16
+ quantization_config=bnb_config,
17
+ device_map={"": 0}
18
+ )
19
+
20
+ ft_model = PeftModel.from_pretrained(model, 'kiki7sun/mixtral-academic-finetune-QLoRA-0121')
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(
23
+ base_model_id,
24
+ add_bos_token=True,
25
+ trust_remote_code=True,
26
+ )
27
+
28
+ ft_model.eval()
29
+ def greet(eval_prompt,max_new_tokens):
30
+ model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")
31
+ with torch.no_grad():
32
+ generation = ft_model.generate(**model_input, max_new_tokens = max_new_tokens)
33
+ result = tokenizer.decode(generation[0], skip_special_tokens=True)
34
+
35
+ return result
36
+
37
+ demo = gr.Interface(fn=greet, inputs="textbox", outputs="textbox")
38
+
39
+ demo.queue().launch(debug=True, share=True, inline=False)