bill83221 commited on
Commit
ae49840
·
1 Parent(s): 754aeba
Files changed (1) hide show
  1. app.py +36 -5
app.py CHANGED
@@ -1,11 +1,42 @@
1
-
2
  import gradio as gr
 
 
 
 
 
 
3
 
4
- def greet(name):
5
- return "Hello " + name + "!!"
 
 
 
6
 
7
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
8
- iface.launch()
 
 
 
 
 
 
 
 
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
 
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ from peft import prepare_model_for_kbit_training
4
+ from peft import LoraConfig, get_peft_model
5
+ from peft import PeftModel, PeftConfig
6
+ from datasets import load_dataset
7
+ import transformers
8
 
9
+ fixed_llm_name = "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ"
10
+ model = AutoModelForCausalLM.from_pretrained(fixed_llm_name,
11
+ device_map="auto", # automatically figures out how to best use CPU + GPU for loading model
12
+ trust_remote_code=False, # prevents running custom model files on your machine
13
+ revision="main") # which version of model to use in repo
14
 
15
+ tokenizer = AutoTokenizer.from_pretrained(fixed_llm_name, use_fast=True)
16
+
17
+ config = PeftConfig.from_pretrained("chihhuiho/VirtualTA")
18
+ model = PeftModel.from_pretrained(model, "chihhuiho/VirtualTA")
19
+
20
+ intstructions_string = "Assume you are a virtual teaching assistant in the statistical and machine learning course. Your job is to communicate with students, answer technical questions and help the student to solve the problem. Please respond to the following post from a student."
21
+
22
+ prompt_template = lambda comment: f"[INST] {intstructions_string} {comment} [/INST]"
23
+
24
+ def predict(comment):
25
+ prompt = prompt_template(comment)
26
 
27
+ model.eval() # model in evaluation mode (dropout modules are deactivated)
28
+
29
+ # tokenize input
30
+ inputs = tokenizer(prompt, return_tensors="pt")
31
+
32
+ # generate output
33
+ outputs = model.generate(input_ids=inputs["input_ids"].to("cuda"), max_new_tokens=400, pad_token_id=tokenizer.eos_token_id)
34
+ outputs = tokenizer.batch_decode(outputs)[0]
35
+ outputs = outputs.split("[/INST]")[1]
36
+ return outputs
37
+
38
+
39
+ iface = gr.Interface(fn=predict, inputs="text", outputs="text")
40
+ iface.launch()
41
 
42