Joaoffg commited on
Commit
560647b
·
verified ·
1 Parent(s): 09657fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -51
app.py CHANGED
@@ -6,14 +6,21 @@ from utils.prompter import Prompter
6
 
7
  class CustomPrompter(Prompter):
8
  def get_response(self, output: str) -> str:
9
- return output.split(self.template["response_split"])[1].strip().split("### Instruction:")[0]
 
 
 
 
10
 
11
- prompt_template_name = "alpaca" # The prompt template to use, will default to alpaca.
 
 
 
12
 
13
- prompter = CustomPrompter(prompt_template_name)
14
 
15
- model = AutoModelForCausalLM.from_pretrained("Joaoffg/ELM")
16
- tokenizer = AutoTokenizer.from_pretrained("Joaoffg/ELM")
17
 
18
  def tokenize(prompt, add_eos_token=True):
19
  result = tokenizer(
@@ -62,52 +69,47 @@ def generate_and_tokenize_prompt(data_point):
62
  return tokenized_full_prompt
63
 
64
  def evaluate(instruction):
65
- try:
66
- # Generate a response:
67
- input_text = None
68
- prompt = prompter.generate_prompt(instruction, input_text)
69
- inputs = tokenizer(prompt, return_tensors="pt")
70
- input_ids = inputs["input_ids"]
71
-
72
- temperature = 0.2
73
- top_p = 0.95
74
- top_k = 25
75
- num_beams = 1
76
- max_new_tokens = 256
77
- repetition_penalty = 1.3
78
- do_sample = False
79
- num_return_sequences = 1
80
-
81
- generation_config = transformers.GenerationConfig(
82
- temperature=temperature,
83
- top_p=top_p,
84
- top_k=top_k,
85
- num_beams=num_beams,
86
- repetition_penalty=repetition_penalty,
87
- do_sample=do_sample,
88
- min_new_tokens=32,
89
- num_return_sequences=num_return_sequences,
90
- pad_token_id=0
 
 
 
 
 
 
 
 
 
91
  )
92
-
93
- with torch.no_grad():
94
- generation_output = model.generate(
95
- input_ids=input_ids,
96
- generation_config=generation_config,
97
- return_dict_in_generate=True,
98
- output_scores=True,
99
- max_new_tokens=max_new_tokens,
100
- )
101
-
102
- print(f'Instruction: {instruction}')
103
-
104
- for i, s in enumerate(generation_output.sequences):
105
- output = tokenizer.decode(s, skip_special_tokens=True)
106
- return prompter.get_response(output)
107
- except Exception as e:
108
- return str(e)
109
-
110
- # Define the Gradio interface
111
  interface = gr.Interface(
112
  fn=evaluate,
113
  inputs=[
@@ -132,5 +134,4 @@ interface = gr.Interface(
132
  ),
133
  )
134
 
135
- # Launch the Gradio interface
136
  interface.queue().launch()
 
6
 
7
  class CustomPrompter(Prompter):
8
  def get_response(self, output: str) -> str:
9
+ # Safely split on '### Response:'
10
+ split_output = output.split(self.template["response_split"], maxsplit=1)
11
+ if len(split_output) < 2:
12
+ return output.strip()
13
+ response_part = split_output[1].strip()
14
 
15
+ # Optionally strip out any subsequent '### Instruction:'
16
+ end_index = response_part.find("### Instruction:")
17
+ if end_index != -1:
18
+ response_part = response_part[:end_index].strip()
19
 
20
+ return response_part
21
 
22
+ prompt_template_name = "alpaca"
23
+ prompter = CustomPrompter(prompt_template_name)
24
 
25
  def tokenize(prompt, add_eos_token=True):
26
  result = tokenizer(
 
69
  return tokenized_full_prompt
70
 
71
  def evaluate(instruction):
72
+ input = None
73
+ prompt = prompter.generate_prompt(instruction, input)
74
+ inputs = tokenizer(prompt, return_tensors="pt")
75
+ input_ids = inputs["input_ids"]
76
+
77
+ # Example generation config
78
+ temperature=0.2
79
+ top_p=0.95
80
+ top_k=25
81
+ num_beams=1
82
+ max_new_tokens=256
83
+ repetition_penalty = 2.0
84
+ do_sample = True
85
+
86
+ generation_config = transformers.GenerationConfig(
87
+ temperature=temperature,
88
+ top_p=top_p,
89
+ top_k=top_k,
90
+ num_beams=num_beams,
91
+ repetition_penalty=repetition_penalty,
92
+ do_sample=do_sample,
93
+ min_new_tokens=32,
94
+ num_return_sequences=1,
95
+ pad_token_id=0,
96
+ # Optionally define a stopping criterion to stop at '### Instruction:'
97
+ # stopping_criteria=StoppingCriteriaList([StopOnTokens(tokenizer.encode("### Instruction:", add_special_tokens=False))]),
98
+ )
99
+
100
+ with torch.no_grad():
101
+ generation_output = model.generate(
102
+ input_ids=input_ids,
103
+ generation_config=generation_config,
104
+ return_dict_in_generate=True,
105
+ output_scores=True,
106
+ max_new_tokens=max_new_tokens,
107
  )
108
+
109
+ # For demo, just take the first sequence
110
+ output = tokenizer.decode(generation_output.sequences[0], skip_special_tokens=True)
111
+ return prompter.get_response(output)
112
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  interface = gr.Interface(
114
  fn=evaluate,
115
  inputs=[
 
134
  ),
135
  )
136
 
 
137
  interface.queue().launch()