anugrahap commited on
Commit
1df0250
·
1 Parent(s): 0e1cbae

Final update for version 2 application

Browse files
Files changed (1) hide show
  1. app.py +43 -116
app.py CHANGED
@@ -1,22 +1,22 @@
1
- import os
2
  import gradio as gr
3
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
 
5
  # initialize the environment
6
  model_name = 'anugrahap/gpt2-indo-textgen'
7
  HF_TOKEN = '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'
8
- callback = gr.HuggingFaceDatasetSaver(HF_TOKEN, "output-gpt2-indo-textgen")
9
 
10
- # define the tokenization method
11
- tokenizer = AutoTokenizer.from_pretrained(model_name,
12
- model_max_length=1e30,
13
- padding_side='right',
14
- return_tensors='pt')
15
 
16
- # add the EOS token as PAD token to avoid warnings
17
- model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
18
 
19
- generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
20
 
21
  # create the decoder parameter to generate the text
22
  def single_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample):
@@ -66,56 +66,20 @@ def single_generation(text,min_length,max_length,temperature,top_k,top_p,num_bea
66
  else:
67
  return error_unknown
68
 
69
- # create the decoder parameter to generate the text
70
- def multiple_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample):
71
- # create local variable for error parameter
72
- error_rep=ValueError(f"ERROR: repetition penalty cannot be lower than one! Given rep penalty = {repetition_penalty}")
73
- error_temp=ValueError(f"ERROR: temperature cannot be zero or lower! Given temperature = {temperature}")
74
- error_minmax=ValueError(f"ERROR: min length must be lower than or equal to max length! Given min length = {min_length}")
75
- error_numbeams_type=TypeError(f"ERROR: number of beams must be an integer not {type(num_beams)}")
76
- error_topk_type=TypeError(f"ERROR: top k must be an integer not {type(top_k)}")
77
- error_minmax_type=TypeError(f"ERROR: min length and max length must be an integer not {type(min_length)} and {type(max_length)}")
78
- error_empty=ValueError("ERROR: Input Text cannot be empty!")
79
- error_unknown=TypeError("Unknown Error.")
80
 
81
- if text != '':
82
- if type(min_length) == int and type(max_length) == int:
83
- if type(top_k) == int:
84
- if type(num_beams) == int:
85
- if min_length <= max_length:
86
- if temperature > 0:
87
- if repetition_penalty >= 1:
88
- result = generator(text,
89
- min_length=min_length,
90
- max_length=max_length,
91
- temperature=temperature,
92
- top_k=top_k,
93
- top_p=top_p,
94
- num_beams=num_beams,
95
- repetition_penalty=repetition_penalty,
96
- do_sample=do_sample,
97
- no_repeat_ngram_size=2,
98
- num_return_sequences=3)
99
- return result[0]["generated_text"], result[1]["generated_text"], result[2]["generated_text"],
100
- elif repetition_penalty < 1:
101
- return error_rep,error_rep,error_rep
102
- elif temperature <= 0:
103
- return error_temp,error_temp,error_temp
104
- elif min_length > max_length:
105
- return error_minmax,error_minmax,error_minmax
106
- elif type(num_beams) != int:
107
- return error_numbeams_type,error_numbeams_type,error_numbeams_type
108
- elif type(top_k) != int:
109
- return error_topk_type,error_topk_type,error_topk_type
110
- elif type(min_length) != int or type(max_length) != int:
111
- return error_minmax_type,error_minmax_type,error_minmax_type
112
- elif text == '':
113
- return error_empty,error_empty,error_empty
114
- else:
115
- return error_unknown,error_unknown,error_unknown
116
 
 
117
 
118
- # create the baseline examples
119
  examples = [
120
  ["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 2.0, True],
121
  ["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 1.0, False],
@@ -124,9 +88,7 @@ examples = [
124
  ["Pemandangan di pantai kuta Bali sangatlah indah.", 30, 50, 0.5, 40, 0.98, 10, 1.0, True],
125
  ["Pemandangan di pantai kuta Bali sangatlah indah.", 10, 30, 1.5, 30, 0.93, 5, 2.0, True]]
126
 
127
- # using gradio block to create the interface
128
- with gr.Blocks(title="GPT-2 Indonesian Text Generation Playground", theme='Default') as app:
129
- gr.Markdown("""
130
  <style>
131
  .center {
132
  display: block;
@@ -147,66 +109,31 @@ with gr.Blocks(title="GPT-2 Indonesian Text Generation Playground", theme='Defau
147
  border="0"
148
  class="center"
149
  style="height: 100px; width: 100px;"/>
150
- <h1>GPT-2 Indonesian Text Generation Playground</h1>""")
151
-
152
- gr.Markdown("<p><i>This project is a part of thesis requirement of Anugrah Akbar Praramadhan</i></p>")
153
-
154
- with gr.Tabs():
155
- #single generation
156
- with gr.TabItem("Single Generation"):
157
- with gr.Row():
158
- with gr.Column():
159
- input1=[gr.Textbox(lines=5, label="Input Text"),
160
- gr.Slider(label="Min Length", minimum=10, maximum=50, value=10, step=5),
161
- gr.Slider(label="Max Length", minimum=10, maximum=100, value=30, step=10),
162
- gr.Number(label="Temperature Sampling", value=1.5),
163
- gr.Slider(label="Top K Sampling", minimum=0, maximum=100, value=30, step=5),
164
- gr.Slider(label="Top P Sampling", minimum=0.01, maximum=1, value=0.93),
165
- gr.Slider(label="Number of Beams", minimum=1, maximum=10, value=5, step=1),
166
- gr.Number(label="Rep Penalty", value=2.0),
167
- gr.Dropdown(label="Do Sample?", choices=[True,False], value=True, multiselect=False)]
168
-
169
- with gr.Column():
170
- output1=gr.Textbox(lines=5, max_lines=50, label="Generated Text with Greedy/Beam Search Decoding")
171
- button1=gr.Button("Run the model")
172
- button1.click(fn=single_generation, inputs=input1, outputs=output1, show_progress=True)
173
- flag_btn = gr.Button("Flag")
174
-
175
- callback.setup([input1,output1],"Flagged Data Points")
176
- flag_btn.click(lambda *args: callback.flag(args), input1, output1, preprocess=False)
177
- gr.Examples(examples, inputs=input1)
178
-
179
- #multiple generation
180
- with gr.TabItem("Multiple Generation"):
181
- with gr.Row():
182
- with gr.Column():
183
- input2=[gr.Textbox(lines=5, label="Input Text"),
184
- gr.Slider(label="Min Length", minimum=10, maximum=50, value=10, step=5),
185
- gr.Slider(label="Max Length", minimum=10, maximum=100, value=30, step=10),
186
- gr.Number(label="Temperature Sampling", value=1.5),
187
- gr.Slider(label="Top K Sampling", minimum=0, maximum=100, value=30, step=5),
188
- gr.Slider(label="Top P Sampling", minimum=0.01, maximum=1, value=0.93),
189
- gr.Slider(label="Number of Beams", minimum=1, maximum=10, value=5, step=1),
190
- gr.Number(label="Rep Penalty", value=2.0),
191
- gr.Dropdown(label="Do Sample?", choices=[True,False], value=True, multiselect=False)]
192
- with gr.Column():
193
- output2=[gr.Textbox(lines=5, max_lines=50, label="#1 Generated Text with Greedy/Beam Search Decoding"),
194
- gr.Textbox(lines=5, max_lines=50, label="#2 Generated Text with Greedy/Beam Search Decoding"),
195
- gr.Textbox(lines=5, max_lines=50, label="#3 Generated Text with Greedy/Beam Search Decoding")]
196
- button2=gr.Button("Run the model")
197
- button2.click(fn=multiple_generation, inputs=input2, outputs=output2, show_progress=True)
198
- flag_btn = gr.Button("Flag")
199
-
200
- callback.setup([input2,output2],"Flagged Data Points")
201
- flag_btn.click(lambda *args: callback.flag(args), input2, output2, preprocess=False)
202
- gr.Examples(examples, inputs=input2)
203
-
204
- gr.Markdown("""<p style='text-align: center'>Copyright Anugrah Akbar Praramadhan 2023 <br>
205
  <p style='text-align: center'> Trained on Indo4B Benchmark Dataset of Indonesian language Wikipedia with a Causal Language Modeling (CLM) objective <br>
206
  <p style='text-align: center'><a href='https://huggingface.co/anugrahap/gpt2-indo-textgen' target='_blank'>Link to the Trained Model</a><br>
207
  <p style='text-align: center'><a href='https://huggingface.co/spaces/anugrahap/gpt2-indo-text-gen/tree/main' target='_blank'>Link to the Project Repository</a><br>
 
208
  <p style='text-align: center'><a href='https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf' target='_blank'>Original Paper</a>
209
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
  if __name__=='__main__':
212
  app.launch()
 
1
+ #this is version two with flagging features
2
  import gradio as gr
3
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
 
5
  # initialize the environment
6
  model_name = 'anugrahap/gpt2-indo-textgen'
7
  HF_TOKEN = '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'
8
+ hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "output-gpt2-indo-textgen")
9
 
10
+ # # define the tokenization method
11
+ # tokenizer = AutoTokenizer.from_pretrained(model_name,
12
+ # model_max_length=1e30,
13
+ # padding_side='right',
14
+ # return_tensors='pt')
15
 
16
+ # # add the EOS token as PAD token to avoid warnings
17
+ # model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
18
 
19
+ generator = pipeline('text-generation', model=model_name)
20
 
21
  # create the decoder parameter to generate the text
22
  def single_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample):
 
66
  else:
67
  return error_unknown
68
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ # create the variable needed for the gradio app
71
+ forinput=[gr.Textbox(lines=5, label="Input Text"),
72
+ gr.Slider(label="Min Length", minimum=10, maximum=50, value=10, step=5),
73
+ gr.Slider(label="Max Length", minimum=10, maximum=100, value=30, step=10),
74
+ gr.Number(label="Temperature Sampling", value=1.5),
75
+ gr.Slider(label="Top K Sampling", minimum=0, maximum=100, value=30, step=5),
76
+ gr.Slider(label="Top P Sampling", minimum=0.01, maximum=1, value=0.93),
77
+ gr.Slider(label="Number of Beams", minimum=1, maximum=10, value=5, step=1),
78
+ gr.Number(label="Rep Penalty", value=2.0),
79
+ gr.Dropdown(label="Do Sample?", choices=[True,False], value=True, multiselect=False)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
+ foroutput=gr.Textbox(lines=5, max_lines=50, label="Generated Text with Greedy/Beam Search Decoding")
82
 
 
83
  examples = [
84
  ["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 2.0, True],
85
  ["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 1.0, False],
 
88
  ["Pemandangan di pantai kuta Bali sangatlah indah.", 30, 50, 0.5, 40, 0.98, 10, 1.0, True],
89
  ["Pemandangan di pantai kuta Bali sangatlah indah.", 10, 30, 1.5, 30, 0.93, 5, 2.0, True]]
90
 
91
+ title = """
 
 
92
  <style>
93
  .center {
94
  display: block;
 
109
  border="0"
110
  class="center"
111
  style="height: 100px; width: 100px;"/>
112
+ <h1>GPT-2 Indonesian Text Generation Playground</h1>"""
113
+
114
+ description = "<p><i>This project is a part of thesis requirement of Anugrah Akbar Praramadhan</i></p>"
115
+
116
+ article = """<p style='text-align: center'>Copyright Anugrah Akbar Praramadhan 2023 <br>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  <p style='text-align: center'> Trained on Indo4B Benchmark Dataset of Indonesian language Wikipedia with a Causal Language Modeling (CLM) objective <br>
118
  <p style='text-align: center'><a href='https://huggingface.co/anugrahap/gpt2-indo-textgen' target='_blank'>Link to the Trained Model</a><br>
119
  <p style='text-align: center'><a href='https://huggingface.co/spaces/anugrahap/gpt2-indo-text-gen/tree/main' target='_blank'>Link to the Project Repository</a><br>
120
+ <p style='text-align: center'><a href='https://huggingface.co/datasets/anugrahap/output-gpt2-indo-textgen/' target='_blank'>Link to the Autosaved Generated Output</a><br>
121
  <p style='text-align: center'><a href='https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf' target='_blank'>Original Paper</a>
122
+ """
123
+
124
+ # using gradio interfaces
125
+ app = gr.Interface(
126
+ fn=single_generation,
127
+ inputs=forinput,
128
+ outputs=foroutput,
129
+ examples=examples,
130
+ title=title,
131
+ description=description,
132
+ article=article,
133
+ allow_flagging='manual',
134
+ flagging_options=['Well Performed', 'Inappropriate Word Selection', 'Wordy', 'Strange Word', 'Others'],
135
+ flagging_callback=hf_writer)
136
+
137
 
138
  if __name__=='__main__':
139
  app.launch()