anugrahap commited on
Commit
d3e2c23
·
1 Parent(s): 1d96ca2

upload stable application and requirements file

Browse files
Files changed (2) hide show
  1. app.py +202 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
+
5
+ # initialize the model
6
+ model_name = 'anugrahap/gpt2-indo-textgen'
7
+
8
+ # define the tokenization method
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name,
10
+ model_max_length=1e30,
11
+ padding_side='right',
12
+ return_tensors='pt')
13
+
14
+ # add the EOS token as PAD token to avoid warnings
15
+ model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
16
+
17
+ generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
18
+
19
+ # create the decoder parameter to generate the text
20
+ def single_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample):
21
+ # create local variable for error parameter
22
+ error_rep=ValueError(f"ERROR: repetition penalty cannot be lower than one! Given rep penalty = {repetition_penalty}")
23
+ error_temp=ValueError(f"ERROR: temperature cannot be zero or lower! Given temperature = {temperature}")
24
+ error_minmax=ValueError(f"ERROR: min length must be lower than or equal to max length! Given min length = {min_length}")
25
+ error_numbeams_type=TypeError(f"ERROR: number of beams must be an integer not {type(num_beams)}")
26
+ error_topk_type=TypeError(f"ERROR: top k must be an integer not {type(top_k)}")
27
+ error_minmax_type=TypeError(f"ERROR: min length and max length must be an integer not {type(min_length)} and {type(max_length)}")
28
+ error_empty=ValueError("ERROR: Input Text cannot be empty!")
29
+ error_unknown=TypeError("Unknown Error.")
30
+
31
+ if text != '':
32
+ if type(min_length) == int and type(max_length) == int:
33
+ if type(top_k) == int:
34
+ if type(num_beams) == int:
35
+ if min_length <= max_length:
36
+ if temperature > 0:
37
+ if repetition_penalty >= 1:
38
+ result = generator(text,
39
+ min_length=min_length,
40
+ max_length=max_length,
41
+ temperature=temperature,
42
+ top_k=top_k,
43
+ top_p=top_p,
44
+ num_beams=num_beams,
45
+ repetition_penalty=repetition_penalty,
46
+ do_sample=do_sample,
47
+ no_repeat_ngram_size=2,
48
+ num_return_sequences=1)
49
+ return result[0]["generated_text"]
50
+ elif repetition_penalty < 1:
51
+ return error_rep
52
+ elif temperature <= 0:
53
+ return error_temp
54
+ elif min_length > max_length:
55
+ return error_minmax
56
+ elif type(num_beams) != int:
57
+ return error_numbeams_type
58
+ elif type(top_k) != int:
59
+ return error_topk_type
60
+ elif type(min_length) != int or type(max_length) != int:
61
+ return error_minmax_type
62
+ elif text == '':
63
+ return error_empty
64
+ else:
65
+ return error_unknown
66
+
67
+ # create the decoder parameter to generate the text
68
+ def multiple_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample):
69
+ # create local variable for error parameter
70
+ error_rep=ValueError(f"ERROR: repetition penalty cannot be lower than one! Given rep penalty = {repetition_penalty}")
71
+ error_temp=ValueError(f"ERROR: temperature cannot be zero or lower! Given temperature = {temperature}")
72
+ error_minmax=ValueError(f"ERROR: min length must be lower than or equal to max length! Given min length = {min_length}")
73
+ error_numbeams_type=TypeError(f"ERROR: number of beams must be an integer not {type(num_beams)}")
74
+ error_topk_type=TypeError(f"ERROR: top k must be an integer not {type(top_k)}")
75
+ error_minmax_type=TypeError(f"ERROR: min length and max length must be an integer not {type(min_length)} and {type(max_length)}")
76
+ error_empty=ValueError("ERROR: Input Text cannot be empty!")
77
+ error_unknown=TypeError("Unknown Error.")
78
+
79
+ if text != '':
80
+ if type(min_length) == int and type(max_length) == int:
81
+ if type(top_k) == int:
82
+ if type(num_beams) == int:
83
+ if min_length <= max_length:
84
+ if temperature > 0:
85
+ if repetition_penalty >= 1:
86
+ result = generator(text,
87
+ min_length=min_length,
88
+ max_length=max_length,
89
+ temperature=temperature,
90
+ top_k=top_k,
91
+ top_p=top_p,
92
+ num_beams=num_beams,
93
+ repetition_penalty=repetition_penalty,
94
+ do_sample=do_sample,
95
+ no_repeat_ngram_size=2,
96
+ num_return_sequences=3)
97
+ return result[0]["generated_text"], result[1]["generated_text"], result[2]["generated_text"],
98
+ elif repetition_penalty < 1:
99
+ return error_rep,error_rep,error_rep
100
+ elif temperature <= 0:
101
+ return error_temp,error_temp,error_temp
102
+ elif min_length > max_length:
103
+ return error_minmax,error_minmax,error_minmax
104
+ elif type(num_beams) != int:
105
+ return error_numbeams_type,error_numbeams_type,error_numbeams_type
106
+ elif type(top_k) != int:
107
+ return error_topk_type,error_topk_type,error_topk_type
108
+ elif type(min_length) != int or type(max_length) != int:
109
+ return error_minmax_type,error_minmax_type,error_minmax_type
110
+ elif text == '':
111
+ return error_empty,error_empty,error_empty
112
+ else:
113
+ return error_unknown,error_unknown,error_unknown
114
+
115
+
116
+ # create the baseline examples
117
+ examples = [
118
+ ["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 2.0, True],
119
+ ["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 1.0, False],
120
+ ["Skripsi merupakan tugas akhir mahasiswa", 20, 40, 1.0, 50, 0.92, 1, 2.0, True],
121
+ ["Skripsi merupakan tugas akhir mahasiswa", 20, 40, 1.0, 50, 0.92, 1, 1.0, False],
122
+ ["Pemandangan di pantai kuta Bali sangatlah indah.", 30, 50, 0.5, 40, 0.98, 10, 1.0, True],
123
+ ["Pemandangan di pantai kuta Bali sangatlah indah.", 10, 30, 1.5, 30, 0.93, 5, 2.0, True]]
124
+
125
+ # using gradio block to create the interface
126
+ with gr.Blocks(title="GPT-2 Indonesian Text Generation Playground", theme='Default') as app:
127
+ gr.Markdown("""
128
+ <style>
129
+ .center {
130
+ display: block;
131
+ margin-top: 20px;
132
+ margin-down: 0px;
133
+ margin-left: auto;
134
+ margin-right: auto;
135
+ }
136
+ </style>
137
+ <style>
138
+ h1 {
139
+ text-align: center;
140
+ margin-top: 0px;
141
+ }
142
+ </style>
143
+ <img src="https://iili.io/HayCG44.md.png"
144
+ alt="Q-GEN Logo"
145
+ border="0"
146
+ class="center"
147
+ style="height: 100px; width: 100px;"/>
148
+ <h1>GPT-2 Indonesian Text Generation Playground</h1>""")
149
+
150
+ gr.Markdown("<p><i>This project is a part of thesis requirement of Anugrah Akbar Praramadhan</i></p>")
151
+
152
+ with gr.Tabs():
153
+ #single generation
154
+ with gr.TabItem("Single Generation"):
155
+ with gr.Row():
156
+ with gr.Column():
157
+ input1=[gr.Textbox(lines=5, label="Input Text"),
158
+ gr.Slider(label="Min Length", minimum=10, maximum=50, value=10, step=5),
159
+ gr.Slider(label="Max Length", minimum=10, maximum=100, value=30, step=10),
160
+ gr.Number(label="Temperature Sampling", value=1.5),
161
+ gr.Slider(label="Top K Sampling", minimum=0, maximum=100, value=30, step=5),
162
+ gr.Slider(label="Top P Sampling", minimum=0.01, maximum=1, value=0.93),
163
+ gr.Slider(label="Number of Beams", minimum=1, maximum=10, value=5, step=1),
164
+ gr.Number(label="Rep Penalty", value=2.0),
165
+ gr.Dropdown(label="Do Sample?", choices=[True,False], value=True, multiselect=False)]
166
+
167
+ with gr.Column():
168
+ output1=gr.Textbox(lines=5, max_lines=50, label="Generated Text with Greedy/Beam Search Decoding")
169
+ button1=gr.Button("Run the model")
170
+ button1.click(fn=single_generation, inputs=input1, outputs=output1, show_progress=True)
171
+ gr.Examples(examples, inputs=input1)
172
+
173
+ #multiple generation
174
+ with gr.TabItem("Multiple Generation"):
175
+ with gr.Row():
176
+ with gr.Column():
177
+ input2=[gr.Textbox(lines=5, label="Input Text"),
178
+ gr.Slider(label="Min Length", minimum=10, maximum=50, value=10, step=5),
179
+ gr.Slider(label="Max Length", minimum=10, maximum=100, value=30, step=10),
180
+ gr.Number(label="Temperature Sampling", value=1.5),
181
+ gr.Slider(label="Top K Sampling", minimum=0, maximum=100, value=30, step=5),
182
+ gr.Slider(label="Top P Sampling", minimum=0.01, maximum=1, value=0.93),
183
+ gr.Slider(label="Number of Beams", minimum=1, maximum=10, value=5, step=1),
184
+ gr.Number(label="Rep Penalty", value=2.0),
185
+ gr.Dropdown(label="Do Sample?", choices=[True,False], value=True, multiselect=False)]
186
+ with gr.Column():
187
+ output2=[gr.Textbox(lines=5, max_lines=50, label="#1 Generated Text with Greedy/Beam Search Decoding"),
188
+ gr.Textbox(lines=5, max_lines=50, label="#2 Generated Text with Greedy/Beam Search Decoding"),
189
+ gr.Textbox(lines=5, max_lines=50, label="#3 Generated Text with Greedy/Beam Search Decoding")]
190
+ button2=gr.Button("Run the model")
191
+ button2.click(fn=multiple_generation, inputs=input2, outputs=output2, show_progress=True)
192
+ gr.Examples(examples, inputs=input2)
193
+
194
+ gr.Markdown("""<p style='text-align: center'>Copyright Anugrah Akbar Praramadhan 2023 <br>
195
+ <p style='text-align: center'> Trained on Indo4B Benchmark Dataset of Indonesian language Wikipedia with a Causal Language Modeling (CLM) objective <br>
196
+ <p style='text-align: center'><a href='https://huggingface.co/anugrahap/gpt2-indo-textgen' target='_blank'>Link to the Trained Model</a><br>
197
+ <p style='text-align: center'><a href='https://huggingface.co/spaces/anugrahap/gpt2-indo-textgen/tree/main' target='_blank'>Link to the Project Repository</a><br>
198
+ <p style='text-align: center'><a href='https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf' target='_blank'>Original Paper</a>
199
+ """)
200
+
201
+ if __name__=='__main__':
202
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio==3.16.1
2
+ huggingface-hub==0.11.1
3
+ protobuf==3.19.6
4
+ torch==1.12.1
5
+ torchaudio==0.12.1
6
+ torchvision==0.13.1
7
+ transformers==4.25.1