Spaces:
Runtime error
Runtime error
Delete app.py
Browse files
app.py
DELETED
@@ -1,202 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import gradio as gr
|
3 |
-
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
|
4 |
-
|
5 |
-
# initialize the model
|
6 |
-
model_name = 'anugrahap/gpt2-indo-textgen'
|
7 |
-
|
8 |
-
# define the tokenization method
|
9 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name,
|
10 |
-
model_max_length=1e30,
|
11 |
-
padding_side='right',
|
12 |
-
return_tensors='pt')
|
13 |
-
|
14 |
-
# add the EOS token as PAD token to avoid warnings
|
15 |
-
model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
|
16 |
-
|
17 |
-
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
|
18 |
-
|
19 |
-
# create the decoder parameter to generate the text
|
20 |
-
def single_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample):
|
21 |
-
# create local variable for error parameter
|
22 |
-
error_rep=ValueError(f"ERROR: repetition penalty cannot be lower than one! Given rep penalty = {repetition_penalty}")
|
23 |
-
error_temp=ValueError(f"ERROR: temperature cannot be zero or lower! Given temperature = {temperature}")
|
24 |
-
error_minmax=ValueError(f"ERROR: min length must be lower than or equal to max length! Given min length = {min_length}")
|
25 |
-
error_numbeams_type=TypeError(f"ERROR: number of beams must be an integer not {type(num_beams)}")
|
26 |
-
error_topk_type=TypeError(f"ERROR: top k must be an integer not {type(top_k)}")
|
27 |
-
error_minmax_type=TypeError(f"ERROR: min length and max length must be an integer not {type(min_length)} and {type(max_length)}")
|
28 |
-
error_empty=ValueError("ERROR: Input Text cannot be empty!")
|
29 |
-
error_unknown=TypeError("Unknown Error.")
|
30 |
-
|
31 |
-
if text != '':
|
32 |
-
if type(min_length) == int and type(max_length) == int:
|
33 |
-
if type(top_k) == int:
|
34 |
-
if type(num_beams) == int:
|
35 |
-
if min_length <= max_length:
|
36 |
-
if temperature > 0:
|
37 |
-
if repetition_penalty >= 1:
|
38 |
-
result = generator(text,
|
39 |
-
min_length=min_length,
|
40 |
-
max_length=max_length,
|
41 |
-
temperature=temperature,
|
42 |
-
top_k=top_k,
|
43 |
-
top_p=top_p,
|
44 |
-
num_beams=num_beams,
|
45 |
-
repetition_penalty=repetition_penalty,
|
46 |
-
do_sample=do_sample,
|
47 |
-
no_repeat_ngram_size=2,
|
48 |
-
num_return_sequences=1)
|
49 |
-
return result[0]["generated_text"]
|
50 |
-
elif repetition_penalty < 1:
|
51 |
-
return error_rep
|
52 |
-
elif temperature <= 0:
|
53 |
-
return error_temp
|
54 |
-
elif min_length > max_length:
|
55 |
-
return error_minmax
|
56 |
-
elif type(num_beams) != int:
|
57 |
-
return error_numbeams_type
|
58 |
-
elif type(top_k) != int:
|
59 |
-
return error_topk_type
|
60 |
-
elif type(min_length) != int or type(max_length) != int:
|
61 |
-
return error_minmax_type
|
62 |
-
elif text == '':
|
63 |
-
return error_empty
|
64 |
-
else:
|
65 |
-
return error_unknown
|
66 |
-
|
67 |
-
# create the decoder parameter to generate the text
|
68 |
-
def multiple_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample):
|
69 |
-
# create local variable for error parameter
|
70 |
-
error_rep=ValueError(f"ERROR: repetition penalty cannot be lower than one! Given rep penalty = {repetition_penalty}")
|
71 |
-
error_temp=ValueError(f"ERROR: temperature cannot be zero or lower! Given temperature = {temperature}")
|
72 |
-
error_minmax=ValueError(f"ERROR: min length must be lower than or equal to max length! Given min length = {min_length}")
|
73 |
-
error_numbeams_type=TypeError(f"ERROR: number of beams must be an integer not {type(num_beams)}")
|
74 |
-
error_topk_type=TypeError(f"ERROR: top k must be an integer not {type(top_k)}")
|
75 |
-
error_minmax_type=TypeError(f"ERROR: min length and max length must be an integer not {type(min_length)} and {type(max_length)}")
|
76 |
-
error_empty=ValueError("ERROR: Input Text cannot be empty!")
|
77 |
-
error_unknown=TypeError("Unknown Error.")
|
78 |
-
|
79 |
-
if text != '':
|
80 |
-
if type(min_length) == int and type(max_length) == int:
|
81 |
-
if type(top_k) == int:
|
82 |
-
if type(num_beams) == int:
|
83 |
-
if min_length <= max_length:
|
84 |
-
if temperature > 0:
|
85 |
-
if repetition_penalty >= 1:
|
86 |
-
result = generator(text,
|
87 |
-
min_length=min_length,
|
88 |
-
max_length=max_length,
|
89 |
-
temperature=temperature,
|
90 |
-
top_k=top_k,
|
91 |
-
top_p=top_p,
|
92 |
-
num_beams=num_beams,
|
93 |
-
repetition_penalty=repetition_penalty,
|
94 |
-
do_sample=do_sample,
|
95 |
-
no_repeat_ngram_size=2,
|
96 |
-
num_return_sequences=3)
|
97 |
-
return result[0]["generated_text"], result[1]["generated_text"], result[2]["generated_text"],
|
98 |
-
elif repetition_penalty < 1:
|
99 |
-
return error_rep,error_rep,error_rep
|
100 |
-
elif temperature <= 0:
|
101 |
-
return error_temp,error_temp,error_temp
|
102 |
-
elif min_length > max_length:
|
103 |
-
return error_minmax,error_minmax,error_minmax
|
104 |
-
elif type(num_beams) != int:
|
105 |
-
return error_numbeams_type,error_numbeams_type,error_numbeams_type
|
106 |
-
elif type(top_k) != int:
|
107 |
-
return error_topk_type,error_topk_type,error_topk_type
|
108 |
-
elif type(min_length) != int or type(max_length) != int:
|
109 |
-
return error_minmax_type,error_minmax_type,error_minmax_type
|
110 |
-
elif text == '':
|
111 |
-
return error_empty,error_empty,error_empty
|
112 |
-
else:
|
113 |
-
return error_unknown,error_unknown,error_unknown
|
114 |
-
|
115 |
-
|
116 |
-
# create the baseline examples
|
117 |
-
examples = [
|
118 |
-
["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 2.0, True],
|
119 |
-
["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 1.0, False],
|
120 |
-
["Skripsi merupakan tugas akhir mahasiswa", 20, 40, 1.0, 50, 0.92, 1, 2.0, True],
|
121 |
-
["Skripsi merupakan tugas akhir mahasiswa", 20, 40, 1.0, 50, 0.92, 1, 1.0, False],
|
122 |
-
["Pemandangan di pantai kuta Bali sangatlah indah.", 30, 50, 0.5, 40, 0.98, 10, 1.0, True],
|
123 |
-
["Pemandangan di pantai kuta Bali sangatlah indah.", 10, 30, 1.5, 30, 0.93, 5, 2.0, True]]
|
124 |
-
|
125 |
-
# using gradio block to create the interface
|
126 |
-
with gr.Blocks(title="GPT-2 Indonesian Text Generation Playground", theme='Default') as app:
|
127 |
-
gr.Markdown("""
|
128 |
-
<style>
|
129 |
-
.center {
|
130 |
-
display: block;
|
131 |
-
margin-top: 20px;
|
132 |
-
margin-down: 0px;
|
133 |
-
margin-left: auto;
|
134 |
-
margin-right: auto;
|
135 |
-
}
|
136 |
-
</style>
|
137 |
-
<style>
|
138 |
-
h1 {
|
139 |
-
text-align: center;
|
140 |
-
margin-top: 0px;
|
141 |
-
}
|
142 |
-
</style>
|
143 |
-
<img src="https://iili.io/HayCG44.md.png"
|
144 |
-
alt="Q-GEN Logo"
|
145 |
-
border="0"
|
146 |
-
class="center"
|
147 |
-
style="height: 100px; width: 100px;"/>
|
148 |
-
<h1>GPT-2 Indonesian Text Generation Playground</h1>""")
|
149 |
-
|
150 |
-
gr.Markdown("<p><i>This project is a part of thesis requirement of Anugrah Akbar Praramadhan</i></p>")
|
151 |
-
|
152 |
-
with gr.Tabs():
|
153 |
-
#single generation
|
154 |
-
with gr.TabItem("Single Generation"):
|
155 |
-
with gr.Row():
|
156 |
-
with gr.Column():
|
157 |
-
input1=[gr.Textbox(lines=5, label="Input Text"),
|
158 |
-
gr.Slider(label="Min Length", minimum=10, maximum=50, value=10, step=5),
|
159 |
-
gr.Slider(label="Max Length", minimum=10, maximum=100, value=30, step=10),
|
160 |
-
gr.Number(label="Temperature Sampling", value=1.5),
|
161 |
-
gr.Slider(label="Top K Sampling", minimum=0, maximum=100, value=30, step=5),
|
162 |
-
gr.Slider(label="Top P Sampling", minimum=0.01, maximum=1, value=0.93),
|
163 |
-
gr.Slider(label="Number of Beams", minimum=1, maximum=10, value=5, step=1),
|
164 |
-
gr.Number(label="Rep Penalty", value=2.0),
|
165 |
-
gr.Dropdown(label="Do Sample?", choices=[True,False], value=True, multiselect=False)]
|
166 |
-
|
167 |
-
with gr.Column():
|
168 |
-
output1=gr.Textbox(lines=5, max_lines=50, label="Generated Text with Greedy/Beam Search Decoding")
|
169 |
-
button1=gr.Button("Run the model")
|
170 |
-
button1.click(fn=single_generation, inputs=input1, outputs=output1, show_progress=True)
|
171 |
-
gr.Examples(examples, inputs=input1)
|
172 |
-
|
173 |
-
#multiple generation
|
174 |
-
with gr.TabItem("Multiple Generation"):
|
175 |
-
with gr.Row():
|
176 |
-
with gr.Column():
|
177 |
-
input2=[gr.Textbox(lines=5, label="Input Text"),
|
178 |
-
gr.Slider(label="Min Length", minimum=10, maximum=50, value=10, step=5),
|
179 |
-
gr.Slider(label="Max Length", minimum=10, maximum=100, value=30, step=10),
|
180 |
-
gr.Number(label="Temperature Sampling", value=1.5),
|
181 |
-
gr.Slider(label="Top K Sampling", minimum=0, maximum=100, value=30, step=5),
|
182 |
-
gr.Slider(label="Top P Sampling", minimum=0.01, maximum=1, value=0.93),
|
183 |
-
gr.Slider(label="Number of Beams", minimum=1, maximum=10, value=5, step=1),
|
184 |
-
gr.Number(label="Rep Penalty", value=2.0),
|
185 |
-
gr.Dropdown(label="Do Sample?", choices=[True,False], value=True, multiselect=False)]
|
186 |
-
with gr.Column():
|
187 |
-
output2=[gr.Textbox(lines=5, max_lines=50, label="#1 Generated Text with Greedy/Beam Search Decoding"),
|
188 |
-
gr.Textbox(lines=5, max_lines=50, label="#2 Generated Text with Greedy/Beam Search Decoding"),
|
189 |
-
gr.Textbox(lines=5, max_lines=50, label="#3 Generated Text with Greedy/Beam Search Decoding")]
|
190 |
-
button2=gr.Button("Run the model")
|
191 |
-
button2.click(fn=multiple_generation, inputs=input2, outputs=output2, show_progress=True)
|
192 |
-
gr.Examples(examples, inputs=input2)
|
193 |
-
|
194 |
-
gr.Markdown("""<p style='text-align: center'>Copyright Anugrah Akbar Praramadhan 2023 <br>
|
195 |
-
<p style='text-align: center'> Trained on Indo4B Benchmark Dataset of Indonesian language Wikipedia with a Causal Language Modeling (CLM) objective <br>
|
196 |
-
<p style='text-align: center'><a href='https://huggingface.co/anugrahap/gpt2-indo-textgen' target='_blank'>Link to the Trained Model</a><br>
|
197 |
-
<p style='text-align: center'><a href='https://huggingface.co/spaces/anugrahap/gpt2-indo-textgen/tree/main' target='_blank'>Link to the Project Repository</a><br>
|
198 |
-
<p style='text-align: center'><a href='https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf' target='_blank'>Original Paper</a>
|
199 |
-
""")
|
200 |
-
|
201 |
-
if __name__=='__main__':
|
202 |
-
app.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|