anugrahap commited on
Commit
cf7c779
·
1 Parent(s): 1362ad3

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -198
app.py DELETED
@@ -1,198 +0,0 @@
1
- import os
2
- import gradio as gr
3
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
4
-
5
- # initialize the environment
6
- model_name = 'anugrahap/gpt2-indo-textgen'
7
- HF_TOKEN = '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'
8
- hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "output-gpt2-indo-textgen")
9
-
10
- #using text generation pipeline
11
- generator = pipeline('text-generation', model=model_name)
12
-
13
- # create the decoder parameter to generate the text
14
- def single_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample):
15
- # create local variable for error parameter
16
- error_rep=ValueError(f"ERROR: repetition penalty cannot be lower than one! Given rep penalty = {repetition_penalty}")
17
- error_temp=ValueError(f"ERROR: temperature cannot be zero or lower! Given temperature = {temperature}")
18
- error_minmax=ValueError(f"ERROR: min length must be lower than or equal to max length! Given min length = {min_length}")
19
- error_numbeams_type=TypeError(f"ERROR: number of beams must be an integer not {type(num_beams)}")
20
- error_topk_type=TypeError(f"ERROR: top k must be an integer not {type(top_k)}")
21
- error_minmax_type=TypeError(f"ERROR: min length and max length must be an integer not {type(min_length)} and {type(max_length)}")
22
- error_empty=ValueError("ERROR: Input Text cannot be empty!")
23
- error_unknown=TypeError("Unknown Error.")
24
-
25
- if text != '':
26
- if type(min_length) == int and type(max_length) == int:
27
- if type(top_k) == int:
28
- if type(num_beams) == int:
29
- if min_length <= max_length:
30
- if temperature > 0:
31
- if repetition_penalty >= 1:
32
- result = generator(text,
33
- min_length=min_length,
34
- max_length=max_length,
35
- temperature=temperature,
36
- top_k=top_k,
37
- top_p=top_p,
38
- num_beams=num_beams,
39
- repetition_penalty=repetition_penalty,
40
- do_sample=do_sample,
41
- no_repeat_ngram_size=2,
42
- num_return_sequences=1)
43
- return result[0]["generated_text"]
44
- elif repetition_penalty < 1:
45
- return error_rep
46
- elif temperature <= 0:
47
- return error_temp
48
- elif min_length > max_length:
49
- return error_minmax
50
- elif type(num_beams) != int:
51
- return error_numbeams_type
52
- elif type(top_k) != int:
53
- return error_topk_type
54
- elif type(min_length) != int or type(max_length) != int:
55
- return error_minmax_type
56
- elif text == '':
57
- return error_empty
58
- else:
59
- return error_unknown
60
-
61
- # create the decoder parameter to generate the text
62
- def multiple_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample):
63
- # create local variable for error parameter
64
- error_rep=ValueError(f"ERROR: repetition penalty cannot be lower than one! Given rep penalty = {repetition_penalty}")
65
- error_temp=ValueError(f"ERROR: temperature cannot be zero or lower! Given temperature = {temperature}")
66
- error_minmax=ValueError(f"ERROR: min length must be lower than or equal to max length! Given min length = {min_length}")
67
- error_numbeams_type=TypeError(f"ERROR: number of beams must be an integer not {type(num_beams)}")
68
- error_topk_type=TypeError(f"ERROR: top k must be an integer not {type(top_k)}")
69
- error_minmax_type=TypeError(f"ERROR: min length and max length must be an integer not {type(min_length)} and {type(max_length)}")
70
- error_empty=ValueError("ERROR: Input Text cannot be empty!")
71
- error_unknown=TypeError("Unknown Error.")
72
-
73
- if text != '':
74
- if type(min_length) == int and type(max_length) == int:
75
- if type(top_k) == int:
76
- if type(num_beams) == int:
77
- if min_length <= max_length:
78
- if temperature > 0:
79
- if repetition_penalty >= 1:
80
- result = generator(text,
81
- min_length=min_length,
82
- max_length=max_length,
83
- temperature=temperature,
84
- top_k=top_k,
85
- top_p=top_p,
86
- num_beams=num_beams,
87
- repetition_penalty=repetition_penalty,
88
- do_sample=do_sample,
89
- no_repeat_ngram_size=2,
90
- num_return_sequences=3)
91
- return result[0]["generated_text"], result[1]["generated_text"], result[2]["generated_text"],
92
- elif repetition_penalty < 1:
93
- return error_rep,error_rep,error_rep
94
- elif temperature <= 0:
95
- return error_temp,error_temp,error_temp
96
- elif min_length > max_length:
97
- return error_minmax,error_minmax,error_minmax
98
- elif type(num_beams) != int:
99
- return error_numbeams_type,error_numbeams_type,error_numbeams_type
100
- elif type(top_k) != int:
101
- return error_topk_type,error_topk_type,error_topk_type
102
- elif type(min_length) != int or type(max_length) != int:
103
- return error_minmax_type,error_minmax_type,error_minmax_type
104
- elif text == '':
105
- return error_empty,error_empty,error_empty
106
- else:
107
- return error_unknown,error_unknown,error_unknown
108
-
109
-
110
- # create the variable needed for the gradio app
111
- forinput=[gr.Textbox(lines=5, label="Input Text"),
112
- gr.Slider(label="Min Length", minimum=10, maximum=50, value=10, step=5),
113
- gr.Slider(label="Max Length", minimum=10, maximum=100, value=30, step=10),
114
- gr.Number(label="Temperature Sampling", value=1.5),
115
- gr.Slider(label="Top K Sampling", minimum=0, maximum=100, value=30, step=5),
116
- gr.Slider(label="Top P Sampling", minimum=0.01, maximum=1, value=0.93),
117
- gr.Slider(label="Number of Beams", minimum=1, maximum=10, value=5, step=1),
118
- gr.Number(label="Rep Penalty", value=2.0),
119
- gr.Dropdown(label="Do Sample?", choices=[True,False], value=True, multiselect=False)]
120
-
121
- output1=gr.Textbox(lines=5, max_lines=50, label="Generated Text with Greedy/Beam Search Decoding")
122
-
123
- output2=[gr.Textbox(lines=5, max_lines=50, label="#1 Generated Text with Greedy/Beam Search Decoding"),
124
- gr.Textbox(lines=5, max_lines=50, label="#2 Generated Text with Greedy/Beam Search Decoding"),
125
- gr.Textbox(lines=5, max_lines=50, label="#3 Generated Text with Greedy/Beam Search Decoding")]
126
-
127
- examples = [
128
- ["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 2.0, True],
129
- ["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 1.0, False],
130
- ["Skripsi merupakan tugas akhir mahasiswa", 20, 40, 1.0, 50, 0.92, 1, 2.0, True],
131
- ["Skripsi merupakan tugas akhir mahasiswa", 20, 40, 1.0, 50, 0.92, 1, 1.0, False],
132
- ["Pemandangan di pantai kuta Bali sangatlah indah.", 30, 50, 0.5, 40, 0.98, 10, 1.0, True],
133
- ["Pemandangan di pantai kuta Bali sangatlah indah.", 10, 30, 1.5, 30, 0.93, 5, 2.0, True]]
134
-
135
- title = """
136
- <style>
137
- .center {
138
- display: block;
139
- margin-top: 20px;
140
- margin-down: 0px;
141
- margin-left: auto;
142
- margin-right: auto;
143
- }
144
- </style>
145
- <style>
146
- h1 {
147
- text-align: center;
148
- margin-top: 0px;
149
- }
150
- </style>
151
- <img src="https://iili.io/HayCG44.md.png"
152
- alt="Q-GEN Logo"
153
- border="0"
154
- class="center"
155
- style="height: 100px; width: 100px;"/>
156
- <h1>GPT-2 Indonesian Text Generation Playground</h1>"""
157
-
158
- description = "<p><i>This project is a part of thesis requirement of Anugrah Akbar Praramadhan</i></p>"
159
-
160
- article = """<p style='text-align: center'>Copyright Anugrah Akbar Praramadhan 2023 <br>
161
- <p style='text-align: center'> Trained on Indo4B Benchmark Dataset of Indonesian language Wikipedia with a Causal Language Modeling (CLM) objective <br>
162
- <p style='text-align: center'><a href='https://huggingface.co/anugrahap/gpt2-indo-textgen' target='_blank'>Link to the Trained Model</a><br>
163
- <p style='text-align: center'><a href='https://huggingface.co/spaces/anugrahap/gpt2-indo-text-gen/tree/main' target='_blank'>Link to the Project Repository</a><br>
164
- <p style='text-align: center'><a href='https://huggingface.co/datasets/anugrahap/output-gpt2-indo-textgen/' target='_blank'>Link to the Autosaved Generated Output</a><br>
165
- <p style='text-align: center'><a href='https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf' target='_blank'>Original Paper</a>
166
- """
167
-
168
- # using gradio interfaces
169
- app1 = gr.Interface(
170
- fn=single_generation,
171
- inputs=forinput,
172
- outputs=output1,
173
- examples=examples,
174
- title=title,
175
- description=description,
176
- article=article,
177
- allow_flagging='manual',
178
- flagging_options=['Well Performed', 'Inappropriate Word Selection', 'Wordy', 'Strange Word', 'Others'],
179
- flagging_callback=hf_writer)
180
-
181
- app2 = gr.Interface(
182
- fn=multiple_generation,
183
- inputs=forinput,
184
- outputs=output2,
185
- examples=examples,
186
- title=title,
187
- description=description,
188
- article=article,
189
- allow_flagging='manual',
190
- flagging_options=['Well Performed', 'Inappropriate Word Selection', 'Wordy', 'Strange Word', 'Others'],
191
- flagging_callback=hf_writer)
192
-
193
- # combining both interface
194
- final_app = gr.TabbedInterface(interface_list=[app1, app2], tab_names=["Single Generation", "Multiple Generation"])
195
-
196
-
197
- if __name__=='__main__':
198
- final_app.launch()