Spaces:
Runtime error
Runtime error
File size: 11,683 Bytes
d3e2c23 d2439cb d3e2c23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
import os
import gradio as gr
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
# initialize the model
model_name = 'anugrahap/gpt2-indo-textgen'
# define the tokenization method
tokenizer = AutoTokenizer.from_pretrained(model_name,
model_max_length=1e30,
padding_side='right',
return_tensors='pt')
# add the EOS token as PAD token to avoid warnings
model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id)
generator = pipeline('text-generation', model=model, tokenizer=tokenizer)
# create the decoder parameter to generate the text
def single_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample):
# create local variable for error parameter
error_rep=ValueError(f"ERROR: repetition penalty cannot be lower than one! Given rep penalty = {repetition_penalty}")
error_temp=ValueError(f"ERROR: temperature cannot be zero or lower! Given temperature = {temperature}")
error_minmax=ValueError(f"ERROR: min length must be lower than or equal to max length! Given min length = {min_length}")
error_numbeams_type=TypeError(f"ERROR: number of beams must be an integer not {type(num_beams)}")
error_topk_type=TypeError(f"ERROR: top k must be an integer not {type(top_k)}")
error_minmax_type=TypeError(f"ERROR: min length and max length must be an integer not {type(min_length)} and {type(max_length)}")
error_empty=ValueError("ERROR: Input Text cannot be empty!")
error_unknown=TypeError("Unknown Error.")
if text != '':
if type(min_length) == int and type(max_length) == int:
if type(top_k) == int:
if type(num_beams) == int:
if min_length <= max_length:
if temperature > 0:
if repetition_penalty >= 1:
result = generator(text,
min_length=min_length,
max_length=max_length,
temperature=temperature,
top_k=top_k,
top_p=top_p,
num_beams=num_beams,
repetition_penalty=repetition_penalty,
do_sample=do_sample,
no_repeat_ngram_size=2,
num_return_sequences=1)
return result[0]["generated_text"]
elif repetition_penalty < 1:
return error_rep
elif temperature <= 0:
return error_temp
elif min_length > max_length:
return error_minmax
elif type(num_beams) != int:
return error_numbeams_type
elif type(top_k) != int:
return error_topk_type
elif type(min_length) != int or type(max_length) != int:
return error_minmax_type
elif text == '':
return error_empty
else:
return error_unknown
# create the decoder parameter to generate the text
def multiple_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample):
# create local variable for error parameter
error_rep=ValueError(f"ERROR: repetition penalty cannot be lower than one! Given rep penalty = {repetition_penalty}")
error_temp=ValueError(f"ERROR: temperature cannot be zero or lower! Given temperature = {temperature}")
error_minmax=ValueError(f"ERROR: min length must be lower than or equal to max length! Given min length = {min_length}")
error_numbeams_type=TypeError(f"ERROR: number of beams must be an integer not {type(num_beams)}")
error_topk_type=TypeError(f"ERROR: top k must be an integer not {type(top_k)}")
error_minmax_type=TypeError(f"ERROR: min length and max length must be an integer not {type(min_length)} and {type(max_length)}")
error_empty=ValueError("ERROR: Input Text cannot be empty!")
error_unknown=TypeError("Unknown Error.")
if text != '':
if type(min_length) == int and type(max_length) == int:
if type(top_k) == int:
if type(num_beams) == int:
if min_length <= max_length:
if temperature > 0:
if repetition_penalty >= 1:
result = generator(text,
min_length=min_length,
max_length=max_length,
temperature=temperature,
top_k=top_k,
top_p=top_p,
num_beams=num_beams,
repetition_penalty=repetition_penalty,
do_sample=do_sample,
no_repeat_ngram_size=2,
num_return_sequences=3)
return result[0]["generated_text"], result[1]["generated_text"], result[2]["generated_text"],
elif repetition_penalty < 1:
return error_rep,error_rep,error_rep
elif temperature <= 0:
return error_temp,error_temp,error_temp
elif min_length > max_length:
return error_minmax,error_minmax,error_minmax
elif type(num_beams) != int:
return error_numbeams_type,error_numbeams_type,error_numbeams_type
elif type(top_k) != int:
return error_topk_type,error_topk_type,error_topk_type
elif type(min_length) != int or type(max_length) != int:
return error_minmax_type,error_minmax_type,error_minmax_type
elif text == '':
return error_empty,error_empty,error_empty
else:
return error_unknown,error_unknown,error_unknown
# create the baseline examples
examples = [
["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 2.0, True],
["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 1.0, False],
["Skripsi merupakan tugas akhir mahasiswa", 20, 40, 1.0, 50, 0.92, 1, 2.0, True],
["Skripsi merupakan tugas akhir mahasiswa", 20, 40, 1.0, 50, 0.92, 1, 1.0, False],
["Pemandangan di pantai kuta Bali sangatlah indah.", 30, 50, 0.5, 40, 0.98, 10, 1.0, True],
["Pemandangan di pantai kuta Bali sangatlah indah.", 10, 30, 1.5, 30, 0.93, 5, 2.0, True]]
# using gradio block to create the interface
with gr.Blocks(title="GPT-2 Indonesian Text Generation Playground", theme='Default') as app:
gr.Markdown("""
<style>
.center {
display: block;
margin-top: 20px;
margin-down: 0px;
margin-left: auto;
margin-right: auto;
}
</style>
<style>
h1 {
text-align: center;
margin-top: 0px;
}
</style>
<img src="https://iili.io/HayCG44.md.png"
alt="Q-GEN Logo"
border="0"
class="center"
style="height: 100px; width: 100px;"/>
<h1>GPT-2 Indonesian Text Generation Playground</h1>""")
gr.Markdown("<p><i>This project is a part of thesis requirement of Anugrah Akbar Praramadhan</i></p>")
with gr.Tabs():
#single generation
with gr.TabItem("Single Generation"):
with gr.Row():
with gr.Column():
input1=[gr.Textbox(lines=5, label="Input Text"),
gr.Slider(label="Min Length", minimum=10, maximum=50, value=10, step=5),
gr.Slider(label="Max Length", minimum=10, maximum=100, value=30, step=10),
gr.Number(label="Temperature Sampling", value=1.5),
gr.Slider(label="Top K Sampling", minimum=0, maximum=100, value=30, step=5),
gr.Slider(label="Top P Sampling", minimum=0.01, maximum=1, value=0.93),
gr.Slider(label="Number of Beams", minimum=1, maximum=10, value=5, step=1),
gr.Number(label="Rep Penalty", value=2.0),
gr.Dropdown(label="Do Sample?", choices=[True,False], value=True, multiselect=False)]
with gr.Column():
output1=gr.Textbox(lines=5, max_lines=50, label="Generated Text with Greedy/Beam Search Decoding")
button1=gr.Button("Run the model")
button1.click(fn=single_generation, inputs=input1, outputs=output1, show_progress=True)
gr.Examples(examples, inputs=input1)
#multiple generation
with gr.TabItem("Multiple Generation"):
with gr.Row():
with gr.Column():
input2=[gr.Textbox(lines=5, label="Input Text"),
gr.Slider(label="Min Length", minimum=10, maximum=50, value=10, step=5),
gr.Slider(label="Max Length", minimum=10, maximum=100, value=30, step=10),
gr.Number(label="Temperature Sampling", value=1.5),
gr.Slider(label="Top K Sampling", minimum=0, maximum=100, value=30, step=5),
gr.Slider(label="Top P Sampling", minimum=0.01, maximum=1, value=0.93),
gr.Slider(label="Number of Beams", minimum=1, maximum=10, value=5, step=1),
gr.Number(label="Rep Penalty", value=2.0),
gr.Dropdown(label="Do Sample?", choices=[True,False], value=True, multiselect=False)]
with gr.Column():
output2=[gr.Textbox(lines=5, max_lines=50, label="#1 Generated Text with Greedy/Beam Search Decoding"),
gr.Textbox(lines=5, max_lines=50, label="#2 Generated Text with Greedy/Beam Search Decoding"),
gr.Textbox(lines=5, max_lines=50, label="#3 Generated Text with Greedy/Beam Search Decoding")]
button2=gr.Button("Run the model")
button2.click(fn=multiple_generation, inputs=input2, outputs=output2, show_progress=True)
gr.Examples(examples, inputs=input2)
gr.Markdown("""<p style='text-align: center'>Copyright Anugrah Akbar Praramadhan 2023 <br>
<p style='text-align: center'> Trained on Indo4B Benchmark Dataset of Indonesian language Wikipedia with a Causal Language Modeling (CLM) objective <br>
<p style='text-align: center'><a href='https://huggingface.co/anugrahap/gpt2-indo-textgen' target='_blank'>Link to the Trained Model</a><br>
<p style='text-align: center'><a href='https://huggingface.co/spaces/anugrahap/gpt2-indo-text-gen/tree/main' target='_blank'>Link to the Project Repository</a><br>
<p style='text-align: center'><a href='https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf' target='_blank'>Original Paper</a>
""")
if __name__=='__main__':
app.launch() |