#this is version two with flagging features import gradio as gr from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM # initialize the environment model_name = 'anugrahap/gpt2-indo-textgen' HF_TOKEN = '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' hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "output-gpt2-indo-textgen") # define the tokenization method tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=1e30, padding_side='right', return_tensors='pt') # add the EOS token as PAD token to avoid warnings model = AutoModelForCausalLM.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id) generator = pipeline('text-generation', model=model, tokenizer=tokenizer) # create the decoder parameter to generate the text def single_generation(text,min_length,max_length,temperature,top_k,top_p,num_beams,repetition_penalty,do_sample): # create local variable for error parameter error_rep=ValueError(f"ERROR: repetition penalty cannot be lower than one! Given rep penalty = {repetition_penalty}") error_temp=ValueError(f"ERROR: temperature cannot be zero or lower! Given temperature = {temperature}") error_minmax=ValueError(f"ERROR: min length must be lower than or equal to max length! Given min length = {min_length}") error_numbeams_type=TypeError(f"ERROR: number of beams must be an integer not {type(num_beams)}") error_topk_type=TypeError(f"ERROR: top k must be an integer not {type(top_k)}") error_minmax_type=TypeError(f"ERROR: min length and max length must be an integer not {type(min_length)} and {type(max_length)}") error_empty=ValueError("ERROR: Input Text cannot be empty!") error_unknown=TypeError("Unknown Error.") if text != '': if type(min_length) == int and type(max_length) == int: if type(top_k) == int: if type(num_beams) == int: if min_length <= max_length: if temperature > 0: if repetition_penalty >= 1: result = generator(text, min_length=min_length, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p, num_beams=num_beams, repetition_penalty=repetition_penalty, do_sample=do_sample, no_repeat_ngram_size=2, num_return_sequences=1) return result[0]["generated_text"] elif repetition_penalty < 1: return error_rep elif temperature <= 0: return error_temp elif min_length > max_length: return error_minmax elif type(num_beams) != int: return error_numbeams_type elif type(top_k) != int: return error_topk_type elif type(min_length) != int or type(max_length) != int: return error_minmax_type elif text == '': return error_empty else: return error_unknown # create the variable needed for the gradio app forinput=[gr.Textbox(lines=5, label="Input Text"), gr.Slider(label="Min Length", minimum=10, maximum=50, value=10, step=5), gr.Slider(label="Max Length", minimum=10, maximum=100, value=30, step=10), gr.Number(label="Temperature Sampling", value=1.5), gr.Slider(label="Top K Sampling", minimum=0, maximum=100, value=30, step=5), gr.Slider(label="Top P Sampling", minimum=0.01, maximum=1, value=0.93), gr.Slider(label="Number of Beams", minimum=1, maximum=10, value=5, step=1), gr.Number(label="Repetition Penalty", value=2.0), gr.Dropdown(label="Do Sample?", choices=[True,False], value=True, multiselect=False)] foroutput=gr.Textbox(lines=5, max_lines=50, label="Generated Text with Greedy/Beam Search Decoding") examples = [ ["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 2.0, True], ["Indonesia adalah negara kepulauan", 10, 30, 1.0, 25, 0.92, 5, 1.0, False], ["Skripsi merupakan tugas akhir mahasiswa", 20, 40, 1.0, 50, 0.92, 1, 2.0, True], ["Skripsi merupakan tugas akhir mahasiswa", 20, 40, 1.0, 50, 0.92, 1, 1.0, False], ["Pemandangan di pantai kuta Bali sangatlah indah.", 30, 50, 0.5, 40, 0.98, 10, 1.0, True], ["Pemandangan di pantai kuta Bali sangatlah indah.", 10, 30, 1.5, 30, 0.93, 5, 2.0, True]] title = """ Q-GEN Logo

GPT-2 Indonesian Text Generation Playground

""" description = "

This project is a part of thesis requirement of Anugrah Akbar Praramadhan

" article = """

Link to the Trained Model

Link to the Project Repository

Link to the Autosaved Generated Output

Original Paper

Copyright Anugrah Akbar Praramadhan 2023

Trained on Indo4B Benchmark Dataset of Indonesian language Wikipedia with a Causal Language Modeling (CLM) objective
""" # using gradio interfaces app = gr.Interface( fn=single_generation, inputs=forinput, outputs=foroutput, examples=examples, title=title, description=description, article=article, allow_flagging='manual', flagging_options=['Well Performed', 'Inappropriate Word Selection', 'Wordy', 'Strange Word', 'Others'], flagging_callback=hf_writer) if __name__=='__main__': app.launch()