Spaces:

alakxender
/

t5-ft-demo

Running on Zero

App Files Files Community

alakxender commited on May 30

Commit

b4cd3cb

1 Parent(s): 82b0ab8

cg

Browse files

Files changed (2) hide show

app.py +43 -3
content_gen.py +57 -0

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 from typo_check import css, process_input,MODEL_OPTIONS_TYPO
 from title_gen import generate_title, MODEL_OPTIONS_TITLE
 # Create Gradio interface using the latest syntax
@@ -127,11 +128,10 @@ with gr.Blocks(theme=gr.themes.Default(), css=css) as demo:
 "ޔޫރޮޕާ ލީގުން މޮޅުވެގެން އަހަރެމެންގެ މައްސަލަތަކެއް ހައްލެއް ނުވާނެ. މޮޅުވެގެން ލިބޭނީ ޗެމްޕިއަންސް ލީގުގެ ޖާގައަކާއި ހަރަދު ކުރާނެ ފައިސާ. އެކަމަކު އަސްލު މައްސަލަތައް އަދިވެސް ހުރީ. އަހަރެމެންގެ ކޮންސިސްޓެންޓް ވާން ޖެހޭ. ރަނގަޅު ނިންމުންތައް ނިންމައި، ރަނގަޅު ރެކްރޫޓްމަންޓް ސިސްޓަމަކާއި ރަނގަޅު އެކަޑަމީއެއް ބޭނުންވޭ. މިއީ މި ކުލަބްގެ އަސްލު މަގާމަށް ދިޔުމަށްޓަކައި ހައްލު ކުރަން ޖެހޭ ކަންކަން."
-އިނގިރޭސި ޕްރިމިއާ ލީގުގެ 14 ވަނައިގައި އޮތް މެންޗެސްޓާ ޔުނައިޓެޑަށް، އަންނަ ސީޒަންގައި ޔޫރަޕްގެ މުބާރާތެއް ކުޅެވެން އޮތް ހަމައެކަނި ގޮތަކީ ޔޫރޮޕާ ލީގު ކާމިޔާބު ކުރުމެވެ.
-ޔޫރޮޕާ ލީގުގެ ސެމީ ފައިނަލްގައި އެތުލެޓިކް ބިލްބާއޯ އާއި ޔުނައިޓެޑް މިރޭ ވާދަ ކުރާއިރު، ދެ ވަނަ ސެމީގައި ބައްދަލުކުރާނީ ޓޮޓެންހަމް ހޮޓްސްޕާ އާއި ނޯވޭގެ ބޯޑޯ/ގްލިމްޓް އެވެ.
 """],
                 ],
                 inputs=article_content,
             )
@@ -156,6 +156,46 @@ This is an experimental model trained on a very small dataset of Dhivehi news ar
 - Due to limited training data, the model may not handle all topics or writing styles equally well.
 - Experimental nature means outputs should be reviewed carefully before use.
 """)
 # Launch the app

 from typo_check import css, process_input,MODEL_OPTIONS_TYPO
 from title_gen import generate_title, MODEL_OPTIONS_TITLE
+from content_gen import generate_content, MODEL_OPTIONS_CONTENT, get_default_prompt
 # Create Gradio interface using the latest syntax
 "ޔޫރޮޕާ ލީގުން މޮޅުވެގެން އަހަރެމެންގެ މައްސަލަތަކެއް ހައްލެއް ނުވާނެ. މޮޅުވެގެން ލިބޭނީ ޗެމްޕިއަންސް ލީގުގެ ޖާގައަކާއި ހަރަދު ކުރާނެ ފައިސާ. އެކަމަކު އަސްލު މައްސަލަތައް އަދިވެސް ހުރީ. އަހަރެމެންގެ ކޮންސިސްޓެންޓް ވާން ޖެހޭ. ރަނގަޅު ނިންމުންތައް ނިންމައި، ރަނގަޅު ރެކްރޫޓްމަންޓް ސިސްޓަމަކާއި ރަނގަޅު އެކަޑަމީއެއް ބޭނުންވޭ. މިއީ މި ކުލަބްގެ އަސްލު މަގާމަށް ދިޔުމަށްޓަކައި ހައްލު ކުރަން ޖެހޭ ކަންކަން."
+އިނގިރޭސި ޕްރިމިއާ ލީގުގެ 14 ވަނައިގައި އޮތް މެންޗެސްޓާ ޔުނައިޓެޑަށް، އަންނަ ސީޒަންގައި ޔޫރަޕްގެ މުބާރާތެއް ކުޅެވެން އޮތް ހަމައެކަނި ގޮތަކ ޔޫރޮޕާ ލީގު ކާމިޔާބު ކުރުމެވެ.
+ޔޫރޮޕާ ލީގުގެ ސެމީ ފައިނަލްގައި އެތުލެޓިކް ބިލްބާއޯ އާއި ޔުނައިޓެޑް މިރޭ ވާދަ ކުރާއިރު، ދެ ވަނަ ސެމީގައި ބައްދަލުކުރާނ ޓޮޓެންހަމް ހޮޓްސްޕާ އާއި ނޯވޭގެ ބޯޑޯ/ގްލިމްޓް އެވެ.
 """],
                 ],
                 inputs=article_content,
             )
 - Due to limited training data, the model may not handle all topics or writing styles equally well.
 - Experimental nature means outputs should be reviewed carefully before use.
+""")
+        with gr.Tab("Content Generation"):
+            gr.Markdown("# <center>Dhivehi Content Generator</center>")
+            gr.Markdown("Generate Dhivehi news articles or content using a fine-tuned T5 model. Select your model and tweak generation parameters.")
+            with gr.Row():
+                model_choice = gr.Dropdown(choices=list(MODEL_OPTIONS_CONTENT.keys()), value=list(MODEL_OPTIONS_CONTENT.keys())[0], label="Model")
+            with gr.Row():
+                prompt_input = gr.Textbox(label="Input Prompt", lines=1, elem_classes="textbox1")
+            def update_prompt(model_choice):
+                return get_default_prompt(model_choice)
+            model_choice.change(fn=update_prompt, inputs=model_choice, outputs=prompt_input)
+            gr.Examples(
+                examples=[
+                    ["ގުޅިފަޅުގެ ލޭންޑް ޔޫޒް ޕްލޭނަށް ބަދަލު ގެނެސްފައިވާތީ މިއަދު ގުރުއަތުލުން ބާއްވަނީ"],
+                    ["ދެމަފިރިންގެ ގުޅުންތަކާއި ޒިންމާތައް ބިނާކުރަން ޖެހޭ އަސާސްތައް"],
+                    ["ކެފެއިންގެ ސަބަބުން އިންސާނާގެ ހަށިގަނޑަށް ކުރާ ޖިސްމާނީ އަދި ނަފްސާނީ އަސަރުތައް ބަޔާންކޮށްދެއްވާށެވެ."],
+                ],
+                inputs=prompt_input,
+            )
+            with gr.Row():
+                max_tokens_slider = gr.Slider(10, 300, value=150, label="Max New Tokens")
+                num_beams_slider = gr.Slider(1, 10, value=4, step=1, label="Beam Size (num_beams)")
+            with gr.Row():
+                rep_penalty_slider = gr.Slider(1.0, 1.9, value=1.2, step=0.1, label="Repetition Penalty")
+                ngram_slider = gr.Slider(0, 10, value=3, step=1, label="No Repeat Ngram Size")
+            with gr.Row():
+                do_sample_checkbox = gr.Checkbox(label="Do Sample", value=True)
+            with gr.Row():
+                output_text = gr.Textbox(label="Generated Output", lines=10, elem_classes="textbox1")
+            generate_button = gr.Button("Generate Content")
+            generate_button.click(
+                fn=generate_content,
+                inputs=[prompt_input, max_tokens_slider, num_beams_slider, rep_penalty_slider, ngram_slider, do_sample_checkbox, model_choice],
+                outputs=output_text
+            )
+            gr.Markdown("""
+**Notice:**
+All outputs generated are synthetic, created using fine-tuned models for experimental and educational evaluation. Accuracy is not guaranteed, and the content should not be considered a source of truth. Please avoid applying these results to production environments, critical systems, or real-world decision-making without proper validation.
 """)
 # Launch the app

content_gen.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import random
+import numpy as np
+import torch
+from transformers import T5ForConditionalGeneration, T5Tokenizer
+import spaces
+# Available models for content generation
+MODEL_OPTIONS_CONTENT = {
+    "MX02 (mixed)": {
+        "model_id": "alakxender/flan-t5-corpora-mixed",
+        "default_prompt": "Tell me about: "
+    },
+    "MX01 (articles)": {
+        "model_id": "alakxender/flan-t5-news-articles",
+        "default_prompt": "Create an article about: "
+    }
+}
+# Cache for loaded models/tokenizers
+MODEL_CACHE = {}
+def get_model_and_tokenizer(model_choice):
+    model_dir = MODEL_OPTIONS_CONTENT[model_choice]["model_id"]
+    if model_dir not in MODEL_CACHE:
+        print(f"Loading model: {model_dir}")
+        tokenizer = T5Tokenizer.from_pretrained(model_dir)
+        model = T5ForConditionalGeneration.from_pretrained(model_dir)
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        print(f"Moving model to device: {device}")
+        model.to(device)
+        MODEL_CACHE[model_dir] = (tokenizer, model)
+    return MODEL_CACHE[model_dir]
+def get_default_prompt(model_choice):
+    return MODEL_OPTIONS_CONTENT[model_choice]["default_prompt"]
+@spaces.GPU()
+def generate_content(prompt, max_new_tokens, num_beams, repetition_penalty, no_repeat_ngram_size, do_sample, model_choice):
+    tokenizer, model = get_model_and_tokenizer(model_choice)
+    prompt = get_default_prompt(model_choice) + prompt
+    inputs = tokenizer(prompt, return_tensors="pt")
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=max_new_tokens,
+        num_beams=num_beams,
+        repetition_penalty=repetition_penalty,
+        no_repeat_ngram_size=no_repeat_ngram_size,
+        do_sample=do_sample,
+        early_stopping=True
+    )
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)