Spaces:

Xindus
/

xindus_summarizer

Sleeping

madankn79 commited on May 1

Commit

98ac441

1 Parent(s): 6ed741d

google

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 # Model choices ordered by accuracy
@@ -27,6 +28,17 @@ model_choices = {
 model_cache = {}
 def load_model(model_name):
     if model_name not in model_cache:
         tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -34,13 +46,18 @@ def load_model(model_name):
         model_cache[model_name] = (tokenizer, model)
     return model_cache[model_name]
 def summarize_text(input_text, model_label, char_limit):
     if not input_text.strip():
         return "Please enter some text."
     model_name = model_choices[model_label]
     tokenizer, model = load_model(model_name)
     if "t5" in model_name.lower() or "flan" in model_name.lower():
         input_text = "summarize: " + input_text

 import gradio as gr
+import re
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 # Model choices ordered by accuracy
 model_cache = {}
+# Function to clean input text (remove special characters and extra spaces)
+def clean_text(input_text):
+    # Replace special characters with a space
+    cleaned_text = re.sub(r'[^A-Za-z0-9\s]', ' ', input_text)
+    # Replace multiple spaces with a single space
+    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
+    # Strip leading and trailing spaces
+    cleaned_text = cleaned_text.strip()
+    return cleaned_text
+# Load model and tokenizer
 def load_model(model_name):
     if model_name not in model_cache:
         tokenizer = AutoTokenizer.from_pretrained(model_name)
         model_cache[model_name] = (tokenizer, model)
     return model_cache[model_name]
+# Summarize the text using a selected model
 def summarize_text(input_text, model_label, char_limit):
     if not input_text.strip():
         return "Please enter some text."
+    # Clean the input text by removing special characters and extra spaces
+    input_text = clean_text(input_text)
     model_name = model_choices[model_label]
     tokenizer, model = load_model(model_name)
+    # Adjust the input format for T5 and FLAN models
     if "t5" in model_name.lower() or "flan" in model_name.lower():
         input_text = "summarize: " + input_text