Spaces:

Xindus
/

xindus_summarizer

Sleeping

madankn79 commited on May 1

Commit

f13147a

1 Parent(s): b7e9fc8

google

Files changed (1) hide show

app.py CHANGED Viewed

@@ -28,14 +28,28 @@ model_choices = {
 model_cache = {}
-# Function to clean input text (remove special characters and extra spaces)
 def clean_text(input_text):
     # Replace special characters with a space
     cleaned_text = re.sub(r'[^A-Za-z0-9\s]', ' ', input_text)
-    # Replace multiple spaces with a single space
-    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
     # Strip leading and trailing spaces
     cleaned_text = cleaned_text.strip()
     return cleaned_text
 # Load model and tokenizer
@@ -84,12 +98,4 @@ iface = gr.Interface(
     fn=summarize_text,
     inputs=[
         gr.Textbox(lines=6, label="Enter text to summarize"),
-        gr.Dropdown(choices=list(model_choices.keys()), label="Choose summarization model", value="Pegasus (google/pegasus-xsum)"),
-        gr.Slider(minimum=30, maximum=200, value=65, step=1, label="Max Character Limit")
-    ],
-    outputs=gr.Textbox(lines=3, label="Summary (truncated to character limit)"),
-    title="Multi-Model Text Summarizer",
-    description="Summarize text using different Hugging Face models with a user-defined character limit."
-)
-iface.launch()

 model_cache = {}
+# List of common prepositions and conjunctions
+prepositions_and_conjunctions = set([
+    "in", "on", "at", "by", "for", "with", "about", "as", "into", "during", "before", "after",
+    "of", "to", "from", "and", "but", "or", "nor", "so", "yet", "for", "because", "although", "since",
+    "unless", "until", "while", "if", "than", "whether", "where", "when", "that", "which", "who", "whom"
+])
+# Function to clean input text by removing prepositions and conjunctions
 def clean_text(input_text):
     # Replace special characters with a space
     cleaned_text = re.sub(r'[^A-Za-z0-9\s]', ' ', input_text)
+    # Tokenize the input text and remove prepositions/conjunctions
+    words = cleaned_text.split()
+    words = [word for word in words if word.lower() not in prepositions_and_conjunctions]
+    # Rebuild the cleaned text
+    cleaned_text = " ".join(words)
     # Strip leading and trailing spaces
     cleaned_text = cleaned_text.strip()
     return cleaned_text
 # Load model and tokenizer
     fn=summarize_text,
     inputs=[
         gr.Textbox(lines=6, label="Enter text to summarize"),
+        gr.Dropdown(choices