Spaces:

Xindus
/

xindus_summarizer

Sleeping

App Files Files Community

madankn79 commited on May 1

Commit

ff5002a

1 Parent(s): fd8e8ce

google

Browse files

Files changed (1) hide show

app.py +12 -11

app.py CHANGED Viewed

@@ -1,6 +1,11 @@
 import gradio as gr
 import re
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 # Model choices ordered by accuracy
 model_choices = {
@@ -28,21 +33,17 @@ model_choices = {
 model_cache = {}
-# List of common prepositions and conjunctions
-prepositions_and_conjunctions = set([
-    "in", "on", "at", "by", "for", "with", "about", "as", "into", "during", "before", "after",
-    "of", "to", "from", "and", "but", "or", "nor", "so", "yet", "for", "because", "although", "since",
-    "unless", "until", "while", "if", "than", "whether", "where", "when", "that", "which", "who", "whom"
-])
-# Function to clean input text by removing prepositions and conjunctions
 def clean_text(input_text):
     # Replace special characters with a space
     cleaned_text = re.sub(r'[^A-Za-z0-9\s]', ' ', input_text)
-    # Tokenize the input text and remove prepositions/conjunctions
     words = cleaned_text.split()
-    words = [word for word in words if word.lower() not in prepositions_and_conjunctions]
     # Rebuild the cleaned text
     cleaned_text = " ".join(words)
@@ -65,7 +66,7 @@ def summarize_text(input_text, model_label, char_limit):
     if not input_text.strip():
         return "Please enter some text."
-    # Clean the input text by removing special characters and extra spaces
     input_text = clean_text(input_text)
     model_name = model_choices[model_label]
@@ -79,7 +80,7 @@ def summarize_text(input_text, model_label, char_limit):
     summary_ids = model.generate(
         inputs["input_ids"],
-        max_length=15,  # Still approximate; can be tuned per model
         min_length=5,
         do_sample=False
     )

 import gradio as gr
 import re
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+from nltk.corpus import stopwords
+# Download the NLTK stopwords (only the first time you run)
+import nltk
+nltk.download('stopwords')
 # Model choices ordered by accuracy
 model_choices = {
 model_cache = {}
+# Get NLTK stopwords (common stop words)
+stop_words = set(stopwords.words('english'))
+# Function to clean input text by removing unnecessary words like stop words
 def clean_text(input_text):
     # Replace special characters with a space
     cleaned_text = re.sub(r'[^A-Za-z0-9\s]', ' ', input_text)
+    # Tokenize the input text and remove stop words
     words = cleaned_text.split()
+    words = [word for word in words if word.lower() not in stop_words]
     # Rebuild the cleaned text
     cleaned_text = " ".join(words)
     if not input_text.strip():
         return "Please enter some text."
+    # Clean the input text by removing special characters and stop words
     input_text = clean_text(input_text)
     model_name = model_choices[model_label]
     summary_ids = model.generate(
         inputs["input_ids"],
+        max_length=20,  # Still approximate; can be tuned per model
         min_length=5,
         do_sample=False
     )