madankn79 commited on
Commit
ff5002a
·
1 Parent(s): fd8e8ce
Files changed (1) hide show
  1. app.py +12 -11
app.py CHANGED
@@ -1,6 +1,11 @@
1
  import gradio as gr
2
  import re
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
 
 
 
 
4
 
5
  # Model choices ordered by accuracy
6
  model_choices = {
@@ -28,21 +33,17 @@ model_choices = {
28
 
29
  model_cache = {}
30
 
31
- # List of common prepositions and conjunctions
32
- prepositions_and_conjunctions = set([
33
- "in", "on", "at", "by", "for", "with", "about", "as", "into", "during", "before", "after",
34
- "of", "to", "from", "and", "but", "or", "nor", "so", "yet", "for", "because", "although", "since",
35
- "unless", "until", "while", "if", "than", "whether", "where", "when", "that", "which", "who", "whom"
36
- ])
37
 
38
- # Function to clean input text by removing prepositions and conjunctions
39
  def clean_text(input_text):
40
  # Replace special characters with a space
41
  cleaned_text = re.sub(r'[^A-Za-z0-9\s]', ' ', input_text)
42
 
43
- # Tokenize the input text and remove prepositions/conjunctions
44
  words = cleaned_text.split()
45
- words = [word for word in words if word.lower() not in prepositions_and_conjunctions]
46
 
47
  # Rebuild the cleaned text
48
  cleaned_text = " ".join(words)
@@ -65,7 +66,7 @@ def summarize_text(input_text, model_label, char_limit):
65
  if not input_text.strip():
66
  return "Please enter some text."
67
 
68
- # Clean the input text by removing special characters and extra spaces
69
  input_text = clean_text(input_text)
70
 
71
  model_name = model_choices[model_label]
@@ -79,7 +80,7 @@ def summarize_text(input_text, model_label, char_limit):
79
 
80
  summary_ids = model.generate(
81
  inputs["input_ids"],
82
- max_length=15, # Still approximate; can be tuned per model
83
  min_length=5,
84
  do_sample=False
85
  )
 
1
  import gradio as gr
2
  import re
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+ from nltk.corpus import stopwords
5
+
6
+ # Download the NLTK stopwords (only the first time you run)
7
+ import nltk
8
+ nltk.download('stopwords')
9
 
10
  # Model choices ordered by accuracy
11
  model_choices = {
 
33
 
34
  model_cache = {}
35
 
36
+ # Get NLTK stopwords (common stop words)
37
+ stop_words = set(stopwords.words('english'))
 
 
 
 
38
 
39
+ # Function to clean input text by removing unnecessary words like stop words
40
  def clean_text(input_text):
41
  # Replace special characters with a space
42
  cleaned_text = re.sub(r'[^A-Za-z0-9\s]', ' ', input_text)
43
 
44
+ # Tokenize the input text and remove stop words
45
  words = cleaned_text.split()
46
+ words = [word for word in words if word.lower() not in stop_words]
47
 
48
  # Rebuild the cleaned text
49
  cleaned_text = " ".join(words)
 
66
  if not input_text.strip():
67
  return "Please enter some text."
68
 
69
+ # Clean the input text by removing special characters and stop words
70
  input_text = clean_text(input_text)
71
 
72
  model_name = model_choices[model_label]
 
80
 
81
  summary_ids = model.generate(
82
  inputs["input_ids"],
83
+ max_length=20, # Still approximate; can be tuned per model
84
  min_length=5,
85
  do_sample=False
86
  )