madankn79 commited on
Commit
704c23d
·
1 Parent(s): 2c7735b
Files changed (1) hide show
  1. app.py +9 -0
app.py CHANGED
@@ -10,6 +10,15 @@ import nltk
10
  nltk.download("stopwords")
11
  stop_words = set(stopwords.words("english"))
12
 
 
 
 
 
 
 
 
 
 
13
  # Model list
14
  model_choices = {
15
  "DistilBART CNN (sshleifer/distilbart-cnn-12-6)": "sshleifer/distilbart-cnn-12-6",
 
10
  nltk.download("stopwords")
11
  stop_words = set(stopwords.words("english"))
12
 
13
+ # Define additional words (prepositions, conjunctions, articles) to remove
14
+ extra_stopwords = set([
15
+ 'a', 'an', 'the', 'and', 'but', 'or', 'for', 'nor', 'so', 'yet', 'at', 'in', 'on', 'with', 'about', 'as', 'by', 'to', 'from', 'of', 'over', 'under', 'during', 'before', 'after', 'between', 'into', 'through', 'among', 'above', 'below'
16
+ ])
17
+
18
+ # Combine NLTK stopwords with extra stopwords
19
+ stop_words = set(stopwords.words("english")).union(extra_stopwords)
20
+
21
+
22
  # Model list
23
  model_choices = {
24
  "DistilBART CNN (sshleifer/distilbart-cnn-12-6)": "sshleifer/distilbart-cnn-12-6",