madankn79 commited on
Commit
f13147a
·
1 Parent(s): b7e9fc8
Files changed (1) hide show
  1. app.py +18 -12
app.py CHANGED
@@ -28,14 +28,28 @@ model_choices = {
28
 
29
  model_cache = {}
30
 
31
- # Function to clean input text (remove special characters and extra spaces)
 
 
 
 
 
 
 
32
  def clean_text(input_text):
33
  # Replace special characters with a space
34
  cleaned_text = re.sub(r'[^A-Za-z0-9\s]', ' ', input_text)
35
- # Replace multiple spaces with a single space
36
- cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
 
 
 
 
 
 
37
  # Strip leading and trailing spaces
38
  cleaned_text = cleaned_text.strip()
 
39
  return cleaned_text
40
 
41
  # Load model and tokenizer
@@ -84,12 +98,4 @@ iface = gr.Interface(
84
  fn=summarize_text,
85
  inputs=[
86
  gr.Textbox(lines=6, label="Enter text to summarize"),
87
- gr.Dropdown(choices=list(model_choices.keys()), label="Choose summarization model", value="Pegasus (google/pegasus-xsum)"),
88
- gr.Slider(minimum=30, maximum=200, value=65, step=1, label="Max Character Limit")
89
- ],
90
- outputs=gr.Textbox(lines=3, label="Summary (truncated to character limit)"),
91
- title="Multi-Model Text Summarizer",
92
- description="Summarize text using different Hugging Face models with a user-defined character limit."
93
- )
94
-
95
- iface.launch()
 
28
 
29
  model_cache = {}
30
 
31
+ # List of common prepositions and conjunctions
32
+ prepositions_and_conjunctions = set([
33
+ "in", "on", "at", "by", "for", "with", "about", "as", "into", "during", "before", "after",
34
+ "of", "to", "from", "and", "but", "or", "nor", "so", "yet", "for", "because", "although", "since",
35
+ "unless", "until", "while", "if", "than", "whether", "where", "when", "that", "which", "who", "whom"
36
+ ])
37
+
38
+ # Function to clean input text by removing prepositions and conjunctions
39
  def clean_text(input_text):
40
  # Replace special characters with a space
41
  cleaned_text = re.sub(r'[^A-Za-z0-9\s]', ' ', input_text)
42
+
43
+ # Tokenize the input text and remove prepositions/conjunctions
44
+ words = cleaned_text.split()
45
+ words = [word for word in words if word.lower() not in prepositions_and_conjunctions]
46
+
47
+ # Rebuild the cleaned text
48
+ cleaned_text = " ".join(words)
49
+
50
  # Strip leading and trailing spaces
51
  cleaned_text = cleaned_text.strip()
52
+
53
  return cleaned_text
54
 
55
  # Load model and tokenizer
 
98
  fn=summarize_text,
99
  inputs=[
100
  gr.Textbox(lines=6, label="Enter text to summarize"),
101
+ gr.Dropdown(choices