madankn79 commited on
Commit
98ac441
·
1 Parent(s): 6ed741d
Files changed (1) hide show
  1. app.py +17 -0
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import gradio as gr
 
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
 
4
  # Model choices ordered by accuracy
@@ -27,6 +28,17 @@ model_choices = {
27
 
28
  model_cache = {}
29
 
 
 
 
 
 
 
 
 
 
 
 
30
  def load_model(model_name):
31
  if model_name not in model_cache:
32
  tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -34,13 +46,18 @@ def load_model(model_name):
34
  model_cache[model_name] = (tokenizer, model)
35
  return model_cache[model_name]
36
 
 
37
  def summarize_text(input_text, model_label, char_limit):
38
  if not input_text.strip():
39
  return "Please enter some text."
40
 
 
 
 
41
  model_name = model_choices[model_label]
42
  tokenizer, model = load_model(model_name)
43
 
 
44
  if "t5" in model_name.lower() or "flan" in model_name.lower():
45
  input_text = "summarize: " + input_text
46
 
 
1
  import gradio as gr
2
+ import re
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
 
5
  # Model choices ordered by accuracy
 
28
 
29
  model_cache = {}
30
 
31
+ # Function to clean input text (remove special characters and extra spaces)
32
+ def clean_text(input_text):
33
+ # Replace special characters with a space
34
+ cleaned_text = re.sub(r'[^A-Za-z0-9\s]', ' ', input_text)
35
+ # Replace multiple spaces with a single space
36
+ cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
37
+ # Strip leading and trailing spaces
38
+ cleaned_text = cleaned_text.strip()
39
+ return cleaned_text
40
+
41
+ # Load model and tokenizer
42
  def load_model(model_name):
43
  if model_name not in model_cache:
44
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
46
  model_cache[model_name] = (tokenizer, model)
47
  return model_cache[model_name]
48
 
49
+ # Summarize the text using a selected model
50
  def summarize_text(input_text, model_label, char_limit):
51
  if not input_text.strip():
52
  return "Please enter some text."
53
 
54
+ # Clean the input text by removing special characters and extra spaces
55
+ input_text = clean_text(input_text)
56
+
57
  model_name = model_choices[model_label]
58
  tokenizer, model = load_model(model_name)
59
 
60
+ # Adjust the input format for T5 and FLAN models
61
  if "t5" in model_name.lower() or "flan" in model_name.lower():
62
  input_text = "summarize: " + input_text
63