madankn79 commited on
Commit
94567bf
·
1 Parent(s): f91ed41
Files changed (1) hide show
  1. app.py +13 -0
app.py CHANGED
@@ -34,6 +34,13 @@ model_choices = {
34
 
35
  model_cache = {}
36
 
 
 
 
 
 
 
 
37
  # Clean text: remove special characters and stop words
38
  def clean_text(input_text):
39
  cleaned = re.sub(r"[^A-Za-z0-9\s]", " ", input_text)
@@ -41,6 +48,12 @@ def clean_text(input_text):
41
 
42
  words = cleaned.split()
43
  words = [word for word in words if word.lower() not in stop_words]
 
 
 
 
 
 
44
  return " ".join(words).strip()
45
 
46
  # Load model and tokenizer
 
34
 
35
  model_cache = {}
36
 
37
+ def emphasize_keywords(text, keywords, repeat=3):
38
+ for kw in keywords:
39
+ pattern = r'\b' + re.escape(kw) + r'\b'
40
+ text = re.sub(pattern, (kw + ' ') * repeat, text, flags=re.IGNORECASE)
41
+ return text
42
+
43
+
44
  # Clean text: remove special characters and stop words
45
  def clean_text(input_text):
46
  cleaned = re.sub(r"[^A-Za-z0-9\s]", " ", input_text)
 
48
 
49
  words = cleaned.split()
50
  words = [word for word in words if word.lower() not in stop_words]
51
+
52
+ # Example keyword list
53
+ keywords = ["blazer", "shirt", "trouser", "saree", "tie", "suit"]
54
+
55
+ words = emphasize_keywords(words, keywords)
56
+
57
  return " ".join(words).strip()
58
 
59
  # Load model and tokenizer