Smriti77 commited on
Commit
d12c944
·
verified ·
1 Parent(s): e827563

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -24,29 +24,32 @@ client = OpenAI(
24
  DATASET_PATH = "data"
25
  JSON_PATH = f"{DATASET_PATH}/sign_language_data.json"
26
 
27
-
28
  if os.path.exists(JSON_PATH):
29
  with open(JSON_PATH, "r") as f:
30
  dataset = json.load(f)
31
 
32
  for item in dataset:
 
33
  category = item["category"].lower().replace(" ", "_")
34
 
35
- # Convert absolute paths to relative ones
36
- if item["video_clip_path"].startswith("/content/sign_language_dataset/"):
37
- item["video_clip_path"] = item["video_clip_path"].replace("/content/sign_language_dataset/", f"{DATASET_PATH}/")
 
 
 
 
 
38
 
39
- if item["frame_path"].startswith("/content/sign_language_dataset/"):
40
- item["frame_path"] = item["frame_path"].replace("/content/sign_language_dataset/", f"{DATASET_PATH}/")
41
  else:
42
 
43
  dataset = []
44
  print(f"Warning: {JSON_PATH} does not exist. Using empty dataset.")
45
 
46
- # Configure logging
47
  logging.getLogger("sentence_transformers").setLevel(logging.ERROR)
48
 
49
- # Load embedding model
50
  print("Loading sentence transformer model...")
51
  embed_model = SentenceTransformer("all-MiniLM-L6-v2")
52
 
@@ -56,7 +59,7 @@ index = faiss.IndexFlatL2(dimension)
56
  text_to_video = {}
57
  idx_to_text = []
58
 
59
- # Add data to index
60
  for item in dataset:
61
  phrases = [item["text"]] + item.get("semantic_meaning", [])
62
 
@@ -76,7 +79,7 @@ def list_available_phrases():
76
 
77
 
78
  def preprocess_text(text):
79
- # Remove emojis and special characters
80
  emoji_pattern = re.compile("["
81
  u"\U0001F600-\U0001F64F"
82
  u"\U0001F300-\U0001F5FF"
@@ -149,7 +152,7 @@ def refine_sentence_with_deepseek(text):
149
 
150
 
151
  def retrieve_video(text, debug=False, similarity_threshold=0.7):
152
- # Check for empty input
153
  if not text or text.isspace():
154
  return None
155
 
 
24
  DATASET_PATH = "data"
25
  JSON_PATH = f"{DATASET_PATH}/sign_language_data.json"
26
 
 
27
  if os.path.exists(JSON_PATH):
28
  with open(JSON_PATH, "r") as f:
29
  dataset = json.load(f)
30
 
31
  for item in dataset:
32
+
33
  category = item["category"].lower().replace(" ", "_")
34
 
35
+
36
+ video_filename = os.path.basename(item["video_clip_path"])
37
+ item["video_clip_path"] = f"{DATASET_PATH}/clips/{category}/{video_filename}"
38
+
39
+
40
+ frame_filename = os.path.basename(item["frame_path"])
41
+ item["frame_path"] = f"{DATASET_PATH}/all_signs/{frame_filename}"
42
+
43
 
 
 
44
  else:
45
 
46
  dataset = []
47
  print(f"Warning: {JSON_PATH} does not exist. Using empty dataset.")
48
 
49
+
50
  logging.getLogger("sentence_transformers").setLevel(logging.ERROR)
51
 
52
+
53
  print("Loading sentence transformer model...")
54
  embed_model = SentenceTransformer("all-MiniLM-L6-v2")
55
 
 
59
  text_to_video = {}
60
  idx_to_text = []
61
 
62
+
63
  for item in dataset:
64
  phrases = [item["text"]] + item.get("semantic_meaning", [])
65
 
 
79
 
80
 
81
  def preprocess_text(text):
82
+
83
  emoji_pattern = re.compile("["
84
  u"\U0001F600-\U0001F64F"
85
  u"\U0001F300-\U0001F5FF"
 
152
 
153
 
154
  def retrieve_video(text, debug=False, similarity_threshold=0.7):
155
+
156
  if not text or text.isspace():
157
  return None
158