Update app.py
Browse files
app.py
CHANGED
@@ -24,29 +24,32 @@ client = OpenAI(
|
|
24 |
DATASET_PATH = "data"
|
25 |
JSON_PATH = f"{DATASET_PATH}/sign_language_data.json"
|
26 |
|
27 |
-
|
28 |
if os.path.exists(JSON_PATH):
|
29 |
with open(JSON_PATH, "r") as f:
|
30 |
dataset = json.load(f)
|
31 |
|
32 |
for item in dataset:
|
|
|
33 |
category = item["category"].lower().replace(" ", "_")
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
-
if item["frame_path"].startswith("/content/sign_language_dataset/"):
|
40 |
-
item["frame_path"] = item["frame_path"].replace("/content/sign_language_dataset/", f"{DATASET_PATH}/")
|
41 |
else:
|
42 |
|
43 |
dataset = []
|
44 |
print(f"Warning: {JSON_PATH} does not exist. Using empty dataset.")
|
45 |
|
46 |
-
|
47 |
logging.getLogger("sentence_transformers").setLevel(logging.ERROR)
|
48 |
|
49 |
-
|
50 |
print("Loading sentence transformer model...")
|
51 |
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
|
52 |
|
@@ -56,7 +59,7 @@ index = faiss.IndexFlatL2(dimension)
|
|
56 |
text_to_video = {}
|
57 |
idx_to_text = []
|
58 |
|
59 |
-
|
60 |
for item in dataset:
|
61 |
phrases = [item["text"]] + item.get("semantic_meaning", [])
|
62 |
|
@@ -76,7 +79,7 @@ def list_available_phrases():
|
|
76 |
|
77 |
|
78 |
def preprocess_text(text):
|
79 |
-
|
80 |
emoji_pattern = re.compile("["
|
81 |
u"\U0001F600-\U0001F64F"
|
82 |
u"\U0001F300-\U0001F5FF"
|
@@ -149,7 +152,7 @@ def refine_sentence_with_deepseek(text):
|
|
149 |
|
150 |
|
151 |
def retrieve_video(text, debug=False, similarity_threshold=0.7):
|
152 |
-
|
153 |
if not text or text.isspace():
|
154 |
return None
|
155 |
|
|
|
24 |
DATASET_PATH = "data"
|
25 |
JSON_PATH = f"{DATASET_PATH}/sign_language_data.json"
|
26 |
|
|
|
27 |
if os.path.exists(JSON_PATH):
|
28 |
with open(JSON_PATH, "r") as f:
|
29 |
dataset = json.load(f)
|
30 |
|
31 |
for item in dataset:
|
32 |
+
|
33 |
category = item["category"].lower().replace(" ", "_")
|
34 |
|
35 |
+
|
36 |
+
video_filename = os.path.basename(item["video_clip_path"])
|
37 |
+
item["video_clip_path"] = f"{DATASET_PATH}/clips/{category}/{video_filename}"
|
38 |
+
|
39 |
+
|
40 |
+
frame_filename = os.path.basename(item["frame_path"])
|
41 |
+
item["frame_path"] = f"{DATASET_PATH}/all_signs/{frame_filename}"
|
42 |
+
|
43 |
|
|
|
|
|
44 |
else:
|
45 |
|
46 |
dataset = []
|
47 |
print(f"Warning: {JSON_PATH} does not exist. Using empty dataset.")
|
48 |
|
49 |
+
|
50 |
logging.getLogger("sentence_transformers").setLevel(logging.ERROR)
|
51 |
|
52 |
+
|
53 |
print("Loading sentence transformer model...")
|
54 |
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
|
55 |
|
|
|
59 |
text_to_video = {}
|
60 |
idx_to_text = []
|
61 |
|
62 |
+
|
63 |
for item in dataset:
|
64 |
phrases = [item["text"]] + item.get("semantic_meaning", [])
|
65 |
|
|
|
79 |
|
80 |
|
81 |
def preprocess_text(text):
|
82 |
+
|
83 |
emoji_pattern = re.compile("["
|
84 |
u"\U0001F600-\U0001F64F"
|
85 |
u"\U0001F300-\U0001F5FF"
|
|
|
152 |
|
153 |
|
154 |
def retrieve_video(text, debug=False, similarity_threshold=0.7):
|
155 |
+
|
156 |
if not text or text.isspace():
|
157 |
return None
|
158 |
|