Spaces:
Runtime error
Runtime error
added video interpreter
Browse files- app.py +3 -3
- requirements.txt +5 -1
- utils.py +39 -5
app.py
CHANGED
@@ -16,9 +16,9 @@ def answer_video_question(query : str, url : str, file : bytes) -> dict:
|
|
16 |
temp_video_path = temp_vid.name
|
17 |
|
18 |
# Output frame folder
|
19 |
-
|
20 |
-
|
21 |
-
return {"status_vid_frame_from_file":
|
22 |
|
23 |
elif url:
|
24 |
files_path = download_video(url)
|
|
|
16 |
temp_video_path = temp_vid.name
|
17 |
|
18 |
# Output frame folder
|
19 |
+
all_frames_data = extract_nfps_frames(temp_video_path)
|
20 |
+
langchain_documents = provide_video_RAG(all_frames_data)
|
21 |
+
return {"status_vid_frame_from_file":all_frames_data}
|
22 |
|
23 |
elif url:
|
24 |
files_path = download_video(url)
|
requirements.txt
CHANGED
@@ -2,4 +2,8 @@ mcp
|
|
2 |
textblob
|
3 |
yt_dlp
|
4 |
opencv-python
|
5 |
-
scikit-image
|
|
|
|
|
|
|
|
|
|
2 |
textblob
|
3 |
yt_dlp
|
4 |
opencv-python
|
5 |
+
scikit-image
|
6 |
+
langchain
|
7 |
+
transformers
|
8 |
+
torch
|
9 |
+
PIL
|
utils.py
CHANGED
@@ -157,9 +157,18 @@ def extract_nfps_frames(video_path, nfps=5,diff_threshold=0.4):
|
|
157 |
prev_frame = frame
|
158 |
saved_id += 1
|
159 |
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
frame_id += 1
|
164 |
|
165 |
cap.release()
|
@@ -171,9 +180,34 @@ def get_frame_infos(filename:str) -> dict:
|
|
171 |
analyser = VideoAnalyser()
|
172 |
|
173 |
description = analyser.describe_frame(filename)
|
174 |
-
|
175 |
|
176 |
print("description",type(description),description)
|
177 |
print("detection",type(detection),detection)
|
178 |
|
179 |
-
return (descrition,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
prev_frame = frame
|
158 |
saved_id += 1
|
159 |
|
160 |
+
# append to a list that will constitute RAG Docuement
|
161 |
+
timestamp_ms = cap.get(cv2.CAP_PROP_POS_MSEC)
|
162 |
+
timestamp_sec = timestamp_ms / 1000.0
|
163 |
+
description, objects = get_frame_infos(filename)
|
164 |
+
frame_data = {
|
165 |
+
"frame_id": saved_id,
|
166 |
+
"timestamp_sec": timestamp_sec,
|
167 |
+
"description": description,
|
168 |
+
"detected_objects": objects,
|
169 |
+
"frame_path": filename # Optional: path to the saved frame
|
170 |
+
}
|
171 |
+
all_frames_data.append(frame_data)
|
172 |
frame_id += 1
|
173 |
|
174 |
cap.release()
|
|
|
180 |
analyser = VideoAnalyser()
|
181 |
|
182 |
description = analyser.describe_frame(filename)
|
183 |
+
objects = analyser.detect_objects(filename)
|
184 |
|
185 |
print("description",type(description),description)
|
186 |
print("detection",type(detection),detection)
|
187 |
|
188 |
+
return (descrition, objects)
|
189 |
+
|
190 |
+
|
191 |
+
|
192 |
+
from langchain.docstore.document import Document
|
193 |
+
|
194 |
+
def provide_video_RAG(all_frames_data):
|
195 |
+
# Assuming 'all_frames_data' is the list from the previous step
|
196 |
+
langchain_documents = []
|
197 |
+
|
198 |
+
for data in all_frames_data:
|
199 |
+
# Combine the analysis into a single string for the document content
|
200 |
+
content = f"Description: {data['description']}\nObjects Detected: {', '.join(data['detected_objects'])}"
|
201 |
+
|
202 |
+
# Create the LangChain Document
|
203 |
+
doc = Document(
|
204 |
+
page_content=content,
|
205 |
+
metadata={
|
206 |
+
"timestamp": data['timestamp_sec'],
|
207 |
+
"frame_id": data['frame_id']
|
208 |
+
}
|
209 |
+
)
|
210 |
+
|
211 |
+
langchain_documents.append(doc)
|
212 |
+
return langchain_documents
|
213 |
+
# Now 'langchain_documents' is ready to be indexed in a vector store for your RAG system
|