RCaz commited on
Commit
9ac40ae
·
1 Parent(s): bd1111c

added video interpreter

Browse files
Files changed (3) hide show
  1. app.py +3 -3
  2. requirements.txt +5 -1
  3. utils.py +39 -5
app.py CHANGED
@@ -16,9 +16,9 @@ def answer_video_question(query : str, url : str, file : bytes) -> dict:
16
  temp_video_path = temp_vid.name
17
 
18
  # Output frame folder
19
- check = extract_keyframes(temp_video_path)
20
-
21
- return {"status_vid_frame_from_file":check}
22
 
23
  elif url:
24
  files_path = download_video(url)
 
16
  temp_video_path = temp_vid.name
17
 
18
  # Output frame folder
19
+ all_frames_data = extract_nfps_frames(temp_video_path)
20
+ langchain_documents = provide_video_RAG(all_frames_data)
21
+ return {"status_vid_frame_from_file":all_frames_data}
22
 
23
  elif url:
24
  files_path = download_video(url)
requirements.txt CHANGED
@@ -2,4 +2,8 @@ mcp
2
  textblob
3
  yt_dlp
4
  opencv-python
5
- scikit-image
 
 
 
 
 
2
  textblob
3
  yt_dlp
4
  opencv-python
5
+ scikit-image
6
+ langchain
7
+ transformers
8
+ torch
9
+ PIL
utils.py CHANGED
@@ -157,9 +157,18 @@ def extract_nfps_frames(video_path, nfps=5,diff_threshold=0.4):
157
  prev_frame = frame
158
  saved_id += 1
159
 
160
- # append to a list that will constitute RAG Docuement
161
- frame_data=get_frame_infos(filename)
162
- all_frames_data.append(frame_data)
 
 
 
 
 
 
 
 
 
163
  frame_id += 1
164
 
165
  cap.release()
@@ -171,9 +180,34 @@ def get_frame_infos(filename:str) -> dict:
171
  analyser = VideoAnalyser()
172
 
173
  description = analyser.describe_frame(filename)
174
- detection = analyser.detect_objects(filename)
175
 
176
  print("description",type(description),description)
177
  print("detection",type(detection),detection)
178
 
179
- return (descrition, detection)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  prev_frame = frame
158
  saved_id += 1
159
 
160
+ # append to a list that will constitute RAG Docuement
161
+ timestamp_ms = cap.get(cv2.CAP_PROP_POS_MSEC)
162
+ timestamp_sec = timestamp_ms / 1000.0
163
+ description, objects = get_frame_infos(filename)
164
+ frame_data = {
165
+ "frame_id": saved_id,
166
+ "timestamp_sec": timestamp_sec,
167
+ "description": description,
168
+ "detected_objects": objects,
169
+ "frame_path": filename # Optional: path to the saved frame
170
+ }
171
+ all_frames_data.append(frame_data)
172
  frame_id += 1
173
 
174
  cap.release()
 
180
  analyser = VideoAnalyser()
181
 
182
  description = analyser.describe_frame(filename)
183
+ objects = analyser.detect_objects(filename)
184
 
185
  print("description",type(description),description)
186
  print("detection",type(detection),detection)
187
 
188
+ return (descrition, objects)
189
+
190
+
191
+
192
+ from langchain.docstore.document import Document
193
+
194
+ def provide_video_RAG(all_frames_data):
195
+ # Assuming 'all_frames_data' is the list from the previous step
196
+ langchain_documents = []
197
+
198
+ for data in all_frames_data:
199
+ # Combine the analysis into a single string for the document content
200
+ content = f"Description: {data['description']}\nObjects Detected: {', '.join(data['detected_objects'])}"
201
+
202
+ # Create the LangChain Document
203
+ doc = Document(
204
+ page_content=content,
205
+ metadata={
206
+ "timestamp": data['timestamp_sec'],
207
+ "frame_id": data['frame_id']
208
+ }
209
+ )
210
+
211
+ langchain_documents.append(doc)
212
+ return langchain_documents
213
+ # Now 'langchain_documents' is ready to be indexed in a vector store for your RAG system