Spaces:

RCaz
/

MCP_Track3_Discover

Runtime error

App Files Files Community

RCaz commited on 14 days ago

Commit

9ac40ae

1 Parent(s): bd1111c

added video interpreter

Browse files

Files changed (3) hide show

app.py +3 -3
requirements.txt +5 -1
utils.py +39 -5

app.py CHANGED Viewed

@@ -16,9 +16,9 @@ def answer_video_question(query : str, url : str, file : bytes) -> dict:
             temp_video_path = temp_vid.name
         # Output frame folder
-        check = extract_keyframes(temp_video_path)
-        return {"status_vid_frame_from_file":check}
     elif url:
         files_path = download_video(url)

             temp_video_path = temp_vid.name
         # Output frame folder
+        all_frames_data = extract_nfps_frames(temp_video_path)
+        langchain_documents = provide_video_RAG(all_frames_data)
+        return {"status_vid_frame_from_file":all_frames_data}
     elif url:
         files_path = download_video(url)

requirements.txt CHANGED Viewed

@@ -2,4 +2,8 @@ mcp
 textblob
 yt_dlp
 opencv-python
-scikit-image

 textblob
 yt_dlp
 opencv-python
+scikit-image
+langchain
+transformers
+torch
+PIL

utils.py CHANGED Viewed

@@ -157,9 +157,18 @@ def extract_nfps_frames(video_path, nfps=5,diff_threshold=0.4):
             prev_frame = frame
             saved_id += 1
-        # append to a list that will constitute RAG Docuement
-        frame_data=get_frame_infos(filename)
-        all_frames_data.append(frame_data)
         frame_id += 1
     cap.release()
@@ -171,9 +180,34 @@ def get_frame_infos(filename:str) -> dict:
     analyser = VideoAnalyser()
     description = analyser.describe_frame(filename)
-    detection = analyser.detect_objects(filename)
     print("description",type(description),description)
     print("detection",type(detection),detection)
-    return (descrition, detection)

             prev_frame = frame
             saved_id += 1
+            # append to a list that will constitute RAG Docuement
+            timestamp_ms = cap.get(cv2.CAP_PROP_POS_MSEC)
+            timestamp_sec = timestamp_ms / 1000.0
+            description, objects = get_frame_infos(filename)
+            frame_data = {
+                "frame_id": saved_id,
+                "timestamp_sec": timestamp_sec,
+                "description": description,
+                "detected_objects": objects,
+                "frame_path": filename  # Optional: path to the saved frame
+            }
+            all_frames_data.append(frame_data)
         frame_id += 1
     cap.release()
     analyser = VideoAnalyser()
     description = analyser.describe_frame(filename)
+    objects = analyser.detect_objects(filename)
     print("description",type(description),description)
     print("detection",type(detection),detection)
+    return (descrition, objects)
+from langchain.docstore.document import Document
+def provide_video_RAG(all_frames_data):
+    # Assuming 'all_frames_data' is the list from the previous step
+    langchain_documents = []
+    for data in all_frames_data:
+        # Combine the analysis into a single string for the document content
+        content = f"Description: {data['description']}\nObjects Detected: {', '.join(data['detected_objects'])}"
+        # Create the LangChain Document
+        doc = Document(
+            page_content=content,
+            metadata={
+                "timestamp": data['timestamp_sec'],
+                "frame_id": data['frame_id']
+            }
+        )
+        langchain_documents.append(doc)
+    return langchain_documents
+    # Now 'langchain_documents' is ready to be indexed in a vector store for your RAG system