Spaces:

88hours
/

multimodel-rag-chat-with-videos

Sleeping

App Files Files Community

multimodel-rag-chat-with-videos / s7_store_in_rag.py

88hours

Upload folder using huggingface_hub

ad022d3 verified 3 months ago

raw

history blame contribute delete

3.6 kB

	from mm_rag.embeddings.bridgetower_embeddings import (
	BridgeTowerEmbeddings
	)
	from mm_rag.vectorstores.multimodal_lancedb import MultimodalLanceDB
	import lancedb
	import json
	import os
	from PIL import Image
	from utility import load_json_file, display_retrieved_results
	import pyarrow as pa

	# declare host file
	LANCEDB_HOST_FILE = "./shared_data/.lancedb"
	# declare table name
	TBL_NAME = "test_tbl"
	# initialize vectorstore
	db = lancedb.connect(LANCEDB_HOST_FILE)
	# initialize an BridgeTower embedder
	embedder = BridgeTowerEmbeddings()


	def return_top_k_most_similar_docs(max_docs=3):
	# ask to return top 3 most similar documents
	# Creating a LanceDB vector store
	vectorstore = MultimodalLanceDB(
	uri=LANCEDB_HOST_FILE,
	embedding=embedder,
	table_name=TBL_NAME)

	# creating a retriever for the vector store
	# search_type="similarity"
	# declares that the type of search that the Retriever should perform
	# is similarity search
	# search_kwargs={"k": 1} means returning top-1 most similar document


	retriever = vectorstore.as_retriever(
	search_type='similarity',
	search_kwargs={"k": max_docs})
	query2 = (
	"an astronaut's spacewalk "
	"with an amazing view of the earth from space behind"
	)
	results2 = retriever.invoke(query2)
	display_retrieved_results(results2)
	query3 = "a group of astronauts"
	results3 = retriever.invoke(query3)
	display_retrieved_results(results3)


	def open_table(TBL_NAME):
	# open a connection to table TBL_NAME
	tbl = db.open_table()

	print(f"There are {tbl.to_pandas().shape[0]} rows in the table")
	# display the first 3 rows of the table
	tbl.to_pandas()[['text', 'image_path']].head(3)

	def store_in_rag():

	# load metadata files
	vid1_metadata_path = './shared_data/videos/video1/metadatas.json'
	vid2_metadata_path = './shared_data/videos/video2/metadatas.json'
	vid1_metadata = load_json_file(vid1_metadata_path)
	vid2_metadata = load_json_file(vid2_metadata_path)

	# collect transcripts and image paths
	vid1_trans = [vid['transcript'] for vid in vid1_metadata]
	vid1_img_path = [vid['extracted_frame_path'] for vid in vid1_metadata]

	vid2_trans = [vid['transcript'] for vid in vid2_metadata]
	vid2_img_path = [vid['extracted_frame_path'] for vid in vid2_metadata]


	# for video1, we pick n = 7
	n = 7
	updated_vid1_trans = [
	' '.join(vid1_trans[i-int(n/2) : i+int(n/2)]) if i-int(n/2) >= 0 else
	' '.join(vid1_trans[0 : i + int(n/2)]) for i in range(len(vid1_trans))
	]

	# also need to update the updated transcripts in metadata
	for i in range(len(updated_vid1_trans)):
	vid1_metadata[i]['transcript'] = updated_vid1_trans[i]

	# you can pass in mode="append"
	# to add more entries to the vector store
	# in case you want to start with a fresh vector store,
	# you can pass in mode="overwrite" instead

	_ = MultimodalLanceDB.from_text_image_pairs(
	texts=updated_vid1_trans+vid2_trans,
	image_paths=vid1_img_path+vid2_img_path,
	embedding=embedder,
	metadatas=vid1_metadata+vid2_metadata,
	connection=db,
	table_name=TBL_NAME,
	mode="overwrite",
	)

	if __name__ == "__main__":
	tbl = db.open_table(TBL_NAME)
	print(f"There are {tbl.to_pandas().shape[0]} rows in the table")
	#display the first 3 rows of the table
	return_top_k_most_similar_docs()