from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS import os import tempfile import zipfile import streamlit as st def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_prefix="vector_store"): """Save vector store to Supabase storage as separate files.""" try: with tempfile.TemporaryDirectory() as temp_dir: # Save vector store locally first local_path = os.path.join(temp_dir, "vector_store") vector_store.save_local(local_path) # Upload index.faiss faiss_file = os.path.join(local_path, "index.faiss") if os.path.exists(faiss_file): with open(faiss_file, 'rb') as f: supabase.storage.from_(bucket_name).upload( f"{file_prefix}_index.faiss", f, {"upsert": "true"} ) print(f"Uploaded: {file_prefix}_index.faiss") # Upload index.pkl pkl_file = os.path.join(local_path, "index.pkl") if os.path.exists(pkl_file): with open(pkl_file, 'rb') as f: supabase.storage.from_(bucket_name).upload( f"{file_prefix}_index.pkl", f, {"upsert": "true"} ) print(f"Uploaded: {file_prefix}_index.pkl") print(f"Vector store uploaded to Supabase bucket: {bucket_name}") return True except Exception as e: print(f"Error uploading vector store to Supabase: {e}") st.error(f"Error uploading to Supabase: {e}") return False def load_vector_store_from_supabase(supabase, bucket_name, file_prefix="vector_store"): """Load vector store from Supabase storage from separate files.""" try: with tempfile.TemporaryDirectory() as temp_dir: local_path = os.path.join(temp_dir, "vector_store") os.makedirs(local_path, exist_ok=True) # Download index.faiss try: faiss_response = supabase.storage.from_(bucket_name).download(f"{file_prefix}_index.faiss") faiss_file = os.path.join(local_path, "index.faiss") with open(faiss_file, 'wb') as f: f.write(faiss_response) print(f"Downloaded: {file_prefix}_index.faiss") except Exception as e: print(f"Error downloading index.faiss: {e}") return None # Download index.pkl try: pkl_response = supabase.storage.from_(bucket_name).download(f"{file_prefix}_index.pkl") pkl_file = os.path.join(local_path, "index.pkl") with open(pkl_file, 'wb') as f: f.write(pkl_response) print(f"Downloaded: {file_prefix}_index.pkl") except Exception as e: print(f"Error downloading index.pkl: {e}") return None # Load vector store embeddings = HuggingFaceEmbeddings( model_name="LazarusNLP/all-indo-e5-small-v4", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": True} ) vector_store = FAISS.load_local( local_path, embeddings, allow_dangerous_deserialization=True ) print(f"Vector store loaded from Supabase bucket: {bucket_name}") return vector_store except Exception as e: print(f"Error loading vector store from Supabase: {e}") st.error(f"Error loading from Supabase: {e}") return None def process_documents(docs): embeddings = HuggingFaceEmbeddings( model_name="LazarusNLP/all-indo-e5-small-v4", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": True} ) text_splitter = RecursiveCharacterTextSplitter( chunk_size=1500, chunk_overlap=300 ) text_chunks = text_splitter.split_documents(docs) vector_store = FAISS.from_documents(text_chunks, embeddings) return vector_store