Spaces:
Sleeping
Sleeping
File size: 4,481 Bytes
ea1ba01 35325e7 ea1ba01 1c19c94 ea1ba01 22ea197 1c19c94 22ea197 1c19c94 22ea197 1c19c94 ea1ba01 22ea197 1c19c94 22ea197 1c19c94 22ea197 1c19c94 22ea197 1c19c94 22ea197 1c19c94 ea1ba01 1c19c94 ea1ba01 1c19c94 ea1ba01 9fd98bf 1c19c94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
import os
import tempfile
import zipfile
import streamlit as st
def save_vector_store_to_supabase(vector_store, supabase, bucket_name, file_prefix="vector_store"):
"""Save vector store to Supabase storage as separate files."""
try:
with tempfile.TemporaryDirectory() as temp_dir:
# Save vector store locally first
local_path = os.path.join(temp_dir, "vector_store")
vector_store.save_local(local_path)
# Upload index.faiss
faiss_file = os.path.join(local_path, "index.faiss")
if os.path.exists(faiss_file):
with open(faiss_file, 'rb') as f:
supabase.storage.from_(bucket_name).upload(
f"{file_prefix}_index.faiss",
f,
{"upsert": "true"}
)
print(f"Uploaded: {file_prefix}_index.faiss")
# Upload index.pkl
pkl_file = os.path.join(local_path, "index.pkl")
if os.path.exists(pkl_file):
with open(pkl_file, 'rb') as f:
supabase.storage.from_(bucket_name).upload(
f"{file_prefix}_index.pkl",
f,
{"upsert": "true"}
)
print(f"Uploaded: {file_prefix}_index.pkl")
print(f"Vector store uploaded to Supabase bucket: {bucket_name}")
return True
except Exception as e:
print(f"Error uploading vector store to Supabase: {e}")
st.error(f"Error uploading to Supabase: {e}")
return False
def load_vector_store_from_supabase(supabase, bucket_name, file_prefix="vector_store"):
"""Load vector store from Supabase storage from separate files."""
try:
with tempfile.TemporaryDirectory() as temp_dir:
local_path = os.path.join(temp_dir, "vector_store")
os.makedirs(local_path, exist_ok=True)
# Download index.faiss
try:
faiss_response = supabase.storage.from_(bucket_name).download(f"{file_prefix}_index.faiss")
faiss_file = os.path.join(local_path, "index.faiss")
with open(faiss_file, 'wb') as f:
f.write(faiss_response)
print(f"Downloaded: {file_prefix}_index.faiss")
except Exception as e:
print(f"Error downloading index.faiss: {e}")
return None
# Download index.pkl
try:
pkl_response = supabase.storage.from_(bucket_name).download(f"{file_prefix}_index.pkl")
pkl_file = os.path.join(local_path, "index.pkl")
with open(pkl_file, 'wb') as f:
f.write(pkl_response)
print(f"Downloaded: {file_prefix}_index.pkl")
except Exception as e:
print(f"Error downloading index.pkl: {e}")
return None
# Load vector store
embeddings = HuggingFaceEmbeddings(
model_name="LazarusNLP/all-indo-e5-small-v4",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
vector_store = FAISS.load_local(
local_path,
embeddings,
allow_dangerous_deserialization=True
)
print(f"Vector store loaded from Supabase bucket: {bucket_name}")
return vector_store
except Exception as e:
print(f"Error loading vector store from Supabase: {e}")
st.error(f"Error loading from Supabase: {e}")
return None
def process_documents(docs):
embeddings = HuggingFaceEmbeddings(
model_name="LazarusNLP/all-indo-e5-small-v4",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1500,
chunk_overlap=300
)
text_chunks = text_splitter.split_documents(docs)
vector_store = FAISS.from_documents(text_chunks, embeddings)
return vector_store |