from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS import os VECTOR_STORE_PATH = "vector_store_data" def save_vector_store(vector_store): """Simpan vector store ke file.""" vector_store.save_local(VECTOR_STORE_PATH) print(f"Vector store saved to {VECTOR_STORE_PATH}") def load_vector_store(): """Muat vector store dari file, atau return None kalau file tidak ada.""" if os.path.exists(VECTOR_STORE_PATH): embeddings = HuggingFaceEmbeddings( model_name="LazarusNLP/all-indo-e5-small-v4", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": True} ) vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True) print(f"Vector store loaded from {VECTOR_STORE_PATH}") return vector_store else: print("Vector store file not found.") return None def process_documents(docs): embeddings = HuggingFaceEmbeddings( model_name="LazarusNLP/all-indo-e5-small-v4", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": True} ) text_splitter = RecursiveCharacterTextSplitter( chunk_size=1500, chunk_overlap=300 ) text_chunks = text_splitter.split_documents(docs) vector_store = FAISS.from_documents(text_chunks, embeddings) return vector_store