Spaces:
Sleeping
Sleeping
File size: 1,524 Bytes
ea1ba01 35325e7 ea1ba01 9fd98bf ea1ba01 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
import os
VECTOR_STORE_PATH = "vector_store_data"
def save_vector_store(vector_store):
"""Simpan vector store ke file."""
vector_store.save_local(VECTOR_STORE_PATH)
print(f"Vector store saved to {VECTOR_STORE_PATH}")
def load_vector_store():
"""Muat vector store dari file, atau return None kalau file tidak ada."""
if os.path.exists(VECTOR_STORE_PATH):
embeddings = HuggingFaceEmbeddings(
model_name="LazarusNLP/all-indo-e5-small-v4",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
vector_store = FAISS.load_local(VECTOR_STORE_PATH, embeddings, allow_dangerous_deserialization=True)
print(f"Vector store loaded from {VECTOR_STORE_PATH}")
return vector_store
else:
print("Vector store file not found.")
return None
def process_documents(docs):
embeddings = HuggingFaceEmbeddings(
model_name="LazarusNLP/all-indo-e5-small-v4",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1500,
chunk_overlap=300
)
text_chunks = text_splitter.split_documents(docs)
vector_store = FAISS.from_documents(text_chunks, embeddings)
return vector_store
|