Spaces:
Running
Running
FauziIsyrinApridal
commited on
Commit
Β·
8d70ef7
1
Parent(s):
719f97b
update req
Browse files- app.py +11 -6
- app/document_processor.py +10 -1
app.py
CHANGED
@@ -68,13 +68,18 @@ def main():
|
|
68 |
if len(st.session_state['history']) == 0:
|
69 |
if vector_store_is_outdated():
|
70 |
docs = load_docs()
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
st.session_state['vector_store'] = vector_store
|
78 |
|
79 |
if st.session_state['vector_store'] is not None:
|
80 |
chain = create_conversational_chain(st.session_state['vector_store'])
|
|
|
68 |
if len(st.session_state['history']) == 0:
|
69 |
if vector_store_is_outdated():
|
70 |
docs = load_docs()
|
71 |
+
if len(docs) > 0:
|
72 |
+
reordered_docs = reorder_embedding(docs)
|
73 |
+
vector_store = process_documents(reordered_docs)
|
74 |
+
save_vector_store(vector_store)
|
75 |
+
else:
|
76 |
+
st.warning("Tidak ada dokumen ditemukan di folder 'data/'. Chatbot tetap bisa digunakan, tapi tanpa konteks dokumen.")
|
77 |
+
vector_store = None
|
78 |
+
else:
|
79 |
+
vector_store = load_vector_store()
|
80 |
+
|
81 |
+
st.session_state['vector_store'] = vector_store
|
82 |
|
|
|
83 |
|
84 |
if st.session_state['vector_store'] is not None:
|
85 |
chain = create_conversational_chain(st.session_state['vector_store'])
|
app/document_processor.py
CHANGED
@@ -27,6 +27,9 @@ def load_vector_store():
|
|
27 |
|
28 |
|
29 |
def process_documents(docs):
|
|
|
|
|
|
|
30 |
embeddings = HuggingFaceEmbeddings(
|
31 |
model_name="LazarusNLP/all-indo-e5-small-v4",
|
32 |
model_kwargs={"device": "cpu"},
|
@@ -38,7 +41,13 @@ def process_documents(docs):
|
|
38 |
chunk_overlap=300
|
39 |
)
|
40 |
text_chunks = text_splitter.split_documents(docs)
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
vector_store = FAISS.from_documents(text_chunks, embeddings)
|
42 |
-
|
43 |
return vector_store
|
44 |
|
|
|
|
27 |
|
28 |
|
29 |
def process_documents(docs):
|
30 |
+
if not docs:
|
31 |
+
raise ValueError("β Input 'docs' kosong. Tidak ada dokumen untuk diproses.")
|
32 |
+
|
33 |
embeddings = HuggingFaceEmbeddings(
|
34 |
model_name="LazarusNLP/all-indo-e5-small-v4",
|
35 |
model_kwargs={"device": "cpu"},
|
|
|
41 |
chunk_overlap=300
|
42 |
)
|
43 |
text_chunks = text_splitter.split_documents(docs)
|
44 |
+
|
45 |
+
if not text_chunks:
|
46 |
+
raise ValueError("β Split dokumen gagal. 'text_chunks' kosong setelah diproses.")
|
47 |
+
|
48 |
+
print(f"β
{len(text_chunks)} text chunks berhasil diproses.")
|
49 |
+
|
50 |
vector_store = FAISS.from_documents(text_chunks, embeddings)
|
|
|
51 |
return vector_store
|
52 |
|
53 |
+
|