FauziIsyrinApridal commited on
Commit
8d70ef7
Β·
1 Parent(s): 719f97b

update req

Browse files
Files changed (2) hide show
  1. app.py +11 -6
  2. app/document_processor.py +10 -1
app.py CHANGED
@@ -68,13 +68,18 @@ def main():
68
  if len(st.session_state['history']) == 0:
69
  if vector_store_is_outdated():
70
  docs = load_docs()
71
- reordered_docs = reorder_embedding(docs)
72
- vector_store = process_documents(reordered_docs)
73
- save_vector_store(vector_store)
74
- else:
75
- vector_store = load_vector_store()
 
 
 
 
 
 
76
 
77
- st.session_state['vector_store'] = vector_store
78
 
79
  if st.session_state['vector_store'] is not None:
80
  chain = create_conversational_chain(st.session_state['vector_store'])
 
68
  if len(st.session_state['history']) == 0:
69
  if vector_store_is_outdated():
70
  docs = load_docs()
71
+ if len(docs) > 0:
72
+ reordered_docs = reorder_embedding(docs)
73
+ vector_store = process_documents(reordered_docs)
74
+ save_vector_store(vector_store)
75
+ else:
76
+ st.warning("Tidak ada dokumen ditemukan di folder 'data/'. Chatbot tetap bisa digunakan, tapi tanpa konteks dokumen.")
77
+ vector_store = None
78
+ else:
79
+ vector_store = load_vector_store()
80
+
81
+ st.session_state['vector_store'] = vector_store
82
 
 
83
 
84
  if st.session_state['vector_store'] is not None:
85
  chain = create_conversational_chain(st.session_state['vector_store'])
app/document_processor.py CHANGED
@@ -27,6 +27,9 @@ def load_vector_store():
27
 
28
 
29
  def process_documents(docs):
 
 
 
30
  embeddings = HuggingFaceEmbeddings(
31
  model_name="LazarusNLP/all-indo-e5-small-v4",
32
  model_kwargs={"device": "cpu"},
@@ -38,7 +41,13 @@ def process_documents(docs):
38
  chunk_overlap=300
39
  )
40
  text_chunks = text_splitter.split_documents(docs)
 
 
 
 
 
 
41
  vector_store = FAISS.from_documents(text_chunks, embeddings)
42
-
43
  return vector_store
44
 
 
 
27
 
28
 
29
  def process_documents(docs):
30
+ if not docs:
31
+ raise ValueError("❌ Input 'docs' kosong. Tidak ada dokumen untuk diproses.")
32
+
33
  embeddings = HuggingFaceEmbeddings(
34
  model_name="LazarusNLP/all-indo-e5-small-v4",
35
  model_kwargs={"device": "cpu"},
 
41
  chunk_overlap=300
42
  )
43
  text_chunks = text_splitter.split_documents(docs)
44
+
45
+ if not text_chunks:
46
+ raise ValueError("❌ Split dokumen gagal. 'text_chunks' kosong setelah diproses.")
47
+
48
+ print(f"βœ… {len(text_chunks)} text chunks berhasil diproses.")
49
+
50
  vector_store = FAISS.from_documents(text_chunks, embeddings)
 
51
  return vector_store
52
 
53
+