import streamlit as st import os from dotenv import load_dotenv from langsmith import traceable from app.chat import initialize_session_state, display_chat_history from app.data_loader import get_data, load_docs from app.document_processor import process_documents, save_vector_store, load_vector_store from app.prompts import sahabat_prompt from langchain_community.llms import Replicate from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from langchain_community.document_transformers import LongContextReorder load_dotenv() VECTOR_STORE_PATH = "vector_store_data" DATA_DIR = "data" @traceable(name="Create RAG Conversational Chain") def create_conversational_chain(vector_store): llm = Replicate( model="fauziisyrinapridal/sahabat-ai-v1:afb9fa89fe786362f619fd4fef34bd1f7a4a4da23073d8a6fbf54dcbe458f216", model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 6000} ) memory = ConversationBufferMemory( memory_key="chat_history", return_messages=True, output_key='answer' ) chain = ConversationalRetrievalChain.from_llm( llm, retriever=vector_store.as_retriever(search_kwargs={"k": 6}), combine_docs_chain_kwargs={"prompt": sahabat_prompt}, return_source_documents=True, memory=memory ) return chain def reorder_embedding(docs): reordering = LongContextReorder() return reordering.transform_documents(docs) def get_latest_data_timestamp(folder): latest_time = 0 for root, _, files in os.walk(folder): for file in files: path = os.path.join(root, file) file_time = os.path.getmtime(path) latest_time = max(latest_time, file_time) return latest_time def vector_store_is_outdated(): if not os.path.exists(VECTOR_STORE_PATH): return True vector_store_time = os.path.getmtime(VECTOR_STORE_PATH) data_time = get_latest_data_timestamp(DATA_DIR) return data_time > vector_store_time @traceable(name="Main Chatbot RAG App") def main(): initialize_session_state() get_data() vector_store = None # Inisialisasi dulu if len(st.session_state['history']) == 0: if vector_store_is_outdated(): docs = load_docs() if len(docs) > 0: reordered_docs = reorder_embedding(docs) vector_store = process_documents(reordered_docs) save_vector_store(vector_store) else: st.warning("Tidak ada dokumen ditemukan di folder 'data/'. Chatbot tetap bisa digunakan, tapi tanpa konteks dokumen.") vector_store = None else: # Jika vector_store tidak outdated dan history kosong, # harus tetap load vector_store dari penyimpanan vector_store = load_vector_store() else: vector_store = load_vector_store() st.session_state['vector_store'] = vector_store if st.session_state['vector_store'] is not None: chain = create_conversational_chain(st.session_state['vector_store']) display_chat_history(chain) if __name__ == "__main__": main()