import streamlit as st import os import tempfile import zipfile from dotenv import load_dotenv from langsmith import traceable from app.chat import initialize_session_state, display_chat_history from app.data_loader import get_data, load_docs from app.document_processor import process_documents, save_vector_store_to_supabase, load_vector_store_from_supabase from app.prompts import sahabat_prompt from app.db import supabase from langchain_community.llms import Replicate from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from langchain_community.document_transformers import LongContextReorder load_dotenv() # Supabase configuration BUCKET_NAME = "pnp-bot-storage-archive" VECTOR_STORE_PREFIX = "vector_store" # Changed from file name to prefix DATA_DIR = "data" @traceable(name="Create RAG Conversational Chain") def create_conversational_chain(vector_store): llm = Replicate( model="fauziisyrinapridal/sahabat-ai-v1:afb9fa89fe786362f619fd4fef34bd1f7a4a4da23073d8a6fbf54dcbe458f216", model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 6000} ) memory = ConversationBufferMemory( memory_key="chat_history", return_messages=True, output_key='answer' ) chain = ConversationalRetrievalChain.from_llm( llm, retriever=vector_store.as_retriever(search_kwargs={"k": 6}), combine_docs_chain_kwargs={"prompt": sahabat_prompt}, return_source_documents=True, memory=memory ) return chain def reorder_embedding(docs): reordering = LongContextReorder() return reordering.transform_documents(docs) def get_latest_data_timestamp(folder): latest_time = 0 for root, _, files in os.walk(folder): for file in files: path = os.path.join(root, file) file_time = os.path.getmtime(path) latest_time = max(latest_time, file_time) return latest_time def get_supabase_vector_store_timestamp(): """Get the timestamp of vector store files in Supabase storage""" try: response = supabase.storage.from_(BUCKET_NAME).list() timestamps = [] for file in response: if file['name'].startswith(VECTOR_STORE_PREFIX) and ( file['name'].endswith('.faiss') or file['name'].endswith('.pkl') ): timestamps.append(file['updated_at']) # Return the latest timestamp if both files exist if len(timestamps) >= 2: return max(timestamps) return None except Exception as e: print(f"Error getting Supabase timestamp: {e}") return None def vector_store_is_outdated(): """Check if vector store needs to be updated based on data folder changes""" supabase_timestamp = get_supabase_vector_store_timestamp() if supabase_timestamp is None: return True # Convert supabase timestamp to epoch time for comparison from datetime import datetime supabase_time = datetime.fromisoformat(supabase_timestamp.replace('Z', '+00:00')).timestamp() data_time = get_latest_data_timestamp(DATA_DIR) return data_time > supabase_time @traceable(name="Main Chatbot RAG App") @traceable(name="Main Chatbot RAG App") def main(): initialize_session_state() vector_store = None # Initialize first if len(st.session_state['history']) == 0: if vector_store_is_outdated(): with st.spinner("Memuat dan memproses dokumen..."): get_data() # 🔄 Panggil hanya jika diperlukan docs = load_docs() if len(docs) > 0: reordered_docs = reorder_embedding(docs) vector_store = process_documents(reordered_docs) with st.spinner("Mengunggah vector store ke Supabase..."): success = save_vector_store_to_supabase(vector_store, supabase, BUCKET_NAME, VECTOR_STORE_PREFIX) if success: st.success("Vector store berhasil diunggah ke Supabase!") else: st.error("Gagal mengunggah vector store ke Supabase") else: st.warning("Folder 'data/' kosong. Chatbot tetap bisa digunakan, tapi tanpa konteks dokumen.") vector_store = None else: with st.spinner("Memuat vector store dari Supabase..."): vector_store = load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX) if vector_store: st.success("Vector store berhasil dimuat dari Supabase!") else: st.error("Gagal memuat vector store dari Supabase") else: vector_store = st.session_state.get('vector_store') if vector_store is None: vector_store = load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX) st.session_state['vector_store'] = vector_store if st.session_state['vector_store'] is not None: chain = create_conversational_chain(st.session_state['vector_store']) display_chat_history(chain) if __name__ == "__main__": main()