pnp-chatbot-v1 / app.py
FauziIsyrinApridal
update chat.py audio dijalankan dua kali dan klik aktif/nonaktif dua kali
882e3b8
import streamlit as st
import os
import tempfile
import zipfile
from dotenv import load_dotenv
from langsmith import traceable
from app.chat import initialize_session_state, display_chat_history
from app.data_loader import get_data, load_docs
from app.document_processor import process_documents, save_vector_store_to_supabase, load_vector_store_from_supabase
from app.prompts import sahabat_prompt
from app.db import supabase
from langchain_community.llms import Replicate
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from langchain_community.document_transformers import LongContextReorder
load_dotenv()
# Supabase configuration
BUCKET_NAME = "pnp-bot-storage-archive"
VECTOR_STORE_PREFIX = "vector_store" # Changed from file name to prefix
DATA_DIR = "data"
@traceable(name="Create RAG Conversational Chain")
def create_conversational_chain(vector_store):
llm = Replicate(
model="fauziisyrinapridal/sahabat-ai-v1:afb9fa89fe786362f619fd4fef34bd1f7a4a4da23073d8a6fbf54dcbe458f216",
model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 6000}
)
memory = ConversationBufferMemory(
memory_key="chat_history",
return_messages=True,
output_key='answer'
)
chain = ConversationalRetrievalChain.from_llm(
llm,
retriever=vector_store.as_retriever(search_kwargs={"k": 6}),
combine_docs_chain_kwargs={"prompt": sahabat_prompt},
return_source_documents=True,
memory=memory
)
return chain
def reorder_embedding(docs):
reordering = LongContextReorder()
return reordering.transform_documents(docs)
def get_latest_data_timestamp(folder):
latest_time = 0
for root, _, files in os.walk(folder):
for file in files:
path = os.path.join(root, file)
file_time = os.path.getmtime(path)
latest_time = max(latest_time, file_time)
return latest_time
def get_supabase_vector_store_timestamp():
"""Get the timestamp of vector store files in Supabase storage"""
try:
response = supabase.storage.from_(BUCKET_NAME).list()
timestamps = []
for file in response:
if file['name'].startswith(VECTOR_STORE_PREFIX) and (
file['name'].endswith('.faiss') or file['name'].endswith('.pkl')
):
timestamps.append(file['updated_at'])
# Return the latest timestamp if both files exist
if len(timestamps) >= 2:
return max(timestamps)
return None
except Exception as e:
print(f"Error getting Supabase timestamp: {e}")
return None
def vector_store_is_outdated():
"""Check if vector store needs to be updated based on data folder changes"""
supabase_timestamp = get_supabase_vector_store_timestamp()
if supabase_timestamp is None:
return True
# Convert supabase timestamp to epoch time for comparison
from datetime import datetime
supabase_time = datetime.fromisoformat(supabase_timestamp.replace('Z', '+00:00')).timestamp()
data_time = get_latest_data_timestamp(DATA_DIR)
return data_time > supabase_time
@traceable(name="Main Chatbot RAG App")
@traceable(name="Main Chatbot RAG App")
def main():
initialize_session_state()
vector_store = None # Initialize first
if len(st.session_state['history']) == 0:
if vector_store_is_outdated():
with st.spinner("Memuat dan memproses dokumen..."):
get_data() # πŸ”„ Panggil hanya jika diperlukan
docs = load_docs()
if len(docs) > 0:
reordered_docs = reorder_embedding(docs)
vector_store = process_documents(reordered_docs)
with st.spinner("Mengunggah vector store ke Supabase..."):
success = save_vector_store_to_supabase(vector_store, supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
if success:
st.success("Vector store berhasil diunggah ke Supabase!")
else:
st.error("Gagal mengunggah vector store ke Supabase")
else:
st.warning("Folder 'data/' kosong. Chatbot tetap bisa digunakan, tapi tanpa konteks dokumen.")
vector_store = None
else:
with st.spinner("Memuat vector store dari Supabase..."):
vector_store = load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
if vector_store:
st.success("Vector store berhasil dimuat dari Supabase!")
else:
st.error("Gagal memuat vector store dari Supabase")
else:
vector_store = st.session_state.get('vector_store')
if vector_store is None:
vector_store = load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)
st.session_state['vector_store'] = vector_store
if st.session_state['vector_store'] is not None:
chain = create_conversational_chain(st.session_state['vector_store'])
display_chat_history(chain)
if __name__ == "__main__":
main()