Spaces:
Sleeping
Sleeping
FauziIsyrinApridal
update chat.py audio dijalankan dua kali dan klik aktif/nonaktif dua kali
882e3b8
import streamlit as st | |
import os | |
import tempfile | |
import zipfile | |
from dotenv import load_dotenv | |
from langsmith import traceable | |
from app.chat import initialize_session_state, display_chat_history | |
from app.data_loader import get_data, load_docs | |
from app.document_processor import process_documents, save_vector_store_to_supabase, load_vector_store_from_supabase | |
from app.prompts import sahabat_prompt | |
from app.db import supabase | |
from langchain_community.llms import Replicate | |
from langchain.memory import ConversationBufferMemory | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain_community.document_transformers import LongContextReorder | |
load_dotenv() | |
# Supabase configuration | |
BUCKET_NAME = "pnp-bot-storage-archive" | |
VECTOR_STORE_PREFIX = "vector_store" # Changed from file name to prefix | |
DATA_DIR = "data" | |
def create_conversational_chain(vector_store): | |
llm = Replicate( | |
model="fauziisyrinapridal/sahabat-ai-v1:afb9fa89fe786362f619fd4fef34bd1f7a4a4da23073d8a6fbf54dcbe458f216", | |
model_kwargs={"temperature": 0.1, "top_p": 0.9, "max_new_tokens": 6000} | |
) | |
memory = ConversationBufferMemory( | |
memory_key="chat_history", | |
return_messages=True, | |
output_key='answer' | |
) | |
chain = ConversationalRetrievalChain.from_llm( | |
llm, | |
retriever=vector_store.as_retriever(search_kwargs={"k": 6}), | |
combine_docs_chain_kwargs={"prompt": sahabat_prompt}, | |
return_source_documents=True, | |
memory=memory | |
) | |
return chain | |
def reorder_embedding(docs): | |
reordering = LongContextReorder() | |
return reordering.transform_documents(docs) | |
def get_latest_data_timestamp(folder): | |
latest_time = 0 | |
for root, _, files in os.walk(folder): | |
for file in files: | |
path = os.path.join(root, file) | |
file_time = os.path.getmtime(path) | |
latest_time = max(latest_time, file_time) | |
return latest_time | |
def get_supabase_vector_store_timestamp(): | |
"""Get the timestamp of vector store files in Supabase storage""" | |
try: | |
response = supabase.storage.from_(BUCKET_NAME).list() | |
timestamps = [] | |
for file in response: | |
if file['name'].startswith(VECTOR_STORE_PREFIX) and ( | |
file['name'].endswith('.faiss') or file['name'].endswith('.pkl') | |
): | |
timestamps.append(file['updated_at']) | |
# Return the latest timestamp if both files exist | |
if len(timestamps) >= 2: | |
return max(timestamps) | |
return None | |
except Exception as e: | |
print(f"Error getting Supabase timestamp: {e}") | |
return None | |
def vector_store_is_outdated(): | |
"""Check if vector store needs to be updated based on data folder changes""" | |
supabase_timestamp = get_supabase_vector_store_timestamp() | |
if supabase_timestamp is None: | |
return True | |
# Convert supabase timestamp to epoch time for comparison | |
from datetime import datetime | |
supabase_time = datetime.fromisoformat(supabase_timestamp.replace('Z', '+00:00')).timestamp() | |
data_time = get_latest_data_timestamp(DATA_DIR) | |
return data_time > supabase_time | |
def main(): | |
initialize_session_state() | |
vector_store = None # Initialize first | |
if len(st.session_state['history']) == 0: | |
if vector_store_is_outdated(): | |
with st.spinner("Memuat dan memproses dokumen..."): | |
get_data() # π Panggil hanya jika diperlukan | |
docs = load_docs() | |
if len(docs) > 0: | |
reordered_docs = reorder_embedding(docs) | |
vector_store = process_documents(reordered_docs) | |
with st.spinner("Mengunggah vector store ke Supabase..."): | |
success = save_vector_store_to_supabase(vector_store, supabase, BUCKET_NAME, VECTOR_STORE_PREFIX) | |
if success: | |
st.success("Vector store berhasil diunggah ke Supabase!") | |
else: | |
st.error("Gagal mengunggah vector store ke Supabase") | |
else: | |
st.warning("Folder 'data/' kosong. Chatbot tetap bisa digunakan, tapi tanpa konteks dokumen.") | |
vector_store = None | |
else: | |
with st.spinner("Memuat vector store dari Supabase..."): | |
vector_store = load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX) | |
if vector_store: | |
st.success("Vector store berhasil dimuat dari Supabase!") | |
else: | |
st.error("Gagal memuat vector store dari Supabase") | |
else: | |
vector_store = st.session_state.get('vector_store') | |
if vector_store is None: | |
vector_store = load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX) | |
st.session_state['vector_store'] = vector_store | |
if st.session_state['vector_store'] is not None: | |
chain = create_conversational_chain(st.session_state['vector_store']) | |
display_chat_history(chain) | |
if __name__ == "__main__": | |
main() |