pnp-chatbot-v1 / app /vector_store.py
FauziIsyrinApridal
revisi 1
daa81fb
from datetime import datetime
import streamlit as st
from typing import Optional
from app.data_loader import list_all_files
from app.db import supabase
from app.document_processor import load_vector_store_from_supabase
from app.config import Config
# Defaults for bucket/prefix if not present in Config
BUCKET_NAME = getattr(Config, "BUCKET_NAME", "pnp-bot-storage-archive")
VECTOR_STORE_PREFIX = getattr(Config, "VECTOR_STORE_PREFIX", "vector_store")
def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
"""Get the latest timestamp from files in a Supabase storage bucket."""
files = list_all_files(bucket_name)
latest_time = 0.0
for file in files:
iso_time = file.get("updated_at") or file.get("created_at")
if iso_time:
try:
timestamp = datetime.fromisoformat(iso_time.replace('Z', '+00:00')).timestamp()
latest_time = max(latest_time, timestamp)
except Exception as e:
print(f"Gagal parsing waktu dari {file.get('name')}: {e}")
return latest_time
def get_supabase_vector_store_timestamp() -> Optional[str]:
"""Get the latest timestamp of vector store files in the Supabase storage."""
try:
response = supabase.storage.from_(BUCKET_NAME).list()
timestamps = []
for file in response:
if file["name"].startswith(VECTOR_STORE_PREFIX) and (
file["name"].endswith(".faiss") or file["name"].endswith(".pkl")
):
timestamps.append(file["updated_at"])
if len(timestamps) >= 2:
return max(timestamps)
return None
except Exception as e:
print(f"Error getting Supabase timestamp: {e}")
return None
def vector_store_is_outdated() -> bool:
"""Check if vector store needs to be updated based on files in Supabase storage."""
supabase_timestamp = get_supabase_vector_store_timestamp()
if supabase_timestamp is None:
return True
supabase_time = datetime.fromisoformat(supabase_timestamp.replace("Z", "+00:00")).timestamp()
data_time = get_latest_data_timestamp_from_files("pnp-bot-storage")
return data_time > supabase_time
@st.cache_resource(show_spinner=False)
def get_cached_vector_store():
"""Cache vector store loading to avoid repeated downloads/deserialization on reruns."""
return load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)