from datetime import datetime import streamlit as st from typing import Optional from app.data_loader import list_all_files from app.db import supabase from app.document_processor import load_vector_store_from_supabase from app.config import Config # Defaults for bucket/prefix if not present in Config BUCKET_NAME = getattr(Config, "BUCKET_NAME", "pnp-bot-storage-archive") VECTOR_STORE_PREFIX = getattr(Config, "VECTOR_STORE_PREFIX", "vector_store") def get_latest_data_timestamp_from_files(bucket_name: str) -> float: """Get the latest timestamp from files in a Supabase storage bucket.""" files = list_all_files(bucket_name) latest_time = 0.0 for file in files: iso_time = file.get("updated_at") or file.get("created_at") if iso_time: try: timestamp = datetime.fromisoformat(iso_time.replace('Z', '+00:00')).timestamp() latest_time = max(latest_time, timestamp) except Exception as e: print(f"Gagal parsing waktu dari {file.get('name')}: {e}") return latest_time def get_supabase_vector_store_timestamp() -> Optional[str]: """Get the latest timestamp of vector store files in the Supabase storage.""" try: response = supabase.storage.from_(BUCKET_NAME).list() timestamps = [] for file in response: if file["name"].startswith(VECTOR_STORE_PREFIX) and ( file["name"].endswith(".faiss") or file["name"].endswith(".pkl") ): timestamps.append(file["updated_at"]) if len(timestamps) >= 2: return max(timestamps) return None except Exception as e: print(f"Error getting Supabase timestamp: {e}") return None def vector_store_is_outdated() -> bool: """Check if vector store needs to be updated based on files in Supabase storage.""" supabase_timestamp = get_supabase_vector_store_timestamp() if supabase_timestamp is None: return True supabase_time = datetime.fromisoformat(supabase_timestamp.replace("Z", "+00:00")).timestamp() data_time = get_latest_data_timestamp_from_files("pnp-bot-storage") return data_time > supabase_time @st.cache_resource(show_spinner=False) def get_cached_vector_store(): """Cache vector store loading to avoid repeated downloads/deserialization on reruns.""" return load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)