Spaces:
Sleeping
Sleeping
from datetime import datetime | |
import streamlit as st | |
from typing import Optional | |
from app.data_loader import list_all_files | |
from app.db import supabase | |
from app.document_processor import load_vector_store_from_supabase | |
from app.config import Config | |
# Defaults for bucket/prefix if not present in Config | |
BUCKET_NAME = getattr(Config, "BUCKET_NAME", "pnp-bot-storage-archive") | |
VECTOR_STORE_PREFIX = getattr(Config, "VECTOR_STORE_PREFIX", "vector_store") | |
def get_latest_data_timestamp_from_files(bucket_name: str) -> float: | |
"""Get the latest timestamp from files in a Supabase storage bucket.""" | |
files = list_all_files(bucket_name) | |
latest_time = 0.0 | |
for file in files: | |
iso_time = file.get("updated_at") or file.get("created_at") | |
if iso_time: | |
try: | |
timestamp = datetime.fromisoformat(iso_time.replace('Z', '+00:00')).timestamp() | |
latest_time = max(latest_time, timestamp) | |
except Exception as e: | |
print(f"Gagal parsing waktu dari {file.get('name')}: {e}") | |
return latest_time | |
def get_supabase_vector_store_timestamp() -> Optional[str]: | |
"""Get the latest timestamp of vector store files in the Supabase storage.""" | |
try: | |
response = supabase.storage.from_(BUCKET_NAME).list() | |
timestamps = [] | |
for file in response: | |
if file["name"].startswith(VECTOR_STORE_PREFIX) and ( | |
file["name"].endswith(".faiss") or file["name"].endswith(".pkl") | |
): | |
timestamps.append(file["updated_at"]) | |
if len(timestamps) >= 2: | |
return max(timestamps) | |
return None | |
except Exception as e: | |
print(f"Error getting Supabase timestamp: {e}") | |
return None | |
def vector_store_is_outdated() -> bool: | |
"""Check if vector store needs to be updated based on files in Supabase storage.""" | |
supabase_timestamp = get_supabase_vector_store_timestamp() | |
if supabase_timestamp is None: | |
return True | |
supabase_time = datetime.fromisoformat(supabase_timestamp.replace("Z", "+00:00")).timestamp() | |
data_time = get_latest_data_timestamp_from_files("pnp-bot-storage") | |
return data_time > supabase_time | |
def get_cached_vector_store(): | |
"""Cache vector store loading to avoid repeated downloads/deserialization on reruns.""" | |
return load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX) | |