File size: 2,461 Bytes
daa81fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from datetime import datetime
import streamlit as st
from typing import Optional
from app.data_loader import list_all_files
from app.db import supabase
from app.document_processor import load_vector_store_from_supabase
from app.config import Config

# Defaults for bucket/prefix if not present in Config
BUCKET_NAME = getattr(Config, "BUCKET_NAME", "pnp-bot-storage-archive")
VECTOR_STORE_PREFIX = getattr(Config, "VECTOR_STORE_PREFIX", "vector_store")


def get_latest_data_timestamp_from_files(bucket_name: str) -> float:
    """Get the latest timestamp from files in a Supabase storage bucket."""
    files = list_all_files(bucket_name)
    latest_time = 0.0
    for file in files:
        iso_time = file.get("updated_at") or file.get("created_at")
        if iso_time:
            try:
                timestamp = datetime.fromisoformat(iso_time.replace('Z', '+00:00')).timestamp()
                latest_time = max(latest_time, timestamp)
            except Exception as e:
                print(f"Gagal parsing waktu dari {file.get('name')}: {e}")
    return latest_time


def get_supabase_vector_store_timestamp() -> Optional[str]:
    """Get the latest timestamp of vector store files in the Supabase storage."""
    try:
        response = supabase.storage.from_(BUCKET_NAME).list()
        timestamps = []
        for file in response:
            if file["name"].startswith(VECTOR_STORE_PREFIX) and (
                file["name"].endswith(".faiss") or file["name"].endswith(".pkl")
            ):
                timestamps.append(file["updated_at"])
        if len(timestamps) >= 2:
            return max(timestamps)
        return None
    except Exception as e:
        print(f"Error getting Supabase timestamp: {e}")
        return None


def vector_store_is_outdated() -> bool:
    """Check if vector store needs to be updated based on files in Supabase storage."""
    supabase_timestamp = get_supabase_vector_store_timestamp()
    if supabase_timestamp is None:
        return True
    supabase_time = datetime.fromisoformat(supabase_timestamp.replace("Z", "+00:00")).timestamp()
    data_time = get_latest_data_timestamp_from_files("pnp-bot-storage")

    return data_time > supabase_time


@st.cache_resource(show_spinner=False)
def get_cached_vector_store():
    """Cache vector store loading to avoid repeated downloads/deserialization on reruns."""
    return load_vector_store_from_supabase(supabase, BUCKET_NAME, VECTOR_STORE_PREFIX)