import faiss import json import numpy as np from pathlib import Path from src.utils.config import VECTOR_DB_PATH, EMBEDDING_DIM from typing import List class VectorStore: """ Wrapper for FAISS vector storage, with ID-to-text mapping. """ def __init__(self, index_path: Path = VECTOR_DB_PATH): self.index_path = index_path.with_suffix(".index") self.meta_path = index_path.with_suffix(".json") self.index = faiss.IndexFlatL2(EMBEDDING_DIM) self.metadata = [] # list of dicts: {"id": str, "text": str} # Try loading if exists if self.index_path.exists() and self.meta_path.exists(): try: self.load() except Exception as e: print(f"[WARN] Failed to load vector store: {e}") # Reinitialize clean if corrupted self.index = faiss.IndexFlatL2(EMBEDDING_DIM) self.metadata = [] def add(self, embeddings: list[list[float]], metadata: List[dict]): """ Add new embeddings and their metadata (e.g., {"id": "doc1_chunk0", "text": "..."}) """ self.index.add(np.array(embeddings).astype("float32")) self.metadata.extend(metadata) self.save() def search(self, query_embedding: list[float], top_k: int = 5) -> List[dict]: """ Perform vector search and return metadata of top_k results. """ D, I = self.index.search(np.array([query_embedding]).astype("float32"), top_k) return [self.metadata[i] for i in I[0]] def save(self) -> None: """ Save data to an external file. """ self.index_path.parent.mkdir(parents = True, exist_ok = True) faiss.write_index(self.index, str(self.index_path)) with open(self.meta_path, 'w', encoding = "utf-8") as f: json.dump(self.metadata, f, ensure_ascii = False, indent = 2) def load(self) -> None: """ Load data from an external file. """ self.index = faiss.read_index(str(self.index_path)) with open(self.meta_path, 'r', encoding = "utf-8") as f: self.metadata = json.load(f)