Spaces:

TalatMasud
/

chatbot-backend

Sleeping

chatbot-backend / src /vctorstores /chroma_vectorstore.py

initial commit

640b1c8 7 months ago

2.07 kB

	# src/vectorstores/chroma_vectorstore.py
	import chromadb
	from typing import List, Callable, Any

	from .base_vectorstore import BaseVectorStore

	class ChromaVectorStore(BaseVectorStore):
	def __init__(
	self,
	embedding_function: Callable[[List[str]], List[List[float]]],
	persist_directory: str = './chroma_db'
	):
	"""
	Initialize Chroma Vector Store

	Args:
	embedding_function (Callable): Function to generate embeddings
	persist_directory (str): Directory to persist the vector store
	"""
	self.client = chromadb.PersistentClient(path=persist_directory)
	self.collection = self.client.get_or_create_collection(name="documents")
	self.embedding_function = embedding_function

	def add_documents(
	self,
	documents: List[str],
	embeddings: List[List[float]] = None
	) -> None:
	"""
	Add documents to the vector store

	Args:
	documents (List[str]): List of document texts
	embeddings (List[List[float]], optional): Pre-computed embeddings
	"""
	if not embeddings:
	embeddings = self.embedding_function(documents)

	# Generate unique IDs
	ids = [f"doc_{i}" for i in range(len(documents))]

	self.collection.add(
	documents=documents,
	embeddings=embeddings,
	ids=ids
	)

	def similarity_search(
	self,
	query_embedding: List[float],
	top_k: int = 3
	) -> List[str]:
	"""
	Perform similarity search

	Args:
	query_embedding (List[float]): Embedding of the query
	top_k (int): Number of top similar documents to retrieve

	Returns:
	List[str]: List of most similar documents
	"""
	results = self.collection.query(
	query_embeddings=[query_embedding],
	n_results=top_k
	)

	return results.get('documents', [[]])[0]