from llama_cpp import Llama from typing import Any, List from llama_index.core.embeddings import BaseEmbedding from llama_index.core.bridge.pydantic import PrivateAttr class LlamaCppIndexEmbedding(BaseEmbedding): _model: Llama = PrivateAttr() def __init__( self, model_path: str = "models/bge-m3-Q4_K_M.gguf", **kwargs: Any, ) -> None: super().__init__(**kwargs) self._model = Llama(model_path=model_path, embedding=True) @classmethod def class_name(cls) -> str: return "llama-cpp-bge-m3-embeddings" async def _aget_query_embedding(self, query: str) -> List[float]: return self._get_query_embedding(query) async def _aget_text_embedding(self, text: str) -> List[float]: return self._get_text_embedding(text) def _get_query_embedding(self, query: str) -> List[float]: # Generate embedding using llama-cpp-python response = self._model.create_embedding(input=query) embedding = response['data'][0]['embedding'] return embedding def _get_text_embedding(self, text: str) -> List[float]: # Generate embedding for a single text response = self._model.create_embedding(input=text) embedding = response['data'][0]['embedding'] return embedding def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]: # Generate embeddings for a list of texts embeddings = [] for text in texts: embedding = self._get_text_embedding(text) embeddings.append(embedding) return embeddings