from langchain_google_genai import GoogleGenerativeAIEmbeddings from langchain_huggingface import HuggingFaceEmbeddings from langchain_openai import OpenAIEmbeddings from langchain_community.vectorstores import Chroma import os import gen_splits from langchain_ollama import OllamaEmbeddings import ollama import chromadb import requests # Added import for requests HF_token = os.environ.get('HF_token') # Embedding Function def embed(texts): model_id = "sentence-transformers/all-MiniLM-L6-v2" api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}" headers = {"Authorization": f"Bearer {HF_token}"} response = requests.post(api_url, headers=headers, json={"inputs": texts, "options": {"wait_for_model": True}}) response.raise_for_status() # Added error handling for the request return response.json() # Create Embeddings for Searching the Splits persist_directory = './chroma/' # create the open-source embedding function embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") def initialize(): splits = gen_splits.gen_splits() vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_function) vectordb.persist() return vectordb # if __name__ == "__main__": # vectordb = initialize()