File size: 1,348 Bytes
5c32827
2fe525b
5c32827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fe525b
 
 
5c32827
 
2fe525b
5c32827
 
 
 
692c6b0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import os
import gen_splits
from langchain_ollama import OllamaEmbeddings
import ollama
import chromadb
import requests  # Added import for requests

HF_token = os.environ.get('HF_token')

# Embedding Function
def embed(texts):
    model_id = "sentence-transformers/all-MiniLM-L6-v2"

    api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
    headers = {"Authorization": f"Bearer {HF_token}"}

    response = requests.post(api_url, headers=headers, json={"inputs": texts, "options": {"wait_for_model": True}})
    response.raise_for_status()  # Added error handling for the request
    return response.json()

# Create Embeddings for Searching the Splits
persist_directory = './chroma/'

# create the open-source embedding function
embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

def initialize():
    splits = gen_splits.gen_splits()
    vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_function)
    vectordb.persist()
    return vectordb


# if __name__ == "__main__":
#     vectordb = initialize()