Spaces:
Runtime error
Runtime error
File size: 1,348 Bytes
5c32827 2fe525b 5c32827 2fe525b 5c32827 2fe525b 5c32827 692c6b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import os
import gen_splits
from langchain_ollama import OllamaEmbeddings
import ollama
import chromadb
import requests # Added import for requests
HF_token = os.environ.get('HF_token')
# Embedding Function
def embed(texts):
model_id = "sentence-transformers/all-MiniLM-L6-v2"
api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}"
headers = {"Authorization": f"Bearer {HF_token}"}
response = requests.post(api_url, headers=headers, json={"inputs": texts, "options": {"wait_for_model": True}})
response.raise_for_status() # Added error handling for the request
return response.json()
# Create Embeddings for Searching the Splits
persist_directory = './chroma/'
# create the open-source embedding function
embedding_function = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
def initialize():
splits = gen_splits.gen_splits()
vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_function)
vectordb.persist()
return vectordb
# if __name__ == "__main__":
# vectordb = initialize() |