File size: 927 Bytes
9091381
2f796ab
9091381
ff2a712
78c6667
 
e391b31
ecef4cf
9091381
 
ecef4cf
 
e83689e
9091381
 
 
78c6667
 
5a0035a
78c6667
 
4f64706
53d161a
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
import os
import gen_splits

GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

# Using Google GenAI Text Embeddings  
# embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001", task_type="retrieval_query", google_api_key=GEMINI_API_KEY)         ###

embedding_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model="text-embedding-3-small")

# Create Embeddings for Searching the Splits
persist_directory = './chroma/'

def initialize():
    splits = gen_splits.gen_splits()
    vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
    vectordb.persist()
    return vectordb

if __name__ == "__main__":
    vectordb = initialize()