abhivsh commited on
Commit
4cc4dc7
·
verified ·
1 Parent(s): 61c450e

Update initialize.py

Browse files
Files changed (1) hide show
  1. initialize.py +13 -2
initialize.py CHANGED
@@ -4,6 +4,7 @@ from langchain_community.vectorstores import Chroma
4
  import os
5
  import gen_splits
6
  from langchain_ollama import OllamaEmbeddings
 
7
 
8
 
9
 
@@ -21,10 +22,20 @@ embedding_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model="text-embedding
21
  persist_directory = './chroma/'
22
 
23
  def initialize():
 
 
 
 
24
  splits = gen_splits.gen_splits()
25
- vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
26
- vectordb.persist()
 
 
 
 
 
27
  return vectordb
 
28
 
29
  if __name__ == "__main__":
30
  vectordb = initialize()
 
4
  import os
5
  import gen_splits
6
  from langchain_ollama import OllamaEmbeddings
7
+ import ollama
8
 
9
 
10
 
 
22
  persist_directory = './chroma/'
23
 
24
  def initialize():
25
+ # splits = gen_splits.gen_splits()
26
+ # vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
27
+ # vectordb.persist()
28
+
29
  splits = gen_splits.gen_splits()
30
+ collection = client.create_collection(name="docs")
31
+ # store each document in a vector embedding database
32
+ for i, d in enumerate(splits):
33
+ response = ollama.embeddings(model="mxbai-embed-large", prompt=d)
34
+ embedding = response["embedding"]
35
+ collection.add(ids=[str(i)],embeddings=[embedding], documents=[d])
36
+
37
  return vectordb
38
+
39
 
40
  if __name__ == "__main__":
41
  vectordb = initialize()