abhivsh commited on
Commit
c8b9e42
·
verified ·
1 Parent(s): 0e67e33

Update initialize.py

Browse files
Files changed (1) hide show
  1. initialize.py +3 -42
initialize.py CHANGED
@@ -7,9 +7,6 @@ from langchain_ollama import OllamaEmbeddings
7
  import ollama
8
  import chromadb
9
 
10
- import time
11
- import httpx
12
-
13
 
14
  GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
15
  OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
@@ -24,49 +21,13 @@ embedding_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model="text-embedding
24
  # Create Embeddings for Searching the Splits
25
  persist_directory = './chroma/'
26
 
27
- # def initialize():
28
- # # splits = gen_splits.gen_splits()
29
- # # vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
30
- # # vectordb.persist()
31
-
32
- # splits = gen_splits.gen_splits()
33
- # client = chromadb.Client()
34
- # collection = client.create_collection(name="docs")
35
- # print(splits)
36
- # # store each document in a vector embedding database
37
- # for i, d in enumerate(splits):
38
- # response = ollama.embeddings(model="mxbai-embed-large", prompt=d.page_content)
39
- # embedding = response["embedding"]
40
- # collection.add(ids=[str(i)],embeddings=[embedding], documents=[d])
41
-
42
- # return collection
43
-
44
-
45
  def initialize():
46
  splits = gen_splits.gen_splits()
47
- client = chromadb.Client()
48
- collection = client.create_collection(name="docs")
49
-
50
-
51
- # store each document in a vector embedding database
52
- for i, d in enumerate(splits):
53
- success = False
54
- attempts = 0
55
- max_attempts = 5
56
-
57
- while not success and attempts < max_attempts:
58
- try:
59
- response = ollama.embeddings(model="mxbai-embed-large", prompt=d.page_content)
60
- embedding = response["embedding"]
61
- collection.add(ids=[str(i)], embeddings=[embedding], documents=[d])
62
- success = True
63
- except httpx.ConnectError as e:
64
- attempts += 1
65
- print(f"Connection failed (attempt {attempts}): {e}")
66
- time.sleep(2) # retry after waiting for 2 seconds
67
-
68
  return collection
69
 
70
 
 
71
  if __name__ == "__main__":
72
  vectordb = initialize()
 
7
  import ollama
8
  import chromadb
9
 
 
 
 
10
 
11
  GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
12
  OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
 
21
  # Create Embeddings for Searching the Splits
22
  persist_directory = './chroma/'
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def initialize():
25
  splits = gen_splits.gen_splits()
26
+ vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
27
+ vectordb.persist()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  return collection
29
 
30
 
31
+
32
  if __name__ == "__main__":
33
  vectordb = initialize()