Spaces:
Runtime error
Runtime error
Update initialize.py
Browse files- initialize.py +3 -42
initialize.py
CHANGED
@@ -7,9 +7,6 @@ from langchain_ollama import OllamaEmbeddings
|
|
7 |
import ollama
|
8 |
import chromadb
|
9 |
|
10 |
-
import time
|
11 |
-
import httpx
|
12 |
-
|
13 |
|
14 |
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
|
15 |
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
|
@@ -24,49 +21,13 @@ embedding_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model="text-embedding
|
|
24 |
# Create Embeddings for Searching the Splits
|
25 |
persist_directory = './chroma/'
|
26 |
|
27 |
-
# def initialize():
|
28 |
-
# # splits = gen_splits.gen_splits()
|
29 |
-
# # vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
|
30 |
-
# # vectordb.persist()
|
31 |
-
|
32 |
-
# splits = gen_splits.gen_splits()
|
33 |
-
# client = chromadb.Client()
|
34 |
-
# collection = client.create_collection(name="docs")
|
35 |
-
# print(splits)
|
36 |
-
# # store each document in a vector embedding database
|
37 |
-
# for i, d in enumerate(splits):
|
38 |
-
# response = ollama.embeddings(model="mxbai-embed-large", prompt=d.page_content)
|
39 |
-
# embedding = response["embedding"]
|
40 |
-
# collection.add(ids=[str(i)],embeddings=[embedding], documents=[d])
|
41 |
-
|
42 |
-
# return collection
|
43 |
-
|
44 |
-
|
45 |
def initialize():
|
46 |
splits = gen_splits.gen_splits()
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
# store each document in a vector embedding database
|
52 |
-
for i, d in enumerate(splits):
|
53 |
-
success = False
|
54 |
-
attempts = 0
|
55 |
-
max_attempts = 5
|
56 |
-
|
57 |
-
while not success and attempts < max_attempts:
|
58 |
-
try:
|
59 |
-
response = ollama.embeddings(model="mxbai-embed-large", prompt=d.page_content)
|
60 |
-
embedding = response["embedding"]
|
61 |
-
collection.add(ids=[str(i)], embeddings=[embedding], documents=[d])
|
62 |
-
success = True
|
63 |
-
except httpx.ConnectError as e:
|
64 |
-
attempts += 1
|
65 |
-
print(f"Connection failed (attempt {attempts}): {e}")
|
66 |
-
time.sleep(2) # retry after waiting for 2 seconds
|
67 |
-
|
68 |
return collection
|
69 |
|
70 |
|
|
|
71 |
if __name__ == "__main__":
|
72 |
vectordb = initialize()
|
|
|
7 |
import ollama
|
8 |
import chromadb
|
9 |
|
|
|
|
|
|
|
10 |
|
11 |
GEMINI_API_KEY = os.environ.get('GEMINI_API_KEY')
|
12 |
OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
|
|
|
21 |
# Create Embeddings for Searching the Splits
|
22 |
persist_directory = './chroma/'
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def initialize():
|
25 |
splits = gen_splits.gen_splits()
|
26 |
+
vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
|
27 |
+
vectordb.persist()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
return collection
|
29 |
|
30 |
|
31 |
+
|
32 |
if __name__ == "__main__":
|
33 |
vectordb = initialize()
|