Spaces:
Runtime error
Runtime error
Update initialize.py
Browse files- initialize.py +13 -2
initialize.py
CHANGED
@@ -4,6 +4,7 @@ from langchain_community.vectorstores import Chroma
|
|
4 |
import os
|
5 |
import gen_splits
|
6 |
from langchain_ollama import OllamaEmbeddings
|
|
|
7 |
|
8 |
|
9 |
|
@@ -21,10 +22,20 @@ embedding_model = OpenAIEmbeddings(api_key=OPENAI_API_KEY, model="text-embedding
|
|
21 |
persist_directory = './chroma/'
|
22 |
|
23 |
def initialize():
|
|
|
|
|
|
|
|
|
24 |
splits = gen_splits.gen_splits()
|
25 |
-
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
27 |
return vectordb
|
|
|
28 |
|
29 |
if __name__ == "__main__":
|
30 |
vectordb = initialize()
|
|
|
4 |
import os
|
5 |
import gen_splits
|
6 |
from langchain_ollama import OllamaEmbeddings
|
7 |
+
import ollama
|
8 |
|
9 |
|
10 |
|
|
|
22 |
persist_directory = './chroma/'
|
23 |
|
24 |
def initialize():
|
25 |
+
# splits = gen_splits.gen_splits()
|
26 |
+
# vectordb = Chroma.from_documents(documents=splits, persist_directory=persist_directory, embedding=embedding_model)
|
27 |
+
# vectordb.persist()
|
28 |
+
|
29 |
splits = gen_splits.gen_splits()
|
30 |
+
collection = client.create_collection(name="docs")
|
31 |
+
# store each document in a vector embedding database
|
32 |
+
for i, d in enumerate(splits):
|
33 |
+
response = ollama.embeddings(model="mxbai-embed-large", prompt=d)
|
34 |
+
embedding = response["embedding"]
|
35 |
+
collection.add(ids=[str(i)],embeddings=[embedding], documents=[d])
|
36 |
+
|
37 |
return vectordb
|
38 |
+
|
39 |
|
40 |
if __name__ == "__main__":
|
41 |
vectordb = initialize()
|