Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -66,8 +66,28 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
|
|
66 |
else:
|
67 |
raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
def get_embeddings():
|
70 |
-
return HuggingFaceEmbeddings(
|
71 |
|
72 |
# Add this at the beginning of your script, after imports
|
73 |
DOCUMENTS_FILE = "uploaded_documents.json"
|
@@ -125,11 +145,13 @@ def update_vectors(files, parser):
|
|
125 |
try:
|
126 |
if os.path.exists("faiss_database"):
|
127 |
logging.info("Updating existing FAISS database")
|
128 |
-
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
129 |
-
|
|
|
130 |
else:
|
131 |
logging.info("Creating new FAISS database")
|
132 |
-
|
|
|
133 |
|
134 |
database.save_local("faiss_database")
|
135 |
logging.info("FAISS database saved")
|
|
|
66 |
else:
|
67 |
raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
|
68 |
|
69 |
+
class HuggingFaceEmbeddings:
|
70 |
+
def __init__(self, api_token):
|
71 |
+
self.api_url = "https://api-inference.huggingface.co/models/dunzhang/stella_en_1.5B_v5"
|
72 |
+
self.headers = {"Authorization": f"Bearer {api_token}"}
|
73 |
+
|
74 |
+
def query(self, payload):
|
75 |
+
response = requests.post(self.api_url, headers=self.headers, json=payload)
|
76 |
+
return response.json()
|
77 |
+
|
78 |
+
def embed_documents(self, texts):
|
79 |
+
payload = {"inputs": texts}
|
80 |
+
response = self.query(payload)
|
81 |
+
if isinstance(response, list):
|
82 |
+
return [np.array(embedding) for embedding in response]
|
83 |
+
else:
|
84 |
+
raise ValueError(f"Unexpected response format: {response}")
|
85 |
+
|
86 |
+
def embed_query(self, text):
|
87 |
+
return self.embed_documents([text])[0]
|
88 |
+
|
89 |
def get_embeddings():
|
90 |
+
return HuggingFaceEmbeddings(api_token=huggingface_token)
|
91 |
|
92 |
# Add this at the beginning of your script, after imports
|
93 |
DOCUMENTS_FILE = "uploaded_documents.json"
|
|
|
145 |
try:
|
146 |
if os.path.exists("faiss_database"):
|
147 |
logging.info("Updating existing FAISS database")
|
148 |
+
database = FAISS.load_local("faiss_database", embed.embed_query, allow_dangerous_deserialization=True)
|
149 |
+
embeddings = embed.embed_documents([doc.page_content for doc in all_data])
|
150 |
+
database.add_embeddings(embeddings, all_data)
|
151 |
else:
|
152 |
logging.info("Creating new FAISS database")
|
153 |
+
embeddings = embed.embed_documents([doc.page_content for doc in all_data])
|
154 |
+
database = FAISS.from_embeddings(embeddings, all_data, embed)
|
155 |
|
156 |
database.save_local("faiss_database")
|
157 |
logging.info("FAISS database saved")
|