SearchGPTTest

Sleeping

App Files Files Community

Shreyas094 commited on Aug 7, 2024

Commit

1f08962

verified ·

1 Parent(s): f23ec49

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -4

app.py CHANGED Viewed

@@ -66,8 +66,28 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
     else:
         raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
 def get_embeddings():
-    return HuggingFaceEmbeddings(model_name="McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised")
 # Add this at the beginning of your script, after imports
 DOCUMENTS_FILE = "uploaded_documents.json"
@@ -125,11 +145,13 @@ def update_vectors(files, parser):
     try:
         if os.path.exists("faiss_database"):
             logging.info("Updating existing FAISS database")
-            database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
-            database.add_documents(all_data)
         else:
             logging.info("Creating new FAISS database")
-            database = FAISS.from_documents(all_data, embed)
         database.save_local("faiss_database")
         logging.info("FAISS database saved")

     else:
         raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
+class HuggingFaceEmbeddings:
+    def __init__(self, api_token):
+        self.api_url = "https://api-inference.huggingface.co/models/dunzhang/stella_en_1.5B_v5"
+        self.headers = {"Authorization": f"Bearer {api_token}"}
+    def query(self, payload):
+        response = requests.post(self.api_url, headers=self.headers, json=payload)
+        return response.json()
+    def embed_documents(self, texts):
+        payload = {"inputs": texts}
+        response = self.query(payload)
+        if isinstance(response, list):
+            return [np.array(embedding) for embedding in response]
+        else:
+            raise ValueError(f"Unexpected response format: {response}")
+    def embed_query(self, text):
+        return self.embed_documents([text])[0]
 def get_embeddings():
+    return HuggingFaceEmbeddings(api_token=huggingface_token)
 # Add this at the beginning of your script, after imports
 DOCUMENTS_FILE = "uploaded_documents.json"
     try:
         if os.path.exists("faiss_database"):
             logging.info("Updating existing FAISS database")
+            database = FAISS.load_local("faiss_database", embed.embed_query, allow_dangerous_deserialization=True)
+            embeddings = embed.embed_documents([doc.page_content for doc in all_data])
+            database.add_embeddings(embeddings, all_data)
         else:
             logging.info("Creating new FAISS database")
+            embeddings = embed.embed_documents([doc.page_content for doc in all_data])
+            database = FAISS.from_embeddings(embeddings, all_data, embed)
         database.save_local("faiss_database")
         logging.info("FAISS database saved")