Shreyas094 commited on
Commit
1f08962
·
verified ·
1 Parent(s): f23ec49

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -4
app.py CHANGED
@@ -66,8 +66,28 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
66
  else:
67
  raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def get_embeddings():
70
- return HuggingFaceEmbeddings(model_name="McGill-NLP/LLM2Vec-Sheared-LLaMA-mntp-supervised")
71
 
72
  # Add this at the beginning of your script, after imports
73
  DOCUMENTS_FILE = "uploaded_documents.json"
@@ -125,11 +145,13 @@ def update_vectors(files, parser):
125
  try:
126
  if os.path.exists("faiss_database"):
127
  logging.info("Updating existing FAISS database")
128
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
129
- database.add_documents(all_data)
 
130
  else:
131
  logging.info("Creating new FAISS database")
132
- database = FAISS.from_documents(all_data, embed)
 
133
 
134
  database.save_local("faiss_database")
135
  logging.info("FAISS database saved")
 
66
  else:
67
  raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
68
 
69
+ class HuggingFaceEmbeddings:
70
+ def __init__(self, api_token):
71
+ self.api_url = "https://api-inference.huggingface.co/models/dunzhang/stella_en_1.5B_v5"
72
+ self.headers = {"Authorization": f"Bearer {api_token}"}
73
+
74
+ def query(self, payload):
75
+ response = requests.post(self.api_url, headers=self.headers, json=payload)
76
+ return response.json()
77
+
78
+ def embed_documents(self, texts):
79
+ payload = {"inputs": texts}
80
+ response = self.query(payload)
81
+ if isinstance(response, list):
82
+ return [np.array(embedding) for embedding in response]
83
+ else:
84
+ raise ValueError(f"Unexpected response format: {response}")
85
+
86
+ def embed_query(self, text):
87
+ return self.embed_documents([text])[0]
88
+
89
  def get_embeddings():
90
+ return HuggingFaceEmbeddings(api_token=huggingface_token)
91
 
92
  # Add this at the beginning of your script, after imports
93
  DOCUMENTS_FILE = "uploaded_documents.json"
 
145
  try:
146
  if os.path.exists("faiss_database"):
147
  logging.info("Updating existing FAISS database")
148
+ database = FAISS.load_local("faiss_database", embed.embed_query, allow_dangerous_deserialization=True)
149
+ embeddings = embed.embed_documents([doc.page_content for doc in all_data])
150
+ database.add_embeddings(embeddings, all_data)
151
  else:
152
  logging.info("Creating new FAISS database")
153
+ embeddings = embed.embed_documents([doc.page_content for doc in all_data])
154
+ database = FAISS.from_embeddings(embeddings, all_data, embed)
155
 
156
  database.save_local("faiss_database")
157
  logging.info("FAISS database saved")