SearchGPTTest

Sleeping

App Files Files Community

Shreyas094 commited on Aug 8, 2024

Commit

43923d8

verified ·

1 Parent(s): 0d333d4

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -19

app.py CHANGED Viewed

@@ -66,24 +66,9 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
     else:
         raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
-# Function to get the embeddings model
 def get_embeddings():
     return HuggingFaceEmbeddings(model_name="avsolatorio/GIST-Embedding-v0")
-# Function to encode text with specific financial focus
-def get_embedding(text):
-    instruction = """
-    Encode this text with a focus on financial information.
-    Pay special attention to:
-    1. Numerical data related to earnings, revenue, and other financial metrics
-    2. Statements about company performance and future outlook
-    3. Comparisons to previous periods or industry benchmarks
-    4. Key financial terms and their context
-    Ignore general boilerplate text and focus on the substantive financial content.
-    """
-    embedding_model = get_embeddings()
-    return embedding_model.encode(instruction + "\n\n" + text)
 # Add this at the beginning of your script, after imports
 DOCUMENTS_FILE = "uploaded_documents.json"
@@ -109,7 +94,7 @@ def update_vectors(files, parser):
         logging.warning("No files provided for update_vectors")
         return "Please upload at least one PDF file.", display_documents()
-    embed = get_embedding()
     total_chunks = 0
     all_data = []
@@ -163,7 +148,7 @@ def delete_documents(selected_docs):
     if not selected_docs:
         return "No documents selected for deletion.", display_documents()
-    embed = get_embedding()
     database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
     deleted_docs = []
@@ -337,7 +322,7 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
 #                logging.info(f"Generated Response (first line): {first_line}")
                 yield response
         else:
-            embed = get_embedding()
             if os.path.exists("faiss_database"):
                 database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
                 retriever = database.as_retriever(search_kwargs={"k": 20})
@@ -468,7 +453,7 @@ After writing the document, please provide a list of sources used in your respon
 def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
     logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
-    embed = get_embedding()
     if os.path.exists("faiss_database"):
         logging.info("Loading FAISS database")
         database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)

     else:
         raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
 def get_embeddings():
     return HuggingFaceEmbeddings(model_name="avsolatorio/GIST-Embedding-v0")
 # Add this at the beginning of your script, after imports
 DOCUMENTS_FILE = "uploaded_documents.json"
         logging.warning("No files provided for update_vectors")
         return "Please upload at least one PDF file.", display_documents()
+    embed = get_embeddings()
     total_chunks = 0
     all_data = []
     if not selected_docs:
         return "No documents selected for deletion.", display_documents()
+    embed = get_embeddings()
     database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
     deleted_docs = []
 #                logging.info(f"Generated Response (first line): {first_line}")
                 yield response
         else:
+            embed = get_embeddings()
             if os.path.exists("faiss_database"):
                 database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
                 retriever = database.as_retriever(search_kwargs={"k": 20})
 def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
     logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
+    embed = get_embeddings()
     if os.path.exists("faiss_database"):
         logging.info("Loading FAISS database")
         database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)