Shreyas094 commited on
Commit
43923d8
·
verified ·
1 Parent(s): 0d333d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -19
app.py CHANGED
@@ -66,24 +66,9 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
66
  else:
67
  raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
68
 
69
- # Function to get the embeddings model
70
  def get_embeddings():
71
  return HuggingFaceEmbeddings(model_name="avsolatorio/GIST-Embedding-v0")
72
 
73
- # Function to encode text with specific financial focus
74
- def get_embedding(text):
75
- instruction = """
76
- Encode this text with a focus on financial information.
77
- Pay special attention to:
78
- 1. Numerical data related to earnings, revenue, and other financial metrics
79
- 2. Statements about company performance and future outlook
80
- 3. Comparisons to previous periods or industry benchmarks
81
- 4. Key financial terms and their context
82
- Ignore general boilerplate text and focus on the substantive financial content.
83
- """
84
- embedding_model = get_embeddings()
85
- return embedding_model.encode(instruction + "\n\n" + text)
86
-
87
  # Add this at the beginning of your script, after imports
88
  DOCUMENTS_FILE = "uploaded_documents.json"
89
 
@@ -109,7 +94,7 @@ def update_vectors(files, parser):
109
  logging.warning("No files provided for update_vectors")
110
  return "Please upload at least one PDF file.", display_documents()
111
 
112
- embed = get_embedding()
113
  total_chunks = 0
114
 
115
  all_data = []
@@ -163,7 +148,7 @@ def delete_documents(selected_docs):
163
  if not selected_docs:
164
  return "No documents selected for deletion.", display_documents()
165
 
166
- embed = get_embedding()
167
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
168
 
169
  deleted_docs = []
@@ -337,7 +322,7 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
337
  # logging.info(f"Generated Response (first line): {first_line}")
338
  yield response
339
  else:
340
- embed = get_embedding()
341
  if os.path.exists("faiss_database"):
342
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
343
  retriever = database.as_retriever(search_kwargs={"k": 20})
@@ -468,7 +453,7 @@ After writing the document, please provide a list of sources used in your respon
468
  def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
469
  logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
470
 
471
- embed = get_embedding()
472
  if os.path.exists("faiss_database"):
473
  logging.info("Loading FAISS database")
474
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
 
66
  else:
67
  raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
68
 
 
69
  def get_embeddings():
70
  return HuggingFaceEmbeddings(model_name="avsolatorio/GIST-Embedding-v0")
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  # Add this at the beginning of your script, after imports
73
  DOCUMENTS_FILE = "uploaded_documents.json"
74
 
 
94
  logging.warning("No files provided for update_vectors")
95
  return "Please upload at least one PDF file.", display_documents()
96
 
97
+ embed = get_embeddings()
98
  total_chunks = 0
99
 
100
  all_data = []
 
148
  if not selected_docs:
149
  return "No documents selected for deletion.", display_documents()
150
 
151
+ embed = get_embeddings()
152
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
153
 
154
  deleted_docs = []
 
322
  # logging.info(f"Generated Response (first line): {first_line}")
323
  yield response
324
  else:
325
+ embed = get_embeddings()
326
  if os.path.exists("faiss_database"):
327
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
328
  retriever = database.as_retriever(search_kwargs={"k": 20})
 
453
  def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
454
  logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
455
 
456
+ embed = get_embeddings()
457
  if os.path.exists("faiss_database"):
458
  logging.info("Loading FAISS database")
459
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)