Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -66,24 +66,9 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
|
|
66 |
else:
|
67 |
raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
|
68 |
|
69 |
-
# Function to get the embeddings model
|
70 |
def get_embeddings():
|
71 |
return HuggingFaceEmbeddings(model_name="avsolatorio/GIST-Embedding-v0")
|
72 |
|
73 |
-
# Function to encode text with specific financial focus
|
74 |
-
def get_embedding(text):
|
75 |
-
instruction = """
|
76 |
-
Encode this text with a focus on financial information.
|
77 |
-
Pay special attention to:
|
78 |
-
1. Numerical data related to earnings, revenue, and other financial metrics
|
79 |
-
2. Statements about company performance and future outlook
|
80 |
-
3. Comparisons to previous periods or industry benchmarks
|
81 |
-
4. Key financial terms and their context
|
82 |
-
Ignore general boilerplate text and focus on the substantive financial content.
|
83 |
-
"""
|
84 |
-
embedding_model = get_embeddings()
|
85 |
-
return embedding_model.encode(instruction + "\n\n" + text)
|
86 |
-
|
87 |
# Add this at the beginning of your script, after imports
|
88 |
DOCUMENTS_FILE = "uploaded_documents.json"
|
89 |
|
@@ -109,7 +94,7 @@ def update_vectors(files, parser):
|
|
109 |
logging.warning("No files provided for update_vectors")
|
110 |
return "Please upload at least one PDF file.", display_documents()
|
111 |
|
112 |
-
embed =
|
113 |
total_chunks = 0
|
114 |
|
115 |
all_data = []
|
@@ -163,7 +148,7 @@ def delete_documents(selected_docs):
|
|
163 |
if not selected_docs:
|
164 |
return "No documents selected for deletion.", display_documents()
|
165 |
|
166 |
-
embed =
|
167 |
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
168 |
|
169 |
deleted_docs = []
|
@@ -337,7 +322,7 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
337 |
# logging.info(f"Generated Response (first line): {first_line}")
|
338 |
yield response
|
339 |
else:
|
340 |
-
embed =
|
341 |
if os.path.exists("faiss_database"):
|
342 |
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
343 |
retriever = database.as_retriever(search_kwargs={"k": 20})
|
@@ -468,7 +453,7 @@ After writing the document, please provide a list of sources used in your respon
|
|
468 |
def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
|
469 |
logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
|
470 |
|
471 |
-
embed =
|
472 |
if os.path.exists("faiss_database"):
|
473 |
logging.info("Loading FAISS database")
|
474 |
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
|
|
66 |
else:
|
67 |
raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
|
68 |
|
|
|
69 |
def get_embeddings():
|
70 |
return HuggingFaceEmbeddings(model_name="avsolatorio/GIST-Embedding-v0")
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
# Add this at the beginning of your script, after imports
|
73 |
DOCUMENTS_FILE = "uploaded_documents.json"
|
74 |
|
|
|
94 |
logging.warning("No files provided for update_vectors")
|
95 |
return "Please upload at least one PDF file.", display_documents()
|
96 |
|
97 |
+
embed = get_embeddings()
|
98 |
total_chunks = 0
|
99 |
|
100 |
all_data = []
|
|
|
148 |
if not selected_docs:
|
149 |
return "No documents selected for deletion.", display_documents()
|
150 |
|
151 |
+
embed = get_embeddings()
|
152 |
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
153 |
|
154 |
deleted_docs = []
|
|
|
322 |
# logging.info(f"Generated Response (first line): {first_line}")
|
323 |
yield response
|
324 |
else:
|
325 |
+
embed = get_embeddings()
|
326 |
if os.path.exists("faiss_database"):
|
327 |
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
328 |
retriever = database.as_retriever(search_kwargs={"k": 20})
|
|
|
453 |
def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
|
454 |
logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
|
455 |
|
456 |
+
embed = get_embeddings()
|
457 |
if os.path.exists("faiss_database"):
|
458 |
logging.info("Loading FAISS database")
|
459 |
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|