Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -20,6 +20,7 @@ from huggingface_hub import InferenceClient
|
|
20 |
import inspect
|
21 |
import logging
|
22 |
import shutil
|
|
|
23 |
|
24 |
|
25 |
# Set up basic configuration for logging
|
@@ -274,22 +275,19 @@ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temp
|
|
274 |
return final_response
|
275 |
|
276 |
def get_web_search_database():
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
else:
|
282 |
-
get_web_search_database.database = WebSearchFAISS.from_documents([], embed)
|
283 |
-
return get_web_search_database.database
|
284 |
|
285 |
-
def
|
286 |
-
|
287 |
|
288 |
def duckduckgo_search(query):
|
289 |
with DDGS() as ddgs:
|
290 |
results = list(ddgs.text(query, max_results=5))
|
291 |
|
292 |
-
database = get_web_search_database()
|
293 |
documents = []
|
294 |
for result in results:
|
295 |
content = f"{result['title']}\n{result['body']}"
|
@@ -297,12 +295,9 @@ def duckduckgo_search(query):
|
|
297 |
documents.append(doc)
|
298 |
|
299 |
database.add_documents(documents)
|
300 |
-
|
301 |
-
|
302 |
-
return results
|
303 |
|
304 |
-
def retrieve_web_search_results(query):
|
305 |
-
database = get_web_search_database()
|
306 |
retriever = database.as_retriever(search_kwargs={"k": 5})
|
307 |
relevant_docs = retriever.get_relevant_documents(query)
|
308 |
|
@@ -453,34 +448,38 @@ After writing the document, please provide a list of sources used in your respon
|
|
453 |
yield "I apologize, but I couldn't generate a response at this time. Please try again later."
|
454 |
|
455 |
def get_response_with_search(query, model, num_calls=3, temperature=0.2):
|
456 |
-
search_results = duckduckgo_search(query)
|
457 |
-
context = retrieve_web_search_results(query)
|
458 |
|
459 |
prompt = f"""Using the following context from web search results:
|
460 |
{context}
|
461 |
Write a detailed and complete research document that fulfills the following user request: '{query}'
|
462 |
After writing the document, please provide a list of sources used in your response."""
|
463 |
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
for
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
|
|
|
|
|
|
|
|
484 |
|
485 |
def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
|
486 |
logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
|
|
|
20 |
import inspect
|
21 |
import logging
|
22 |
import shutil
|
23 |
+
import tempfile
|
24 |
|
25 |
|
26 |
# Set up basic configuration for logging
|
|
|
275 |
return final_response
|
276 |
|
277 |
def get_web_search_database():
|
278 |
+
embed = get_embeddings()
|
279 |
+
temp_dir = tempfile.mkdtemp()
|
280 |
+
database = WebSearchFAISS.from_documents([], embed)
|
281 |
+
return database, temp_dir
|
|
|
|
|
|
|
282 |
|
283 |
+
def cleanup_web_search_database(temp_dir):
|
284 |
+
shutil.rmtree(temp_dir)
|
285 |
|
286 |
def duckduckgo_search(query):
|
287 |
with DDGS() as ddgs:
|
288 |
results = list(ddgs.text(query, max_results=5))
|
289 |
|
290 |
+
database, temp_dir = get_web_search_database()
|
291 |
documents = []
|
292 |
for result in results:
|
293 |
content = f"{result['title']}\n{result['body']}"
|
|
|
295 |
documents.append(doc)
|
296 |
|
297 |
database.add_documents(documents)
|
298 |
+
return database, temp_dir, results
|
|
|
|
|
299 |
|
300 |
+
def retrieve_web_search_results(database, query):
|
|
|
301 |
retriever = database.as_retriever(search_kwargs={"k": 5})
|
302 |
relevant_docs = retriever.get_relevant_documents(query)
|
303 |
|
|
|
448 |
yield "I apologize, but I couldn't generate a response at this time. Please try again later."
|
449 |
|
450 |
def get_response_with_search(query, model, num_calls=3, temperature=0.2):
|
451 |
+
database, temp_dir, search_results = duckduckgo_search(query)
|
452 |
+
context = retrieve_web_search_results(database, query)
|
453 |
|
454 |
prompt = f"""Using the following context from web search results:
|
455 |
{context}
|
456 |
Write a detailed and complete research document that fulfills the following user request: '{query}'
|
457 |
After writing the document, please provide a list of sources used in your response."""
|
458 |
|
459 |
+
try:
|
460 |
+
if model == "@cf/meta/llama-3.1-8b-instruct":
|
461 |
+
# Use Cloudflare API
|
462 |
+
for response in get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="web"):
|
463 |
+
yield response, "" # Yield streaming response without sources
|
464 |
+
else:
|
465 |
+
# Use Hugging Face API
|
466 |
+
client = InferenceClient(model, token=huggingface_token)
|
467 |
+
|
468 |
+
main_content = ""
|
469 |
+
for i in range(num_calls):
|
470 |
+
for message in client.chat_completion(
|
471 |
+
messages=[{"role": "user", "content": prompt}],
|
472 |
+
max_tokens=10000,
|
473 |
+
temperature=temperature,
|
474 |
+
stream=True,
|
475 |
+
):
|
476 |
+
if message.choices and message.choices[0].delta and message.choices[0].delta.content:
|
477 |
+
chunk = message.choices[0].delta.content
|
478 |
+
main_content += chunk
|
479 |
+
yield main_content, "" # Yield partial main content without sources
|
480 |
+
finally:
|
481 |
+
# Clean up the temporary database
|
482 |
+
cleanup_web_search_database(temp_dir)
|
483 |
|
484 |
def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
|
485 |
logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
|