Shreyas094 commited on
Commit
c0570dd
·
verified ·
1 Parent(s): 2385219

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -37
app.py CHANGED
@@ -20,6 +20,7 @@ from huggingface_hub import InferenceClient
20
  import inspect
21
  import logging
22
  import shutil
 
23
 
24
 
25
  # Set up basic configuration for logging
@@ -274,22 +275,19 @@ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temp
274
  return final_response
275
 
276
  def get_web_search_database():
277
- if not hasattr(get_web_search_database, "database"):
278
- embed = get_embeddings()
279
- if os.path.exists("web_search_database"):
280
- get_web_search_database.database = WebSearchFAISS.load_local("web_search_database", embed, allow_dangerous_deserialization=True)
281
- else:
282
- get_web_search_database.database = WebSearchFAISS.from_documents([], embed)
283
- return get_web_search_database.database
284
 
285
- def save_web_search_database(database):
286
- database.save_local("web_search_database")
287
 
288
  def duckduckgo_search(query):
289
  with DDGS() as ddgs:
290
  results = list(ddgs.text(query, max_results=5))
291
 
292
- database = get_web_search_database()
293
  documents = []
294
  for result in results:
295
  content = f"{result['title']}\n{result['body']}"
@@ -297,12 +295,9 @@ def duckduckgo_search(query):
297
  documents.append(doc)
298
 
299
  database.add_documents(documents)
300
- save_web_search_database(database)
301
-
302
- return results
303
 
304
- def retrieve_web_search_results(query):
305
- database = get_web_search_database()
306
  retriever = database.as_retriever(search_kwargs={"k": 5})
307
  relevant_docs = retriever.get_relevant_documents(query)
308
 
@@ -453,34 +448,38 @@ After writing the document, please provide a list of sources used in your respon
453
  yield "I apologize, but I couldn't generate a response at this time. Please try again later."
454
 
455
  def get_response_with_search(query, model, num_calls=3, temperature=0.2):
456
- search_results = duckduckgo_search(query)
457
- context = retrieve_web_search_results(query)
458
 
459
  prompt = f"""Using the following context from web search results:
460
  {context}
461
  Write a detailed and complete research document that fulfills the following user request: '{query}'
462
  After writing the document, please provide a list of sources used in your response."""
463
 
464
- if model == "@cf/meta/llama-3.1-8b-instruct":
465
- # Use Cloudflare API
466
- for response in get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="web"):
467
- yield response, "" # Yield streaming response without sources
468
- else:
469
- # Use Hugging Face API
470
- client = InferenceClient(model, token=huggingface_token)
471
-
472
- main_content = ""
473
- for i in range(num_calls):
474
- for message in client.chat_completion(
475
- messages=[{"role": "user", "content": prompt}],
476
- max_tokens=10000,
477
- temperature=temperature,
478
- stream=True,
479
- ):
480
- if message.choices and message.choices[0].delta and message.choices[0].delta.content:
481
- chunk = message.choices[0].delta.content
482
- main_content += chunk
483
- yield main_content, "" # Yield partial main content without sources
 
 
 
 
484
 
485
  def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
486
  logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
 
20
  import inspect
21
  import logging
22
  import shutil
23
+ import tempfile
24
 
25
 
26
  # Set up basic configuration for logging
 
275
  return final_response
276
 
277
  def get_web_search_database():
278
+ embed = get_embeddings()
279
+ temp_dir = tempfile.mkdtemp()
280
+ database = WebSearchFAISS.from_documents([], embed)
281
+ return database, temp_dir
 
 
 
282
 
283
+ def cleanup_web_search_database(temp_dir):
284
+ shutil.rmtree(temp_dir)
285
 
286
  def duckduckgo_search(query):
287
  with DDGS() as ddgs:
288
  results = list(ddgs.text(query, max_results=5))
289
 
290
+ database, temp_dir = get_web_search_database()
291
  documents = []
292
  for result in results:
293
  content = f"{result['title']}\n{result['body']}"
 
295
  documents.append(doc)
296
 
297
  database.add_documents(documents)
298
+ return database, temp_dir, results
 
 
299
 
300
+ def retrieve_web_search_results(database, query):
 
301
  retriever = database.as_retriever(search_kwargs={"k": 5})
302
  relevant_docs = retriever.get_relevant_documents(query)
303
 
 
448
  yield "I apologize, but I couldn't generate a response at this time. Please try again later."
449
 
450
  def get_response_with_search(query, model, num_calls=3, temperature=0.2):
451
+ database, temp_dir, search_results = duckduckgo_search(query)
452
+ context = retrieve_web_search_results(database, query)
453
 
454
  prompt = f"""Using the following context from web search results:
455
  {context}
456
  Write a detailed and complete research document that fulfills the following user request: '{query}'
457
  After writing the document, please provide a list of sources used in your response."""
458
 
459
+ try:
460
+ if model == "@cf/meta/llama-3.1-8b-instruct":
461
+ # Use Cloudflare API
462
+ for response in get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="web"):
463
+ yield response, "" # Yield streaming response without sources
464
+ else:
465
+ # Use Hugging Face API
466
+ client = InferenceClient(model, token=huggingface_token)
467
+
468
+ main_content = ""
469
+ for i in range(num_calls):
470
+ for message in client.chat_completion(
471
+ messages=[{"role": "user", "content": prompt}],
472
+ max_tokens=10000,
473
+ temperature=temperature,
474
+ stream=True,
475
+ ):
476
+ if message.choices and message.choices[0].delta and message.choices[0].delta.content:
477
+ chunk = message.choices[0].delta.content
478
+ main_content += chunk
479
+ yield main_content, "" # Yield partial main content without sources
480
+ finally:
481
+ # Clean up the temporary database
482
+ cleanup_web_search_database(temp_dir)
483
 
484
  def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
485
  logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")