Shreyas094 commited on
Commit
8840ae9
·
verified ·
1 Parent(s): c037696

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -7
app.py CHANGED
@@ -271,16 +271,45 @@ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temp
271
  print(f"Final clean response: {final_response[:100]}...")
272
  return final_response
273
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  def duckduckgo_search(query):
275
  with DDGS() as ddgs:
276
- results = ddgs.text(query, max_results=5)
 
 
 
 
 
 
 
 
 
 
 
277
  return results
278
 
 
 
 
 
 
279
  class CitingSources(BaseModel):
280
  sources: List[str] = Field(
281
  ...,
282
  description="List of sources to cite. Should be an URL of the source."
283
  )
 
284
  def chatbot_interface(message, history, use_web_search, model, temperature, num_calls):
285
  if not message.strip():
286
  return "", history
@@ -318,8 +347,6 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
318
  if use_web_search:
319
  for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
320
  response = f"{main_content}\n\n{sources}"
321
- first_line = response.split('\n')[0] if response else ''
322
- # logging.info(f"Generated Response (first line): {first_line}")
323
  yield response
324
  else:
325
  embed = get_embeddings()
@@ -421,8 +448,10 @@ After writing the document, please provide a list of sources used in your respon
421
 
422
  def get_response_with_search(query, model, num_calls=3, temperature=0.2):
423
  search_results = duckduckgo_search(query)
424
- context = "\n".join(f"{result['title']}\n{result['body']}\nSource: {result['href']}\n"
425
- for result in search_results if 'body' in result)
 
 
426
 
427
  prompt = f"""Using the following context:
428
  {context}
@@ -432,7 +461,7 @@ After writing the document, please provide a list of sources used in your respon
432
  if model == "@cf/meta/llama-3.1-8b-instruct":
433
  # Use Cloudflare API
434
  for response in get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="web"):
435
- yield response, "" # Yield streaming response without sources
436
  else:
437
  # Use Hugging Face API
438
  client = InferenceClient(model, token=huggingface_token)
@@ -448,7 +477,7 @@ After writing the document, please provide a list of sources used in your respon
448
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
449
  chunk = message.choices[0].delta.content
450
  main_content += chunk
451
- yield main_content, "" # Yield partial main content without sources
452
 
453
  def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
454
  logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
 
271
  print(f"Final clean response: {final_response[:100]}...")
272
  return final_response
273
 
274
+ def get_web_search_database():
275
+ embed = get_embeddings()
276
+ if os.path.exists("web_search_database"):
277
+ return FAISS.load_local("web_search_database", embed, allow_dangerous_deserialization=True)
278
+ else:
279
+ return FAISS.from_documents([], embed)
280
+
281
+ def save_web_search_database(database):
282
+ database.save_local("web_search_database")
283
+
284
+
285
+
286
  def duckduckgo_search(query):
287
  with DDGS() as ddgs:
288
+ results = list(ddgs.text(query, max_results=5))
289
+
290
+ database = get_web_search_database()
291
+ documents = []
292
+ for result in results:
293
+ content = f"{result['title']}\n{result['body']}"
294
+ doc = Document(page_content=content, metadata={"source": result['href']})
295
+ documents.append(doc)
296
+
297
+ database.add_documents(documents)
298
+ save_web_search_database(database)
299
+
300
  return results
301
 
302
+ def retrieve_web_search_results(query):
303
+ database = get_web_search_database()
304
+ retriever = database.as_retriever(search_kwargs={"k": 5})
305
+ return retriever.get_relevant_documents(query)
306
+
307
  class CitingSources(BaseModel):
308
  sources: List[str] = Field(
309
  ...,
310
  description="List of sources to cite. Should be an URL of the source."
311
  )
312
+
313
  def chatbot_interface(message, history, use_web_search, model, temperature, num_calls):
314
  if not message.strip():
315
  return "", history
 
347
  if use_web_search:
348
  for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
349
  response = f"{main_content}\n\n{sources}"
 
 
350
  yield response
351
  else:
352
  embed = get_embeddings()
 
448
 
449
  def get_response_with_search(query, model, num_calls=3, temperature=0.2):
450
  search_results = duckduckgo_search(query)
451
+ relevant_docs = retrieve_web_search_results(query)
452
+
453
+ context = "\n".join([doc.page_content for doc in relevant_docs])
454
+ sources = "\n".join([f"Source: {doc.metadata['source']}" for doc in relevant_docs])
455
 
456
  prompt = f"""Using the following context:
457
  {context}
 
461
  if model == "@cf/meta/llama-3.1-8b-instruct":
462
  # Use Cloudflare API
463
  for response in get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="web"):
464
+ yield response, sources
465
  else:
466
  # Use Hugging Face API
467
  client = InferenceClient(model, token=huggingface_token)
 
477
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
478
  chunk = message.choices[0].delta.content
479
  main_content += chunk
480
+ yield main_content, sources
481
 
482
  def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
483
  logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")