Shreyas094 commited on
Commit
9ab1e0a
·
verified ·
1 Parent(s): 58fb1c1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -24
app.py CHANGED
@@ -67,7 +67,7 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
67
  raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
68
 
69
  def get_embeddings():
70
- return HuggingFaceEmbeddings(model_name="avsolatorio/GIST-Embedding-v0")
71
 
72
  # Add this at the beginning of your script, after imports
73
  DOCUMENTS_FILE = "uploaded_documents.json"
@@ -273,7 +273,7 @@ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temp
273
 
274
  def duckduckgo_search(query):
275
  with DDGS() as ddgs:
276
- results = ddgs.text(query, max_results=20)
277
  return results
278
 
279
  class CitingSources(BaseModel):
@@ -441,36 +441,57 @@ def get_response_with_search(query, model, num_calls=3, temperature=0.2):
441
  retriever = web_search_database.as_retriever(search_kwargs={"k": 5})
442
  relevant_docs = retriever.get_relevant_documents(query)
443
 
444
- context = "\n".join([doc.page_content for doc in relevant_docs])
445
-
446
- prompt = f"""Using the following context from web search results:
447
- {context}
448
- You are an expert assistant tasked with creating an in-depth and comprehensive research document in response to the following user query: '{query}'.
449
- Your response should thoroughly explore all relevant aspects of the topic, covering various angles, details, and implications as supported by the retrieved information.
450
- Ensure that your document is well-structured, logically organized, and coherent, providing clear explanations, detailed insights, and thoughtful analysis where applicable.
451
- Base your entire response strictly on the information retrieved from trusted sources. After completing the document, provide a list of all sources used.
452
- Importantly, only include information that is directly supported by the retrieved content. If any part of the information cannot be verified from the given sources, clearly state that it could not be confirmed. Strive to present a comprehensive, informative, and well-rounded analysis that fully addresses the user’s query."""
453
-
454
- if model == "@cf/meta/llama-3.1-8b-instruct":
455
- # Use Cloudflare API
456
- for response in get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="web"):
457
- yield response, "" # Yield streaming response without sources
458
- else:
459
- # Use Hugging Face API
460
- client = InferenceClient(model, token=huggingface_token)
461
 
462
- main_content = ""
463
- for i in range(num_calls):
 
 
 
 
 
 
 
 
 
 
 
 
 
464
  for message in client.chat_completion(
465
  messages=[{"role": "user", "content": prompt}],
466
- max_tokens=10000,
467
  temperature=temperature,
468
  stream=True,
469
  ):
470
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
471
  chunk = message.choices[0].delta.content
472
- main_content += chunk
473
- yield main_content, "" # Yield partial main content without sources
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
474
 
475
  def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
476
  logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
 
67
  raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
68
 
69
  def get_embeddings():
70
+ return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
71
 
72
  # Add this at the beginning of your script, after imports
73
  DOCUMENTS_FILE = "uploaded_documents.json"
 
273
 
274
  def duckduckgo_search(query):
275
  with DDGS() as ddgs:
276
+ results = ddgs.text(query, max_results=5)
277
  return results
278
 
279
  class CitingSources(BaseModel):
 
441
  retriever = web_search_database.as_retriever(search_kwargs={"k": 5})
442
  relevant_docs = retriever.get_relevant_documents(query)
443
 
444
+ for i, doc in enumerate(relevant_docs, 1):
445
+ context = doc.page_content
446
+ source = doc.metadata.get('source', 'Unknown source')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
 
448
+ prompt = f"""Using the following context from a web search result:
449
+ {context}
450
+ You are an expert AI assistant. Write a detailed summary of the information provided in this source that is relevant to the following user request: '{query}'
451
+ Base your summary strictly on the information from this source. Only include information that is directly supported by the given content.
452
+ If any part of the information cannot be verified from this source, clearly state that it could not be confirmed."""
453
+
454
+ if model == "@cf/meta/llama-3.1-8b-instruct":
455
+ # Use Cloudflare API
456
+ for response in get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=1, temperature=temperature, search_type="web"):
457
+ yield f"Source {i} ({source}):\n\n{response}\n\n", ""
458
+ else:
459
+ # Use Hugging Face API
460
+ client = InferenceClient(model, token=huggingface_token)
461
+
462
+ summary = ""
463
  for message in client.chat_completion(
464
  messages=[{"role": "user", "content": prompt}],
465
+ max_tokens=2000,
466
  temperature=temperature,
467
  stream=True,
468
  ):
469
  if message.choices and message.choices[0].delta and message.choices[0].delta.content:
470
  chunk = message.choices[0].delta.content
471
+ summary += chunk
472
+ yield f"Source {i} ({source}):\n\n{summary}\n\n", ""
473
+
474
+ # Generate an overall summary after processing all sources
475
+ overall_prompt = f"""Based on the summaries you've generated for each source, provide a concise overall summary that addresses the user's query: '{query}'
476
+ Highlight any conflicting information or gaps in the available data."""
477
+
478
+ if model == "@cf/meta/llama-3.1-8b-instruct":
479
+ # Use Cloudflare API for overall summary
480
+ for response in get_response_from_cloudflare(prompt="", context="", query=overall_prompt, num_calls=1, temperature=temperature, search_type="web"):
481
+ yield f"Overall Summary:\n\n{response}\n\n", ""
482
+ else:
483
+ # Use Hugging Face API for overall summary
484
+ overall_summary = ""
485
+ for message in client.chat_completion(
486
+ messages=[{"role": "user", "content": overall_prompt}],
487
+ max_tokens=2000,
488
+ temperature=temperature,
489
+ stream=True,
490
+ ):
491
+ if message.choices and message.choices[0].delta and message.choices[0].delta.content:
492
+ chunk = message.choices[0].delta.content
493
+ overall_summary += chunk
494
+ yield f"Overall Summary:\n\n{overall_summary}\n\n", ""
495
 
496
  def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
497
  logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")