SearchGPTTest

Sleeping

App Files Files Community

Shreyas094 commited on Aug 11, 2024

Commit

9ab1e0a

verified ·

1 Parent(s): 58fb1c1

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -24

app.py CHANGED Viewed

@@ -67,7 +67,7 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
         raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
 def get_embeddings():
-    return HuggingFaceEmbeddings(model_name="avsolatorio/GIST-Embedding-v0")
 # Add this at the beginning of your script, after imports
 DOCUMENTS_FILE = "uploaded_documents.json"
@@ -273,7 +273,7 @@ def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temp
 def duckduckgo_search(query):
     with DDGS() as ddgs:
-        results = ddgs.text(query, max_results=20)
     return results
 class CitingSources(BaseModel):
@@ -441,36 +441,57 @@ def get_response_with_search(query, model, num_calls=3, temperature=0.2):
     retriever = web_search_database.as_retriever(search_kwargs={"k": 5})
     relevant_docs = retriever.get_relevant_documents(query)
-    context = "\n".join([doc.page_content for doc in relevant_docs])
-    prompt = f"""Using the following context from web search results:
-{context}
-You are an expert assistant tasked with creating an in-depth and comprehensive research document in response to the following user query: '{query}'.
-Your response should thoroughly explore all relevant aspects of the topic, covering various angles, details, and implications as supported by the retrieved information.
-Ensure that your document is well-structured, logically organized, and coherent, providing clear explanations, detailed insights, and thoughtful analysis where applicable.
-Base your entire response strictly on the information retrieved from trusted sources. After completing the document, provide a list of all sources used.
-Importantly, only include information that is directly supported by the retrieved content. If any part of the information cannot be verified from the given sources, clearly state that it could not be confirmed. Strive to present a comprehensive, informative, and well-rounded analysis that fully addresses the user’s query."""
-    if model == "@cf/meta/llama-3.1-8b-instruct":
-        # Use Cloudflare API
-        for response in get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=num_calls, temperature=temperature, search_type="web"):
-            yield response, ""  # Yield streaming response without sources
-    else:
-        # Use Hugging Face API
-        client = InferenceClient(model, token=huggingface_token)
-        main_content = ""
-        for i in range(num_calls):
             for message in client.chat_completion(
                 messages=[{"role": "user", "content": prompt}],
-                max_tokens=10000,
                 temperature=temperature,
                 stream=True,
             ):
                 if message.choices and message.choices[0].delta and message.choices[0].delta.content:
                     chunk = message.choices[0].delta.content
-                    main_content += chunk
-                    yield main_content, ""  # Yield partial main content without sources
 def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
     logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")

         raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
 def get_embeddings():
+    return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
 # Add this at the beginning of your script, after imports
 DOCUMENTS_FILE = "uploaded_documents.json"
 def duckduckgo_search(query):
     with DDGS() as ddgs:
+        results = ddgs.text(query, max_results=5)
     return results
 class CitingSources(BaseModel):
     retriever = web_search_database.as_retriever(search_kwargs={"k": 5})
     relevant_docs = retriever.get_relevant_documents(query)
+    for i, doc in enumerate(relevant_docs, 1):
+        context = doc.page_content
+        source = doc.metadata.get('source', 'Unknown source')
+        prompt = f"""Using the following context from a web search result:
+{context}
+You are an expert AI assistant. Write a detailed summary of the information provided in this source that is relevant to the following user request: '{query}'
+Base your summary strictly on the information from this source. Only include information that is directly supported by the given content.
+If any part of the information cannot be verified from this source, clearly state that it could not be confirmed."""
+        if model == "@cf/meta/llama-3.1-8b-instruct":
+            # Use Cloudflare API
+            for response in get_response_from_cloudflare(prompt="", context=context, query=query, num_calls=1, temperature=temperature, search_type="web"):
+                yield f"Source {i} ({source}):\n\n{response}\n\n", ""
+        else:
+            # Use Hugging Face API
+            client = InferenceClient(model, token=huggingface_token)
+            summary = ""
             for message in client.chat_completion(
                 messages=[{"role": "user", "content": prompt}],
+                max_tokens=2000,
                 temperature=temperature,
                 stream=True,
             ):
                 if message.choices and message.choices[0].delta and message.choices[0].delta.content:
                     chunk = message.choices[0].delta.content
+                    summary += chunk
+                    yield f"Source {i} ({source}):\n\n{summary}\n\n", ""
+    # Generate an overall summary after processing all sources
+    overall_prompt = f"""Based on the summaries you've generated for each source, provide a concise overall summary that addresses the user's query: '{query}'
+Highlight any conflicting information or gaps in the available data."""
+    if model == "@cf/meta/llama-3.1-8b-instruct":
+        # Use Cloudflare API for overall summary
+        for response in get_response_from_cloudflare(prompt="", context="", query=overall_prompt, num_calls=1, temperature=temperature, search_type="web"):
+            yield f"Overall Summary:\n\n{response}\n\n", ""
+    else:
+        # Use Hugging Face API for overall summary
+        overall_summary = ""
+        for message in client.chat_completion(
+            messages=[{"role": "user", "content": overall_prompt}],
+            max_tokens=2000,
+            temperature=temperature,
+            stream=True,
+        ):
+            if message.choices and message.choices[0].delta and message.choices[0].delta.content:
+                chunk = message.choices[0].delta.content
+                overall_summary += chunk
+                yield f"Overall Summary:\n\n{overall_summary}\n\n", ""
 def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
     logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")