Spaces:
Running
Running
GitsSaikat
commited on
Update deep_research.py
Browse files- research/deep_research.py +13 -6
research/deep_research.py
CHANGED
|
@@ -89,14 +89,17 @@ async def generate_search_queries_async(session, user_query):
|
|
| 89 |
return []
|
| 90 |
return []
|
| 91 |
|
| 92 |
-
|
|
|
|
| 93 |
"""
|
| 94 |
Make an asynchronous SERPAPI call to perform a Google search for the provided query.
|
|
|
|
| 95 |
"""
|
| 96 |
params = {
|
| 97 |
"q": query,
|
| 98 |
"api_key": SERPAPI_API_KEY,
|
| 99 |
-
"engine": "google"
|
|
|
|
| 100 |
}
|
| 101 |
try:
|
| 102 |
async with session.get(SERPAPI_URL, params=params) as resp:
|
|
@@ -104,7 +107,7 @@ async def perform_search_async(session, query):
|
|
| 104 |
results = await resp.json()
|
| 105 |
if "organic_results" in results:
|
| 106 |
links = [item.get("link") for item in results["organic_results"] if "link" in item]
|
| 107 |
-
return links
|
| 108 |
else:
|
| 109 |
print("No organic results found in SERPAPI response.")
|
| 110 |
return []
|
|
@@ -274,9 +277,11 @@ async def process_link(session, link, user_query, search_query):
|
|
| 274 |
return SourcedContext(context, link)
|
| 275 |
return None
|
| 276 |
|
| 277 |
-
|
|
|
|
| 278 |
"""
|
| 279 |
Primary research procedure intended for integration with Streamlit.
|
|
|
|
| 280 |
"""
|
| 281 |
sourced_contexts = []
|
| 282 |
all_search_queries = []
|
|
@@ -292,9 +297,11 @@ async def research_flow(user_query, iteration_limit):
|
|
| 292 |
print(f"\n--- Iteration {iteration + 1} ---")
|
| 293 |
iteration_contexts = []
|
| 294 |
|
| 295 |
-
|
|
|
|
| 296 |
search_results = await asyncio.gather(*search_tasks)
|
| 297 |
|
|
|
|
| 298 |
unique_links = {}
|
| 299 |
for idx, links in enumerate(search_results):
|
| 300 |
query = new_search_queries[idx]
|
|
@@ -352,4 +359,4 @@ def main():
|
|
| 352 |
print(final_report)
|
| 353 |
|
| 354 |
if __name__ == "__main__":
|
| 355 |
-
main()
|
|
|
|
| 89 |
return []
|
| 90 |
return []
|
| 91 |
|
| 92 |
+
# Modify perform_search_async function
|
| 93 |
+
async def perform_search_async(session, query, result_limit=5):
|
| 94 |
"""
|
| 95 |
Make an asynchronous SERPAPI call to perform a Google search for the provided query.
|
| 96 |
+
result_limit: Maximum number of search results to return
|
| 97 |
"""
|
| 98 |
params = {
|
| 99 |
"q": query,
|
| 100 |
"api_key": SERPAPI_API_KEY,
|
| 101 |
+
"engine": "google",
|
| 102 |
+
"num": result_limit # Add this parameter for limiting results
|
| 103 |
}
|
| 104 |
try:
|
| 105 |
async with session.get(SERPAPI_URL, params=params) as resp:
|
|
|
|
| 107 |
results = await resp.json()
|
| 108 |
if "organic_results" in results:
|
| 109 |
links = [item.get("link") for item in results["organic_results"] if "link" in item]
|
| 110 |
+
return links[:result_limit] # Ensure we don't exceed the limit
|
| 111 |
else:
|
| 112 |
print("No organic results found in SERPAPI response.")
|
| 113 |
return []
|
|
|
|
| 277 |
return SourcedContext(context, link)
|
| 278 |
return None
|
| 279 |
|
| 280 |
+
# Modify research_flow function to accept search_limit parameter
|
| 281 |
+
async def research_flow(user_query, iteration_limit, search_limit=5):
|
| 282 |
"""
|
| 283 |
Primary research procedure intended for integration with Streamlit.
|
| 284 |
+
search_limit: Maximum number of search results per query
|
| 285 |
"""
|
| 286 |
sourced_contexts = []
|
| 287 |
all_search_queries = []
|
|
|
|
| 297 |
print(f"\n--- Iteration {iteration + 1} ---")
|
| 298 |
iteration_contexts = []
|
| 299 |
|
| 300 |
+
# Update to include search_limit
|
| 301 |
+
search_tasks = [perform_search_async(session, query, search_limit) for query in new_search_queries]
|
| 302 |
search_results = await asyncio.gather(*search_tasks)
|
| 303 |
|
| 304 |
+
|
| 305 |
unique_links = {}
|
| 306 |
for idx, links in enumerate(search_results):
|
| 307 |
query = new_search_queries[idx]
|
|
|
|
| 359 |
print(final_report)
|
| 360 |
|
| 361 |
if __name__ == "__main__":
|
| 362 |
+
main()
|