GitsSaikat commited on
Commit
3290198
·
0 Parent(s):

first commit

Browse files
DeepResearch_App/README.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 🔍 Open DeepResearch
2
+
3
+ ![Open DeepResearch Logo](logo.png)
4
+
5
+ ## Overview
6
+
7
+ Open DeepResearch is a powerful research assistant that leverages AI to conduct comprehensive research on any topic. It automates the process of gathering, analyzing, and synthesizing information from multiple sources to generate detailed reports with proper citations.
8
+
9
+ ## Features
10
+
11
+ - 🤖 AI-powered search query generation
12
+ - 🌐 Automated web searching and content extraction
13
+ - 📊 Smart relevance filtering
14
+ - 📝 Comprehensive report generation with citations
15
+ - 🔄 Iterative research refinement
16
+ - 📱 User-friendly Streamlit interface
17
+
18
+ ## Requirements
19
+
20
+ You'll need API keys from:
21
+ - [OpenRouter](https://openrouter.ai/keys)
22
+ - [SerpAPI](https://serpapi.com/manage-api-key)
23
+ - [Jina](https://jina.ai/api-key)
24
+
25
+ ## Installation
26
+
27
+ ```bash
28
+ git clone https://github.com/yourusername/DeepResearch_App.git
29
+ cd DeepResearch_App
30
+ pip install -r requirements.txt
31
+ ```
32
+
33
+ ## Usage
34
+
35
+ 1. Launch the app:
36
+ ```bash
37
+ streamlit run app.py
38
+ ```
39
+
40
+ 2. Configure your API keys in the sidebar
41
+ 3. Enter your research query
42
+ 4. Set the number of research iterations
43
+ 5. Click "Start Research" and wait for your detailed report
44
+
45
+ ## How It Works
46
+
47
+ 1. **Query Generation**: AI creates targeted search queries based on your topic
48
+ 2. **Web Search**: Automated search across multiple sources
49
+ 3. **Content Analysis**: Relevant information extraction and filtering
50
+ 4. **Report Generation**: Synthesized findings with proper citations
51
+ 5. **Iterative Refinement**: Additional searches based on gaps in information
52
+
53
+ ## Contributing
54
+
55
+ Contributions are welcome! Please feel free to submit a Pull Request.
56
+
57
+ ## License
58
+
59
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
60
+
61
+
DeepResearch_App/app.py ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import asyncio
3
+ from research import deep_research
4
+ from PIL import Image
5
+
6
+ # Page configuration
7
+ st.set_page_config(
8
+ page_title="Open DeepResearch",
9
+ page_icon="🔍",
10
+ layout="wide",
11
+ initial_sidebar_state="expanded"
12
+ )
13
+
14
+ # Load and display logo in sidebar
15
+ logo = Image.open('logo.png')
16
+ st.sidebar.image(logo, width=200, use_container_width=True)
17
+
18
+ # Initialize session state for API keys
19
+ if 'api_keys_configured' not in st.session_state:
20
+ st.session_state.api_keys_configured = False
21
+
22
+ # Custom CSS (previous CSS remains the same)
23
+ st.markdown("""
24
+ <style>
25
+ /* ... previous CSS ... */
26
+ .api-container {
27
+ background-color: #f8f9fa;
28
+ padding: 1.5rem;
29
+ border-radius: 10px;
30
+ margin-bottom: 2rem;
31
+ border: 1px solid #e0e0e0;
32
+ }
33
+ .api-header {
34
+ color: #1E88E5;
35
+ font-size: 1.2rem;
36
+ margin-bottom: 1rem;
37
+ }
38
+ </style>
39
+ """, unsafe_allow_html=True)
40
+
41
+ # Sidebar for API Configuration
42
+ with st.sidebar:
43
+ st.markdown("### ⚙️ API Configuration")
44
+ st.info("Please configure your API keys before starting research.")
45
+
46
+ with st.expander("Configure API Keys", expanded=not st.session_state.api_keys_configured):
47
+ api_form = st.form("api_keys_form")
48
+ with api_form:
49
+ openrouter_key = api_form.text_input(
50
+ "OpenRouter API Key",
51
+ type="password",
52
+ value=st.session_state.get('openrouter_key', ''),
53
+ help="Required for language model access"
54
+ )
55
+
56
+ serpapi_key = api_form.text_input(
57
+ "SerpAPI Key",
58
+ type="password",
59
+ value=st.session_state.get('serpapi_key', ''),
60
+ help="Required for web search functionality"
61
+ )
62
+
63
+ jina_key = api_form.text_input(
64
+ "Jina API Key",
65
+ type="password",
66
+ value=st.session_state.get('jina_key', ''),
67
+ help="Required for content extraction"
68
+ )
69
+
70
+ if api_form.form_submit_button("Save API Keys"):
71
+ if not all([openrouter_key, serpapi_key, jina_key]):
72
+ st.error("❌ All API keys are required!")
73
+ else:
74
+ # Store API keys in session state
75
+ st.session_state.openrouter_key = openrouter_key
76
+ st.session_state.serpapi_key = serpapi_key
77
+ st.session_state.jina_key = jina_key
78
+ st.session_state.api_keys_configured = True
79
+ st.success("✅ API keys saved successfully!")
80
+ st.rerun()
81
+
82
+ if st.session_state.api_keys_configured:
83
+ st.success("✅ API Keys configured")
84
+
85
+ # Add links to get API keys
86
+ st.markdown("### 🔑 Get API Keys")
87
+ st.markdown("""
88
+ - [OpenRouter API Key](https://openrouter.ai/keys)
89
+ - [SerpAPI Key](https://serpapi.com/manage-api-key)
90
+ - [Jina API Key](https://jina.ai/api-key)
91
+ """)
92
+
93
+ def run_research(user_query, iteration_limit):
94
+ # Set API keys in the research module
95
+ deep_research.OPENROUTER_API_KEY = st.session_state.openrouter_key
96
+ deep_research.SERPAPI_API_KEY = st.session_state.serpapi_key
97
+ deep_research.JINA_API_KEY = st.session_state.jina_key
98
+ return asyncio.run(deep_research.research_flow(user_query, iteration_limit))
99
+
100
+ # Main content
101
+ if not st.session_state.api_keys_configured:
102
+ st.warning("⚠️ Please configure your API keys in the sidebar before proceeding.")
103
+ else:
104
+ # Title and description
105
+ st.title("🔍 Open DeepResearch")
106
+ st.markdown("""
107
+ <div style='background-color: #e3f2fd; padding: 1rem; border-radius: 10px; margin-bottom: 2rem;'>
108
+ <h4 style='color: #1565C0; margin-bottom: 0.5rem;'>Welcome to the Open DeepResearch!</h4>
109
+ <p style='color: #424242;'>
110
+ This application helps you conduct comprehensive research on any topic by:
111
+ <br>
112
+ • Generating relevant search queries<br>
113
+ • Analyzing multiple sources<br>
114
+ • Synthesizing information into a detailed report
115
+ </p>
116
+ </div>
117
+ """, unsafe_allow_html=True)
118
+
119
+ # Main form in a container
120
+ with st.container():
121
+ col1, col2 = st.columns([2, 1])
122
+
123
+ with col1:
124
+ with st.form("research_form", clear_on_submit=False):
125
+ st.markdown("### Research Parameters")
126
+
127
+ user_query = st.text_area(
128
+ "Research Query",
129
+ placeholder="Enter your research topic or question here...",
130
+ help="Be as specific as possible for better results",
131
+ height=100
132
+ )
133
+
134
+ col_a, col_b = st.columns(2)
135
+ with col_a:
136
+ iter_limit_input = st.number_input(
137
+ "Maximum Iterations",
138
+ min_value=1,
139
+ max_value=20,
140
+ value=10,
141
+ help="Higher values mean more thorough research but longer processing time"
142
+ )
143
+
144
+ submitted = st.form_submit_button("🚀 Start Research")
145
+
146
+ with col2:
147
+ st.markdown("### Tips for Better Results")
148
+ st.info("""
149
+ • Be specific in your query
150
+ • Use clear, focused questions
151
+ • Consider including relevant keywords
152
+ • Specify time periods if applicable
153
+ """)
154
+
155
+ # Process and display results
156
+ if submitted:
157
+ if not user_query.strip():
158
+ st.error("⚠️ Please enter a research query before proceeding.")
159
+ else:
160
+ try:
161
+ with st.spinner("🔄 Conducting research... This may take a few minutes..."):
162
+ final_report = run_research(user_query, int(iter_limit_input))
163
+
164
+ st.markdown("""
165
+ <div class='report-container'>
166
+ <h3 style='color: #1E88E5; margin-bottom: 1rem;'>📊 Research Report</h3>
167
+ </div>
168
+ """, unsafe_allow_html=True)
169
+
170
+ # Display the report in tabs
171
+ tab1, tab2 = st.tabs(["📝 Formatted Report", "📄 Raw Text"])
172
+
173
+ with tab1:
174
+ st.markdown(final_report)
175
+
176
+ with tab2:
177
+ st.text_area(
178
+ label="",
179
+ value=final_report,
180
+ height=500,
181
+ help="You can copy the raw text from here"
182
+ )
183
+
184
+ # Download button for the report
185
+ st.download_button(
186
+ label="📥 Download Report",
187
+ data=final_report,
188
+ file_name="research_report.txt",
189
+ mime="text/plain"
190
+ )
191
+
192
+ except Exception as e:
193
+ st.error(f"❌ An error occurred during research: {str(e)}")
194
+ st.markdown("""
195
+ <div style='background-color: #ffebee; padding: 1rem; border-radius: 10px;'>
196
+ <p style='color: #c62828;'>Please try again with a different query or contact support if the issue persists.</p>
197
+ </div>
198
+ """, unsafe_allow_html=True)
199
+
200
+ # Footer
201
+ st.markdown("""
202
+ <div style='text-align: center; color: #666; padding: 2rem;'>
203
+ <p>Built by GitsSaikat ❤️</p>
204
+ </div>
205
+ """, unsafe_allow_html=True)
DeepResearch_App/logo.png ADDED
DeepResearch_App/requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ streamlit
2
+ aiohttp
3
+ nest_asyncio
4
+ json5
DeepResearch_App/research/__pycache__/async_research.cpython-312.pyc ADDED
Binary file (19.2 kB). View file
 
DeepResearch_App/research/__pycache__/deep_research.cpython-312.pyc ADDED
Binary file (18.4 kB). View file
 
DeepResearch_App/research/deep_research.py ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import aiohttp
3
+ import json
4
+ import nest_asyncio
5
+ nest_asyncio.apply()
6
+
7
+ # API Endpoints
8
+ OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
9
+ SERPAPI_URL = "https://serpapi.com/search"
10
+ JINA_BASE_URL = "https://r.jina.ai/"
11
+
12
+ # Modify the default model selection
13
+ DEFAULT_MODEL = "google/gemini-2.0-flash-lite-preview-02-05:free" # Gemini Flash 2.0 model identifier
14
+
15
+ # Helper class to hold extracted content along with its source URL
16
+ class SourcedContext:
17
+ def __init__(self, text, source_url):
18
+ self.text = text
19
+ self.source_url = source_url
20
+
21
+ async def call_openrouter_async(session, messages, model=DEFAULT_MODEL):
22
+ """
23
+ Make an asynchronous request to the OpenRouter chat completion API with the given messages.
24
+ Returns the assistant's reply text.
25
+ """
26
+ headers = {
27
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
28
+ "HTTP-Referer": "https://github.com/Pygen",
29
+ "X-Title": "Research Assistant",
30
+ "Content-Type": "application/json"
31
+ }
32
+
33
+ payload = {
34
+ "model": model,
35
+ "messages": messages,
36
+ "temperature": 0.7,
37
+ "max_tokens": 4096
38
+ }
39
+
40
+ try:
41
+ async with session.post(OPENROUTER_URL, headers=headers, json=payload) as resp:
42
+ if resp.status == 200:
43
+ result = await resp.json()
44
+ try:
45
+ return result['choices'][0]['message']['content']
46
+ except (KeyError, IndexError) as e:
47
+ print("Unexpected response structure from OpenRouter:", result)
48
+ return None
49
+ else:
50
+ text = await resp.text()
51
+ print(f"OpenRouter API error: {resp.status} - {text}")
52
+ return None
53
+ except Exception as e:
54
+ print("Error during OpenRouter call:", e)
55
+ return None
56
+
57
+ async def generate_search_queries_async(session, user_query):
58
+ """
59
+ Use the LLM to produce up to four clear search queries based on the user's topic.
60
+ """
61
+ prompt = (
62
+ "You are a seasoned research assistant. Based on the user's topic, produce as many as four distinct and precise "
63
+ "search queries that will help collect thorough information on the subject. "
64
+ "Return a Python list of strings only, without any code formatting or backticks. "
65
+ "For example: ['query1', 'query2', 'query3']"
66
+ )
67
+ messages = [
68
+ {"role": "system", "content": "You are a precise and supportive research assistant."},
69
+ {"role": "user", "content": f"User Topic: {user_query}\n\n{prompt}"}
70
+ ]
71
+ response = await call_openrouter_async(session, messages)
72
+ if response:
73
+ try:
74
+ cleaned_response = response.strip()
75
+ if cleaned_response.startswith("```"):
76
+ cleaned_response = cleaned_response.split("```")[1]
77
+ if cleaned_response.startswith("python"):
78
+ cleaned_response = cleaned_response[6:]
79
+ cleaned_response = cleaned_response.strip()
80
+
81
+ search_queries = eval(cleaned_response)
82
+ if isinstance(search_queries, list):
83
+ return search_queries
84
+ else:
85
+ print("The LLM response is not a list. Response:", response)
86
+ return []
87
+ except Exception as e:
88
+ print("Error interpreting search queries:", e, "\nResponse:", response)
89
+ return []
90
+ return []
91
+
92
+ async def perform_search_async(session, query):
93
+ """
94
+ Make an asynchronous SERPAPI call to perform a Google search for the provided query.
95
+ """
96
+ params = {
97
+ "q": query,
98
+ "api_key": SERPAPI_API_KEY,
99
+ "engine": "google"
100
+ }
101
+ try:
102
+ async with session.get(SERPAPI_URL, params=params) as resp:
103
+ if resp.status == 200:
104
+ results = await resp.json()
105
+ if "organic_results" in results:
106
+ links = [item.get("link") for item in results["organic_results"] if "link" in item]
107
+ return links
108
+ else:
109
+ print("No organic results found in SERPAPI response.")
110
+ return []
111
+ else:
112
+ text = await resp.text()
113
+ print(f"SERPAPI error: {resp.status} - {text}")
114
+ return []
115
+ except Exception as e:
116
+ print("Error during SERPAPI search:", e)
117
+ return []
118
+
119
+ async def fetch_webpage_text_async(session, url):
120
+ """
121
+ Fetch the textual content of a webpage asynchronously using the Jina service.
122
+ """
123
+ full_url = f"{JINA_BASE_URL}{url}"
124
+ headers = {
125
+ "Authorization": f"Bearer {JINA_API_KEY}"
126
+ }
127
+ try:
128
+ async with session.get(full_url, headers=headers) as resp:
129
+ if resp.status == 200:
130
+ return await resp.text()
131
+ else:
132
+ text = await resp.text()
133
+ print(f"Jina fetch error for {url}: {resp.status} - {text}")
134
+ return ""
135
+ except Exception as e:
136
+ print("Error retrieving webpage text with Jina:", e)
137
+ return ""
138
+
139
+ async def is_page_useful_async(session, user_query, page_text):
140
+ """
141
+ Request the LLM to determine if the provided webpage content is pertinent to answering the user's topic.
142
+ """
143
+ prompt = (
144
+ "You are a discerning evaluator of research. Given the user's topic and a snippet of webpage content, "
145
+ "decide if the page contains valuable information to address the query. "
146
+ "Reply strictly with one word: 'Yes' if the content is useful, or 'No' if it is not. Provide no extra text."
147
+ )
148
+ messages = [
149
+ {"role": "system", "content": "You are a concise and strict research relevance evaluator."},
150
+ {"role": "user", "content": f"User Topic: {user_query}\n\nWebpage Snippet (up to 20000 characters):\n{page_text[:20000]}\n\n{prompt}"}
151
+ ]
152
+ response = await call_openrouter_async(session, messages)
153
+ if response:
154
+ answer = response.strip()
155
+ if answer in ["Yes", "No"]:
156
+ return answer
157
+ else:
158
+ if "Yes" in answer:
159
+ return "Yes"
160
+ elif "No" in answer:
161
+ return "No"
162
+ return "No"
163
+
164
+ async def extract_relevant_context_async(session, user_query, search_query, page_text):
165
+ """
166
+ Derive and return the important details from the webpage text to address the user's topic.
167
+ """
168
+ prompt = (
169
+ "You are an expert extractor of information. Given the user's topic, the search query that produced this page, "
170
+ "and the webpage text, extract all pertinent details needed to answer the inquiry. "
171
+ "Return only the relevant text without any additional commentary."
172
+ )
173
+ messages = [
174
+ {"role": "system", "content": "You excel at summarizing and extracting relevant details."},
175
+ {"role": "user", "content": f"User Topic: {user_query}\nSearch Query: {search_query}\n\nWebpage Snippet (up to 20000 characters):\n{page_text[:20000]}\n\n{prompt}"}
176
+ ]
177
+ response = await call_openrouter_async(session, messages)
178
+ if response:
179
+ return response.strip()
180
+ return ""
181
+
182
+ async def get_new_search_queries_async(session, user_query, previous_search_queries, all_contexts):
183
+ """
184
+ Evaluate if additional search queries are necessary based on the current research progress.
185
+ """
186
+ context_combined = "\n".join(all_contexts)
187
+ prompt = (
188
+ "You are a systematic research planner. Taking into account the original topic, prior search queries, "
189
+ "and the extracted information from webpages, determine if more research is required. "
190
+ "If so, produce up to four new search queries as a Python list "
191
+ "(for example: ['new query1', 'new query2']). If no further research is needed, reply with an empty string."
192
+ "\nReturn only a Python list or an empty string without extra commentary."
193
+ )
194
+ messages = [
195
+ {"role": "system", "content": "You are methodical in planning further research steps."},
196
+ {"role": "user", "content": f"User Topic: {user_query}\nPrevious Queries: {previous_search_queries}\n\nCollected Context:\n{context_combined}\n\n{prompt}"}
197
+ ]
198
+ response = await call_openrouter_async(session, messages)
199
+ if response:
200
+ cleaned = response.strip()
201
+ if cleaned == "":
202
+ return ""
203
+ try:
204
+ if cleaned.startswith("```"):
205
+ cleaned = cleaned.split("```")[1]
206
+ if cleaned.startswith("python"):
207
+ cleaned = cleaned[6:]
208
+ cleaned = cleaned.strip()
209
+ new_queries = eval(cleaned)
210
+ if isinstance(new_queries, list):
211
+ return new_queries
212
+ else:
213
+ print("LLM response is not a list for extra search queries. Response:", response)
214
+ return []
215
+ except Exception as e:
216
+ print("Failed to parse additional search queries:", e, "\nResponse:", response)
217
+ return []
218
+ return []
219
+
220
+ async def generate_final_report_async(session, user_query, sourced_contexts):
221
+ """
222
+ Construct the ultimate detailed report including proper citations and references.
223
+ """
224
+ # Assign citation numbers to contexts based on source URL
225
+ references = {}
226
+ ref_number = 1
227
+ formatted_contexts = []
228
+
229
+ for ctx in sourced_contexts:
230
+ if ctx.source_url not in references:
231
+ references[ctx.source_url] = ref_number
232
+ ref_number += 1
233
+ formatted_contexts.append(f"{ctx.text} [{references[ctx.source_url]}]")
234
+
235
+ context_combined = "\n".join(formatted_contexts)
236
+
237
+ # Build the reference section
238
+ reference_list = [f"[{num}] {url}" for url, num in sorted(references.items(), key=lambda x: x[1])]
239
+ reference_section = "\n\nReferences:\n" + "\n".join(reference_list)
240
+
241
+ prompt = (
242
+ "You are a proficient academic report writer. Using the compiled contexts below and the original topic, "
243
+ "compose a comprehensive, well-organized, and in-depth report that fully addresses the inquiry. "
244
+ "Ensure that each piece of evidence is tagged with citation numbers in square brackets (e.g., [1], [2]). "
245
+ "Maintain these tags in your final report to show the references. "
246
+ "The style should be academic with proper in-text citations. Do not alter or add citation numbers."
247
+ )
248
+
249
+ messages = [
250
+ {"role": "system", "content": "You are an expert academic report composer."},
251
+ {"role": "user", "content": f"User Topic: {user_query}\n\nCollected Context:\n{context_combined}\n\n{prompt}"}
252
+ ]
253
+
254
+ report = await call_openrouter_async(session, messages)
255
+ if report:
256
+ return report + reference_section
257
+ return "Error occurred while generating the report."
258
+
259
+ async def process_link(session, link, user_query, search_query):
260
+ """
261
+ Handle a single URL: fetch its content, assess its relevance, and if it qualifies, extract the associated context.
262
+ Returns a SourcedContext object upon success, or None otherwise.
263
+ """
264
+ print(f"Retrieving content from: {link}")
265
+ page_text = await fetch_webpage_text_async(session, link)
266
+ if not page_text:
267
+ return None
268
+ usefulness = await is_page_useful_async(session, user_query, page_text)
269
+ print(f"Relevance of {link}: {usefulness}")
270
+ if usefulness == "Yes":
271
+ context = await extract_relevant_context_async(session, user_query, search_query, page_text)
272
+ if context:
273
+ print(f"Context extracted from {link} (first 200 characters): {context[:200]}")
274
+ return SourcedContext(context, link)
275
+ return None
276
+
277
+ async def research_flow(user_query, iteration_limit):
278
+ """
279
+ Primary research procedure intended for integration with Streamlit.
280
+ """
281
+ sourced_contexts = []
282
+ all_search_queries = []
283
+ iteration = 0
284
+
285
+ async with aiohttp.ClientSession() as session:
286
+ new_search_queries = await generate_search_queries_async(session, user_query)
287
+ if not new_search_queries:
288
+ return "No search queries were generated by the LLM. Terminating process."
289
+ all_search_queries.extend(new_search_queries)
290
+
291
+ while iteration < iteration_limit:
292
+ print(f"\n--- Iteration {iteration + 1} ---")
293
+ iteration_contexts = []
294
+
295
+ search_tasks = [perform_search_async(session, query) for query in new_search_queries]
296
+ search_results = await asyncio.gather(*search_tasks)
297
+
298
+ unique_links = {}
299
+ for idx, links in enumerate(search_results):
300
+ query = new_search_queries[idx]
301
+ for link in links:
302
+ if link not in unique_links:
303
+ unique_links[link] = query
304
+
305
+ print(f"Collected {len(unique_links)} distinct links in this iteration.")
306
+
307
+ link_tasks = [
308
+ process_link(session, link, user_query, unique_links[link])
309
+ for link in unique_links
310
+ ]
311
+ link_results = await asyncio.gather(*link_tasks)
312
+
313
+ for res in link_results:
314
+ if res:
315
+ iteration_contexts.append(res)
316
+
317
+ if iteration_contexts:
318
+ sourced_contexts.extend(iteration_contexts)
319
+ else:
320
+ print("No relevant information was found in this iteration.")
321
+
322
+ context_texts = [ctx.text for ctx in sourced_contexts]
323
+ new_search_queries = await get_new_search_queries_async(
324
+ session, user_query, all_search_queries, context_texts
325
+ )
326
+
327
+ if new_search_queries == "":
328
+ print("LLM has determined that additional research is unnecessary.")
329
+ break
330
+ elif new_search_queries:
331
+ print("LLM provided extra search queries:", new_search_queries)
332
+ all_search_queries.extend(new_search_queries)
333
+ else:
334
+ print("LLM returned no further search queries. Concluding the loop.")
335
+ break
336
+
337
+ iteration += 1
338
+
339
+ final_report = await generate_final_report_async(session, user_query, sourced_contexts)
340
+ return final_report
341
+
342
+ def main():
343
+ """
344
+ CLI entry point for testing this research module.
345
+ """
346
+ user_query = input("Enter your research topic/question: ").strip()
347
+ iter_limit_input = input("Enter the maximum number of iterations (default is 10): ").strip()
348
+ iteration_limit = int(iter_limit_input) if iter_limit_input.isdigit() else 10
349
+
350
+ final_report = asyncio.run(research_flow(user_query, iteration_limit))
351
+ print("\n==== FINAL REPORT ====\n")
352
+ print(final_report)
353
+
354
+ if __name__ == "__main__":
355
+ main()