Alejandro Ardila commited on
Commit
d0c87f7
·
1 Parent(s): b1fe90c

First attempt

Browse files
Files changed (4) hide show
  1. .gitignore +179 -0
  2. app.py +98 -0
  3. modal_app.py +319 -0
  4. requirements.txt +7 -0
.gitignore ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
130
+
131
+ # pytype static type analyzer
132
+ .pytype/
133
+
134
+ # Cython debug symbols
135
+ cython_debug/
136
+
137
+ # IDE
138
+ .vscode/
139
+ .idea/
140
+ *.swp
141
+ *.swo
142
+ *~
143
+
144
+ # OS generated files
145
+ .DS_Store
146
+ .DS_Store?
147
+ ._*
148
+ .Spotlight-V100
149
+ .Trashes
150
+ ehthumbs.db
151
+ Thumbs.db
152
+
153
+ # Temporary files
154
+ *.tmp
155
+ *.temp
156
+
157
+ # API keys and secrets
158
+ .env.local
159
+ .env.development.local
160
+ .env.test.local
161
+ .env.production.local
162
+ secrets.json
163
+ config.json
164
+ *.key
165
+ *.pem
166
+
167
+ # Logs
168
+ logs/
169
+ *.log
170
+
171
+ # Database
172
+ *.db
173
+ *.sqlite
174
+ *.sqlite3
175
+
176
+ # Mac specific
177
+ .AppleDouble
178
+ .LSOverride
179
+ Icon
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import modal
3
+ import json
4
+
5
+ # --- Configuration ---
6
+ MODAL_APP_NAME = "sitegeist-ai-app"
7
+
8
+ def analyze_web_content(urls_json: str, deep_analysis: bool = False, analysis_prompt: str = "Summarize the content and identify key themes."):
9
+ """
10
+ MCP Tool: Analyzes web content from one or more URLs.
11
+ Performs deep analysis with marketing metrics if a single URL is provided and deep_analysis is True.
12
+ Otherwise, performs a swarm analysis for multiple URLs or a single URL without deep_analysis.
13
+
14
+ Args:
15
+ urls_json (str): A JSON string representing a list of URLs. e.g., '["http://example.com", "http://another.com"]'
16
+ deep_analysis (bool): If True and only one URL is provided, performs an in-depth analysis.
17
+ analysis_prompt (str): The specific analysis to perform on the content.
18
+ """
19
+ print(f"Received request: deep_analysis={deep_analysis}, prompt='{analysis_prompt}', urls_json='{urls_json}'")
20
+ try:
21
+ urls = json.loads(urls_json)
22
+ if not isinstance(urls, list) or not all(isinstance(url, str) for url in urls):
23
+ raise ValueError("Input must be a JSON string of a list of URLs.")
24
+ if not urls:
25
+ return json.dumps({"status": "error", "message": "URL list cannot be empty."})
26
+ except json.JSONDecodeError:
27
+ return json.dumps({"status": "error", "message": "Invalid JSON format for URLs."})
28
+ except ValueError as ve:
29
+ return json.dumps({"status": "error", "message": str(ve)})
30
+
31
+ result = None
32
+ try:
33
+ if len(urls) == 1 and deep_analysis:
34
+ print(f"Calling Modal: deep_analyze_url for {urls[0]}")
35
+ # Lookup the Modal function
36
+ modal_deep_analyze = modal.Function.lookup(MODAL_APP_NAME, "deep_analyze_url")
37
+ if modal_deep_analyze is None:
38
+ return json.dumps({"status": "error", "message": f"Could not find Modal function 'deep_analyze_url' in app '{MODAL_APP_NAME}'."})
39
+ # Call the Modal function remotely
40
+ result = modal_deep_analyze.remote(url=urls[0])
41
+ else:
42
+ print(f"Calling Modal: swarm_analyze_urls for {len(urls)} URLs")
43
+ # Lookup the Modal function
44
+ modal_swarm_analyze = modal.Function.lookup(MODAL_APP_NAME, "swarm_analyze_urls")
45
+ if modal_swarm_analyze is None:
46
+ return json.dumps({"status": "error", "message": f"Could not find Modal function 'swarm_analyze_urls' in app '{MODAL_APP_NAME}'."})
47
+ # Call the Modal function remotely
48
+ result = modal_swarm_analyze.remote(urls=urls, analysis_prompt=analysis_prompt)
49
+
50
+ return json.dumps(result, indent=2) # Return the result from Modal as a JSON string
51
+
52
+ except modal.exception.NotFoundError as e:
53
+ print(f"Modal function not found: {e}")
54
+ return json.dumps({"status": "error", "message": f"Modal function lookup failed. Ensure '{MODAL_APP_NAME}' is deployed and functions are correctly named. Details: {e}"})
55
+ except Exception as e:
56
+ print(f"An unexpected error occurred: {e}")
57
+ return json.dumps({"status": "error", "message": f"An unexpected error occurred: {str(e)}"})
58
+
59
+ # --- Gradio Interface for Testing (if not using MCP directly) ---
60
+ # This allows you to test the analyze_web_content function via a web UI.
61
+ # You would typically expose `analyze_web_content` directly as an MCP tool.
62
+ with gr.Blocks() as demo:
63
+ gr.Markdown("# Sitegeist AI: Marketing & Content Intelligence Engine")
64
+ gr.Markdown(
65
+ "Enter URLs as a JSON list (e.g., `[\"http://url1.com\", \"http://url2.com\"]`). "
66
+ "The Modal backend calls are mocked and will return predefined data."
67
+ )
68
+ with gr.Row():
69
+ urls_input = gr.Textbox(label="URLs (JSON list)", placeholder='["https://example.com"]')
70
+ deep_analysis_checkbox = gr.Checkbox(label="Perform Deep Analysis (for single URL)", value=False)
71
+ analysis_prompt_input = gr.Textbox(label="Analysis Prompt", value="Summarize the content.")
72
+ submit_button = gr.Button("Analyze Content")
73
+ output_json = gr.JSON(label="Analysis Result")
74
+
75
+ submit_button.click(
76
+ analyze_web_content,
77
+ inputs=[urls_input, deep_analysis_checkbox, analysis_prompt_input],
78
+ outputs=output_json
79
+ )
80
+
81
+ if __name__ == "__main__":
82
+ # To run this Gradio app: python app.py
83
+ # Ensure your Modal token is configured (`modal token set`)
84
+ # And the Modal app (`modal_app.py`) is deployed (`modal deploy modal_app.py`)
85
+ # or runnable locally if you are testing `modal run modal_app.py` in another terminal.
86
+
87
+ # For the Gradio app to successfully call `modal.Function.lookup()`,
88
+ # it generally expects the Modal app to be deployed, or you need to be
89
+ # running within a `modal.stub.run()` context if calling local stubs
90
+ # from another Modal process, which is more advanced.
91
+ # The simplest way for this sketch is to deploy `modal_app.py` first.
92
+ print("Attempting to launch Gradio demo...")
93
+ print("REMINDER: For Gradio to connect to Modal functions,")
94
+ print(f"1. Deploy 'modal_app.py' using 'modal deploy modal_app.py'.")
95
+ print(f"2. Ensure your Modal token is set up.")
96
+ print(f"3. The MODAL_APP_NAME ('{MODAL_APP_NAME}') in app.py must match the app name in modal_app.py.")
97
+
98
+ demo.launch(mcp_server=True)
modal_app.py ADDED
@@ -0,0 +1,319 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import modal
2
+ import requests
3
+ import textstat # For readability scores
4
+ from bs4 import BeautifulSoup # For HTML parsing
5
+ import json # For handling JSON data
6
+ import os # For environment variables
7
+ import openai # For OpenAI API calls
8
+
9
+ # --- Configuration ---
10
+ # Define a Modal image with necessary Python packages
11
+ modal_image = modal.Image.debian_slim().pip_install(
12
+ "requests",
13
+ "beautifulsoup4",
14
+ "textstat",
15
+ "lxml", # A robust parser for BeautifulSoup
16
+ "openai" # OpenAI API library
17
+ )
18
+
19
+ # Define a Modal App. The name is important for lookup from Gradio.
20
+ app = modal.App(name="sitegeist-ai-app")
21
+
22
+ # --- OpenAI LLM Function ---
23
+ def query_llm(prompt_text: str, expected_json_structure: dict):
24
+ """
25
+ Calls the OpenAI API to get structured JSON responses.
26
+
27
+ Args:
28
+ prompt_text (str): The prompt to send to OpenAI
29
+ expected_json_structure (dict): Dictionary structure to guide the output format
30
+
31
+ Returns:
32
+ dict: Parsed JSON response from OpenAI
33
+
34
+ Raises:
35
+ Exception: If API call fails or response is not valid JSON
36
+ """
37
+ try:
38
+ # Initialize OpenAI client with API key from environment variable
39
+ client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
40
+
41
+ # Create system prompt that instructs the model to return JSON with expected structure
42
+ expected_keys = list(expected_json_structure.keys())
43
+ system_prompt = (
44
+ "You are a helpful assistant designed to output JSON. "
45
+ "Based on the user's content, provide a JSON object with the following keys: "
46
+ f"{', '.join(expected_keys)}. "
47
+ "Ensure your response is valid JSON format only."
48
+ )
49
+
50
+ print(f"--- OpenAI API CALLED ---\nPrompt: {prompt_text[:200]}...\nExpected JSON keys: {expected_keys}")
51
+
52
+ # Make the API call with JSON mode enabled
53
+ response = client.chat.completions.create(
54
+ model="gpt-4-turbo-preview",
55
+ response_format={"type": "json_object"}, # Enable JSON mode
56
+ messages=[
57
+ {"role": "system", "content": system_prompt},
58
+ {"role": "user", "content": prompt_text}
59
+ ],
60
+ temperature=0.3 # Lower temperature for more consistent structured output
61
+ )
62
+
63
+ # Extract and parse the JSON response
64
+ result_json_str = response.choices[0].message.content
65
+ print(f"OpenAI Response: {result_json_str[:200]}...")
66
+
67
+ # Parse the JSON string into a Python dictionary
68
+ result_dict = json.loads(result_json_str)
69
+ return result_dict
70
+
71
+ except openai.APIError as e:
72
+ error_msg = f"OpenAI API error: {str(e)}"
73
+ print(f"ERROR: {error_msg}")
74
+
75
+ # Fallback: Try without JSON mode if the model doesn't support it
76
+ if "response_format" in str(e) and "not supported" in str(e):
77
+ print("Falling back to non-JSON mode...")
78
+ try:
79
+ fallback_response = client.chat.completions.create(
80
+ model="gpt-4-turbo-preview",
81
+ messages=[
82
+ {"role": "system", "content": system_prompt + " Make sure to respond with valid JSON only."},
83
+ {"role": "user", "content": prompt_text}
84
+ ],
85
+ temperature=0.3
86
+ )
87
+
88
+ fallback_result_str = fallback_response.choices[0].message.content
89
+ print(f"Fallback Response: {fallback_result_str[:200]}...")
90
+
91
+ # Try to extract JSON from the response (in case there's extra text)
92
+ import re
93
+ json_match = re.search(r'\{.*\}', fallback_result_str, re.DOTALL)
94
+ if json_match:
95
+ json_str = json_match.group()
96
+ return json.loads(json_str)
97
+ else:
98
+ return json.loads(fallback_result_str)
99
+
100
+ except Exception as fallback_error:
101
+ return {"error": f"Fallback also failed: {str(fallback_error)}", "status": "fallback_failed"}
102
+ else:
103
+ return {"error": error_msg, "status": "api_error"}
104
+
105
+ except json.JSONDecodeError as e:
106
+ error_msg = f"Failed to parse JSON response: {str(e)}"
107
+ print(f"ERROR: {error_msg}")
108
+ return {"error": error_msg, "status": "json_parse_error", "raw_response": result_json_str}
109
+
110
+ except Exception as e:
111
+ error_msg = f"Unexpected error in OpenAI call: {str(e)}"
112
+ print(f"ERROR: {error_msg}")
113
+ return {"error": error_msg, "status": "unexpected_error"}
114
+
115
+ # --- Deep Analysis Function (Runs on Modal) ---
116
+ @app.function(image=modal_image, secrets=[modal.Secret.from_name("openai-secret")])
117
+ def deep_analyze_url(url: str):
118
+ """
119
+ Performs a deep marketing and content analysis on a single URL.
120
+ """
121
+ print(f"Deep analyzing URL: {url}")
122
+ scraped_data = {}
123
+ text_content = ""
124
+
125
+ # 1. Scraping
126
+ try:
127
+ response = requests.get(url, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
128
+ response.raise_for_status() # Raise an exception for HTTP errors
129
+ soup = BeautifulSoup(response.content, 'lxml')
130
+
131
+ # Try to find main content, fall back to body
132
+ main_content_area = soup.find('article') or soup.find('main') or soup.body
133
+ if main_content_area:
134
+ text_content = main_content_area.get_text(separator=' ', strip=True)
135
+ else:
136
+ text_content = soup.get_text(separator=' ', strip=True) # Fallback if no specific main area
137
+
138
+ scraped_data["meta_title"] = soup.find('title').get_text(strip=True) if soup.find('title') else "Not found"
139
+ meta_desc_tag = soup.find('meta', attrs={'name': 'description'})
140
+ scraped_data["meta_description"] = meta_desc_tag['content'] if meta_desc_tag and 'content' in meta_desc_tag.attrs else "Not found"
141
+
142
+ # Link counts
143
+ all_links = [a['href'] for a in soup.find_all('a', href=True)]
144
+ scraped_data["internal_links"] = len([link for link in all_links if url in link or link.startswith('/')])
145
+ scraped_data["external_links"] = len([link for link in all_links if url not in link and link.startswith('http')])
146
+
147
+ except requests.exceptions.RequestException as e:
148
+ return {"url": url, "status": "failed", "error": f"Scraping failed: {str(e)}"}
149
+ except Exception as e:
150
+ return {"url": url, "status": "failed", "error": f"Error during scraping/parsing: {str(e)}"}
151
+
152
+ if not text_content:
153
+ return {"url": url, "status": "failed", "error": "Could not extract text content."}
154
+
155
+ # 2. Statistical & SEO Analysis (using textstat)
156
+ try:
157
+ word_count = textstat.lexicon_count(text_content)
158
+ sentence_count = textstat.sentence_count(text_content)
159
+ readability_metrics = {
160
+ "flesch_reading_ease": textstat.flesch_reading_ease(text_content),
161
+ "flesch_kincaid_grade": textstat.flesch_kincaid_grade(text_content),
162
+ "estimated_reading_time_minutes": round(word_count / 200, 2) if word_count > 0 else 0,
163
+ "word_count": word_count,
164
+ "sentence_count": sentence_count,
165
+ }
166
+ except Exception as e:
167
+ readability_metrics = {"error": f"Readability analysis failed: {str(e)}"}
168
+
169
+
170
+ # 3. LLM-Powered Qualitative Analysis (OpenAI)
171
+ llm_prompt_for_deep_analysis = f"""
172
+ Analyze the following web content from {url}. Extract the requested information and provide it in a JSON format.
173
+
174
+ Content: "{text_content}"
175
+
176
+ Please analyze this content and provide:
177
+ - primary_keywords: List of 3-5 main keywords/topics
178
+ - lsi_keywords: List of related semantic keywords (5-8 keywords)
179
+ - sentiment: Object with "score" (Positive/Negative/Neutral) and "confidence" (0-1)
180
+ - emotional_tone: List of emotional descriptors (2-4 items)
181
+ - cta_analysis: Object with "has_cta" (boolean) and "cta_text" (string or null)
182
+ - brand_mentions: List of brand names mentioned in the content
183
+ """
184
+
185
+ # This defines the structure we expect from the LLM for deep analysis
186
+ expected_llm_structure_deep = {
187
+ "primary_keywords": [], "lsi_keywords": [], "sentiment": {},
188
+ "emotional_tone": [], "cta_analysis": {}, "brand_mentions": []
189
+ }
190
+ llm_driven_analysis_result = query_llm(llm_prompt_for_deep_analysis, expected_llm_structure_deep)
191
+
192
+ # Check if there was an error in the LLM call
193
+ if "error" in llm_driven_analysis_result:
194
+ return {
195
+ "url": url,
196
+ "status": "partial_success",
197
+ "analysis": {
198
+ "readability_metrics": readability_metrics,
199
+ "seo_metrics": {
200
+ "meta_title": scraped_data.get("meta_title"),
201
+ "meta_description": scraped_data.get("meta_description"),
202
+ "internal_links": scraped_data.get("internal_links"),
203
+ "external_links": scraped_data.get("external_links"),
204
+ },
205
+ "llm_driven_analysis": llm_driven_analysis_result # Will contain error info
206
+ }
207
+ }
208
+
209
+ # 4. Combine and Return
210
+ return {
211
+ "url": url,
212
+ "status": "success",
213
+ "analysis": {
214
+ "readability_metrics": readability_metrics,
215
+ "seo_metrics": { # Merging scraped SEO data here
216
+ "meta_title": scraped_data.get("meta_title"),
217
+ "meta_description": scraped_data.get("meta_description"),
218
+ "internal_links": scraped_data.get("internal_links"),
219
+ "external_links": scraped_data.get("external_links"),
220
+ },
221
+ "llm_driven_analysis": llm_driven_analysis_result
222
+ }
223
+ }
224
+
225
+ # --- Swarm Analysis Functions (Runs on Modal) ---
226
+ @app.function(image=modal_image, secrets=[modal.Secret.from_name("openai-secret")])
227
+ def scrape_and_analyze(url: str, analysis_prompt_for_swarm: str):
228
+ """
229
+ Helper function for swarm analysis: scrapes and performs a *simple* analysis on one URL.
230
+ """
231
+ print(f"Swarm - analyzing single URL: {url}")
232
+ try:
233
+ response = requests.get(url, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
234
+ response.raise_for_status()
235
+ soup = BeautifulSoup(response.content, 'lxml')
236
+ main_content_area = soup.find('article') or soup.find('main') or soup.body
237
+ text_content = main_content_area.get_text(separator=' ', strip=True) if main_content_area else soup.get_text(separator=' ', strip=True)
238
+
239
+ if not text_content:
240
+ return {"url": url, "status": "failed", "error": "No text content found"}
241
+
242
+ # OpenAI call for a simple summary
243
+ llm_prompt = f"""
244
+ Content from {url}: {text_content[:1000]}
245
+
246
+ {analysis_prompt_for_swarm}
247
+
248
+ Please provide a concise summary of the main topic and key points from this content.
249
+ """
250
+ summary_result = query_llm(llm_prompt, {"summary": ""}) # Expecting a simple summary
251
+ return {"url": url, "status": "success", "analysis": summary_result}
252
+
253
+ except requests.exceptions.RequestException as e:
254
+ return {"url": url, "status": "failed", "error": f"Scraping failed: {str(e)}"}
255
+ except Exception as e:
256
+ return {"url": url, "status": "failed", "error": f"Processing error: {str(e)}"}
257
+
258
+ @app.function(image=modal_image, secrets=[modal.Secret.from_name("openai-secret")], timeout=60000) # Longer timeout for potentially many URLs
259
+ def swarm_analyze_urls(urls: list[str], analysis_prompt: str):
260
+ """
261
+ Scrapes and analyzes a list of URLs in parallel for swarm mode.
262
+ """
263
+ print(f"Swarm analyzing {len(urls)} URLs. Prompt: {analysis_prompt}")
264
+ individual_results = []
265
+ # Use .map to run scrape_and_analyze in parallel for each URL
266
+ # The 'kwargs' argument passes the analysis_prompt to each mapped function call
267
+ for result in scrape_and_analyze.map(urls, kwargs={"analysis_prompt_for_swarm": analysis_prompt}):
268
+ individual_results.append(result)
269
+
270
+ # Aggregate results (OpenAI call)
271
+ successful_summaries = [
272
+ res["analysis"]["summary"]
273
+ for res in individual_results
274
+ if res["status"] == "success" and "analysis" in res and "summary" in res["analysis"]
275
+ ]
276
+
277
+ if not successful_summaries:
278
+ return {
279
+ "overall_summary": "No successful analyses to aggregate.",
280
+ "top_themes": [],
281
+ "individual_results": individual_results
282
+ }
283
+
284
+ aggregation_prompt = f"""
285
+ Synthesize these summaries into an comprehensive overview and identify the top themes:
286
+
287
+ Summaries: {'. '.join(successful_summaries)}
288
+
289
+ Please provide:
290
+ - aggregated_summary: A comprehensive overview synthesizing all summaries
291
+ - top_themes: List of 3-5 main themes that emerge across all content
292
+ """
293
+ aggregated_llm_result = query_llm(aggregation_prompt, {"aggregated_summary": "", "top_themes": []})
294
+
295
+ return {
296
+ "overall_summary": aggregated_llm_result.get("aggregated_summary"),
297
+ "top_themes": aggregated_llm_result.get("top_themes"),
298
+ "individual_results": individual_results
299
+ }
300
+
301
+ # --- Local Stub for Testing (Optional) ---
302
+ # This allows you to test your Modal functions locally without deploying.
303
+ # To run: modal run modal_app.py
304
+ @app.local_entrypoint()
305
+ def main():
306
+ print("--- Testing deep_analyze_url ---")
307
+ # Test with a known working URL for scraping
308
+ test_url_deep = "https://modal.com/docs/guide" # Example URL
309
+ deep_result = deep_analyze_url.remote(test_url_deep)
310
+ print(json.dumps(deep_result, indent=2))
311
+
312
+ print("\n--- Testing swarm_analyze_urls ---")
313
+ test_urls_swarm = [
314
+ "https://modal.com/blog",
315
+ "https://gantry.io/blog",
316
+ "http://example.com/nonexistentpage"
317
+ ]
318
+ swarm_result = swarm_analyze_urls.remote(test_urls_swarm, "Provide a brief summary of the main topic.")
319
+ print(json.dumps(swarm_result, indent=2))
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ modal
2
+ requests
3
+ gradio[mcp]
4
+ textstat
5
+ beautifulsoup4
6
+ lxml
7
+ openai