Update run.py
Browse files
run.py
CHANGED
@@ -68,10 +68,10 @@ def main():
|
|
68 |
|
69 |
sep = "\n" + "="*80 + "\n"
|
70 |
overview = sep.join([f"Article: {i+1}\n{article}" for i, article in enumerate(df["article_summary"])])
|
71 |
-
report = create_report(overview, client, MODEL)
|
72 |
|
73 |
# Extract report content
|
74 |
-
final_report = report
|
75 |
|
76 |
file_path = f"reports/{'-'.join(topic.lower().split())}/{date.today().strftime('%Y-%m-%d')}.md"
|
77 |
print(f"Uploading to {args.repo_id} under {file_path}...")
|
@@ -129,7 +129,8 @@ def summarize(article, client, model):
|
|
129 |
"""Summarize an article using the HuggingFace inference API"""
|
130 |
user_msg = f"""\
|
131 |
Summarize the following news article in a few bullet points. \
|
132 |
-
Note that the reader is an expert in the field and wants only the most relevant and novel information.
|
|
|
133 |
|
134 |
Article:
|
135 |
{article}
|
@@ -162,10 +163,15 @@ URL: {row['url']}
|
|
162 |
Summary:\n{row['summary_clean']}"""
|
163 |
return summary
|
164 |
|
165 |
-
def create_report(articles_overview, client, model):
|
166 |
"""Create a comprehensive report from all article summaries"""
|
167 |
user_msg = f"""\
|
168 |
-
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
Separete the report into these categories:
|
171 |
- Breaking news: anything that can also appear below but is the most important news of the day
|
@@ -174,16 +180,31 @@ Separete the report into these categories:
|
|
174 |
- Big Tech news (e.g. news from Google/Meta/OpenAI etc.)
|
175 |
- Policy (e.g. US administration or EU policy)
|
176 |
- Products (e.g. news of products that are powered by AI in some way)
|
177 |
-
- Miscellaneous (whatever doesn't fit into the others)
|
178 |
|
179 |
Style: The reader is an expert in the field and wants only the most relevant and novel information. \
|
180 |
Omit articles that are irrelevant to the field of AI and feel free to aggregate several articles about the same topic into one point. \
|
181 |
-
Start the report with a summary of how many articles you processed and which time window.
|
182 |
|
183 |
Format: Use markdown formatting and add links at the end of each section linking to the original articles.
|
184 |
|
185 |
-
|
186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
"""
|
188 |
|
189 |
messages=[
|
@@ -199,8 +220,47 @@ Articles:\
|
|
199 |
temperature=0.8,
|
200 |
max_tokens=32000,
|
201 |
)
|
202 |
-
|
203 |
return response.choices[0].message.content
|
204 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
if __name__ == "__main__":
|
206 |
main()
|
|
|
68 |
|
69 |
sep = "\n" + "="*80 + "\n"
|
70 |
overview = sep.join([f"Article: {i+1}\n{article}" for i, article in enumerate(df["article_summary"])])
|
71 |
+
report = create_report(overview, client, MODEL, topic)
|
72 |
|
73 |
# Extract report content
|
74 |
+
final_report = postprocess_report(report, topic, num, MODEL)
|
75 |
|
76 |
file_path = f"reports/{'-'.join(topic.lower().split())}/{date.today().strftime('%Y-%m-%d')}.md"
|
77 |
print(f"Uploading to {args.repo_id} under {file_path}...")
|
|
|
129 |
"""Summarize an article using the HuggingFace inference API"""
|
130 |
user_msg = f"""\
|
131 |
Summarize the following news article in a few bullet points. \
|
132 |
+
Note that the reader is an expert in the field and wants only the most relevant and novel information and likes to know the specific details. \
|
133 |
+
So keep the summary brief but don't omit technical terms or specific information.
|
134 |
|
135 |
Article:
|
136 |
{article}
|
|
|
163 |
Summary:\n{row['summary_clean']}"""
|
164 |
return summary
|
165 |
|
166 |
+
def create_report(articles_overview, client, model, topic):
|
167 |
"""Create a comprehensive report from all article summaries"""
|
168 |
user_msg = f"""\
|
169 |
+
News articles:\
|
170 |
+
{articles_overview}
|
171 |
+
|
172 |
+
===========================
|
173 |
+
|
174 |
+
Create a summary report of the newspaper articles above. Ignore everything that's not releated to the topic '{topic}'
|
175 |
|
176 |
Separete the report into these categories:
|
177 |
- Breaking news: anything that can also appear below but is the most important news of the day
|
|
|
180 |
- Big Tech news (e.g. news from Google/Meta/OpenAI etc.)
|
181 |
- Policy (e.g. US administration or EU policy)
|
182 |
- Products (e.g. news of products that are powered by AI in some way)
|
183 |
+
- Miscellaneous (whatever doesn't fit into the others but still relevant to the topic)
|
184 |
|
185 |
Style: The reader is an expert in the field and wants only the most relevant and novel information. \
|
186 |
Omit articles that are irrelevant to the field of AI and feel free to aggregate several articles about the same topic into one point. \
|
|
|
187 |
|
188 |
Format: Use markdown formatting and add links at the end of each section linking to the original articles.
|
189 |
|
190 |
+
Example snippet:
|
191 |
+
|
192 |
+
```
|
193 |
+
# NEWS_SUMMARY
|
194 |
+
|
195 |
+
---
|
196 |
+
|
197 |
+
## **Breaking News**
|
198 |
+
- **Google and Apple in talks to integrate Gemini AI into Apple Intelligence by mid-2025** _[Apple Insider](https://appleinsider.com/articles/25/04/30/google-wants-gemini-ai-deal-with-apple-by-mid-2025), [The Verge](https://www.theverge.com/news/658770/google-gemini-apple-iphone-deal-ai)_
|
199 |
+
- Google’s Gemini AI could enhance Siri with advanced reasoning and contextual capabilities, though Apple’s strict privacy controls may limit deep system access.
|
200 |
+
- A potential deal could accelerate Apple’s AI development and expand Google’s AI reach.
|
201 |
+
- **Apple Vision Pro launch delayed** _[Six Colors](https://sixcolors.com/post/2025/04/apple-in-the-enterprise-the-complete-2025-commentary/)_
|
202 |
+
- Apple’s mixed-reality headset, featuring advanced AI integration, is expected to arrive in 2025, though specifics remain unclear.
|
203 |
+
|
204 |
+
---
|
205 |
+
|
206 |
+
... followed by the other sections.
|
207 |
+
```
|
208 |
"""
|
209 |
|
210 |
messages=[
|
|
|
220 |
temperature=0.8,
|
221 |
max_tokens=32000,
|
222 |
)
|
223 |
+
|
224 |
return response.choices[0].message.content
|
225 |
|
226 |
+
def postprocess_report(report, summaries, topic, num_articles, model):
|
227 |
+
report_summary = f"""\
|
228 |
+
# News Summary: {topic}
|
229 |
+
|
230 |
+
**Period:** {(date.today() - timedelta(days=1)).strftime('%Y-%m-%d')}-{date.today().strftime('%Y-%m-%d')}
|
231 |
+
**Processed articles:** {num_articles}
|
232 |
+
**Model**: {model}
|
233 |
+
"""
|
234 |
+
|
235 |
+
report_content = report.split("</think>")[1].strip() if "</think>" in report else report.strip()
|
236 |
+
report_thoughts = report.split("</think>")[0].strip() if "</think>" in report else "No thoughts."
|
237 |
+
report_thoughts.replace("<think>", "")
|
238 |
+
|
239 |
+
final_report = f"""\
|
240 |
+
{report_content.replace('NEWS_SUMMARY', report_summary)}
|
241 |
+
|
242 |
+
## Sources
|
243 |
+
|
244 |
+
<details>
|
245 |
+
<summary>All sources</summary>
|
246 |
+
|
247 |
+
{summaries}
|
248 |
+
|
249 |
+
</details>
|
250 |
+
|
251 |
+
## Model reasoning
|
252 |
+
|
253 |
+
<details>
|
254 |
+
<summary>Model thought traces</summary>
|
255 |
+
|
256 |
+
{report_thoughts}
|
257 |
+
|
258 |
+
</details>
|
259 |
+
|
260 |
+
"""
|
261 |
+
|
262 |
+
return final_report
|
263 |
+
|
264 |
+
|
265 |
if __name__ == "__main__":
|
266 |
main()
|