lvwerra HF Staff commited on
Commit
ccac437
·
verified ·
1 Parent(s): a4321ae

Update run.py

Browse files
Files changed (1) hide show
  1. run.py +70 -10
run.py CHANGED
@@ -68,10 +68,10 @@ def main():
68
 
69
  sep = "\n" + "="*80 + "\n"
70
  overview = sep.join([f"Article: {i+1}\n{article}" for i, article in enumerate(df["article_summary"])])
71
- report = create_report(overview, client, MODEL)
72
 
73
  # Extract report content
74
- final_report = report.split("</think>")[1].strip() if "</think>" in report else report.strip()
75
 
76
  file_path = f"reports/{'-'.join(topic.lower().split())}/{date.today().strftime('%Y-%m-%d')}.md"
77
  print(f"Uploading to {args.repo_id} under {file_path}...")
@@ -129,7 +129,8 @@ def summarize(article, client, model):
129
  """Summarize an article using the HuggingFace inference API"""
130
  user_msg = f"""\
131
  Summarize the following news article in a few bullet points. \
132
- Note that the reader is an expert in the field and wants only the most relevant and novel information.
 
133
 
134
  Article:
135
  {article}
@@ -162,10 +163,15 @@ URL: {row['url']}
162
  Summary:\n{row['summary_clean']}"""
163
  return summary
164
 
165
- def create_report(articles_overview, client, model):
166
  """Create a comprehensive report from all article summaries"""
167
  user_msg = f"""\
168
- Create a summary report of the following newspaper articles.
 
 
 
 
 
169
 
170
  Separete the report into these categories:
171
  - Breaking news: anything that can also appear below but is the most important news of the day
@@ -174,16 +180,31 @@ Separete the report into these categories:
174
  - Big Tech news (e.g. news from Google/Meta/OpenAI etc.)
175
  - Policy (e.g. US administration or EU policy)
176
  - Products (e.g. news of products that are powered by AI in some way)
177
- - Miscellaneous (whatever doesn't fit into the others)
178
 
179
  Style: The reader is an expert in the field and wants only the most relevant and novel information. \
180
  Omit articles that are irrelevant to the field of AI and feel free to aggregate several articles about the same topic into one point. \
181
- Start the report with a summary of how many articles you processed and which time window.
182
 
183
  Format: Use markdown formatting and add links at the end of each section linking to the original articles.
184
 
185
- Articles:\
186
- {articles_overview}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  """
188
 
189
  messages=[
@@ -199,8 +220,47 @@ Articles:\
199
  temperature=0.8,
200
  max_tokens=32000,
201
  )
202
-
203
  return response.choices[0].message.content
204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  if __name__ == "__main__":
206
  main()
 
68
 
69
  sep = "\n" + "="*80 + "\n"
70
  overview = sep.join([f"Article: {i+1}\n{article}" for i, article in enumerate(df["article_summary"])])
71
+ report = create_report(overview, client, MODEL, topic)
72
 
73
  # Extract report content
74
+ final_report = postprocess_report(report, topic, num, MODEL)
75
 
76
  file_path = f"reports/{'-'.join(topic.lower().split())}/{date.today().strftime('%Y-%m-%d')}.md"
77
  print(f"Uploading to {args.repo_id} under {file_path}...")
 
129
  """Summarize an article using the HuggingFace inference API"""
130
  user_msg = f"""\
131
  Summarize the following news article in a few bullet points. \
132
+ Note that the reader is an expert in the field and wants only the most relevant and novel information and likes to know the specific details. \
133
+ So keep the summary brief but don't omit technical terms or specific information.
134
 
135
  Article:
136
  {article}
 
163
  Summary:\n{row['summary_clean']}"""
164
  return summary
165
 
166
+ def create_report(articles_overview, client, model, topic):
167
  """Create a comprehensive report from all article summaries"""
168
  user_msg = f"""\
169
+ News articles:\
170
+ {articles_overview}
171
+
172
+ ===========================
173
+
174
+ Create a summary report of the newspaper articles above. Ignore everything that's not releated to the topic '{topic}'
175
 
176
  Separete the report into these categories:
177
  - Breaking news: anything that can also appear below but is the most important news of the day
 
180
  - Big Tech news (e.g. news from Google/Meta/OpenAI etc.)
181
  - Policy (e.g. US administration or EU policy)
182
  - Products (e.g. news of products that are powered by AI in some way)
183
+ - Miscellaneous (whatever doesn't fit into the others but still relevant to the topic)
184
 
185
  Style: The reader is an expert in the field and wants only the most relevant and novel information. \
186
  Omit articles that are irrelevant to the field of AI and feel free to aggregate several articles about the same topic into one point. \
 
187
 
188
  Format: Use markdown formatting and add links at the end of each section linking to the original articles.
189
 
190
+ Example snippet:
191
+
192
+ ```
193
+ # NEWS_SUMMARY
194
+
195
+ ---
196
+
197
+ ## **Breaking News**
198
+ - **Google and Apple in talks to integrate Gemini AI into Apple Intelligence by mid-2025** _[Apple Insider](https://appleinsider.com/articles/25/04/30/google-wants-gemini-ai-deal-with-apple-by-mid-2025), [The Verge](https://www.theverge.com/news/658770/google-gemini-apple-iphone-deal-ai)_
199
+ - Google’s Gemini AI could enhance Siri with advanced reasoning and contextual capabilities, though Apple’s strict privacy controls may limit deep system access.
200
+ - A potential deal could accelerate Apple’s AI development and expand Google’s AI reach.
201
+ - **Apple Vision Pro launch delayed** _[Six Colors](https://sixcolors.com/post/2025/04/apple-in-the-enterprise-the-complete-2025-commentary/)_
202
+ - Apple’s mixed-reality headset, featuring advanced AI integration, is expected to arrive in 2025, though specifics remain unclear.
203
+
204
+ ---
205
+
206
+ ... followed by the other sections.
207
+ ```
208
  """
209
 
210
  messages=[
 
220
  temperature=0.8,
221
  max_tokens=32000,
222
  )
223
+
224
  return response.choices[0].message.content
225
 
226
+ def postprocess_report(report, summaries, topic, num_articles, model):
227
+ report_summary = f"""\
228
+ # News Summary: {topic}
229
+
230
+ **Period:** {(date.today() - timedelta(days=1)).strftime('%Y-%m-%d')}-{date.today().strftime('%Y-%m-%d')}
231
+ **Processed articles:** {num_articles}
232
+ **Model**: {model}
233
+ """
234
+
235
+ report_content = report.split("</think>")[1].strip() if "</think>" in report else report.strip()
236
+ report_thoughts = report.split("</think>")[0].strip() if "</think>" in report else "No thoughts."
237
+ report_thoughts.replace("<think>", "")
238
+
239
+ final_report = f"""\
240
+ {report_content.replace('NEWS_SUMMARY', report_summary)}
241
+
242
+ ## Sources
243
+
244
+ <details>
245
+ <summary>All sources</summary>
246
+
247
+ {summaries}
248
+
249
+ </details>
250
+
251
+ ## Model reasoning
252
+
253
+ <details>
254
+ <summary>Model thought traces</summary>
255
+
256
+ {report_thoughts}
257
+
258
+ </details>
259
+
260
+ """
261
+
262
+ return final_report
263
+
264
+
265
  if __name__ == "__main__":
266
  main()