Spaces:

lvwerra
/

ai-news

Runtime error

App Files Files Community

lvwerra HF Staff commited on May 2

Commit

ccac437

verified ·

1 Parent(s): a4321ae

Update run.py

Browse files

Files changed (1) hide show

run.py +70 -10

run.py CHANGED Viewed

@@ -68,10 +68,10 @@ def main():
     sep = "\n" + "="*80 + "\n"
     overview = sep.join([f"Article: {i+1}\n{article}" for i, article in enumerate(df["article_summary"])])
-    report = create_report(overview, client, MODEL)
     # Extract report content
-    final_report = report.split("</think>")[1].strip() if "</think>" in report else report.strip()
     file_path = f"reports/{'-'.join(topic.lower().split())}/{date.today().strftime('%Y-%m-%d')}.md"
     print(f"Uploading to {args.repo_id} under {file_path}...")
@@ -129,7 +129,8 @@ def summarize(article, client, model):
     """Summarize an article using the HuggingFace inference API"""
     user_msg = f"""\
 Summarize the following news article in a few bullet points. \
-Note that the reader is an expert in the field and wants only the most relevant and novel information.
 Article:
 {article}
@@ -162,10 +163,15 @@ URL: {row['url']}
 Summary:\n{row['summary_clean']}"""
     return summary
-def create_report(articles_overview, client, model):
     """Create a comprehensive report from all article summaries"""
     user_msg = f"""\
-Create a summary report of the following newspaper articles.
 Separete the report into these categories:
 - Breaking news: anything that can also appear below but is the most important news of the day
@@ -174,16 +180,31 @@ Separete the report into these categories:
 - Big Tech news (e.g. news from Google/Meta/OpenAI etc.)
 - Policy (e.g. US administration or EU policy)
 - Products (e.g. news of products that are powered by AI in some way)
-- Miscellaneous (whatever doesn't fit into the others)
 Style: The reader is an expert in the field and wants only the most relevant and novel information. \
 Omit articles that are irrelevant to the field of AI and feel free to aggregate several articles about the same topic into one point. \
-Start the report with a summary of how many articles you processed and which time window.
 Format: Use markdown formatting and add links at the end of each section linking to the original articles.
-Articles:\
-{articles_overview}
 """
     messages=[
@@ -199,8 +220,47 @@ Articles:\
         temperature=0.8,
         max_tokens=32000,
     )
     return response.choices[0].message.content
 if __name__ == "__main__":
     main()

     sep = "\n" + "="*80 + "\n"
     overview = sep.join([f"Article: {i+1}\n{article}" for i, article in enumerate(df["article_summary"])])
+    report = create_report(overview, client, MODEL, topic)
     # Extract report content
+    final_report = postprocess_report(report, topic, num, MODEL)
     file_path = f"reports/{'-'.join(topic.lower().split())}/{date.today().strftime('%Y-%m-%d')}.md"
     print(f"Uploading to {args.repo_id} under {file_path}...")
     """Summarize an article using the HuggingFace inference API"""
     user_msg = f"""\
 Summarize the following news article in a few bullet points. \
+Note that the reader is an expert in the field and wants only the most relevant and novel information and likes to know the specific details. \
+So keep the summary brief but don't omit technical terms or specific information.
 Article:
 {article}
 Summary:\n{row['summary_clean']}"""
     return summary
+def create_report(articles_overview, client, model, topic):
     """Create a comprehensive report from all article summaries"""
     user_msg = f"""\
+News articles:\
+{articles_overview}
+===========================
+Create a summary report of the newspaper articles above. Ignore everything that's not releated to the topic '{topic}'
 Separete the report into these categories:
 - Breaking news: anything that can also appear below but is the most important news of the day
 - Big Tech news (e.g. news from Google/Meta/OpenAI etc.)
 - Policy (e.g. US administration or EU policy)
 - Products (e.g. news of products that are powered by AI in some way)
+- Miscellaneous (whatever doesn't fit into the others but still relevant to the topic)
 Style: The reader is an expert in the field and wants only the most relevant and novel information. \
 Omit articles that are irrelevant to the field of AI and feel free to aggregate several articles about the same topic into one point. \
 Format: Use markdown formatting and add links at the end of each section linking to the original articles.
+Example snippet:
+```
+# NEWS_SUMMARY
+---
+## **Breaking News**
+- **Google and Apple in talks to integrate Gemini AI into Apple Intelligence by mid-2025** _[Apple Insider](https://appleinsider.com/articles/25/04/30/google-wants-gemini-ai-deal-with-apple-by-mid-2025), [The Verge](https://www.theverge.com/news/658770/google-gemini-apple-iphone-deal-ai)_
+  - Google’s Gemini AI could enhance Siri with advanced reasoning and contextual capabilities, though Apple’s strict privacy controls may limit deep system access.
+  - A potential deal could accelerate Apple’s AI development and expand Google’s AI reach.
+- **Apple Vision Pro launch delayed** _[Six Colors](https://sixcolors.com/post/2025/04/apple-in-the-enterprise-the-complete-2025-commentary/)_
+  - Apple’s mixed-reality headset, featuring advanced AI integration, is expected to arrive in 2025, though specifics remain unclear.
+---
+... followed by the other sections.
+```
 """
     messages=[
         temperature=0.8,
         max_tokens=32000,
     )
     return response.choices[0].message.content
+def postprocess_report(report, summaries, topic, num_articles, model):
+    report_summary = f"""\
+# News Summary: {topic}
+**Period:** {(date.today() - timedelta(days=1)).strftime('%Y-%m-%d')}-{date.today().strftime('%Y-%m-%d')}
+**Processed articles:** {num_articles}
+**Model**: {model}
+"""
+    report_content = report.split("</think>")[1].strip() if "</think>" in report else report.strip()
+    report_thoughts = report.split("</think>")[0].strip() if "</think>" in report else "No thoughts."
+    report_thoughts.replace("<think>", "")
+    final_report = f"""\
+{report_content.replace('NEWS_SUMMARY', report_summary)}
+## Sources
+<details>
+<summary>All sources</summary>
+{summaries}
+</details>
+## Model reasoning
+<details>
+<summary>Model thought traces</summary>
+{report_thoughts}
+</details>
+"""
+    return final_report
 if __name__ == "__main__":
     main()