Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import cluster_news | |
import extract_news | |
import summarizer | |
import analyze_sentiment | |
import gather_news | |
# ------------------ Utilities ------------------ | |
def fetch_content(topic): | |
articles = gather_news.fetch_articles_newsapi(topic) | |
if isinstance(articles, str): | |
articles = gather_news.fetch_articles_google(topic) | |
if isinstance(articles, str): | |
return None | |
try: | |
articles = sorted(articles, key=lambda x: x.get("publishedAt", ""), reverse=True)[:10] | |
except Exception: | |
return None | |
return articles | |
def fetch_and_process_latest_news(sentiment_filters): | |
topic = "Top Headlines" | |
articles = gather_news.fetch_articles_newsapi("top headlines") | |
if isinstance(articles, str) or not articles: | |
return sentiment_filters, "### No latest news available", "", "", "", "", None | |
articles = sorted(articles, key=lambda x: x.get("publishedAt", ""), reverse=True)[:10] | |
extracted_articles = extract_summarize_and_analyze_articles(articles) | |
if not extracted_articles: | |
return sentiment_filters, "### No content to display", "", "", "", "", None | |
df = pd.DataFrame(extracted_articles) | |
result = cluster_news.cluster_and_label_articles(df, content_column="content", summary_column="summary") | |
cluster_md_blocks = display_clusters_as_columns(result, sentiment_filters) | |
csv_file, _ = save_clustered_articles(result["dataframe"], topic) | |
return sentiment_filters, *cluster_md_blocks, csv_file | |
def extract_summarize_and_analyze_articles(articles): | |
extracted_articles = [] | |
for article in articles: | |
url = article.get("url") | |
if url: | |
content, _ = extract_news.extract_full_content(url) | |
if content: | |
summary = summarizer.generate_summary(content) | |
sentiment, score = analyze_sentiment.analyze_summary(summary) | |
extracted_articles.append({ | |
"title": article.get("title", "No title"), | |
"url": url, | |
"source": article.get("source", "Unknown"), | |
"author": article.get("author", "Unknown"), | |
"publishedAt": article.get("publishedAt", "Unknown"), | |
"content": content, | |
"summary": summary, | |
"sentiment": sentiment, | |
"score": score | |
}) | |
return extracted_articles | |
def extract_summarize_and_analyze_content_from_file(files): | |
extracted_articles = [] | |
for file in files: | |
with open(file.name, "r", encoding="utf-8") as f: | |
content = f.read() | |
if content.strip(): | |
summary = summarizer.generate_summary(content) | |
sentiment, score = analyze_sentiment.analyze_summary(summary) | |
extracted_articles.append({ | |
"title": "Custom File", | |
"url": "N/A", | |
"source": "Uploaded File", | |
"author": "Unknown", | |
"publishedAt": "Unknown", | |
"content": content, | |
"summary": summary, | |
"sentiment": sentiment, | |
"score": score | |
}) | |
return extracted_articles | |
def extract_summarize_and_analyze_content_from_urls(urls): | |
extracted_articles = [] | |
for url in urls: | |
content, title = extract_news.extract_full_content(url) | |
if content: # Only proceed if content is successfully extracted | |
summary = summarizer.generate_summary(content) | |
sentiment, score = analyze_sentiment.analyze_summary(summary) | |
extracted_articles.append({ | |
"title": title if title else "Untitled Article", | |
"url": url, | |
"source": "External Link", | |
"author": "Unknown", | |
"publishedAt": "Unknown", | |
"content": content, | |
"summary": summary, | |
"sentiment": sentiment, | |
"score": score | |
}) | |
return extracted_articles | |
def display_clusters_as_columns(result, sentiment_filters=None): | |
df = result["dataframe"] | |
detected_topics = result.get("detected_topics", {}) | |
df["sentiment"] = df["sentiment"].str.capitalize() | |
if sentiment_filters: | |
df = df[df["sentiment"].isin(sentiment_filters)] | |
if df.empty: | |
return ["### ⚠️ No matching articles."] + [""] * 4 | |
clusters = df.groupby("cluster_label") | |
markdown_blocks = [] | |
for cluster_label, articles in clusters: | |
cluster_md = f"### 🧩 Cluster {cluster_label}\n" | |
if cluster_label in detected_topics: | |
topics = detected_topics[cluster_label] | |
cluster_md += f"**Primary Topic:** {topics['primary_focus']}\n\n" | |
if topics["related_topics"]: | |
cluster_md += f"**Related Topics:** {', '.join(topics['related_topics'])}\n\n" | |
cluster_md += f"**Articles:** {len(articles)}\n\n" | |
for _, article in articles.iterrows(): | |
cluster_md += ( | |
f"#### 📰 {article['title']}\n" | |
f"- **Source:** {article['source']}\n" | |
f"- **Sentiment:** {article['sentiment']}\n" | |
f"<details><summary><strong>Summary</strong></summary>\n" | |
f"{article['summary']}\n" | |
f"</details>\n" | |
f"- [Read Full Article]({article['url']})\n\n" | |
) | |
markdown_blocks.append(cluster_md) | |
while len(markdown_blocks) < 5: | |
markdown_blocks.append("") | |
return markdown_blocks[:5] | |
def save_clustered_articles(df, topic): | |
if df.empty: | |
return None, None | |
csv_file = f"{topic.replace(' ', '_')}_clustered_articles.csv" | |
df.to_csv(csv_file, index=False) | |
return csv_file, None | |
# ------------------ Pipeline Trigger ------------------ | |
def update_ui_with_columns(topic, files, urls, sentiment_filters): | |
extracted_articles = [] | |
if topic.strip(): | |
articles = fetch_content(topic) | |
if articles: | |
extracted_articles.extend(extract_summarize_and_analyze_articles(articles)) | |
if files: | |
extracted_articles.extend(extract_summarize_and_analyze_content_from_file(files)) | |
if urls: | |
url_list = [url.strip() for url in urls.split("\n") if url.strip()] | |
extracted_articles.extend(extract_summarize_and_analyze_content_from_urls(url_list)) | |
if not extracted_articles: | |
return sentiment_filters, "### No content to display", "", "", "", "", None | |
df = pd.DataFrame(extracted_articles) | |
result = cluster_news.cluster_and_label_articles(df, content_column="content", summary_column="summary") | |
cluster_md_blocks = display_clusters_as_columns(result, sentiment_filters) | |
csv_file, _ = save_clustered_articles(result["dataframe"], topic or "batch_upload") | |
return sentiment_filters, *cluster_md_blocks, csv_file | |
def clear_interface(): | |
return ( | |
"", # topic_input | |
["Positive", "Neutral", "Negative"],# sentiment_filter | |
gr.update(value=None), # uploaded_files (reset file upload) | |
"", # urls_input | |
"", "", "", "", "", # cluster columns 0–4 | |
gr.update(value=None) # csv_output (reset download file) | |
) | |
# ------------------ Gradio UI ------------------ | |
with gr.Blocks(theme=gr.themes.Base(), css=".gr-markdown { margin: 10px; }") as demo: | |
# Header Section | |
gr.Markdown("# 📰 Quick Pulse") | |
gr.Markdown("### AI-Powered News Summarization with Real-Time Sentiment and Topic Insights") | |
gr.Markdown( | |
"From headlines to insight, Quick Pulse summarizes news stories, captures emotional context, and clusters related topics to provide structured intelligence—faster than ever") | |
# Input Section | |
gr.Markdown("---") # Horizontal line for separation | |
with gr.Accordion("🗞️ Latest Top Headlines", open=False): | |
latest_news_button = gr.Button("Fetch & Summarize Top 10 Headlines") | |
with gr.Row(): | |
topic_input = gr.Textbox(label="Enter Topic", placeholder="e.g. climate change") | |
sentiment_filter = gr.CheckboxGroup(choices=["Positive", "Neutral", "Negative"], value=["Positive", "Neutral", "Negative"], label="Sentiment Filter") | |
csv_output = gr.File(label="📁 Download Clustered Digest CSV") | |
with gr.Accordion("📂 Upload Articles (.txt files)", open=False): | |
uploaded_files = gr.File(label="Upload .txt Files", file_types=[".txt"], file_count="multiple") | |
with gr.Accordion("🔗 Enter Multiple URLs", open=False): | |
urls_input = gr.Textbox(label="Enter URLs (newline separated)", lines=4) | |
with gr.Row(): | |
submit_button = gr.Button(" Generate Digest") | |
clear_button = gr.Button(" Clear") | |
with gr.Row(): | |
column_0 = gr.Markdown() | |
column_1 = gr.Markdown() | |
column_2 = gr.Markdown() | |
column_3 = gr.Markdown() | |
column_4 = gr.Markdown() | |
submit_button.click( | |
fn=update_ui_with_columns, | |
inputs=[topic_input, uploaded_files, urls_input, sentiment_filter], | |
outputs=[ | |
sentiment_filter, | |
column_0, column_1, column_2, column_3, column_4, | |
csv_output | |
] | |
) | |
latest_news_button.click( | |
fn=fetch_and_process_latest_news, | |
inputs=[sentiment_filter], | |
outputs=[ | |
sentiment_filter, | |
column_0, column_1, column_2, column_3, column_4, | |
csv_output | |
] | |
) | |
clear_button.click( | |
fn=clear_interface, | |
inputs=[], | |
outputs=[ | |
topic_input, # 1 | |
sentiment_filter, # 2 | |
uploaded_files, # 3 | |
urls_input, # 4 | |
column_0, # 5 | |
column_1, # 6 | |
column_2, # 7 | |
column_3, # 8 | |
column_4, # 9 | |
csv_output # 10 | |
] | |
) | |
if __name__ == "__main__": | |
demo.launch() | |