|
import gradio as gr |
|
import requests |
|
from newspaper import Article |
|
from transformers import pipeline |
|
import config |
|
import nltk |
|
|
|
|
|
|
|
summarizer = pipeline("summarization", model="harao-ml/flant5-finetuned-summarize") |
|
|
|
|
|
def split_text(text, max_tokens=512): |
|
words = text.split() |
|
for i in range(0, len(words), max_tokens): |
|
yield ' '.join(words[i:i + max_tokens]) |
|
|
|
|
|
def clean_text(text): |
|
text = ' '.join(text.split()) |
|
text = ' '.join(word for word in text.split() if len(word) < 100) |
|
return text |
|
|
|
|
|
|
|
def fetch_article_details(url): |
|
try: |
|
article = Article(url) |
|
article.download() |
|
article.parse() |
|
title = article.title or "Untitled" |
|
author = ", ".join(article.authors) if article.authors else "Unknown" |
|
pub_date = article.publish_date.strftime('%B %d, %Y') if article.publish_date else "Unknown" |
|
return title, author, pub_date, article.text |
|
except Exception as e: |
|
return None, None, None, f"Error fetching article: {str(e)}" |
|
|
|
|
|
def generate_summary(content): |
|
if not content.strip(): |
|
return "No input provided." |
|
text = content |
|
cleaned_text = clean_text(text) |
|
chunks = list(split_text(cleaned_text)) |
|
cons_summary = ''.join([summarizer(chunk, do_sample=False)[0]['summary_text'] for chunk in chunks if chunk.strip()]) if chunks else '' |
|
summary = summarizer(text, do_sample=False)[0]['summary_text'] |
|
return cons_summary |
|
|
|
|
|
def summarize_input(mixed_input): |
|
if mixed_input.startswith("http://") or mixed_input.startswith("https://"): |
|
title, author, pub_date, content = fetch_article_details(mixed_input) |
|
if content.startswith("Error"): |
|
return f"### Error\n\n{content}" |
|
summary = generate_summary(content) |
|
return f"**Title:** {title}\n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**π Summary** \n\n{summary}\n\n[π Read more]({mixed_input})\n\n---" |
|
else: |
|
summary = generate_summary(mixed_input) |
|
return f"## π Summary \n\n{summary}\n\n**Original Text:**\n\n{mixed_input}\n\n---" |
|
|
|
|
|
def fetch_news(): |
|
url = 'https://newsapi.org/v2/top-headlines' |
|
params = { |
|
'apiKey': config.api_key, |
|
'language': 'en', |
|
'sources': 'associated-press', |
|
'pageSize': 10 |
|
} |
|
try: |
|
response = requests.get(url, params=params) |
|
if response.status_code != 200: |
|
return f"Error: Failed to fetch news. Status code: {response.status_code}" |
|
|
|
articles = response.json().get("articles", []) |
|
summaries = [] |
|
for article in articles: |
|
title = article.get("title", "No title") |
|
article_url = article.get("url", "#") |
|
author = article.get("author", "Unknown") |
|
pub_date = article.get("publishedAt", "Unknown") |
|
content = extract_full_content(article_url) or article.get("content") or article.get("description") or "" |
|
summary = generate_summary(content) |
|
summaries.append(f"**{title}** \n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**Summary:** {summary}\n\n [π Read more]({article_url})\n\n---") |
|
|
|
if not summaries: |
|
return "### No articles could be summarized." |
|
return "\n\n".join(summaries) |
|
except Exception as e: |
|
return f"### Error fetching news\n\n{str(e)}" |
|
|
|
|
|
def extract_full_content(url): |
|
try: |
|
article = Article(url) |
|
article.download() |
|
article.parse() |
|
return article.text |
|
except Exception: |
|
return None |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Base()) as demo: |
|
gr.Markdown("# π° Sum Up! Stay Informed, Instantly") |
|
gr.Markdown(" ## A LLM based News Summarizer App") |
|
|
|
|
|
gr.Markdown("Sum Up! condenses the latest headlines from trusted news sources into clear, concise and easy-to-read summaries, so you can stay informed in seconds.") |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
gr.Markdown("### Top Stories - A Snapshot ") |
|
gr.Markdown("**Source: Associated Press**") |
|
gr.Markdown("Click the button below to fetch the latest news articles.") |
|
news_btn = gr.Button("ποΈ News Now", variant="primary") |
|
with gr.Column(scale=1): |
|
input_box = gr.Textbox(label="Enter article text or URL", placeholder="Paste article text or link...") |
|
summarize_btn = gr.Button("π Summarize", variant="secondary") |
|
|
|
|
|
output_area = gr.Markdown() |
|
|
|
|
|
summarize_btn.click(fn=summarize_input, inputs=input_box, outputs=output_area) |
|
news_btn.click(fn=fetch_news, inputs=[], outputs=output_area) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |