File size: 5,180 Bytes
5549e15 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import gradio as gr
import requests
from newspaper import Article
from transformers import pipeline
import config
import nltk
# Load summarization pipeline
summarizer = pipeline("summarization", model="harao-ml/flant5-finetuned-summarize")
# Function to split text into smaller chunks
def split_text(text, max_tokens=512):
words = text.split()
for i in range(0, len(words), max_tokens):
yield ' '.join(words[i:i + max_tokens])
# Function to clean text
def clean_text(text):
text = ' '.join(text.split())
text = ' '.join(word for word in text.split() if len(word) < 100)
return text
# Helper function to fetch and parse an article from a URL
def fetch_article_details(url):
try:
article = Article(url)
article.download()
article.parse()
title = article.title or "Untitled"
author = ", ".join(article.authors) if article.authors else "Unknown"
pub_date = article.publish_date.strftime('%B %d, %Y') if article.publish_date else "Unknown"
return title, author, pub_date, article.text
except Exception as e:
return None, None, None, f"Error fetching article: {str(e)}"
# Helper function to generate a summary
def generate_summary(content):
if not content.strip():
return "No input provided."
text = content
cleaned_text = clean_text(text)
chunks = list(split_text(cleaned_text))
cons_summary = ''.join([summarizer(chunk, do_sample=False)[0]['summary_text'] for chunk in chunks if chunk.strip()]) if chunks else ''
summary = summarizer(text, do_sample=False)[0]['summary_text']
return cons_summary
# Summarize from text or URL
def summarize_input(mixed_input):
if mixed_input.startswith("http://") or mixed_input.startswith("https://"):
title, author, pub_date, content = fetch_article_details(mixed_input)
if content.startswith("Error"):
return f"### Error\n\n{content}"
summary = generate_summary(content)
return f"**Title:** {title}\n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**π Summary** \n\n{summary}\n\n[π Read more]({mixed_input})\n\n---"
else:
summary = generate_summary(mixed_input)
return f"## π Summary \n\n{summary}\n\n**Original Text:**\n\n{mixed_input}\n\n---"
# Function to fetch top headlines from NewsAPI and summarize them
def fetch_news():
url = 'https://newsapi.org/v2/top-headlines'
params = {
'apiKey': config.api_key,
'language': 'en',
'sources': 'associated-press',
'pageSize': 10
}
try:
response = requests.get(url, params=params)
if response.status_code != 200:
return f"Error: Failed to fetch news. Status code: {response.status_code}"
articles = response.json().get("articles", [])
summaries = []
for article in articles:
title = article.get("title", "No title")
article_url = article.get("url", "#")
author = article.get("author", "Unknown")
pub_date = article.get("publishedAt", "Unknown")
content = extract_full_content(article_url) or article.get("content") or article.get("description") or ""
summary = generate_summary(content)
summaries.append(f"**{title}** \n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**Summary:** {summary}\n\n [π Read more]({article_url})\n\n---")
if not summaries:
return "### No articles could be summarized."
return "\n\n".join(summaries)
except Exception as e:
return f"### Error fetching news\n\n{str(e)}"
# Helper function to extract full content using newspaper3k
def extract_full_content(url):
try:
article = Article(url)
article.download()
article.parse()
return article.text
except Exception:
return None
# Gradio interface
with gr.Blocks(theme=gr.themes.Base()) as demo:
gr.Markdown("# π° Sum Up! Stay Informed, Instantly")
gr.Markdown(" ## A LLM based News Summarizer App")
# Add a brief description
gr.Markdown("Sum Up! condenses the latest headlines from trusted news sources into clear, concise and easy-to-read summaries, so you can stay informed in seconds.")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Top Stories - A Snapshot ")
gr.Markdown("**Source: Associated Press**")
gr.Markdown("Click the button below to fetch the latest news articles.")
news_btn = gr.Button("ποΈ News Now", variant="primary")
with gr.Column(scale=1):
input_box = gr.Textbox(label="Enter article text or URL", placeholder="Paste article text or link...")
summarize_btn = gr.Button("π Summarize", variant="secondary")
# Output area for displaying results
output_area = gr.Markdown() # Use a valid output component
# Link buttons to their respective functions
summarize_btn.click(fn=summarize_input, inputs=input_box, outputs=output_area)
news_btn.click(fn=fetch_news, inputs=[], outputs=output_area)
if __name__ == "__main__":
demo.launch() |