File size: 5,180 Bytes
5549e15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import gradio as gr
import requests
from newspaper import Article
from transformers import pipeline
import config
import nltk


# Load summarization pipeline
summarizer = pipeline("summarization", model="harao-ml/flant5-finetuned-summarize")

# Function to split text into smaller chunks
def split_text(text, max_tokens=512):
    words = text.split()
    for i in range(0, len(words), max_tokens):
        yield ' '.join(words[i:i + max_tokens])

# Function to clean text
def clean_text(text):
    text = ' '.join(text.split())
    text = ' '.join(word for word in text.split() if len(word) < 100)
    return text


# Helper function to fetch and parse an article from a URL
def fetch_article_details(url):
    try:
        article = Article(url)
        article.download()
        article.parse()
        title = article.title or "Untitled"
        author = ", ".join(article.authors) if article.authors else "Unknown"
        pub_date = article.publish_date.strftime('%B %d, %Y') if article.publish_date else "Unknown"
        return title, author, pub_date, article.text
    except Exception as e:
        return None, None, None, f"Error fetching article: {str(e)}"

# Helper function to generate a summary
def generate_summary(content):
    if not content.strip():
            return "No input provided."
    text = content
    cleaned_text = clean_text(text)
    chunks = list(split_text(cleaned_text))
    cons_summary = ''.join([summarizer(chunk, do_sample=False)[0]['summary_text'] for chunk in chunks if chunk.strip()]) if chunks else ''
    summary = summarizer(text, do_sample=False)[0]['summary_text']
    return cons_summary

# Summarize from text or URL
def summarize_input(mixed_input):
    if mixed_input.startswith("http://") or mixed_input.startswith("https://"):
        title, author, pub_date, content = fetch_article_details(mixed_input)
        if content.startswith("Error"):
            return f"### Error\n\n{content}"
        summary = generate_summary(content)
        return f"**Title:** {title}\n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**πŸ“ Summary** \n\n{summary}\n\n[πŸ”— Read more]({mixed_input})\n\n---"
    else:
        summary = generate_summary(mixed_input)
        return f"## πŸ“ Summary \n\n{summary}\n\n**Original Text:**\n\n{mixed_input}\n\n---"

# Function to fetch top headlines from NewsAPI and summarize them
def fetch_news():
    url = 'https://newsapi.org/v2/top-headlines'
    params = {
        'apiKey': config.api_key,
        'language': 'en',
        'sources': 'associated-press',
        'pageSize': 10
    }
    try:
        response = requests.get(url, params=params)
        if response.status_code != 200:
            return f"Error: Failed to fetch news. Status code: {response.status_code}"

        articles = response.json().get("articles", [])
        summaries = []
        for article in articles:
            title = article.get("title", "No title")
            article_url = article.get("url", "#")
            author = article.get("author", "Unknown")
            pub_date = article.get("publishedAt", "Unknown")
            content = extract_full_content(article_url) or article.get("content") or article.get("description") or ""
            summary = generate_summary(content)
            summaries.append(f"**{title}** \n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**Summary:** {summary}\n\n [πŸ”— Read more]({article_url})\n\n---")

        if not summaries:
            return "### No articles could be summarized."
        return "\n\n".join(summaries)
    except Exception as e:
        return f"### Error fetching news\n\n{str(e)}"

# Helper function to extract full content using newspaper3k
def extract_full_content(url):
    try:
        article = Article(url)
        article.download()
        article.parse()
        return article.text
    except Exception:
        return None

# Gradio interface
with gr.Blocks(theme=gr.themes.Base()) as demo:
    gr.Markdown("# πŸ“° Sum Up! Stay Informed, Instantly")
    gr.Markdown(" ## A LLM based News Summarizer App")

    # Add a brief description
    gr.Markdown("Sum Up! condenses the latest headlines from trusted news sources into clear, concise and easy-to-read summaries, so you can stay informed in seconds.")
    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Top Stories - A Snapshot ")
            gr.Markdown("**Source: Associated Press**")
            gr.Markdown("Click the button below to fetch the latest news articles.")
            news_btn = gr.Button("πŸ—žοΈ News Now", variant="primary")
        with gr.Column(scale=1):
            input_box = gr.Textbox(label="Enter article text or URL", placeholder="Paste article text or link...")
            summarize_btn = gr.Button("πŸ” Summarize", variant="secondary")

    # Output area for displaying results
    output_area = gr.Markdown()  # Use a valid output component

    # Link buttons to their respective functions
    summarize_btn.click(fn=summarize_input, inputs=input_box, outputs=output_area)
    news_btn.click(fn=fetch_news, inputs=[], outputs=output_area)


if __name__ == "__main__":
    demo.launch()