Spaces:

harao-ml
/

SumUp

Running

App Files Files Community

harao-ml commited on May 7

Commit

5549e15

verified ·

1 Parent(s): 4b1f254

Initial commit

Browse files

Files changed (1) hide show

app.py +128 -0

app.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import gradio as gr
+import requests
+from newspaper import Article
+from transformers import pipeline
+import config
+import nltk
+# Load summarization pipeline
+summarizer = pipeline("summarization", model="harao-ml/flant5-finetuned-summarize")
+# Function to split text into smaller chunks
+def split_text(text, max_tokens=512):
+    words = text.split()
+    for i in range(0, len(words), max_tokens):
+        yield ' '.join(words[i:i + max_tokens])
+# Function to clean text
+def clean_text(text):
+    text = ' '.join(text.split())
+    text = ' '.join(word for word in text.split() if len(word) < 100)
+    return text
+# Helper function to fetch and parse an article from a URL
+def fetch_article_details(url):
+    try:
+        article = Article(url)
+        article.download()
+        article.parse()
+        title = article.title or "Untitled"
+        author = ", ".join(article.authors) if article.authors else "Unknown"
+        pub_date = article.publish_date.strftime('%B %d, %Y') if article.publish_date else "Unknown"
+        return title, author, pub_date, article.text
+    except Exception as e:
+        return None, None, None, f"Error fetching article: {str(e)}"
+# Helper function to generate a summary
+def generate_summary(content):
+    if not content.strip():
+            return "No input provided."
+    text = content
+    cleaned_text = clean_text(text)
+    chunks = list(split_text(cleaned_text))
+    cons_summary = ''.join([summarizer(chunk, do_sample=False)[0]['summary_text'] for chunk in chunks if chunk.strip()]) if chunks else ''
+    summary = summarizer(text, do_sample=False)[0]['summary_text']
+    return cons_summary
+# Summarize from text or URL
+def summarize_input(mixed_input):
+    if mixed_input.startswith("http://") or mixed_input.startswith("https://"):
+        title, author, pub_date, content = fetch_article_details(mixed_input)
+        if content.startswith("Error"):
+            return f"### Error\n\n{content}"
+        summary = generate_summary(content)
+        return f"**Title:** {title}\n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**📝 Summary** \n\n{summary}\n\n[🔗 Read more]({mixed_input})\n\n---"
+    else:
+        summary = generate_summary(mixed_input)
+        return f"## 📝 Summary \n\n{summary}\n\n**Original Text:**\n\n{mixed_input}\n\n---"
+# Function to fetch top headlines from NewsAPI and summarize them
+def fetch_news():
+    url = 'https://newsapi.org/v2/top-headlines'
+    params = {
+        'apiKey': config.api_key,
+        'language': 'en',
+        'sources': 'associated-press',
+        'pageSize': 10
+    }
+    try:
+        response = requests.get(url, params=params)
+        if response.status_code != 200:
+            return f"Error: Failed to fetch news. Status code: {response.status_code}"
+        articles = response.json().get("articles", [])
+        summaries = []
+        for article in articles:
+            title = article.get("title", "No title")
+            article_url = article.get("url", "#")
+            author = article.get("author", "Unknown")
+            pub_date = article.get("publishedAt", "Unknown")
+            content = extract_full_content(article_url) or article.get("content") or article.get("description") or ""
+            summary = generate_summary(content)
+            summaries.append(f"**{title}** \n\n**Author(s):** {author}\n\n**Published:** {pub_date}\n\n**Summary:** {summary}\n\n [🔗 Read more]({article_url})\n\n---")
+        if not summaries:
+            return "### No articles could be summarized."
+        return "\n\n".join(summaries)
+    except Exception as e:
+        return f"### Error fetching news\n\n{str(e)}"
+# Helper function to extract full content using newspaper3k
+def extract_full_content(url):
+    try:
+        article = Article(url)
+        article.download()
+        article.parse()
+        return article.text
+    except Exception:
+        return None
+# Gradio interface
+with gr.Blocks(theme=gr.themes.Base()) as demo:
+    gr.Markdown("# 📰 Sum Up! Stay Informed, Instantly")
+    gr.Markdown(" ## A LLM based News Summarizer App")
+    # Add a brief description
+    gr.Markdown("Sum Up! condenses the latest headlines from trusted news sources into clear, concise and easy-to-read summaries, so you can stay informed in seconds.")
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### Top Stories - A Snapshot ")
+            gr.Markdown("**Source: Associated Press**")
+            gr.Markdown("Click the button below to fetch the latest news articles.")
+            news_btn = gr.Button("🗞️ News Now", variant="primary")
+        with gr.Column(scale=1):
+            input_box = gr.Textbox(label="Enter article text or URL", placeholder="Paste article text or link...")
+            summarize_btn = gr.Button("🔍 Summarize", variant="secondary")
+    # Output area for displaying results
+    output_area = gr.Markdown()  # Use a valid output component
+    # Link buttons to their respective functions
+    summarize_btn.click(fn=summarize_input, inputs=input_box, outputs=output_area)
+    news_btn.click(fn=fetch_news, inputs=[], outputs=output_area)
+if __name__ == "__main__":
+    demo.launch()