Spaces:

harao-ml
/

QuickPulse

Running

File size: 2,664 Bytes

7c3be27
 
 
 
 
 
 
 
d8ab53f
7c3be27
 
 
 
 
 
d8ab53f
 
 
7c3be27
24cf91c
7c3be27

# gather_news.py


# News Source Integration
# This script integrates with various news sources to fetch the latest articles from the specified news sources, extracts relevant information such as title, URL,Source,Author and Publish date.

import requests
import feedparser
import os 

def fetch_articles_newsapi(topic):
    """
    Fetch articles from NewsAPI based on the provided topic.
    """
    url = 'https://newsapi.org/v2/everything'
    api_key = os.environ.get("api_key")  # Make sure the key name matches what's in HF settings
    if not api_key:
        raise ValueError("API_KEY is not set in environment variables.")    
    params = {
        'apiKey': api_key,
        'language': 'en',
        'q': topic,
        'pageSize': 20
    }
    try:
        response = requests.get(url, params=params)
        if response.status_code != 200:
            return f"Error: Failed to fetch news. Status code: {response.status_code}"

        articles = response.json().get("articles", [])
        if not articles:
            return "No articles found."

        # Extract relevant information from each article
        extracted_articles = []
        for article in articles:
            extracted_articles.append({
                "title": article.get("title", "No title"),
                "url": article.get("url", "#"),
                "source": article.get("source", {}).get("name", "Unknown"),
                "author": article.get("author", "Unknown"),
                "publishedAt": article.get("publishedAt", "Unknown")
            })

        return extracted_articles
    except Exception as e:
        return f"Error fetching news: {str(e)}"
    
def fetch_articles_google(topic):
    """
    Fetch articles from Google News RSS feed based on the provided topic.
    """
    rss_url = f'https://news.google.com/rss/search?q={topic}&hl=en-US&gl=US&ceid=US:en'
    try:
        feed = feedparser.parse(rss_url)
        if not feed.entries:
            return "No articles found."

        # Extract relevant information from each article
        extracted_articles = []
        for entry in feed.entries[:20]:  # Limit to top 20 articles
            extracted_articles.append({
                "title": entry.title,
                "url": entry.link,
                "source": entry.source.title if hasattr(entry, 'source') else "Unknown",
                "author": entry.author if hasattr(entry, 'author') else "Unknown",
                "publishedAt": entry.published if hasattr(entry, 'published') else "Unknown"
            })

        return extracted_articles
    except Exception as e:
        return f"Error fetching news: {str(e)}"