File size: 1,827 Bytes
e66e891
 
f8a041b
00764df
 
f8a041b
e66e891
00764df
 
e66e891
00764df
 
f8a041b
8863982
e66e891
00764df
f8a041b
e66e891
00764df
 
8863982
 
f8a041b
00764df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8a041b
00764df
8863982
f8a041b
 
8863982
f8a041b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
'''Tool functions for MCP server'''

import logging
from urllib.parse import urlparse
import validators
import functions.helper_functions as helper_funcs

FEED_URIS = {}
RSS_EXTENSIONS = ['xml', 'rss', 'atom']


def get_content(website: str) -> list:
    '''Gets RSS feed content from a given website.
    
    Args:
        website_url: URL or nam of website to extract RSS feed content from

    Returns:
        List of titles for the 10 most recent entries in the RSS feed from the
        requested website.
    '''

    logger = logging.getLogger(__name__ + '.get_content')
    logger.info('Getting feed content for: %s', website)

    # Find the feed URI
    feed_uri = None

    # If the website contains xml, rss or atom, assume it's an RSS URI
    if any(extension in website.lower() for extension in RSS_EXTENSIONS):
        feed_uri = website
        logger.info('%s looks like a feed URI already - using it directly', website)

    # Next, check the cache to see if we alreay have this feed's URI
    elif website in FEED_URIS.keys():
        feed_uri = FEED_URIS[website]
        logger.info('%s feed URI in cache: %s', website, feed_uri)

    # If neither of those get it - try feedparse if it looks like a url
    # or else just google it
    else:
        if validators.url(website):
            website_url = website
            logger.info('%s looks like a website URL', website)

        else:
            website_url = helper_funcs.get_url(website)
            logger.info('Google result for %s: %s', website, website_url)

        feed_uri = helper_funcs.get_feed(website_url)
        logger.info('get_feed() returned %s', feed_uri)

        FEED_URIS[website] = feed_uri

    content = helper_funcs.parse_feed(feed_uri)
    logger.info('parse_feed() returned %s', content)

    return '\n'.join(content)