Spaces:

Agents-MCP-Hackathon
/

rss-mcp-server

Running

Added some fancy fallback options to handle requests for feeds that are not urls, also added caching so we don't have to look up the same feed URI more than once in a session.

00764df unverified 23 days ago

raw

history blame

1.83 kB

	'''Tool functions for MCP server'''

	import logging
	from urllib.parse import urlparse
	import validators
	import functions.helper_functions as helper_funcs

	FEED_URIS = {}
	RSS_EXTENSIONS = ['xml', 'rss', 'atom']


	def get_content(website: str) -> list:
	'''Gets RSS feed content from a given website.

	Args:
	website_url: URL or nam of website to extract RSS feed content from

	Returns:
	List of titles for the 10 most recent entries in the RSS feed from the
	requested website.
	'''

	logger = logging.getLogger(__name__ + '.get_content')
	logger.info('Getting feed content for: %s', website)

	# Find the feed URI
	feed_uri = None

	# If the website contains xml, rss or atom, assume it's an RSS URI
	if any(extension in website.lower() for extension in RSS_EXTENSIONS):
	feed_uri = website
	logger.info('%s looks like a feed URI already - using it directly', website)

	# Next, check the cache to see if we alreay have this feed's URI
	elif website in FEED_URIS.keys():
	feed_uri = FEED_URIS[website]
	logger.info('%s feed URI in cache: %s', website, feed_uri)

	# If neither of those get it - try feedparse if it looks like a url
	# or else just google it
	else:
	if validators.url(website):
	website_url = website
	logger.info('%s looks like a website URL', website)

	else:
	website_url = helper_funcs.get_url(website)
	logger.info('Google result for %s: %s', website, website_url)

	feed_uri = helper_funcs.get_feed(website_url)
	logger.info('get_feed() returned %s', feed_uri)

	FEED_URIS[website] = feed_uri

	content = helper_funcs.parse_feed(feed_uri)
	logger.info('parse_feed() returned %s', content)

	return '\n'.join(content)