Spaces:

Agents-MCP-Hackathon
/

rss-mcp-server

Running

App Files Files Community

rss-mcp-server / functions /tools.py

gperdrizet

Completed vector context search tool.

cade3d1 verified 18 days ago

raw

history blame

3.02 kB

	'''Tool functions for MCP server'''

	import os
	import threading
	import time
	import json
	import logging
	import queue
	from upstash_vector import Index, Vector

	import functions.feed_extraction as extraction_funcs
	import functions.summarization as summarization_funcs
	import functions.rag as rag_funcs

	RAG_INGEST_QUEUE = queue.Queue()

	rag_ingest_thread = threading.Thread(
	target=rag_funcs.ingest,
	args=(RAG_INGEST_QUEUE,)
	)

	rag_ingest_thread.start()


	def get_feed(website: str) -> list:
	'''Gets RSS feed content from a given website. Can take a website or RSS
	feed URL directly, or the name of a website. Will attempt to find RSS
	feed and return title, summary and link to full article for most recent
	items in feed.

	Args:
	website: URL or name of website to extract RSS feed content from

	Returns:
	JSON string containing the feed content or 'No feed found' if a RSS
	feed for the requested website could not be found
	'''

	start_time = time.time()

	logger = logging.getLogger(__name__ + '.get_feed()')
	logger.info('Getting feed content for: %s', website)

	# Find the feed's URI from the website name/URL
	feed_uri = extraction_funcs.find_feed_uri(website)
	logger.info('find_feed_uri() returned %s', feed_uri)

	if 'No feed found' in feed_uri:
	logger.info('Completed in %s seconds', round(time.time()-start_time, 2))
	return 'No feed found'

	# Parse and extract content from the feed
	content = extraction_funcs.parse_feed(feed_uri)
	logger.info('parse_feed() returned %s entries', len(list(content.keys())))

	# Summarize each post in the feed and submit full text for RAG ingest
	for i, item in content.items():

	if item['content'] is not None:

	RAG_INGEST_QUEUE.put(item)
	logger.info('"%s" sent to RAG ingest', item['title'])

	summary = summarization_funcs.summarize_content(
	item['title'],
	item['content']
	)

	content[i]['summary'] = summary
	logger.info('Summary of "%s" generated', item['title'])

	content[i].pop('content', None)

	logger.info('Completed in %s seconds', round(time.time()-start_time, 2))

	return json.dumps(content)


	def context_search(query: str, article_title: str = None) -> str:
	'''Searches for context relevant to query in article vector store.

	Ags:
	query: user query to find context for
	article_title: optional, use this argument to search only for context
	from a specific context

	Returns:
	Context which bests matches query as string.
	'''

	index = Index(
	url='https://living-whale-89944-us1-vector.upstash.io',
	token=os.environ['UPSTASH_VECTOR_KEY']
	)

	results = None

	results = index.query(
	data=query,
	top_k=3,
	include_metadata=True,
	include_data=True,
	namespace=article_title
	)

	return results