|
'''Tool functions for MCP server''' |
|
|
|
import os |
|
import threading |
|
import time |
|
import json |
|
import logging |
|
import queue |
|
from upstash_vector import Index, Vector |
|
|
|
import functions.feed_extraction as extraction_funcs |
|
import functions.summarization as summarization_funcs |
|
import functions.rag as rag_funcs |
|
|
|
RAG_INGEST_QUEUE = queue.Queue() |
|
|
|
rag_ingest_thread = threading.Thread( |
|
target=rag_funcs.ingest, |
|
args=(RAG_INGEST_QUEUE,) |
|
) |
|
|
|
rag_ingest_thread.start() |
|
|
|
|
|
def get_feed(website: str) -> list: |
|
'''Gets RSS feed content from a given website. Can take a website or RSS |
|
feed URL directly, or the name of a website. Will attempt to find RSS |
|
feed and return title, summary and link to full article for most recent |
|
items in feed. |
|
|
|
Args: |
|
website: URL or name of website to extract RSS feed content from |
|
|
|
Returns: |
|
JSON string containing the feed content or 'No feed found' if a RSS |
|
feed for the requested website could not be found |
|
''' |
|
|
|
start_time = time.time() |
|
|
|
logger = logging.getLogger(__name__ + '.get_feed()') |
|
logger.info('Getting feed content for: %s', website) |
|
|
|
|
|
feed_uri = extraction_funcs.find_feed_uri(website) |
|
logger.info('find_feed_uri() returned %s', feed_uri) |
|
|
|
if 'No feed found' in feed_uri: |
|
logger.info('Completed in %s seconds', round(time.time()-start_time, 2)) |
|
return 'No feed found' |
|
|
|
|
|
content = extraction_funcs.parse_feed(feed_uri) |
|
logger.info('parse_feed() returned %s entries', len(list(content.keys()))) |
|
|
|
|
|
for i, item in content.items(): |
|
|
|
if item['content'] is not None: |
|
|
|
RAG_INGEST_QUEUE.put(item) |
|
logger.info('"%s" sent to RAG ingest', item['title']) |
|
|
|
summary = summarization_funcs.summarize_content( |
|
item['title'], |
|
item['content'] |
|
) |
|
|
|
content[i]['summary'] = summary |
|
logger.info('Summary of "%s" generated', item['title']) |
|
|
|
content[i].pop('content', None) |
|
|
|
logger.info('Completed in %s seconds', round(time.time()-start_time, 2)) |
|
|
|
return json.dumps(content) |
|
|
|
|
|
def context_search(query: str, article_title: str = None) -> str: |
|
'''Searches for context relevant to query in article vector store. |
|
|
|
Ags: |
|
query: user query to find context for |
|
article_title: optional, use this argument to search only for context |
|
from a specific context |
|
|
|
Returns: |
|
Context which bests matches query as string. |
|
''' |
|
|
|
index = Index( |
|
url='https://living-whale-89944-us1-vector.upstash.io', |
|
token=os.environ['UPSTASH_VECTOR_KEY'] |
|
) |
|
|
|
results = None |
|
|
|
results = index.query( |
|
data=query, |
|
top_k=3, |
|
include_metadata=True, |
|
include_data=True, |
|
namespace=article_title |
|
) |
|
|
|
return results |
|
|