Spaces:
Runtime error
Runtime error
File size: 6,366 Bytes
e66e891 0000c5e 6c67813 e97f932 f8a041b 0000c5e 2e66273 fb7ecc7 af9e498 0000c5e 687d26a 4c58071 0000c5e e66e891 00764df fb7ecc7 687d26a e108e70 fb7ecc7 8863982 e66e891 687d26a fb7ecc7 f8a041b e66e891 687d26a 8863982 6c67813 00764df dc68696 6c67813 dc68696 6c67813 dc68696 fb7ecc7 6c67813 fb7ecc7 6c67813 fb7ecc7 dc68696 fb7ecc7 0000c5e fb7ecc7 cade3d1 0000c5e fb7ecc7 dc68696 0000c5e fb7ecc7 cade3d1 f8a041b fb7ecc7 4c58071 6c67813 4c58071 fb7ecc7 0000c5e 2e66273 e108e70 0000c5e fb7ecc7 e108e70 0000c5e e108e70 0000c5e fb7ecc7 0000c5e cade3d1 0000c5e cade3d1 0000c5e fb7ecc7 e108e70 2e66273 e108e70 2e66273 e108e70 2e66273 e108e70 2e66273 e108e70 af9e498 e108e70 af9e498 e108e70 af9e498 e108e70 af9e498 e108e70 facf739 e108e70 facf739 e108e70 facf739 e108e70 facf739 e108e70 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 |
'''Tool functions for MCP server'''
import os
import threading
import time
import json
import logging
import queue
from typing import Tuple
from upstash_vector import Index
from upstash_redis import Redis
import functions.feed_extraction as extraction_funcs
import functions.summarization as summarization_funcs
import functions.rag as rag_funcs
RAG_INGEST_QUEUE = queue.Queue()
rag_ingest_thread = threading.Thread(
target=rag_funcs.ingest,
args=(RAG_INGEST_QUEUE,)
)
rag_ingest_thread.start()
def get_feed(website: str, n: int = 3) -> list:
'''Gets RSS feed content from a given website. Can take a website or RSS
feed URL directly, or the name of a website. Will attempt to find RSS
feed and return title, summary and link to full article for most recent
n items in feed. This function is slow and resource heavy, only call it when
the user wants to check a feed for new content, or asks for content from a
feed that you have not retrieved yet.
Args:
website: URL or name of website to extract RSS feed content from
n: (optional) number of articles to parse from feed, defaults to 3
Returns:
JSON string containing the feed content or 'No feed found' if a RSS
feed for the requested website could not be found
'''
start_time = time.time()
logger = logging.getLogger(__name__ + '.get_feed()')
logger.info('Getting feed content for: %s', website)
# Find the feed's URI from the website name/URL
feed_uri = extraction_funcs.find_feed_uri(website)
logger.info('find_feed_uri() returned %s', feed_uri)
if 'No feed found' in feed_uri:
logger.info('Completed in %s seconds', round(time.time()-start_time, 2))
return 'No feed found'
# Parse and extract content from the feed
articles = extraction_funcs.parse_feed(feed_uri, n)
logger.info('parse_feed() returned %s entries', len(list(articles.keys())))
# Loop on the posts, sending them to RAG (nonblocking) and summarization (blocking)
for i, item in articles.items():
# Check if content is present
if item['content'] is not None:
logger.info('Summarizing/RAG ingesting: %s', item)
# Send to RAG ingest
RAG_INGEST_QUEUE.put(item.copy())
logger.info('"%s" sent to RAG ingest', item['title'])
# Generate summary and add to content
summary = summarization_funcs.summarize_content(
item['title'],
item['content']
)
articles[i]['summary'] = summary
logger.info('Summary of "%s" generated', item['title'])
# Remove full-text content before returning
articles[i].pop('content', None)
logger.info('Completed in %s seconds', round(time.time()-start_time, 2))
# Return content dictionary as string
return json.dumps(articles)
def context_search(query: str, article_title: str = None) -> list[Tuple[float, str]]:
'''Searches for context relevant to query. Use this Function to search
for additional general information if needed before answering the user's question
about an article. If article_title is provided the search will only return
results from that article. If article_title is omitted, the search will
include all articles currently in the cache.
Ags:
query: user query to find context for
article_title: optional, use this argument to search only for
context from a specific article, defaults to None
Returns:
Text relevant to the query
'''
logger = logging.getLogger(__name__ + 'context_search')
index = Index(
url='https://living-whale-89944-us1-vector.upstash.io',
token=os.environ['UPSTASH_VECTOR_KEY']
)
results = None
results = index.query(
data=query,
top_k=3,
include_data=True,
namespace=article_title
)
logger.info('Retrieved %s chunks for "%s"', len(results), query)
return results[0].data
def find_article(query: str) -> list[Tuple[float, str]]:
'''Uses vector search to find the most likely title of the article
referred to by query. Use this function if the user is asking about
an article, but it is not clear what the exact title of the article is.
Args:
query: query to to find source article tile for
Returns:
Article title
'''
logger = logging.getLogger(__name__ + 'context_search')
index = Index(
url='https://living-whale-89944-us1-vector.upstash.io',
token=os.environ['UPSTASH_VECTOR_KEY']
)
results = None
results = index.query(
data=query,
top_k=3,
include_metadata=True,
include_data=True
)
logger.info('Retrieved %s chunks for "%s"', len(results), query)
return results[0].metadata['namespace']
def get_summary(title: str) -> str:
'''Uses article title to retrieve summary of article content.
Args:
title: exact title of article
Returns:
Short summary of article content.
'''
logger = logging.getLogger(__name__ + '.get_summary()')
redis = Redis(
url='https://sensible-midge-19304.upstash.io',
token=os.environ['UPSTASH_REDIS_KEY']
)
cache_key = f'{title} summary'
summary = redis.get(cache_key)
if summary:
logger.info('Got summary for "%s": %s', title, summary[:100])
return summary
logger.info('Could not find summary for: "%s"', title)
return f'No article called "{title}". Make sure you have the correct title.'
def get_link(title: str) -> str:
'''Uses article title to look up direct link to article content webpage.
Args:
title: exact title of article
Returns:
Article webpage URL.
'''
logger = logging.getLogger(__name__ + '.get_link()')
redis = Redis(
url='https://sensible-midge-19304.upstash.io',
token=os.environ['UPSTASH_REDIS_KEY']
)
cache_key = f'{title} link'
link = redis.get(cache_key)
if link:
logger.info('Got link for "%s": %s', title, link)
return link
logger.info('Could not find link for: "%s"', title)
return f'No article called "{title}". Make sure you have the correct title.'
|