Spaces:

Agents-MCP-Hackathon
/

rss-mcp-server

Running

App Files Files Community

gperdrizet commited on 30 days ago

Commit

dc68696

verified ·

1 Parent(s): 663acf0

Added Redis caching for feed content and article summaries.

Browse files

Files changed (3) hide show

functions/feed_extraction.py +27 -16
functions/summarization.py +21 -13
functions/tools.py +20 -31

functions/feed_extraction.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import os
 import re
 import logging
 import urllib.request
 from urllib.error import HTTPError, URLError
@@ -110,28 +111,38 @@ def parse_feed(feed_uri: str) -> list:
         if 'title' in entry and 'link' in entry:
-            entry_content['title'] = entry.title
-            entry_content['link'] = entry.link
-            # entry_content['updated'] = None
-            # entry_content['summary'] = None
-            entry_content['content'] = None
-            # if 'updated' in entry:
-            #     entry_content['updated'] = entry.updated
-            # if 'summary' in entry:
-            #     summary = _get_text(entry.summary)
-            #     entry_content['summary'] = summary
-            if 'content' in entry:
-                entry_content['content'] = entry.content
-            if entry_content['content'] is None:
-                html = _get_html(entry_content['link'])
-                content = _get_text(html)
-                entry_content['content'] = content
         entries[i] = entry_content

 import os
 import re
+import json
 import logging
 import urllib.request
 from urllib.error import HTTPError, URLError
         if 'title' in entry and 'link' in entry:
+            title = entry.title
+            # Check the Redis cache for this entry
+            cache_key = title.lower().replace(' ', '_')
+            cache_hit = False
+            cached_entry = REDIS.get(cache_key)
+            if cached_entry:
+                cache_hit = True
+                entry_content = json.loads(cached_entry)
+                logger.info('Entry in Redis cache: "%s"', title)
+            # If its not in the Redis cache, parse it from the feed data
+            else:
+                entry_content['title'] = entry.title
+                entry_content['link'] = entry.link
+                entry_content['content'] = None
+                if 'content' in entry:
+                    entry_content['content'] = entry.content
+                if entry_content['content'] is None:
+                    html = _get_html(entry_content['link'])
+                    content = _get_text(html)
+                    entry_content['content'] = content
+                logger.info('Parsed entry: "%s"', title)
+            # Add it to the Redis cache if it wasn't there
+            if cache_hit is False:
+                REDIS.set(cache_key, entry_content)
         entries[i] = entry_content

functions/summarization.py CHANGED Viewed

@@ -4,9 +4,14 @@ import os
 import logging
 from openai import OpenAI
-def summarize_content(content: str) -> str:
     '''Generates summary of article content using Modal inference endpoint.
     Args:
@@ -19,6 +24,15 @@ def summarize_content(content: str) -> str:
     logger = logging.getLogger(__name__ + '.summarize_content')
     logger.info('Summarizing extracted content')
     client = OpenAI(api_key=os.environ['MODAL_API_KEY'])
     client.base_url = (
@@ -29,16 +43,6 @@ def summarize_content(content: str) -> str:
     model = client.models.list().data[0]
     model_id = model.id
-    # messages = [
-    #     {
-    #         'role': 'system',
-    #         'content': ('You are a research assistant, skilled in summarizing documents in just '+
-    #             'a few sentences. Your document summaries should be a maximum of 2 to 4 sentences long.'),
-    #         'role': 'user',
-    #         'content': content
-    #     }
-    # ]
     messages = [
         {
             'role': 'system',
@@ -68,7 +72,11 @@ def summarize_content(content: str) -> str:
         logger.error('Error during Modal API call: %s', e)
     if response is not None:
-        return response.choices[0].message.content
     else:
-        return None

 import logging
 from openai import OpenAI
+from upstash_redis import Redis
+REDIS = Redis(
+    url='https://sensible-midge-19304.upstash.io',
+    token=os.environ['UPSTASH_REDIS_KEY']
+)
+def summarize_content(title: str, content: str) -> str:
     '''Generates summary of article content using Modal inference endpoint.
     Args:
     logger = logging.getLogger(__name__ + '.summarize_content')
     logger.info('Summarizing extracted content')
+    # Check Redis cache for summary
+    cache_key = f"{title.lower().replace(' ', '_')}-summary"
+    cached_summary = REDIS.get(cache_key)
+    if cached_summary:
+        logger.info('Got summary from Redis cache: "%s"', title)
+        return cached_summary
+    # It the summary is not in the cache, generate it
     client = OpenAI(api_key=os.environ['MODAL_API_KEY'])
     client.base_url = (
     model = client.models.list().data[0]
     model_id = model.id
     messages = [
         {
             'role': 'system',
         logger.error('Error during Modal API call: %s', e)
     if response is not None:
+        summary = response.choices[0].message.content
     else:
+        summary = None
+    REDIS.set(cache_key, summary)
+    logger.info('Summarized: "%s"', title)
+    return summary

functions/tools.py CHANGED Viewed

@@ -6,20 +6,15 @@ import logging
 import functions.feed_extraction as extraction_funcs
 import functions.summarization as summarization_funcs
-LOCAL_CACHE = {
-    'get_feed': {}
-}
-def get_feed(website: str, use_cache: bool = True) -> list:
     '''Gets RSS feed content from a given website. Can take a website or RSS
     feed URL directly, or the name of a website. Will attempt to find RSS
     feed and return title, summary and link to full article for most recent
-    items in feed
     Args:
         website: URL or name of website to extract RSS feed content from
-        use_cache: check local cache for content from RSS feed first before
-            downloading data from the website's RSS feed
     Returns:
         JSON string containing the feed content or 'No feed found' if a RSS
@@ -31,35 +26,29 @@ def get_feed(website: str, use_cache: bool = True) -> list:
     logger = logging.getLogger(__name__ + '.get_feed()')
     logger.info('Getting feed content for: %s', website)
-    # Check to see if we have this feed cached, if desired
-    if use_cache is True and website in LOCAL_CACHE['get_feed']:
-        content = LOCAL_CACHE['get_feed'][website]
-        logger.info('Got feed content from local cache')
-    else:
-        # Find the feed's URI from the website name/URL
-        feed_uri = extraction_funcs.find_feed_uri(website)
-        logger.info('find_feed_uri() returned %s', feed_uri)
-        if 'No feed found' in feed_uri:
-            logger.info('Completed in %s seconds', round(time.time()-start_time, 2))
-            return 'No feed found'
-        # Parse and extract content from the feed
-        content = extraction_funcs.parse_feed(feed_uri)
-        logger.info('parse_feed() returned %s entries', len(list(content.keys())))
-        # Summarize each post in the feed
-        for i, item in content.items():
-            if item['content'] is not None:
-                summary = summarization_funcs.summarize_content(item['content'])
-                content[i]['summary'] = summary
-            content[i].pop('content', None)
-        LOCAL_CACHE['get_feed'][website] = content
     logger.info('Completed in %s seconds', round(time.time()-start_time, 2))

 import functions.feed_extraction as extraction_funcs
 import functions.summarization as summarization_funcs
+def get_feed(website: str) -> list:
     '''Gets RSS feed content from a given website. Can take a website or RSS
     feed URL directly, or the name of a website. Will attempt to find RSS
     feed and return title, summary and link to full article for most recent
+    items in feed.
     Args:
         website: URL or name of website to extract RSS feed content from
     Returns:
         JSON string containing the feed content or 'No feed found' if a RSS
     logger = logging.getLogger(__name__ + '.get_feed()')
     logger.info('Getting feed content for: %s', website)
+    # Find the feed's URI from the website name/URL
+    feed_uri = extraction_funcs.find_feed_uri(website)
+    logger.info('find_feed_uri() returned %s', feed_uri)
+    if 'No feed found' in feed_uri:
+        logger.info('Completed in %s seconds', round(time.time()-start_time, 2))
+        return 'No feed found'
+    # Parse and extract content from the feed
+    content = extraction_funcs.parse_feed(feed_uri)
+    logger.info('parse_feed() returned %s entries', len(list(content.keys())))
+    # Summarize each post in the feed
+    for i, item in content.items():
+        if item['content'] is not None:
+            summary = summarization_funcs.summarize_content(
+                item['title'],
+                item['content']
+            )
+            content[i]['summary'] = summary
+        content[i].pop('content', None)
     logger.info('Completed in %s seconds', round(time.time()-start_time, 2))