gperdrizet commited on
Commit
f8a041b
·
unverified ·
1 Parent(s): f665007

Finished basic functions to find RSS feed uri and pull the feed.

Browse files
assets/html.py CHANGED
@@ -3,9 +3,9 @@
3
  TITLE = (
4
  '''
5
  <center>
6
- <h1>RSS feed finder</h1>
7
  </center>
8
  '''
9
  )
10
 
11
- DESCRIPTION = 'Enter a website to crawl for RSS feed URI.'
 
3
  TITLE = (
4
  '''
5
  <center>
6
+ <h1>RSS feed reader</h1>
7
  </center>
8
  '''
9
  )
10
 
11
+ DESCRIPTION = 'Enter a website to extract RSS feed entry titles.'
functions/helper_functions.py CHANGED
@@ -1 +1,56 @@
1
  '''Helper functions for MCP tools.'''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  '''Helper functions for MCP tools.'''
2
+
3
+ import logging
4
+ import feedparser
5
+ from findfeed import search
6
+
7
+
8
+ def get_feed(website_url: str) -> str:
9
+ '''Finds the RSS feed URI for a website given the website's url.
10
+
11
+ Args:
12
+ website_url: The url for the website to find the RSS feed for
13
+
14
+ Returns:
15
+ The website's RSS feed URI as a string
16
+ '''
17
+
18
+ logger = logging.getLogger(__name__ + '.get_content')
19
+
20
+ feeds = search(website_url)
21
+
22
+ if len(feeds) > 0:
23
+ return str(feeds[0].url)
24
+
25
+ else:
26
+ return f'No feed found for {website_url}'
27
+
28
+
29
+ def parse_feed(feed_uri: str) -> list:
30
+ '''Gets content from a remote RSS feed URI.
31
+
32
+ Args:
33
+ feed_uri: The RSS feed to get content from
34
+
35
+ Returns:
36
+ List of titles for the 10 most recent entries in the RSS feed.
37
+ '''
38
+
39
+ logger = logging.getLogger(__name__ + '.parse_feed')
40
+
41
+ feed = feedparser.parse(feed_uri)
42
+ logger.info('%s yieled %s entries', feed_uri, len(feed.entries))
43
+
44
+ titles = []
45
+
46
+ for entry in feed.entries:
47
+
48
+ logger.debug('Entry attributes: %s', list(entry.keys()))
49
+
50
+ if 'title' in entry:
51
+ titles.append(entry.title)
52
+
53
+ if len(titles) >= 10:
54
+ break
55
+
56
+ return titles
functions/tools.py CHANGED
@@ -1,22 +1,26 @@
1
  '''Tool functions for MCP server'''
2
 
3
- from findfeed import search
 
4
 
5
 
6
- def get_feed(url: str) -> str:
7
- '''Finds the RSS feed URI for a website given the website's url.
8
 
9
  Args:
10
- url: The url for the website to find the RSS feed for
11
-
12
  Returns:
13
- The website's RSS feed URI as a string
14
  '''
15
 
16
- feeds = search(url)
 
 
 
 
17
 
18
- if len(feeds) > 0:
19
- return str(feeds[0].url)
20
 
21
- else:
22
- return f'No feed found for {url}'
 
1
  '''Tool functions for MCP server'''
2
 
3
+ import logging
4
+ import functions.helper_functions as helper_funcs
5
 
6
 
7
+ def get_content(website_url: str) -> list:
8
+ '''Gets RSS feed content from a given website.
9
 
10
  Args:
11
+ website_url: URL of website to extract RSS feed content from
12
+
13
  Returns:
14
+ List of titles for the 10 most recent entries in the RSS feed.
15
  '''
16
 
17
+ logger = logging.getLogger(__name__ + '.get_content')
18
+ logger.info('Getting feed content for: %s', website_url)
19
+
20
+ feed_uri = helper_funcs.get_feed(website_url)
21
+ logger.info('get_feed() returned %s', feed_uri)
22
 
23
+ content = helper_funcs.parse_feed(feed_uri)
24
+ logger.info('parse_feed() returned %s', content)
25
 
26
+ return '\n'.join(content)
 
rss_server.py CHANGED
@@ -1,9 +1,32 @@
1
  '''Main script to run gradio interface and MCP server.'''
2
 
 
 
 
 
3
  import gradio as gr
4
  import assets.html as html
5
  from functions import tools as tool_funcs
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  with gr.Blocks() as demo:
9
 
@@ -12,14 +35,14 @@ with gr.Blocks() as demo:
12
 
13
  gr.Markdown(html.DESCRIPTION)
14
  website_url = gr.Textbox('hackernews.com', label='Website URL')
15
- output = gr.Textbox(label='RSS feed URI')
16
  submit_button = gr.Button('Submit')
17
 
18
  submit_button.click( # pylint: disable=no-member
19
- fn=tool_funcs.get_feed,
20
  inputs=website_url,
21
  outputs=output,
22
- api_name='get_feed'
23
  )
24
 
25
 
 
1
  '''Main script to run gradio interface and MCP server.'''
2
 
3
+ import logging
4
+ from pathlib import Path
5
+ from logging.handlers import RotatingFileHandler
6
+
7
  import gradio as gr
8
  import assets.html as html
9
  from functions import tools as tool_funcs
10
 
11
+ # Make sure log directory exists
12
+ Path('logs').mkdir(parents=True, exist_ok=True)
13
+
14
+ # Set-up logger
15
+ logger = logging.getLogger()
16
+
17
+ logging.basicConfig(
18
+ handlers=[RotatingFileHandler(
19
+ 'logs/rss_server.log',
20
+ maxBytes=100000,
21
+ backupCount=10,
22
+ mode='w'
23
+ )],
24
+ level=logging.DEBUG,
25
+ format='%(levelname)s - %(name)s - %(message)s'
26
+ )
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
 
31
  with gr.Blocks() as demo:
32
 
 
35
 
36
  gr.Markdown(html.DESCRIPTION)
37
  website_url = gr.Textbox('hackernews.com', label='Website URL')
38
+ output = gr.Textbox(label='RSS entry titles', lines=10)
39
  submit_button = gr.Button('Submit')
40
 
41
  submit_button.click( # pylint: disable=no-member
42
+ fn=tool_funcs.get_content,
43
  inputs=website_url,
44
  outputs=output,
45
+ api_name='Get RSS feed content'
46
  )
47
 
48