gperdrizet commited on
Commit
1ad027d
·
unverified ·
2 Parent(s): 07a36c8 e108e70

Merge pull request #18 from gperdrizet/dev

Browse files
assets/html.py DELETED
@@ -1,33 +0,0 @@
1
- '''HTML elements for Gradio interface.'''
2
-
3
- TITLE = (
4
- '''
5
- <center>
6
- <h1>RSS feed reader</h1>
7
- </center>
8
- '''
9
- )
10
-
11
- DESCRIPTION = (
12
- '''
13
- <p>RSS feed reader MCP server. See
14
- <a href="https://huggingface.co/spaces/Agents-MCP-Hackathon/rss-mcp-client">
15
- Agentic RSS reader</a>for a demonstration. Check out the
16
- <a href="https://github.com/gperdrizet/MCP-hackathon/tree/main">
17
- main project repo on GitHub</a>. Both Spaces by
18
- <a href="https://www.linkedin.com/in/gperdrizet">George Perdrizet</a>.</p>
19
-
20
- <p>This Space is not meant to be used directly, but you can try out the bare tool below.
21
- Enter a website name, website URL, or feed URI. The tool will do it's best
22
- to find the feed and return titles, links and summaries for the three most recent posts.
23
- Suggestions: http://openai.com/news/rss.xml, hackernews.com, slashdot, etc.</p>
24
-
25
- <h2>Tools</h2>
26
-
27
- <ol>
28
- <li><b>DONE</b> Given a website name or URL, find its RSS feed and return recent
29
- article titles, links and a generated summary of content if avalible</li>
30
- <li><b>TODO</b> Simple RAG on requested RSS feed content</li>
31
- </ol>
32
- '''
33
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
assets/text.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''HTML elements for Gradio interface.'''
2
+
3
+ TITLE = ('''
4
+ <center>
5
+ <h1>RSS feed reader</h1>
6
+ </center>
7
+ ''')
8
+
9
+ DESCRIPTION = ('''
10
+ RSS feed reader MCP server. See
11
+ [Agentic RSS reader](https://huggingface.co/spaces/Agents-MCP-Hackathon/rss-mcp-client)
12
+ for a demonstration. Check out the
13
+ [main project repo on GitHub](https://github.com/gperdrizet/MCP-hackathon/tree/main)
14
+ . Both Spaces by
15
+ [George Perdrizet](https://www.linkedin.com/in/gperdrizet)
16
+
17
+ This space is not meant to be used directly. It exposes a set of tools to
18
+ interact with RSS feeds for use by agents. For testing and demonstration,
19
+ you can try the tools directly below.
20
+
21
+ ## Tools
22
+
23
+ 1. `get_feed()`: Given a website name or URL, find its RSS feed and
24
+ return recent article titles, links and a generated summary of content if
25
+ avalible. Caches results for fast retrieval by other tools. Embeds content
26
+ to vector database for subsequent RAG.
27
+ 2. `context_search()`: Vector search on article content for RAG context.
28
+ 3. `find_article()`: Uses vector search on article content to find title of article
29
+ that user is referring to.
30
+ 4. `get_summary()`: Gets article summary from Redis cache using article title.
31
+ 5. `get_link()`: Gets article link from Redis cache using article title.
32
+ ''')
functions/feed_extraction.py CHANGED
@@ -2,7 +2,6 @@
2
 
3
  import os
4
  import re
5
- import json
6
  import logging
7
  import urllib.request
8
  from urllib.error import HTTPError, URLError
@@ -53,8 +52,8 @@ def find_feed_uri(website: str) -> str:
53
  feed_uri = FEED_URIS[website]
54
  logger.info('%s feed URI in local cache: %s', website, feed_uri)
55
 
56
- # Then, check to see if the URI is in the Redis cache
57
- cache_key = f"{website.lower().replace(' ', '_')}-feed-uri"
58
  cache_hit = False
59
 
60
  if feed_uri is None:
@@ -65,7 +64,7 @@ def find_feed_uri(website: str) -> str:
65
  feed_uri = cached_uri
66
  logger.info('%s feed URI in Redis cache: %s', website, feed_uri)
67
 
68
- # If none of those get it - try feedparse if it looks like a url
69
  # or else just google it
70
  if feed_uri is None:
71
  if website.split('.')[-1] in COMMON_EXTENSIONS:
@@ -79,6 +78,7 @@ def find_feed_uri(website: str) -> str:
79
  feed_uri = _get_feed(website_url)
80
  logger.info('get_feed() returned %s', feed_uri)
81
 
 
82
  FEED_URIS[website] = feed_uri
83
 
84
  # Add the feed URI to the redis cache if it wasn't already there
@@ -88,14 +88,16 @@ def find_feed_uri(website: str) -> str:
88
  return feed_uri
89
 
90
 
91
- def parse_feed(feed_uri: str) -> list:
92
  '''Gets content from a remote RSS feed URI.
93
 
94
  Args:
95
  feed_uri: The RSS feed to get content from
 
96
 
97
  Returns:
98
- List of titles for the 10 most recent entries in the RSS feed.
 
99
  '''
100
 
101
  logger = logging.getLogger(__name__ + '.parse_feed')
@@ -112,16 +114,15 @@ def parse_feed(feed_uri: str) -> list:
112
  if 'title' in entry and 'link' in entry:
113
 
114
  title = entry.title
 
115
 
116
- # Check the Redis cache for this entry
117
- cache_key = title.lower().replace(' ', '_')
118
- cache_hit = False
119
- cached_entry = REDIS.get(cache_key)
120
 
121
- if cached_entry:
122
- cache_hit = True
123
- entry_content = json.loads(cached_entry)
124
  logger.info('Entry in Redis cache: "%s"', title)
 
 
125
 
126
  # If its not in the Redis cache, parse it from the feed data
127
  else:
@@ -129,24 +130,26 @@ def parse_feed(feed_uri: str) -> list:
129
  entry_content['link'] = entry.link
130
  entry_content['content'] = None
131
 
 
132
  if 'content' in entry:
133
  entry_content['content'] = entry.content
134
 
135
- if entry_content['content'] is None:
 
136
 
137
  html = _get_html(entry_content['link'])
138
  content = _get_text(html)
139
  entry_content['content'] = content
140
 
141
- logger.info('Parsed entry: "%s"', title)
 
 
142
 
143
- # Add it to the Redis cache if it wasn't there
144
- if cache_hit is False:
145
- REDIS.set(cache_key, entry_content)
146
 
147
  entries[i] = entry_content
148
 
149
- if i == 2:
150
  break
151
 
152
  logger.info('Entries contains %s elements', len(list(entries.keys())))
 
2
 
3
  import os
4
  import re
 
5
  import logging
6
  import urllib.request
7
  from urllib.error import HTTPError, URLError
 
52
  feed_uri = FEED_URIS[website]
53
  logger.info('%s feed URI in local cache: %s', website, feed_uri)
54
 
55
+ # If we still haven't found it, check to see if the URI is in the Redis cache
56
+ cache_key = f'{website} feed uri'
57
  cache_hit = False
58
 
59
  if feed_uri is None:
 
64
  feed_uri = cached_uri
65
  logger.info('%s feed URI in Redis cache: %s', website, feed_uri)
66
 
67
+ # If still none of those methods get it - try feedparse if it looks like a url
68
  # or else just google it
69
  if feed_uri is None:
70
  if website.split('.')[-1] in COMMON_EXTENSIONS:
 
78
  feed_uri = _get_feed(website_url)
79
  logger.info('get_feed() returned %s', feed_uri)
80
 
81
+ # Add to local cache
82
  FEED_URIS[website] = feed_uri
83
 
84
  # Add the feed URI to the redis cache if it wasn't already there
 
88
  return feed_uri
89
 
90
 
91
+ def parse_feed(feed_uri: str, n: int) -> list:
92
  '''Gets content from a remote RSS feed URI.
93
 
94
  Args:
95
  feed_uri: The RSS feed to get content from
96
+ n: the number of feed entries to parse
97
 
98
  Returns:
99
+ List of dictionaries for the n most recent entries in the RSS feed.
100
+ Each dictionary contains 'title', 'link' and 'content' keys.
101
  '''
102
 
103
  logger = logging.getLogger(__name__ + '.parse_feed')
 
114
  if 'title' in entry and 'link' in entry:
115
 
116
  title = entry.title
117
+ entry_content['title'] = title
118
 
119
+ # Check the Redis cache
120
+ cached_link = REDIS.get(f'{title} link')
 
 
121
 
122
+ if cached_link:
 
 
123
  logger.info('Entry in Redis cache: "%s"', title)
124
+ entry_content['link'] = cached_link
125
+ entry_content['content'] = REDIS.get(f'{title} content')
126
 
127
  # If its not in the Redis cache, parse it from the feed data
128
  else:
 
130
  entry_content['link'] = entry.link
131
  entry_content['content'] = None
132
 
133
+ # Grab the article content from the feed, if provided
134
  if 'content' in entry:
135
  entry_content['content'] = entry.content
136
 
137
+ # If not, try to get the article content from the link
138
+ elif entry_content['content'] is None:
139
 
140
  html = _get_html(entry_content['link'])
141
  content = _get_text(html)
142
  entry_content['content'] = content
143
 
144
+ # Add everything to the cache
145
+ REDIS.set(f'{title} link', entry_content['link'])
146
+ REDIS.set(f'{title} content', entry_content['content'])
147
 
148
+ logger.info('Parsed entry: "%s"', title)
 
 
149
 
150
  entries[i] = entry_content
151
 
152
+ if i == n-1:
153
  break
154
 
155
  logger.info('Entries contains %s elements', len(list(entries.keys())))
functions/gradio_functions.py CHANGED
@@ -2,6 +2,55 @@
2
 
3
  import os
4
  import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
  def update_log(n: int = 10):
 
2
 
3
  import os
4
  import re
5
+ import logging
6
+
7
+ from openai import OpenAI
8
+
9
+ def call_modal() -> None:
10
+ '''Sends request to Modal to spin up container'''
11
+
12
+ logger = logging.getLogger(__name__ + '.call_modal()')
13
+
14
+ # Call the modal container so it spins up
15
+ client = OpenAI(api_key=os.environ['MODAL_API_KEY'])
16
+
17
+ client.base_url = (
18
+ 'https://gperdrizet--vllm-openai-compatible-summarization-serve.modal.run/v1'
19
+ )
20
+
21
+ # Default to first avalible model
22
+ model = client.models.list().data[0]
23
+ model_id = model.id
24
+
25
+ messages = [
26
+ {
27
+ 'role': 'system',
28
+ 'content': ('Interpret the following proverb in 50 words or less: ' +
29
+ 'A poor craftsman blames the eye of the beholder')
30
+ }
31
+ ]
32
+
33
+ logger.info('Prompt: %s', messages[0]['content'])
34
+
35
+ completion_args = {
36
+ 'model': model_id,
37
+ 'messages': messages,
38
+ }
39
+
40
+ try:
41
+ response = client.chat.completions.create(**completion_args)
42
+
43
+ except Exception as e: # pylint: disable=broad-exception-caught
44
+ response = None
45
+ logger.error('Error during Modal API call: %s', e)
46
+
47
+ if response is not None:
48
+ reply = response.choices[0].message.content
49
+
50
+ else:
51
+ reply = None
52
+
53
+ logger.info('Reply: %s', reply)
54
 
55
 
56
  def update_log(n: int = 10):
functions/rag.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''Collection of function for RAG on article texts.'''
2
+
3
+ import os
4
+ import logging
5
+ import queue
6
+ from semantic_text_splitter import TextSplitter
7
+ from tokenizers import Tokenizer
8
+ from upstash_vector import Index
9
+
10
+
11
+ def ingest(rag_ingest_queue: queue.Queue) -> None:
12
+ '''Semantically chunks article and upsert to Upstash vector db
13
+ using article title as namespace.'''
14
+
15
+ logger = logging.getLogger(__name__ + '.ingest()')
16
+
17
+ index = Index(
18
+ url='https://living-whale-89944-us1-vector.upstash.io',
19
+ token=os.environ['UPSTASH_VECTOR_KEY']
20
+ )
21
+
22
+ while True:
23
+
24
+ namespaces = index.list_namespaces()
25
+
26
+ item = rag_ingest_queue.get()
27
+ logger.info('Upserting "%s": %s', item['title'], item)
28
+ title = item['title']
29
+
30
+ if title not in namespaces:
31
+ text = item['content']
32
+ logger.info('Got "%s" from RAG ingest queue', title)
33
+
34
+ tokenizer=Tokenizer.from_pretrained('bert-base-uncased')
35
+ splitter=TextSplitter.from_huggingface_tokenizer(tokenizer, 256)
36
+ chunks=splitter.chunks(text)
37
+
38
+ for i, chunk in enumerate(chunks):
39
+
40
+ index.upsert(
41
+ [
42
+ (
43
+ hash(f'{title}-{i}'),
44
+ chunk,
45
+ {'namespace': title}
46
+ )
47
+ ],
48
+ )
49
+ logger.info('Ingested %s chunks into vector DB', i + 1)
50
+
51
+ else:
52
+ logger.info('%s already in RAG namespace', title)
functions/summarization.py CHANGED
@@ -25,7 +25,7 @@ def summarize_content(title: str, content: str) -> str:
25
  logger.info('Summarizing extracted content')
26
 
27
  # Check Redis cache for summary
28
- cache_key = f"{title.lower().replace(' ', '_')}-summary"
29
  cached_summary = REDIS.get(cache_key)
30
 
31
  if cached_summary:
@@ -77,6 +77,8 @@ def summarize_content(title: str, content: str) -> str:
77
  else:
78
  summary = None
79
 
 
80
  REDIS.set(cache_key, summary)
81
  logger.info('Summarized: "%s"', title)
 
82
  return summary
 
25
  logger.info('Summarizing extracted content')
26
 
27
  # Check Redis cache for summary
28
+ cache_key = f'{title} summary'
29
  cached_summary = REDIS.get(cache_key)
30
 
31
  if cached_summary:
 
77
  else:
78
  summary = None
79
 
80
+ # Add the new summary to the cache
81
  REDIS.set(cache_key, summary)
82
  logger.info('Summarized: "%s"', title)
83
+
84
  return summary
functions/tools.py CHANGED
@@ -1,20 +1,40 @@
1
  '''Tool functions for MCP server'''
2
 
 
 
3
  import time
4
  import json
5
  import logging
 
 
 
 
 
6
  import functions.feed_extraction as extraction_funcs
7
  import functions.summarization as summarization_funcs
 
 
 
 
 
 
 
 
 
 
8
 
9
 
10
- def get_feed(website: str) -> list:
11
  '''Gets RSS feed content from a given website. Can take a website or RSS
12
  feed URL directly, or the name of a website. Will attempt to find RSS
13
  feed and return title, summary and link to full article for most recent
14
- items in feed.
 
 
15
 
16
  Args:
17
  website: URL or name of website to extract RSS feed content from
 
18
 
19
  Returns:
20
  JSON string containing the feed content or 'No feed found' if a RSS
@@ -35,21 +55,161 @@ def get_feed(website: str) -> list:
35
  return 'No feed found'
36
 
37
  # Parse and extract content from the feed
38
- content = extraction_funcs.parse_feed(feed_uri)
39
- logger.info('parse_feed() returned %s entries', len(list(content.keys())))
40
 
41
- # Summarize each post in the feed
42
- for i, item in content.items():
43
 
 
44
  if item['content'] is not None:
 
 
 
 
 
 
 
45
  summary = summarization_funcs.summarize_content(
46
  item['title'],
47
  item['content']
48
  )
49
- content[i]['summary'] = summary
50
 
51
- content[i].pop('content', None)
 
 
 
 
52
 
53
  logger.info('Completed in %s seconds', round(time.time()-start_time, 2))
54
 
55
- return json.dumps(content)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  '''Tool functions for MCP server'''
2
 
3
+ import os
4
+ import threading
5
  import time
6
  import json
7
  import logging
8
+ import queue
9
+ from typing import Tuple
10
+ from upstash_vector import Index
11
+ from upstash_redis import Redis
12
+
13
  import functions.feed_extraction as extraction_funcs
14
  import functions.summarization as summarization_funcs
15
+ import functions.rag as rag_funcs
16
+
17
+ RAG_INGEST_QUEUE = queue.Queue()
18
+
19
+ rag_ingest_thread = threading.Thread(
20
+ target=rag_funcs.ingest,
21
+ args=(RAG_INGEST_QUEUE,)
22
+ )
23
+
24
+ rag_ingest_thread.start()
25
 
26
 
27
+ def get_feed(website: str, n: int = 3) -> list:
28
  '''Gets RSS feed content from a given website. Can take a website or RSS
29
  feed URL directly, or the name of a website. Will attempt to find RSS
30
  feed and return title, summary and link to full article for most recent
31
+ n items in feed. This function is slow and resource heavy, only call it when
32
+ the user wants to check a feed for new content, or asks for content from a
33
+ feed that you have not retrieved yet.
34
 
35
  Args:
36
  website: URL or name of website to extract RSS feed content from
37
+ n: (optional) number of articles to parse from feed, defaults to 3
38
 
39
  Returns:
40
  JSON string containing the feed content or 'No feed found' if a RSS
 
55
  return 'No feed found'
56
 
57
  # Parse and extract content from the feed
58
+ articles = extraction_funcs.parse_feed(feed_uri, n)
59
+ logger.info('parse_feed() returned %s entries', len(list(articles.keys())))
60
 
61
+ # Loop on the posts, sending them to RAG (nonblocking) and summarization (blocking)
62
+ for i, item in articles.items():
63
 
64
+ # Check if content is present
65
  if item['content'] is not None:
66
+ logger.info('Summarizing/RAG ingesting: %s', item)
67
+
68
+ # Send to RAG ingest
69
+ RAG_INGEST_QUEUE.put(item.copy())
70
+ logger.info('"%s" sent to RAG ingest', item['title'])
71
+
72
+ # Generate summary and add to content
73
  summary = summarization_funcs.summarize_content(
74
  item['title'],
75
  item['content']
76
  )
 
77
 
78
+ articles[i]['summary'] = summary
79
+ logger.info('Summary of "%s" generated', item['title'])
80
+
81
+ # Remove full-text content before returning
82
+ articles[i].pop('content', None)
83
 
84
  logger.info('Completed in %s seconds', round(time.time()-start_time, 2))
85
 
86
+ # Return content dictionary as string
87
+ return json.dumps(articles)
88
+
89
+
90
+ def context_search(query: str, article_title: str = None) -> list[Tuple[float, str]]:
91
+ '''Searches for context relevant to query. Use this Function to search
92
+ for additional general information if needed before answering the user's question
93
+ about an article. If article_title is provided the search will only return
94
+ results from that article. If article_title is omitted, the search will
95
+ include all articles currently in the cache.
96
+
97
+ Ags:
98
+ query: user query to find context for
99
+ article_title: optional, use this argument to search only for
100
+ context from a specific article, defaults to None
101
+
102
+ Returns:
103
+ Text relevant to the query
104
+ '''
105
+
106
+ logger = logging.getLogger(__name__ + 'context_search')
107
+
108
+ index = Index(
109
+ url='https://living-whale-89944-us1-vector.upstash.io',
110
+ token=os.environ['UPSTASH_VECTOR_KEY']
111
+ )
112
+
113
+ results = None
114
+
115
+ results = index.query(
116
+ data=query,
117
+ top_k=3,
118
+ include_data=True,
119
+ namespace=article_title
120
+ )
121
+
122
+ logger.info('Retrieved %s chunks for "%s"', len(results), query)
123
+
124
+ return results[0].data
125
+
126
+
127
+ def find_article(query: str) -> list[Tuple[float, str]]:
128
+ '''Uses vector search to find the most likely title of the article
129
+ referred to by query. Use this function if the user is asking about
130
+ an article, but it is not clear what the exact title of the article is.
131
+
132
+ Args:
133
+ query: query to to find source article tile for
134
+
135
+ Returns:
136
+ Article title
137
+ '''
138
+
139
+ logger = logging.getLogger(__name__ + 'context_search')
140
+
141
+ index = Index(
142
+ url='https://living-whale-89944-us1-vector.upstash.io',
143
+ token=os.environ['UPSTASH_VECTOR_KEY']
144
+ )
145
+
146
+ results = None
147
+
148
+ results = index.query(
149
+ data=query,
150
+ top_k=3,
151
+ include_metadata=True,
152
+ include_data=True
153
+ )
154
+
155
+ logger.info('Retrieved %s chunks for "%s"', len(results), query)
156
+
157
+ return results[0].metadata['namespace']
158
+
159
+
160
+ def get_summary(title: str) -> str:
161
+ '''Uses article title to retrieve summary of article content.
162
+
163
+ Args:
164
+ title: exact title of article
165
+
166
+ Returns:
167
+ Short summary of article content.
168
+ '''
169
+
170
+ logger = logging.getLogger(__name__ + '.get_summary()')
171
+
172
+ redis = Redis(
173
+ url='https://sensible-midge-19304.upstash.io',
174
+ token=os.environ['UPSTASH_REDIS_KEY']
175
+ )
176
+
177
+ cache_key = f'{title} summary'
178
+ summary = redis.get(cache_key)
179
+
180
+ if summary:
181
+
182
+ logger.info('Got summary for "%s": %s', title, summary[:100])
183
+ return summary
184
+
185
+ logger.info('Could not find summary for: "%s"', title)
186
+ return f'No article called "{title}". Make sure you have the correct title.'
187
+
188
+
189
+ def get_link(title: str) -> str:
190
+ '''Uses article title to look up direct link to article content webpage.
191
+
192
+ Args:
193
+ title: exact title of article
194
+
195
+ Returns:
196
+ Article webpage URL.
197
+ '''
198
+
199
+ logger = logging.getLogger(__name__ + '.get_link()')
200
+
201
+ redis = Redis(
202
+ url='https://sensible-midge-19304.upstash.io',
203
+ token=os.environ['UPSTASH_REDIS_KEY']
204
+ )
205
+
206
+ cache_key = f'{title} link'
207
+ link = redis.get(cache_key)
208
+
209
+ if link:
210
+
211
+ logger.info('Got link for "%s": %s', title, link)
212
+ return link
213
+
214
+ logger.info('Could not find link for: "%s"', title)
215
+ return f'No article called "{title}". Make sure you have the correct title.'
requirements.txt CHANGED
@@ -5,4 +5,7 @@ googlesearch-python
5
  gradio
6
  mcp
7
  openai
8
- upstash-redis
 
 
 
 
5
  gradio
6
  mcp
7
  openai
8
+ semantic-text-splitter
9
+ tokenizers
10
+ upstash-redis
11
+ upstash-vector
rss_server.py CHANGED
@@ -1,18 +1,19 @@
1
  '''Main script to run gradio interface and MCP server.'''
2
 
3
  import logging
4
- from functools import partial
5
  from pathlib import Path
6
  from logging.handlers import RotatingFileHandler
7
 
8
  import gradio as gr
9
- import assets.html as html
10
  import functions.tools as tool_funcs
11
  import functions.gradio_functions as gradio_funcs
12
 
 
 
 
13
 
14
- # Set-up logging
15
- # Make sure log directory exists
16
  Path('logs').mkdir(parents=True, exist_ok=True)
17
 
18
  # Clear old logs if present
@@ -36,11 +37,14 @@ logger = logging.getLogger(__name__)
36
  with gr.Blocks() as demo:
37
 
38
  # Page text
39
- gr.HTML(html.TITLE)
40
- gr.HTML(html.DESCRIPTION)
 
41
 
42
  # Log output
43
- dialog_output = gr.Textbox(label='Server logs', lines=10, max_lines=100)
 
 
44
  timer = gr.Timer(0.5, active=True)
45
 
46
  timer.tick( # pylint: disable=no-member
@@ -49,19 +53,132 @@ with gr.Blocks() as demo:
49
  show_api=False
50
  )
51
 
 
52
  # Get feed tool
53
- website_url = gr.Textbox('hackernews.com', label='Website')
54
- output = gr.Textbox(label='RSS entries', lines=10)
55
- submit_button = gr.Button('Submit')
 
 
 
 
56
 
57
- submit_button.click( # pylint: disable=no-member
58
  fn=tool_funcs.get_feed,
59
  inputs=website_url,
60
- outputs=output,
61
  api_name='Get RSS feed content'
62
  )
63
 
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  if __name__ == '__main__':
66
 
67
  demo.launch(mcp_server=True)
 
1
  '''Main script to run gradio interface and MCP server.'''
2
 
3
  import logging
 
4
  from pathlib import Path
5
  from logging.handlers import RotatingFileHandler
6
 
7
  import gradio as gr
8
+ import assets.text as text
9
  import functions.tools as tool_funcs
10
  import functions.gradio_functions as gradio_funcs
11
 
12
+ # Call the modal container so it spins up before the rest of
13
+ # the app starts
14
+ gradio_funcs.call_modal()
15
 
16
+ # Set-up logging - make sure log directory exists
 
17
  Path('logs').mkdir(parents=True, exist_ok=True)
18
 
19
  # Clear old logs if present
 
37
  with gr.Blocks() as demo:
38
 
39
  # Page text
40
+ gr.HTML(text.TITLE)
41
+ gr.Markdown(text.DESCRIPTION)
42
+
43
 
44
  # Log output
45
+ with gr.Row():
46
+ dialog_output = gr.Textbox(label='Server logs', lines=7, max_lines=5)
47
+
48
  timer = gr.Timer(0.5, active=True)
49
 
50
  timer.tick( # pylint: disable=no-member
 
53
  show_api=False
54
  )
55
 
56
+
57
  # Get feed tool
58
+ gr.Markdown('### 1. `get_feed()`')
59
+ website_url = gr.Textbox('slashdot', label='Website')
60
+ feed_output = gr.Textbox(label='RSS entries', lines=7, max_lines=7)
61
+
62
+ with gr.Row():
63
+ website_submit_button = gr.Button('Submit website')
64
+ website_clear_button = gr.ClearButton(components=[website_url, feed_output])
65
 
66
+ website_submit_button.click( # pylint: disable=no-member
67
  fn=tool_funcs.get_feed,
68
  inputs=website_url,
69
+ outputs=feed_output,
70
  api_name='Get RSS feed content'
71
  )
72
 
73
 
74
+ # Vector search tool
75
+ gr.Markdown('### 2. `context_search()`')
76
+
77
+ context_search_query = gr.Textbox(
78
+ 'How is the air traffic control system being updated?',
79
+ label='Context search query'
80
+ )
81
+ context_search_output = gr.Textbox(
82
+ label='Context search results',
83
+ lines=7,
84
+ max_lines=7
85
+ )
86
+
87
+ with gr.Row():
88
+ context_search_submit_button = gr.Button('Submit query')
89
+ context_search_clear_button = gr.ClearButton(
90
+ components=[context_search_query, context_search_output]
91
+ )
92
+
93
+ context_search_submit_button.click( # pylint: disable=no-member
94
+ fn=tool_funcs.context_search,
95
+ inputs=context_search_query,
96
+ outputs=context_search_output,
97
+ api_name='Context vector search'
98
+ )
99
+
100
+
101
+ # Find article tool
102
+ gr.Markdown('### 3. `find_article()`')
103
+
104
+ article_search_query = gr.Textbox(
105
+ 'How is the air traffic control system being updated?',
106
+ label='Article search query'
107
+ )
108
+ article_search_output = gr.Textbox(
109
+ label='Article search results',
110
+ lines=3,
111
+ max_lines=3
112
+ )
113
+
114
+ with gr.Row():
115
+ article_search_submit_button = gr.Button('Submit query')
116
+ article_search_clear_button = gr.ClearButton(
117
+ components=[article_search_query, article_search_output]
118
+ )
119
+
120
+ article_search_submit_button.click( # pylint: disable=no-member
121
+ fn=tool_funcs.find_article,
122
+ inputs=article_search_query,
123
+ outputs=article_search_output,
124
+ api_name='Article vector search'
125
+ )
126
+
127
+
128
+ # Get summary tool
129
+ gr.Markdown('### 4. `get_summary()`')
130
+
131
+ article_title = gr.Textbox(
132
+ 'FAA To Eliminate Floppy Disks Used In Air Traffic Control Systems',
133
+ label='Article title'
134
+ )
135
+ article_summary = gr.Textbox(
136
+ label='Article summary',
137
+ lines=3,
138
+ max_lines=3
139
+ )
140
+
141
+ with gr.Row():
142
+ article_title_submit_button = gr.Button('Submit title')
143
+ article_title_clear_button = gr.ClearButton(
144
+ components=[article_title, article_summary]
145
+ )
146
+
147
+ article_title_submit_button.click( # pylint: disable=no-member
148
+ fn=tool_funcs.get_summary,
149
+ inputs=article_title,
150
+ outputs=article_summary,
151
+ api_name='Article summary search'
152
+ )
153
+
154
+
155
+ # Get link tool
156
+ gr.Markdown('### 5. `get_link()`')
157
+
158
+ article_title_link = gr.Textbox(
159
+ 'FAA To Eliminate Floppy Disks Used In Air Traffic Control Systems',
160
+ label='Article title'
161
+ )
162
+ article_link = gr.Textbox(
163
+ label='Article link',
164
+ lines=3,
165
+ max_lines=3
166
+ )
167
+
168
+ with gr.Row():
169
+ article_link_submit_button = gr.Button('Submit title')
170
+ article_link_clear_button = gr.ClearButton(
171
+ components=[article_title_link, article_link]
172
+ )
173
+
174
+ article_link_submit_button.click( # pylint: disable=no-member
175
+ fn=tool_funcs.get_link,
176
+ inputs=article_title_link,
177
+ outputs=article_link,
178
+ api_name='Article link search'
179
+ )
180
+
181
+
182
  if __name__ == '__main__':
183
 
184
  demo.launch(mcp_server=True)