gperdrizet commited on
Commit
3c77f9c
·
unverified ·
2 Parent(s): 5614e32 397bedd

Merge pull request #8 from gperdrizet/dev

Browse files
Files changed (2) hide show
  1. assets/html.py +17 -2
  2. functions/helper_functions.py +11 -0
assets/html.py CHANGED
@@ -3,9 +3,24 @@
3
  TITLE = (
4
  '''
5
  <center>
6
- <h1>RSS feed reader</h1>
7
  </center>
8
  '''
9
  )
10
 
11
- DESCRIPTION = 'Enter a website to extract RSS feed entry titles.'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  TITLE = (
4
  '''
5
  <center>
6
+ <h1>RSS feed finder/extractor</h1>
7
  </center>
8
  '''
9
  )
10
 
11
+ DESCRIPTION = (
12
+ '''
13
+ Functions to find and extract RSS feeds are complete-ish. No AI
14
+ yet, plan for tomorrow is to build two tools:
15
+
16
+ <ol>
17
+ <li>Human readable summaries of requested RSS feed</li>
18
+ <li>Simple RAG on requested RSS feed content</li>
19
+ </ol>
20
+
21
+ For now we just dump the extracted RSS content below. Try asking
22
+ for a feed by website name, website URL, or entering your favorite
23
+ feed URI directly. Suggestions: http://openai.com/news/rss.xml,
24
+ hackernews.com, Hugging Face, etc
25
+ '''
26
+ )
functions/helper_functions.py CHANGED
@@ -209,6 +209,9 @@ def get_html(url: str) -> str:
209
 
210
  content = content.decode(encoding)
211
 
 
 
 
212
  except HTTPError:
213
  content = None
214
 
@@ -227,6 +230,9 @@ def get_text(html: str) -> str:
227
 
228
  Returns:
229
  Cleaned text string'''
 
 
 
230
 
231
  extractor = extractors.ArticleExtractor()
232
 
@@ -236,6 +242,11 @@ def get_text(html: str) -> str:
236
  except HTMLExtractionError:
237
  pass
238
 
 
 
 
 
 
239
 
240
  return clean_html(html)
241
 
 
209
 
210
  content = content.decode(encoding)
211
 
212
+ else:
213
+ content = None
214
+
215
  except HTTPError:
216
  content = None
217
 
 
230
 
231
  Returns:
232
  Cleaned text string'''
233
+
234
+ if html is None:
235
+ return None
236
 
237
  extractor = extractors.ArticleExtractor()
238
 
 
242
  except HTMLExtractionError:
243
  pass
244
 
245
+ except AttributeError:
246
+ pass
247
+
248
+ except TypeError:
249
+ pass
250
 
251
  return clean_html(html)
252