gperdrizet commited on
Commit
e6f6cfa
·
unverified ·
1 Parent(s): e97f932

Added fence to prevent parsing empty HTML string.

Browse files
Files changed (1) hide show
  1. functions/helper_functions.py +11 -0
functions/helper_functions.py CHANGED
@@ -209,6 +209,9 @@ def get_html(url: str) -> str:
209
 
210
  content = content.decode(encoding)
211
 
 
 
 
212
  except HTTPError:
213
  content = None
214
 
@@ -227,6 +230,9 @@ def get_text(html: str) -> str:
227
 
228
  Returns:
229
  Cleaned text string'''
 
 
 
230
 
231
  extractor = extractors.ArticleExtractor()
232
 
@@ -236,6 +242,11 @@ def get_text(html: str) -> str:
236
  except HTMLExtractionError:
237
  pass
238
 
 
 
 
 
 
239
 
240
  return clean_html(html)
241
 
 
209
 
210
  content = content.decode(encoding)
211
 
212
+ else:
213
+ content = None
214
+
215
  except HTTPError:
216
  content = None
217
 
 
230
 
231
  Returns:
232
  Cleaned text string'''
233
+
234
+ if html is None:
235
+ return None
236
 
237
  extractor = extractors.ArticleExtractor()
238
 
 
242
  except HTMLExtractionError:
243
  pass
244
 
245
+ except AttributeError:
246
+ pass
247
+
248
+ except TypeError:
249
+ pass
250
 
251
  return clean_html(html)
252