Added fence to prevent parsing empty HTML string.
Browse files
functions/helper_functions.py
CHANGED
@@ -209,6 +209,9 @@ def get_html(url: str) -> str:
|
|
209 |
|
210 |
content = content.decode(encoding)
|
211 |
|
|
|
|
|
|
|
212 |
except HTTPError:
|
213 |
content = None
|
214 |
|
@@ -227,6 +230,9 @@ def get_text(html: str) -> str:
|
|
227 |
|
228 |
Returns:
|
229 |
Cleaned text string'''
|
|
|
|
|
|
|
230 |
|
231 |
extractor = extractors.ArticleExtractor()
|
232 |
|
@@ -236,6 +242,11 @@ def get_text(html: str) -> str:
|
|
236 |
except HTMLExtractionError:
|
237 |
pass
|
238 |
|
|
|
|
|
|
|
|
|
|
|
239 |
|
240 |
return clean_html(html)
|
241 |
|
|
|
209 |
|
210 |
content = content.decode(encoding)
|
211 |
|
212 |
+
else:
|
213 |
+
content = None
|
214 |
+
|
215 |
except HTTPError:
|
216 |
content = None
|
217 |
|
|
|
230 |
|
231 |
Returns:
|
232 |
Cleaned text string'''
|
233 |
+
|
234 |
+
if html is None:
|
235 |
+
return None
|
236 |
|
237 |
extractor = extractors.ArticleExtractor()
|
238 |
|
|
|
242 |
except HTMLExtractionError:
|
243 |
pass
|
244 |
|
245 |
+
except AttributeError:
|
246 |
+
pass
|
247 |
+
|
248 |
+
except TypeError:
|
249 |
+
pass
|
250 |
|
251 |
return clean_html(html)
|
252 |
|