Spaces:
Runtime error
Runtime error
Switched to simple check for domain extension.
Browse files- functions/helper_functions.py +1 -4
- functions/tools.py +2 -2
functions/helper_functions.py
CHANGED
@@ -20,6 +20,7 @@ def get_url(company_name: str) -> str:
|
|
20 |
'''
|
21 |
|
22 |
logger = logging.getLogger(__name__ + '.get_url')
|
|
|
23 |
|
24 |
query = f'{company_name} official website'
|
25 |
|
@@ -45,10 +46,6 @@ def get_feed(website_url: str) -> str:
|
|
45 |
|
46 |
feeds = feed_search(website_url)
|
47 |
|
48 |
-
logger.info('Feeds search result is: %s', type(feeds))
|
49 |
-
logger.info('Feeds search results: %s', len(feeds))
|
50 |
-
logger.info('Feeds results: %s', list(feeds))
|
51 |
-
|
52 |
if len(feeds) > 0:
|
53 |
return str(feeds[0].url)
|
54 |
|
|
|
20 |
'''
|
21 |
|
22 |
logger = logging.getLogger(__name__ + '.get_url')
|
23 |
+
logger.info('Getting website URL for %s', company_name)
|
24 |
|
25 |
query = f'{company_name} official website'
|
26 |
|
|
|
46 |
|
47 |
feeds = feed_search(website_url)
|
48 |
|
|
|
|
|
|
|
|
|
49 |
if len(feeds) > 0:
|
50 |
return str(feeds[0].url)
|
51 |
|
functions/tools.py
CHANGED
@@ -2,11 +2,11 @@
|
|
2 |
|
3 |
import logging
|
4 |
from urllib.parse import urlparse
|
5 |
-
import validators
|
6 |
import functions.helper_functions as helper_funcs
|
7 |
|
8 |
FEED_URIS = {}
|
9 |
RSS_EXTENSIONS = ['xml', 'rss', 'atom']
|
|
|
10 |
|
11 |
|
12 |
def get_content(website: str) -> list:
|
@@ -39,7 +39,7 @@ def get_content(website: str) -> list:
|
|
39 |
# If neither of those get it - try feedparse if it looks like a url
|
40 |
# or else just google it
|
41 |
else:
|
42 |
-
if
|
43 |
website_url = website
|
44 |
logger.info('%s looks like a website URL', website)
|
45 |
|
|
|
2 |
|
3 |
import logging
|
4 |
from urllib.parse import urlparse
|
|
|
5 |
import functions.helper_functions as helper_funcs
|
6 |
|
7 |
FEED_URIS = {}
|
8 |
RSS_EXTENSIONS = ['xml', 'rss', 'atom']
|
9 |
+
COMMON_EXTENSIONS = ['com', 'net', 'org', 'edu', 'gov', 'co', 'us']
|
10 |
|
11 |
|
12 |
def get_content(website: str) -> list:
|
|
|
39 |
# If neither of those get it - try feedparse if it looks like a url
|
40 |
# or else just google it
|
41 |
else:
|
42 |
+
if website.split('.')[-1] in COMMON_EXTENSIONS:
|
43 |
website_url = website
|
44 |
logger.info('%s looks like a website URL', website)
|
45 |
|