Spaces:
Running
Running
Fix resolve shorten urls to go to last redirection.
Browse files- url_tools.py +8 -3
url_tools.py
CHANGED
@@ -17,8 +17,12 @@ def extract_domain_from_url(url: str) -> str:
|
|
17 |
Returns:
|
18 |
str: The domain (e.g., 'example.com').
|
19 |
"""
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
22 |
|
23 |
def normalize_url(url: str) -> str:
|
24 |
"""Ensure the URL has a scheme and is normalized."""
|
@@ -34,7 +38,8 @@ def resolve_short_url(url: str) -> str:
|
|
34 |
with httpx.Client(follow_redirects=False, timeout=5) as client:
|
35 |
response = client.head(url, headers={"User-Agent": "Mozilla/5.0"})
|
36 |
if response.status_code in {301, 302, 303, 307, 308}:
|
37 |
-
|
|
|
38 |
return url # No redirect
|
39 |
except httpx.RequestError as e:
|
40 |
print(f"Error: {e}")
|
|
|
17 |
Returns:
|
18 |
str: The domain (e.g., 'example.com').
|
19 |
"""
|
20 |
+
redirect_url = resolve_short_url(url)
|
21 |
+
print(f"redirect: {url} -> {redirect_url}")
|
22 |
+
parsed = urlparse(redirect_url)
|
23 |
+
domain = parsed.netloc
|
24 |
+
print(f"domain: {redirect_url} -> {domain}")
|
25 |
+
return domain
|
26 |
|
27 |
def normalize_url(url: str) -> str:
|
28 |
"""Ensure the URL has a scheme and is normalized."""
|
|
|
38 |
with httpx.Client(follow_redirects=False, timeout=5) as client:
|
39 |
response = client.head(url, headers={"User-Agent": "Mozilla/5.0"})
|
40 |
if response.status_code in {301, 302, 303, 307, 308}:
|
41 |
+
location = response.headers.get("location")
|
42 |
+
return resolve_short_url(location)
|
43 |
return url # No redirect
|
44 |
except httpx.RequestError as e:
|
45 |
print(f"Error: {e}")
|