kokluch commited on
Commit
c5e53a5
·
1 Parent(s): dc6faa5

Fix resolve shorten urls to go to last redirection.

Browse files
Files changed (1) hide show
  1. url_tools.py +8 -3
url_tools.py CHANGED
@@ -17,8 +17,12 @@ def extract_domain_from_url(url: str) -> str:
17
  Returns:
18
  str: The domain (e.g., 'example.com').
19
  """
20
- parsed = urlparse(url)
21
- return parsed.netloc
 
 
 
 
22
 
23
  def normalize_url(url: str) -> str:
24
  """Ensure the URL has a scheme and is normalized."""
@@ -34,7 +38,8 @@ def resolve_short_url(url: str) -> str:
34
  with httpx.Client(follow_redirects=False, timeout=5) as client:
35
  response = client.head(url, headers={"User-Agent": "Mozilla/5.0"})
36
  if response.status_code in {301, 302, 303, 307, 308}:
37
- return response.headers.get("location")
 
38
  return url # No redirect
39
  except httpx.RequestError as e:
40
  print(f"Error: {e}")
 
17
  Returns:
18
  str: The domain (e.g., 'example.com').
19
  """
20
+ redirect_url = resolve_short_url(url)
21
+ print(f"redirect: {url} -> {redirect_url}")
22
+ parsed = urlparse(redirect_url)
23
+ domain = parsed.netloc
24
+ print(f"domain: {redirect_url} -> {domain}")
25
+ return domain
26
 
27
  def normalize_url(url: str) -> str:
28
  """Ensure the URL has a scheme and is normalized."""
 
38
  with httpx.Client(follow_redirects=False, timeout=5) as client:
39
  response = client.head(url, headers={"User-Agent": "Mozilla/5.0"})
40
  if response.status_code in {301, 302, 303, 307, 308}:
41
+ location = response.headers.get("location")
42
+ return resolve_short_url(location)
43
  return url # No redirect
44
  except httpx.RequestError as e:
45
  print(f"Error: {e}")