Felguk commited on
Commit
4e43700
Β·
verified Β·
1 Parent(s): 4f25906

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -11
app.py CHANGED
@@ -26,22 +26,28 @@ async def extract_additional_resources(url):
26
  try:
27
  response = await asyncio.to_thread(requests.get, url, timeout=5)
28
  response.raise_for_status()
29
- soup = BeautifulSoup(response.text, "html.parser")
30
 
31
- # Extract CSS links (limit to 5)
32
- css_links = [urljoin(url, link["href"]) for link in soup.find_all("link", rel="stylesheet") if "href" in link.attrs][:5]
 
33
 
34
- # Extract JS links (limit to 5)
35
- js_links = [urljoin(url, script["src"]) for script in soup.find_all("script") if "src" in script.attrs][:5]
36
 
37
- # Extract image links (limit to 5)
38
- img_links = [urljoin(url, img["src"]) for img in soup.find_all("img") if "src" in img.attrs][:5]
39
 
40
- # Fetch CSS and JS content asynchronously
41
- css_content = await asyncio.gather(*[fetch_file_content(link) for link in css_links])
42
- js_content = await asyncio.gather(*[fetch_file_content(link) for link in js_links])
43
 
44
- return css_links, js_links, img_links, css_content, js_content
 
 
 
 
 
 
 
45
  except Exception as e:
46
  return [], [], [], [], []
47
 
 
26
  try:
27
  response = await asyncio.to_thread(requests.get, url, timeout=5)
28
  response.raise_for_status()
 
29
 
30
+ # Check if the content is HTML
31
+ if 'text/html' in response.headers.get('Content-Type', ''):
32
+ soup = BeautifulSoup(response.text, "html.parser")
33
 
34
+ # Extract CSS links (limit to 5)
35
+ css_links = [urljoin(url, link["href"]) for link in soup.find_all("link", rel="stylesheet") if "href" in link.attrs][:5]
36
 
37
+ # Extract JS links (limit to 5)
38
+ js_links = [urljoin(url, script["src"]) for script in soup.find_all("script") if "src" in script.attrs][:5]
39
 
40
+ # Extract image links (limit to 5)
41
+ img_links = [urljoin(url, img["src"]) for img in soup.find_all("img") if "src" in img.attrs][:5]
 
42
 
43
+ # Fetch CSS and JS content asynchronously
44
+ css_content = await asyncio.gather(*[fetch_file_content(link) for link in css_links])
45
+ js_content = await asyncio.gather(*[fetch_file_content(link) for link in js_links])
46
+
47
+ return css_links, js_links, img_links, css_content, js_content
48
+ else:
49
+ # If it's not HTML, treat it as a file
50
+ return [], [], [], [response.text], []
51
  except Exception as e:
52
  return [], [], [], [], []
53