Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -26,22 +26,28 @@ async def extract_additional_resources(url):
|
|
26 |
try:
|
27 |
response = await asyncio.to_thread(requests.get, url, timeout=5)
|
28 |
response.raise_for_status()
|
29 |
-
soup = BeautifulSoup(response.text, "html.parser")
|
30 |
|
31 |
-
#
|
32 |
-
|
|
|
33 |
|
34 |
-
|
35 |
-
|
36 |
|
37 |
-
|
38 |
-
|
39 |
|
40 |
-
|
41 |
-
|
42 |
-
js_content = await asyncio.gather(*[fetch_file_content(link) for link in js_links])
|
43 |
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
except Exception as e:
|
46 |
return [], [], [], [], []
|
47 |
|
|
|
26 |
try:
|
27 |
response = await asyncio.to_thread(requests.get, url, timeout=5)
|
28 |
response.raise_for_status()
|
|
|
29 |
|
30 |
+
# Check if the content is HTML
|
31 |
+
if 'text/html' in response.headers.get('Content-Type', ''):
|
32 |
+
soup = BeautifulSoup(response.text, "html.parser")
|
33 |
|
34 |
+
# Extract CSS links (limit to 5)
|
35 |
+
css_links = [urljoin(url, link["href"]) for link in soup.find_all("link", rel="stylesheet") if "href" in link.attrs][:5]
|
36 |
|
37 |
+
# Extract JS links (limit to 5)
|
38 |
+
js_links = [urljoin(url, script["src"]) for script in soup.find_all("script") if "src" in script.attrs][:5]
|
39 |
|
40 |
+
# Extract image links (limit to 5)
|
41 |
+
img_links = [urljoin(url, img["src"]) for img in soup.find_all("img") if "src" in img.attrs][:5]
|
|
|
42 |
|
43 |
+
# Fetch CSS and JS content asynchronously
|
44 |
+
css_content = await asyncio.gather(*[fetch_file_content(link) for link in css_links])
|
45 |
+
js_content = await asyncio.gather(*[fetch_file_content(link) for link in js_links])
|
46 |
+
|
47 |
+
return css_links, js_links, img_links, css_content, js_content
|
48 |
+
else:
|
49 |
+
# If it's not HTML, treat it as a file
|
50 |
+
return [], [], [], [response.text], []
|
51 |
except Exception as e:
|
52 |
return [], [], [], [], []
|
53 |
|