Spaces:
Running
Running
update
Browse files
app.py
CHANGED
@@ -473,14 +473,40 @@ def extract_website_content(url: str) -> str:
|
|
473 |
if not parsed_url.netloc:
|
474 |
return "Error: Invalid URL provided"
|
475 |
|
476 |
-
# Set headers to mimic a browser request
|
477 |
headers = {
|
478 |
-
'User-Agent': 'Mozilla/5.0 (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
479 |
}
|
480 |
|
481 |
-
#
|
482 |
-
|
483 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
484 |
|
485 |
# Parse HTML content
|
486 |
soup = BeautifulSoup(response.content, 'html.parser')
|
@@ -572,6 +598,19 @@ PAGE STRUCTURE:
|
|
572 |
|
573 |
return website_content.strip()
|
574 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
575 |
except requests.exceptions.RequestException as e:
|
576 |
return f"Error accessing website: {str(e)}"
|
577 |
except Exception as e:
|
@@ -603,7 +642,17 @@ def generation_code(query: Optional[str], image: Optional[gr.Image], file: Optio
|
|
603 |
website_text = website_text[:8000] # Limit to 8000 chars for prompt size
|
604 |
query = f"{query}\n\n[Website content to redesign below]\n{website_text}"
|
605 |
elif website_text.startswith("Error"):
|
606 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
607 |
|
608 |
# Enhance query with search if enabled
|
609 |
enhanced_query = enhance_query_with_search(query, enable_search)
|
|
|
473 |
if not parsed_url.netloc:
|
474 |
return "Error: Invalid URL provided"
|
475 |
|
476 |
+
# Set comprehensive headers to mimic a real browser request
|
477 |
headers = {
|
478 |
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
479 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
|
480 |
+
'Accept-Language': 'en-US,en;q=0.9',
|
481 |
+
'Accept-Encoding': 'gzip, deflate, br',
|
482 |
+
'DNT': '1',
|
483 |
+
'Connection': 'keep-alive',
|
484 |
+
'Upgrade-Insecure-Requests': '1',
|
485 |
+
'Sec-Fetch-Dest': 'document',
|
486 |
+
'Sec-Fetch-Mode': 'navigate',
|
487 |
+
'Sec-Fetch-Site': 'none',
|
488 |
+
'Sec-Fetch-User': '?1',
|
489 |
+
'Cache-Control': 'max-age=0'
|
490 |
}
|
491 |
|
492 |
+
# Create a session to maintain cookies and handle redirects
|
493 |
+
session = requests.Session()
|
494 |
+
session.headers.update(headers)
|
495 |
+
|
496 |
+
# Make the request with retry logic
|
497 |
+
max_retries = 3
|
498 |
+
for attempt in range(max_retries):
|
499 |
+
try:
|
500 |
+
response = session.get(url, timeout=15, allow_redirects=True)
|
501 |
+
response.raise_for_status()
|
502 |
+
break
|
503 |
+
except requests.exceptions.HTTPError as e:
|
504 |
+
if e.response.status_code == 403 and attempt < max_retries - 1:
|
505 |
+
# Try with different User-Agent on 403
|
506 |
+
session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
507 |
+
continue
|
508 |
+
else:
|
509 |
+
raise
|
510 |
|
511 |
# Parse HTML content
|
512 |
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
598 |
|
599 |
return website_content.strip()
|
600 |
|
601 |
+
except requests.exceptions.HTTPError as e:
|
602 |
+
if e.response.status_code == 403:
|
603 |
+
return f"Error: Website blocked access (403 Forbidden). This website may have anti-bot protection. Try a different website or provide a description of what you want to build instead."
|
604 |
+
elif e.response.status_code == 404:
|
605 |
+
return f"Error: Website not found (404). Please check the URL and try again."
|
606 |
+
elif e.response.status_code >= 500:
|
607 |
+
return f"Error: Website server error ({e.response.status_code}). Please try again later."
|
608 |
+
else:
|
609 |
+
return f"Error accessing website: HTTP {e.response.status_code} - {str(e)}"
|
610 |
+
except requests.exceptions.Timeout:
|
611 |
+
return "Error: Request timed out. The website may be slow or unavailable."
|
612 |
+
except requests.exceptions.ConnectionError:
|
613 |
+
return "Error: Could not connect to the website. Please check your internet connection and the URL."
|
614 |
except requests.exceptions.RequestException as e:
|
615 |
return f"Error accessing website: {str(e)}"
|
616 |
except Exception as e:
|
|
|
642 |
website_text = website_text[:8000] # Limit to 8000 chars for prompt size
|
643 |
query = f"{query}\n\n[Website content to redesign below]\n{website_text}"
|
644 |
elif website_text.startswith("Error"):
|
645 |
+
# Provide helpful guidance when website extraction fails
|
646 |
+
fallback_guidance = """
|
647 |
+
Since I couldn't extract the website content, please provide additional details about what you'd like to build:
|
648 |
+
|
649 |
+
1. What type of website is this? (e.g., e-commerce, blog, portfolio, dashboard)
|
650 |
+
2. What are the main features you want?
|
651 |
+
3. What's the target audience?
|
652 |
+
4. Any specific design preferences? (colors, style, layout)
|
653 |
+
|
654 |
+
This will help me create a better design for you."""
|
655 |
+
query = f"{query}\n\n[Error extracting website: {website_text}]{fallback_guidance}"
|
656 |
|
657 |
# Enhance query with search if enabled
|
658 |
enhanced_query = enhance_query_with_search(query, enable_search)
|