from curl_cffi import requests as req from bs4 import BeautifulSoup import html2text url = 'https://www.firecrawl.dev/' # Fetch HTML content response = req.get(url) soup = BeautifulSoup(response.text, 'html.parser') # Optional: Clean up unwanted tags for tag in soup(['script', 'style', 'noscript', 'svg']): tag.decompose() # Extract cleaned HTML clean_html = str(soup) # Convert to Markdown markdown = html2text.html2text(clean_html) # Output print(markdown)