|
|
|
""" |
|
Playwright初始化脚本 |
|
确保浏览器正确安装和配置 |
|
针对Hugging Face Space环境优化 |
|
""" |
|
|
|
import subprocess |
|
import sys |
|
import os |
|
from pathlib import Path |
|
|
|
def setup_environment(): |
|
"""设置环境变量,解决权限问题""" |
|
|
|
current_dir = Path.cwd() |
|
app_cache_dir = current_dir / ".cache" |
|
|
|
|
|
if str(current_dir).startswith('/app'): |
|
cache_dir = app_cache_dir |
|
else: |
|
|
|
home_dir = Path.home() |
|
cache_dir = home_dir / ".cache" |
|
|
|
|
|
cache_dir.mkdir(parents=True, exist_ok=True, mode=0o755) |
|
playwright_cache = cache_dir / "ms-playwright" |
|
playwright_cache.mkdir(parents=True, exist_ok=True, mode=0o755) |
|
|
|
|
|
env_vars = { |
|
"PLAYWRIGHT_BROWSERS_PATH": str(playwright_cache), |
|
"PLAYWRIGHT_SKIP_BROWSER_GC": "1", |
|
"XDG_CACHE_HOME": str(cache_dir), |
|
"HOME": str(cache_dir.parent), |
|
"TMPDIR": str(cache_dir / "tmp"), |
|
"TMP": str(cache_dir / "tmp"), |
|
"TEMP": str(cache_dir / "tmp"), |
|
} |
|
|
|
|
|
tmp_dir = cache_dir / "tmp" |
|
tmp_dir.mkdir(parents=True, exist_ok=True, mode=0o755) |
|
|
|
|
|
for key, value in env_vars.items(): |
|
os.environ[key] = value |
|
print(f"🔧 设置环境变量: {key}={value}") |
|
|
|
print(f"📁 缓存目录: {cache_dir}") |
|
print(f"🎭 Playwright缓存: {playwright_cache}") |
|
print(f"📂 当前工作目录: {current_dir}") |
|
|
|
return str(cache_dir) |
|
|
|
def check_playwright_installation(): |
|
"""检查playwright是否正确安装""" |
|
try: |
|
print("🔍 开始检查 Playwright 安装...") |
|
|
|
|
|
from playwright.sync_api import sync_playwright |
|
print("✅ Playwright 模块导入成功") |
|
|
|
|
|
print("🌐 尝试启动 Chromium 浏览器...") |
|
with sync_playwright() as p: |
|
browser = p.chromium.launch(headless=True) |
|
print("✅ 浏览器启动成功") |
|
browser.close() |
|
print("✅ 浏览器关闭成功") |
|
|
|
print("✅ Playwright 浏览器检查通过!") |
|
return True |
|
|
|
except Exception as e: |
|
print(f"❌ Playwright 浏览器检查失败: {e}") |
|
print(f"🔍 错误类型: {type(e).__name__}") |
|
return False |
|
|
|
def install_browsers(): |
|
"""安装playwright浏览器""" |
|
try: |
|
print("🔄 正在安装 Playwright 浏览器...") |
|
|
|
|
|
print("🔍 当前环境变量:") |
|
for key in ["PLAYWRIGHT_BROWSERS_PATH", "XDG_CACHE_HOME", "HOME", "TMPDIR"]: |
|
print(f" {key}: {os.environ.get(key, 'NOT SET')}") |
|
|
|
|
|
cmd = [sys.executable, "-m", "playwright", "install", "chromium"] |
|
print(f"🚀 执行命令: {' '.join(cmd)}") |
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy()) |
|
|
|
print(f"📤 命令返回码: {result.returncode}") |
|
if result.stdout: |
|
print(f"📜 标准输出:\n{result.stdout}") |
|
if result.stderr: |
|
print(f"⚠️ 错误输出:\n{result.stderr}") |
|
|
|
if result.returncode != 0: |
|
print("❌ 安装命令执行失败") |
|
return False |
|
|
|
print("✅ Playwright 浏览器安装完成!") |
|
return True |
|
|
|
except subprocess.CalledProcessError as e: |
|
print(f"❌ 浏览器安装失败: {e}") |
|
return False |
|
except Exception as e: |
|
print(f"❌ 安装过程中出现异常: {e}") |
|
return False |
|
|
|
def main(): |
|
"""主函数""" |
|
print("🚀 初始化 Playwright (Hugging Face Space 增强版)...") |
|
|
|
|
|
cache_dir = setup_environment() |
|
|
|
|
|
print("🔍 第一次检查...") |
|
if check_playwright_installation(): |
|
return True |
|
|
|
|
|
print("🔧 检测到问题,正在重新安装浏览器...") |
|
if install_browsers(): |
|
|
|
print("🔍 安装后重新检查...") |
|
return check_playwright_installation() |
|
|
|
print("😞 所有尝试都失败了") |
|
return False |
|
|
|
if __name__ == "__main__": |
|
success = main() |
|
if not success: |
|
print("❌ Playwright 初始化失败!") |
|
print("💡 提示:如果仍有问题,可能需要在系统级别安装浏览器依赖") |
|
print("🔍 调试信息:") |
|
print(f" - 当前用户: {os.getuid() if hasattr(os, 'getuid') else 'N/A'}") |
|
print(f" - 工作目录: {Path.cwd()}") |
|
print(f" - 环境变量 HOME: {os.environ.get('HOME', 'NOT SET')}") |
|
sys.exit(1) |
|
print("🎉 Playwright 初始化成功!") |