#!/usr/bin/env python3 """ Playwright初始化脚本 确保浏览器正确安装和配置 针对Hugging Face Space环境优化 """ import subprocess import sys import os from pathlib import Path def setup_environment(): """设置环境变量,解决权限问题""" # 获取当前工作目录 current_dir = Path.cwd() app_cache_dir = current_dir / ".cache" # 如果在 /app 目录下,使用 /app/.cache if str(current_dir).startswith('/app'): cache_dir = app_cache_dir else: # 否则使用用户主目录 home_dir = Path.home() cache_dir = home_dir / ".cache" # 确保缓存目录存在并有正确权限 cache_dir.mkdir(parents=True, exist_ok=True, mode=0o755) playwright_cache = cache_dir / "ms-playwright" playwright_cache.mkdir(parents=True, exist_ok=True, mode=0o755) # 强制设置所有相关环境变量 env_vars = { "PLAYWRIGHT_BROWSERS_PATH": str(playwright_cache), "PLAYWRIGHT_SKIP_BROWSER_GC": "1", "XDG_CACHE_HOME": str(cache_dir), "HOME": str(cache_dir.parent), # 确保 HOME 指向正确位置 "TMPDIR": str(cache_dir / "tmp"), "TMP": str(cache_dir / "tmp"), "TEMP": str(cache_dir / "tmp"), } # 创建临时目录 tmp_dir = cache_dir / "tmp" tmp_dir.mkdir(parents=True, exist_ok=True, mode=0o755) # 设置环境变量 for key, value in env_vars.items(): os.environ[key] = value print(f"🔧 设置环境变量: {key}={value}") print(f"📁 缓存目录: {cache_dir}") print(f"🎭 Playwright缓存: {playwright_cache}") print(f"📂 当前工作目录: {current_dir}") return str(cache_dir) def check_playwright_installation(): """检查playwright是否正确安装""" try: print("🔍 开始检查 Playwright 安装...") # 尝试导入playwright from playwright.sync_api import sync_playwright print("✅ Playwright 模块导入成功") # 尝试启动浏览器 print("🌐 尝试启动 Chromium 浏览器...") with sync_playwright() as p: browser = p.chromium.launch(headless=True) print("✅ 浏览器启动成功") browser.close() print("✅ 浏览器关闭成功") print("✅ Playwright 浏览器检查通过!") return True except Exception as e: print(f"❌ Playwright 浏览器检查失败: {e}") print(f"🔍 错误类型: {type(e).__name__}") return False def install_browsers(): """安装playwright浏览器""" try: print("🔄 正在安装 Playwright 浏览器...") # 打印当前环境变量 print("🔍 当前环境变量:") for key in ["PLAYWRIGHT_BROWSERS_PATH", "XDG_CACHE_HOME", "HOME", "TMPDIR"]: print(f" {key}: {os.environ.get(key, 'NOT SET')}") # 使用更详细的安装命令 cmd = [sys.executable, "-m", "playwright", "install", "chromium"] print(f"🚀 执行命令: {' '.join(cmd)}") result = subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy()) print(f"📤 命令返回码: {result.returncode}") if result.stdout: print(f"📜 标准输出:\n{result.stdout}") if result.stderr: print(f"⚠️ 错误输出:\n{result.stderr}") if result.returncode != 0: print("❌ 安装命令执行失败") return False print("✅ Playwright 浏览器安装完成!") return True except subprocess.CalledProcessError as e: print(f"❌ 浏览器安装失败: {e}") return False except Exception as e: print(f"❌ 安装过程中出现异常: {e}") return False def main(): """主函数""" print("🚀 初始化 Playwright (Hugging Face Space 增强版)...") # 首先设置环境变量 cache_dir = setup_environment() # 检查是否已正确安装 print("🔍 第一次检查...") if check_playwright_installation(): return True # 如果检查失败,尝试重新安装 print("🔧 检测到问题,正在重新安装浏览器...") if install_browsers(): # 重新检查 print("🔍 安装后重新检查...") return check_playwright_installation() print("😞 所有尝试都失败了") return False if __name__ == "__main__": success = main() if not success: print("❌ Playwright 初始化失败!") print("💡 提示:如果仍有问题,可能需要在系统级别安装浏览器依赖") print("🔍 调试信息:") print(f" - 当前用户: {os.getuid() if hasattr(os, 'getuid') else 'N/A'}") print(f" - 工作目录: {Path.cwd()}") print(f" - 环境变量 HOME: {os.environ.get('HOME', 'NOT SET')}") sys.exit(1) print("🎉 Playwright 初始化成功!")