P2P / init_playwright.py
ASC8384's picture
try
9a1ee6a
#!/usr/bin/env python3
"""
Playwright初始化脚本
确保浏览器正确安装和配置
针对Hugging Face Space环境优化
"""
import subprocess
import sys
import os
from pathlib import Path
def setup_environment():
"""设置环境变量,解决权限问题"""
# 获取当前工作目录
current_dir = Path.cwd()
app_cache_dir = current_dir / ".cache"
# 如果在 /app 目录下,使用 /app/.cache
if str(current_dir).startswith('/app'):
cache_dir = app_cache_dir
else:
# 否则使用用户主目录
home_dir = Path.home()
cache_dir = home_dir / ".cache"
# 确保缓存目录存在并有正确权限
cache_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
playwright_cache = cache_dir / "ms-playwright"
playwright_cache.mkdir(parents=True, exist_ok=True, mode=0o755)
# 强制设置所有相关环境变量
env_vars = {
"PLAYWRIGHT_BROWSERS_PATH": str(playwright_cache),
"PLAYWRIGHT_SKIP_BROWSER_GC": "1",
"XDG_CACHE_HOME": str(cache_dir),
"HOME": str(cache_dir.parent), # 确保 HOME 指向正确位置
"TMPDIR": str(cache_dir / "tmp"),
"TMP": str(cache_dir / "tmp"),
"TEMP": str(cache_dir / "tmp"),
}
# 创建临时目录
tmp_dir = cache_dir / "tmp"
tmp_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
# 设置环境变量
for key, value in env_vars.items():
os.environ[key] = value
print(f"🔧 设置环境变量: {key}={value}")
print(f"📁 缓存目录: {cache_dir}")
print(f"🎭 Playwright缓存: {playwright_cache}")
print(f"📂 当前工作目录: {current_dir}")
return str(cache_dir)
def check_playwright_installation():
"""检查playwright是否正确安装"""
try:
print("🔍 开始检查 Playwright 安装...")
# 尝试导入playwright
from playwright.sync_api import sync_playwright
print("✅ Playwright 模块导入成功")
# 尝试启动浏览器
print("🌐 尝试启动 Chromium 浏览器...")
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
print("✅ 浏览器启动成功")
browser.close()
print("✅ 浏览器关闭成功")
print("✅ Playwright 浏览器检查通过!")
return True
except Exception as e:
print(f"❌ Playwright 浏览器检查失败: {e}")
print(f"🔍 错误类型: {type(e).__name__}")
return False
def install_browsers():
"""安装playwright浏览器"""
try:
print("🔄 正在安装 Playwright 浏览器...")
# 打印当前环境变量
print("🔍 当前环境变量:")
for key in ["PLAYWRIGHT_BROWSERS_PATH", "XDG_CACHE_HOME", "HOME", "TMPDIR"]:
print(f" {key}: {os.environ.get(key, 'NOT SET')}")
# 使用更详细的安装命令
cmd = [sys.executable, "-m", "playwright", "install", "chromium"]
print(f"🚀 执行命令: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy())
print(f"📤 命令返回码: {result.returncode}")
if result.stdout:
print(f"📜 标准输出:\n{result.stdout}")
if result.stderr:
print(f"⚠️ 错误输出:\n{result.stderr}")
if result.returncode != 0:
print("❌ 安装命令执行失败")
return False
print("✅ Playwright 浏览器安装完成!")
return True
except subprocess.CalledProcessError as e:
print(f"❌ 浏览器安装失败: {e}")
return False
except Exception as e:
print(f"❌ 安装过程中出现异常: {e}")
return False
def main():
"""主函数"""
print("🚀 初始化 Playwright (Hugging Face Space 增强版)...")
# 首先设置环境变量
cache_dir = setup_environment()
# 检查是否已正确安装
print("🔍 第一次检查...")
if check_playwright_installation():
return True
# 如果检查失败,尝试重新安装
print("🔧 检测到问题,正在重新安装浏览器...")
if install_browsers():
# 重新检查
print("🔍 安装后重新检查...")
return check_playwright_installation()
print("😞 所有尝试都失败了")
return False
if __name__ == "__main__":
success = main()
if not success:
print("❌ Playwright 初始化失败!")
print("💡 提示:如果仍有问题,可能需要在系统级别安装浏览器依赖")
print("🔍 调试信息:")
print(f" - 当前用户: {os.getuid() if hasattr(os, 'getuid') else 'N/A'}")
print(f" - 工作目录: {Path.cwd()}")
print(f" - 环境变量 HOME: {os.environ.get('HOME', 'NOT SET')}")
sys.exit(1)
print("🎉 Playwright 初始化成功!")