File size: 5,105 Bytes
51687f7
 
 
 
bee90e6
51687f7
 
 
 
 
bee90e6
 
 
 
9a1ee6a
 
 
bee90e6
9a1ee6a
 
 
 
 
 
 
bee90e6
9a1ee6a
 
 
 
bee90e6
9a1ee6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bee90e6
 
51687f7
 
 
 
9a1ee6a
 
51687f7
 
9a1ee6a
51687f7
 
9a1ee6a
51687f7
 
9a1ee6a
51687f7
9a1ee6a
51687f7
 
 
 
 
 
9a1ee6a
51687f7
 
 
 
 
 
bee90e6
9a1ee6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bee90e6
 
9a1ee6a
bee90e6
 
51687f7
 
9a1ee6a
51687f7
 
 
bee90e6
 
 
51687f7
 
 
9a1ee6a
bee90e6
 
 
51687f7
bee90e6
9a1ee6a
51687f7
 
 
 
 
 
 
9a1ee6a
51687f7
 
9a1ee6a
51687f7
 
 
 
 
 
bee90e6
9a1ee6a
 
 
 
51687f7
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python3
"""
Playwright初始化脚本
确保浏览器正确安装和配置
针对Hugging Face Space环境优化
"""

import subprocess
import sys
import os
from pathlib import Path

def setup_environment():
    """设置环境变量,解决权限问题"""
    # 获取当前工作目录
    current_dir = Path.cwd()
    app_cache_dir = current_dir / ".cache"
    
    # 如果在 /app 目录下,使用 /app/.cache
    if str(current_dir).startswith('/app'):
        cache_dir = app_cache_dir
    else:
        # 否则使用用户主目录
        home_dir = Path.home()
        cache_dir = home_dir / ".cache"
    
    # 确保缓存目录存在并有正确权限
    cache_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
    playwright_cache = cache_dir / "ms-playwright"
    playwright_cache.mkdir(parents=True, exist_ok=True, mode=0o755)
    
    # 强制设置所有相关环境变量
    env_vars = {
        "PLAYWRIGHT_BROWSERS_PATH": str(playwright_cache),
        "PLAYWRIGHT_SKIP_BROWSER_GC": "1",
        "XDG_CACHE_HOME": str(cache_dir),
        "HOME": str(cache_dir.parent),  # 确保 HOME 指向正确位置
        "TMPDIR": str(cache_dir / "tmp"),
        "TMP": str(cache_dir / "tmp"),
        "TEMP": str(cache_dir / "tmp"),
    }
    
    # 创建临时目录
    tmp_dir = cache_dir / "tmp"
    tmp_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
    
    # 设置环境变量
    for key, value in env_vars.items():
        os.environ[key] = value
        print(f"🔧 设置环境变量: {key}={value}")
    
    print(f"📁 缓存目录: {cache_dir}")
    print(f"🎭 Playwright缓存: {playwright_cache}")
    print(f"📂 当前工作目录: {current_dir}")
    
    return str(cache_dir)

def check_playwright_installation():
    """检查playwright是否正确安装"""
    try:
        print("🔍 开始检查 Playwright 安装...")
        
        # 尝试导入playwright
        from playwright.sync_api import sync_playwright
        print("✅ Playwright 模块导入成功")
        
        # 尝试启动浏览器
        print("🌐 尝试启动 Chromium 浏览器...")
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True)
            print("✅ 浏览器启动成功")
            browser.close()
            print("✅ 浏览器关闭成功")
            
        print("✅ Playwright 浏览器检查通过!")
        return True
        
    except Exception as e:
        print(f"❌ Playwright 浏览器检查失败: {e}")
        print(f"🔍 错误类型: {type(e).__name__}")
        return False

def install_browsers():
    """安装playwright浏览器"""
    try:
        print("🔄 正在安装 Playwright 浏览器...")
        
        # 打印当前环境变量
        print("🔍 当前环境变量:")
        for key in ["PLAYWRIGHT_BROWSERS_PATH", "XDG_CACHE_HOME", "HOME", "TMPDIR"]:
            print(f"  {key}: {os.environ.get(key, 'NOT SET')}")
        
        # 使用更详细的安装命令
        cmd = [sys.executable, "-m", "playwright", "install", "chromium"]
        print(f"🚀 执行命令: {' '.join(cmd)}")
        
        result = subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy())
        
        print(f"📤 命令返回码: {result.returncode}")
        if result.stdout:
            print(f"📜 标准输出:\n{result.stdout}")
        if result.stderr:
            print(f"⚠️ 错误输出:\n{result.stderr}")
        
        if result.returncode != 0:
            print("❌ 安装命令执行失败")
            return False
            
        print("✅ Playwright 浏览器安装完成!")
        return True
        
    except subprocess.CalledProcessError as e:
        print(f"❌ 浏览器安装失败: {e}")
        return False
    except Exception as e:
        print(f"❌ 安装过程中出现异常: {e}")
        return False

def main():
    """主函数"""
    print("🚀 初始化 Playwright (Hugging Face Space 增强版)...")
    
    # 首先设置环境变量
    cache_dir = setup_environment()
    
    # 检查是否已正确安装
    print("🔍 第一次检查...")
    if check_playwright_installation():
        return True
    
    # 如果检查失败,尝试重新安装
    print("🔧 检测到问题,正在重新安装浏览器...")
    if install_browsers():
        # 重新检查
        print("🔍 安装后重新检查...")
        return check_playwright_installation()
    
    print("😞 所有尝试都失败了")
    return False

if __name__ == "__main__":
    success = main()
    if not success:
        print("❌ Playwright 初始化失败!")
        print("💡 提示:如果仍有问题,可能需要在系统级别安装浏览器依赖")
        print("🔍 调试信息:")
        print(f"  - 当前用户: {os.getuid() if hasattr(os, 'getuid') else 'N/A'}")
        print(f"  - 工作目录: {Path.cwd()}")
        print(f"  - 环境变量 HOME: {os.environ.get('HOME', 'NOT SET')}")
        sys.exit(1)
    print("🎉 Playwright 初始化成功!")