try
Browse files- Dockerfile +50 -10
- init_playwright.py +70 -19
Dockerfile
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
FROM python:3.10-slim
|
2 |
|
3 |
-
# 安装系统依赖
|
4 |
RUN apt-get update && apt-get install -y \
|
5 |
wget \
|
6 |
gnupg \
|
7 |
ca-certificates \
|
8 |
fonts-liberation \
|
9 |
libasound2 \
|
|
|
10 |
libatk-bridge2.0-0 \
|
11 |
libatk1.0-0 \
|
12 |
libc6 \
|
@@ -20,8 +21,10 @@ RUN apt-get update && apt-get install -y \
|
|
20 |
libgdk-pixbuf2.0-0 \
|
21 |
libglib2.0-0 \
|
22 |
libgtk-3-0 \
|
|
|
23 |
libnspr4 \
|
24 |
libnss3 \
|
|
|
25 |
libpango-1.0-0 \
|
26 |
libpangocairo-1.0-0 \
|
27 |
libstdc++6 \
|
@@ -39,14 +42,34 @@ RUN apt-get update && apt-get install -y \
|
|
39 |
libxss1 \
|
40 |
libxtst6 \
|
41 |
lsb-release \
|
42 |
-
wget \
|
43 |
xdg-utils \
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
&& rm -rf /var/lib/apt/lists/*
|
51 |
|
52 |
WORKDIR /app
|
@@ -57,12 +80,29 @@ COPY requirements.txt .
|
|
57 |
# 安装Python依赖
|
58 |
RUN pip install --no-cache-dir -r requirements.txt
|
59 |
|
60 |
-
#
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
# 复制应用代码
|
64 |
COPY . .
|
65 |
|
|
|
|
|
|
|
66 |
# 暴露端口
|
67 |
EXPOSE 7860
|
68 |
|
|
|
1 |
FROM python:3.10-slim
|
2 |
|
3 |
+
# 安装系统依赖 (包括Playwright需要的所有依赖)
|
4 |
RUN apt-get update && apt-get install -y \
|
5 |
wget \
|
6 |
gnupg \
|
7 |
ca-certificates \
|
8 |
fonts-liberation \
|
9 |
libasound2 \
|
10 |
+
libasound2-dev \
|
11 |
libatk-bridge2.0-0 \
|
12 |
libatk1.0-0 \
|
13 |
libc6 \
|
|
|
21 |
libgdk-pixbuf2.0-0 \
|
22 |
libglib2.0-0 \
|
23 |
libgtk-3-0 \
|
24 |
+
libgtk-4-1 \
|
25 |
libnspr4 \
|
26 |
libnss3 \
|
27 |
+
libnss3-dev \
|
28 |
libpango-1.0-0 \
|
29 |
libpangocairo-1.0-0 \
|
30 |
libstdc++6 \
|
|
|
42 |
libxss1 \
|
43 |
libxtst6 \
|
44 |
lsb-release \
|
|
|
45 |
xdg-utils \
|
46 |
+
# Playwright额外依赖
|
47 |
+
gstreamer1.0-base \
|
48 |
+
gstreamer1.0-plugins-base \
|
49 |
+
gstreamer1.0-plugins-good \
|
50 |
+
gstreamer1.0-plugins-bad \
|
51 |
+
libgstreamer1.0-0 \
|
52 |
+
libgstreamer-plugins-base1.0-0 \
|
53 |
+
libgstreamer-plugins-good1.0-0 \
|
54 |
+
libgstreamer-plugins-bad1.0-0 \
|
55 |
+
libgstapp1.0-0 \
|
56 |
+
libgstbase1.0-0 \
|
57 |
+
libgstpbutils1.0-0 \
|
58 |
+
libgstaudio1.0-0 \
|
59 |
+
libgsttag1.0-0 \
|
60 |
+
libgstvideo1.0-0 \
|
61 |
+
libgstgl1.0-0 \
|
62 |
+
libgstcodecparsers1.0-0 \
|
63 |
+
libgstfft1.0-0 \
|
64 |
+
libgstallocators1.0-0 \
|
65 |
+
# 图形和字体库
|
66 |
+
libgraphene-1.0-0 \
|
67 |
+
libatomic1 \
|
68 |
+
libxslt1.1 \
|
69 |
+
libwoff1 \
|
70 |
+
libvpx7 \
|
71 |
+
libevent-2.1-7 \
|
72 |
+
libopus0 \
|
73 |
&& rm -rf /var/lib/apt/lists/*
|
74 |
|
75 |
WORKDIR /app
|
|
|
80 |
# 安装Python依赖
|
81 |
RUN pip install --no-cache-dir -r requirements.txt
|
82 |
|
83 |
+
# 设置环境变量(与我们的init_playwright.py脚本配合)
|
84 |
+
ENV PLAYWRIGHT_BROWSERS_PATH=/app/.cache/ms-playwright
|
85 |
+
ENV PLAYWRIGHT_SKIP_BROWSER_GC=1
|
86 |
+
ENV XDG_CACHE_HOME=/app/.cache
|
87 |
+
ENV HOME=/app
|
88 |
+
ENV TMPDIR=/app/.cache/tmp
|
89 |
+
ENV TMP=/app/.cache/tmp
|
90 |
+
ENV TEMP=/app/.cache/tmp
|
91 |
+
|
92 |
+
# 创建缓存目录并设置权限
|
93 |
+
RUN mkdir -p /app/.cache/ms-playwright /app/.cache/tmp && \
|
94 |
+
chmod -R 755 /app/.cache
|
95 |
+
|
96 |
+
# 安装playwright浏览器到指定目录
|
97 |
+
RUN playwright install chromium --with-deps || \
|
98 |
+
(echo "第一次安装失败,尝试不使用--with-deps" && playwright install chromium)
|
99 |
|
100 |
# 复制应用代码
|
101 |
COPY . .
|
102 |
|
103 |
+
# 确保所有文件有正确权限
|
104 |
+
RUN chmod +x init_playwright.py
|
105 |
+
|
106 |
# 暴露端口
|
107 |
EXPOSE 7860
|
108 |
|
init_playwright.py
CHANGED
@@ -12,39 +12,72 @@ from pathlib import Path
|
|
12 |
|
13 |
def setup_environment():
|
14 |
"""设置环境变量,解决权限问题"""
|
15 |
-
#
|
16 |
-
|
17 |
-
|
18 |
|
19 |
-
#
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
#
|
23 |
-
|
24 |
-
|
|
|
25 |
|
26 |
-
#
|
27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
-
print(f"📁 设置缓存目录: {cache_dir}")
|
30 |
return str(cache_dir)
|
31 |
|
32 |
def check_playwright_installation():
|
33 |
"""检查playwright是否正确安装"""
|
34 |
try:
|
|
|
|
|
35 |
# 尝试导入playwright
|
36 |
from playwright.sync_api import sync_playwright
|
|
|
37 |
|
38 |
# 尝试启动浏览器
|
|
|
39 |
with sync_playwright() as p:
|
40 |
browser = p.chromium.launch(headless=True)
|
|
|
41 |
browser.close()
|
|
|
42 |
|
43 |
print("✅ Playwright 浏览器检查通过!")
|
44 |
return True
|
45 |
|
46 |
except Exception as e:
|
47 |
print(f"❌ Playwright 浏览器检查失败: {e}")
|
|
|
48 |
return False
|
49 |
|
50 |
def install_browsers():
|
@@ -52,19 +85,30 @@ def install_browsers():
|
|
52 |
try:
|
53 |
print("🔄 正在安装 Playwright 浏览器...")
|
54 |
|
55 |
-
#
|
56 |
-
|
57 |
-
|
58 |
-
"
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
if result.returncode != 0:
|
62 |
-
print(
|
63 |
-
print(f"错误输出: {result.stderr}")
|
64 |
return False
|
65 |
|
66 |
print("✅ Playwright 浏览器安装完成!")
|
67 |
return True
|
|
|
68 |
except subprocess.CalledProcessError as e:
|
69 |
print(f"❌ 浏览器安装失败: {e}")
|
70 |
return False
|
@@ -74,12 +118,13 @@ def install_browsers():
|
|
74 |
|
75 |
def main():
|
76 |
"""主函数"""
|
77 |
-
print("🚀 初始化 Playwright (Hugging Face Space
|
78 |
|
79 |
# 首先设置环境变量
|
80 |
cache_dir = setup_environment()
|
81 |
|
82 |
# 检查是否已正确安装
|
|
|
83 |
if check_playwright_installation():
|
84 |
return True
|
85 |
|
@@ -87,8 +132,10 @@ def main():
|
|
87 |
print("🔧 检测到问题,正在重新安装浏览器...")
|
88 |
if install_browsers():
|
89 |
# 重新检查
|
|
|
90 |
return check_playwright_installation()
|
91 |
|
|
|
92 |
return False
|
93 |
|
94 |
if __name__ == "__main__":
|
@@ -96,5 +143,9 @@ if __name__ == "__main__":
|
|
96 |
if not success:
|
97 |
print("❌ Playwright 初始化失败!")
|
98 |
print("💡 提示:如果仍有问题,可能需要在系统级别安装浏览器依赖")
|
|
|
|
|
|
|
|
|
99 |
sys.exit(1)
|
100 |
print("🎉 Playwright 初始化成功!")
|
|
|
12 |
|
13 |
def setup_environment():
|
14 |
"""设置环境变量,解决权限问题"""
|
15 |
+
# 获取当前工作目录
|
16 |
+
current_dir = Path.cwd()
|
17 |
+
app_cache_dir = current_dir / ".cache"
|
18 |
|
19 |
+
# 如果在 /app 目录下,使用 /app/.cache
|
20 |
+
if str(current_dir).startswith('/app'):
|
21 |
+
cache_dir = app_cache_dir
|
22 |
+
else:
|
23 |
+
# 否则使用用户主目录
|
24 |
+
home_dir = Path.home()
|
25 |
+
cache_dir = home_dir / ".cache"
|
26 |
|
27 |
+
# 确保缓存目录存在并有正确权限
|
28 |
+
cache_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
|
29 |
+
playwright_cache = cache_dir / "ms-playwright"
|
30 |
+
playwright_cache.mkdir(parents=True, exist_ok=True, mode=0o755)
|
31 |
|
32 |
+
# 强制设置所有相关环境变量
|
33 |
+
env_vars = {
|
34 |
+
"PLAYWRIGHT_BROWSERS_PATH": str(playwright_cache),
|
35 |
+
"PLAYWRIGHT_SKIP_BROWSER_GC": "1",
|
36 |
+
"XDG_CACHE_HOME": str(cache_dir),
|
37 |
+
"HOME": str(cache_dir.parent), # 确保 HOME 指向正确位置
|
38 |
+
"TMPDIR": str(cache_dir / "tmp"),
|
39 |
+
"TMP": str(cache_dir / "tmp"),
|
40 |
+
"TEMP": str(cache_dir / "tmp"),
|
41 |
+
}
|
42 |
+
|
43 |
+
# 创建临时目录
|
44 |
+
tmp_dir = cache_dir / "tmp"
|
45 |
+
tmp_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
|
46 |
+
|
47 |
+
# 设置环境变量
|
48 |
+
for key, value in env_vars.items():
|
49 |
+
os.environ[key] = value
|
50 |
+
print(f"🔧 设置环境变量: {key}={value}")
|
51 |
+
|
52 |
+
print(f"📁 缓存目录: {cache_dir}")
|
53 |
+
print(f"🎭 Playwright缓存: {playwright_cache}")
|
54 |
+
print(f"📂 当前工作目录: {current_dir}")
|
55 |
|
|
|
56 |
return str(cache_dir)
|
57 |
|
58 |
def check_playwright_installation():
|
59 |
"""检查playwright是否正确安装"""
|
60 |
try:
|
61 |
+
print("🔍 开始检查 Playwright 安装...")
|
62 |
+
|
63 |
# 尝试导入playwright
|
64 |
from playwright.sync_api import sync_playwright
|
65 |
+
print("✅ Playwright 模块导入成功")
|
66 |
|
67 |
# 尝试启动浏览器
|
68 |
+
print("🌐 尝试启动 Chromium 浏览器...")
|
69 |
with sync_playwright() as p:
|
70 |
browser = p.chromium.launch(headless=True)
|
71 |
+
print("✅ 浏览器启动成功")
|
72 |
browser.close()
|
73 |
+
print("✅ 浏览器关闭成功")
|
74 |
|
75 |
print("✅ Playwright 浏览器检查通过!")
|
76 |
return True
|
77 |
|
78 |
except Exception as e:
|
79 |
print(f"❌ Playwright 浏览器检查失败: {e}")
|
80 |
+
print(f"🔍 错误类型: {type(e).__name__}")
|
81 |
return False
|
82 |
|
83 |
def install_browsers():
|
|
|
85 |
try:
|
86 |
print("🔄 正在安装 Playwright 浏览器...")
|
87 |
|
88 |
+
# 打印当前环境变量
|
89 |
+
print("🔍 当前环境变量:")
|
90 |
+
for key in ["PLAYWRIGHT_BROWSERS_PATH", "XDG_CACHE_HOME", "HOME", "TMPDIR"]:
|
91 |
+
print(f" {key}: {os.environ.get(key, 'NOT SET')}")
|
92 |
+
|
93 |
+
# 使用更详细的安装命令
|
94 |
+
cmd = [sys.executable, "-m", "playwright", "install", "chromium"]
|
95 |
+
print(f"🚀 执行命令: {' '.join(cmd)}")
|
96 |
+
|
97 |
+
result = subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy())
|
98 |
+
|
99 |
+
print(f"📤 命令返回码: {result.returncode}")
|
100 |
+
if result.stdout:
|
101 |
+
print(f"📜 标准输出:\n{result.stdout}")
|
102 |
+
if result.stderr:
|
103 |
+
print(f"⚠️ 错误输出:\n{result.stderr}")
|
104 |
|
105 |
if result.returncode != 0:
|
106 |
+
print("❌ 安装命令执行失败")
|
|
|
107 |
return False
|
108 |
|
109 |
print("✅ Playwright 浏览器安装完成!")
|
110 |
return True
|
111 |
+
|
112 |
except subprocess.CalledProcessError as e:
|
113 |
print(f"❌ 浏览器安装失败: {e}")
|
114 |
return False
|
|
|
118 |
|
119 |
def main():
|
120 |
"""主函数"""
|
121 |
+
print("🚀 初始化 Playwright (Hugging Face Space 增强版)...")
|
122 |
|
123 |
# 首先设置环境变量
|
124 |
cache_dir = setup_environment()
|
125 |
|
126 |
# 检查是否已正确安装
|
127 |
+
print("🔍 第一次检查...")
|
128 |
if check_playwright_installation():
|
129 |
return True
|
130 |
|
|
|
132 |
print("🔧 检测到问题,正在重新安装浏览器...")
|
133 |
if install_browsers():
|
134 |
# 重新检查
|
135 |
+
print("🔍 安装后重新检查...")
|
136 |
return check_playwright_installation()
|
137 |
|
138 |
+
print("😞 所有尝试都失败了")
|
139 |
return False
|
140 |
|
141 |
if __name__ == "__main__":
|
|
|
143 |
if not success:
|
144 |
print("❌ Playwright 初始化失败!")
|
145 |
print("💡 提示:如果仍有问题,可能需要在系统级别安装浏览器依赖")
|
146 |
+
print("🔍 调试信息:")
|
147 |
+
print(f" - 当前用户: {os.getuid() if hasattr(os, 'getuid') else 'N/A'}")
|
148 |
+
print(f" - 工作目录: {Path.cwd()}")
|
149 |
+
print(f" - 环境变量 HOME: {os.environ.get('HOME', 'NOT SET')}")
|
150 |
sys.exit(1)
|
151 |
print("🎉 Playwright 初始化成功!")
|