ASC8384 commited on
Commit
9a1ee6a
·
1 Parent(s): bee90e6
Files changed (2) hide show
  1. Dockerfile +50 -10
  2. init_playwright.py +70 -19
Dockerfile CHANGED
@@ -1,12 +1,13 @@
1
  FROM python:3.10-slim
2
 
3
- # 安装系统依赖
4
  RUN apt-get update && apt-get install -y \
5
  wget \
6
  gnupg \
7
  ca-certificates \
8
  fonts-liberation \
9
  libasound2 \
 
10
  libatk-bridge2.0-0 \
11
  libatk1.0-0 \
12
  libc6 \
@@ -20,8 +21,10 @@ RUN apt-get update && apt-get install -y \
20
  libgdk-pixbuf2.0-0 \
21
  libglib2.0-0 \
22
  libgtk-3-0 \
 
23
  libnspr4 \
24
  libnss3 \
 
25
  libpango-1.0-0 \
26
  libpangocairo-1.0-0 \
27
  libstdc++6 \
@@ -39,14 +42,34 @@ RUN apt-get update && apt-get install -y \
39
  libxss1 \
40
  libxtst6 \
41
  lsb-release \
42
- wget \
43
  xdg-utils \
44
- libnss3-dev \
45
- libxss1 \
46
- libasound2-dev \
47
- libxrandr2 \
48
- libxcomposite1 \
49
- libxdamage1 \
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  && rm -rf /var/lib/apt/lists/*
51
 
52
  WORKDIR /app
@@ -57,12 +80,29 @@ COPY requirements.txt .
57
  # 安装Python依赖
58
  RUN pip install --no-cache-dir -r requirements.txt
59
 
60
- # 安装playwright浏览器
61
- RUN playwright install
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  # 复制应用代码
64
  COPY . .
65
 
 
 
 
66
  # 暴露端口
67
  EXPOSE 7860
68
 
 
1
  FROM python:3.10-slim
2
 
3
+ # 安装系统依赖 (包括Playwright需要的所有依赖)
4
  RUN apt-get update && apt-get install -y \
5
  wget \
6
  gnupg \
7
  ca-certificates \
8
  fonts-liberation \
9
  libasound2 \
10
+ libasound2-dev \
11
  libatk-bridge2.0-0 \
12
  libatk1.0-0 \
13
  libc6 \
 
21
  libgdk-pixbuf2.0-0 \
22
  libglib2.0-0 \
23
  libgtk-3-0 \
24
+ libgtk-4-1 \
25
  libnspr4 \
26
  libnss3 \
27
+ libnss3-dev \
28
  libpango-1.0-0 \
29
  libpangocairo-1.0-0 \
30
  libstdc++6 \
 
42
  libxss1 \
43
  libxtst6 \
44
  lsb-release \
 
45
  xdg-utils \
46
+ # Playwright额外依赖
47
+ gstreamer1.0-base \
48
+ gstreamer1.0-plugins-base \
49
+ gstreamer1.0-plugins-good \
50
+ gstreamer1.0-plugins-bad \
51
+ libgstreamer1.0-0 \
52
+ libgstreamer-plugins-base1.0-0 \
53
+ libgstreamer-plugins-good1.0-0 \
54
+ libgstreamer-plugins-bad1.0-0 \
55
+ libgstapp1.0-0 \
56
+ libgstbase1.0-0 \
57
+ libgstpbutils1.0-0 \
58
+ libgstaudio1.0-0 \
59
+ libgsttag1.0-0 \
60
+ libgstvideo1.0-0 \
61
+ libgstgl1.0-0 \
62
+ libgstcodecparsers1.0-0 \
63
+ libgstfft1.0-0 \
64
+ libgstallocators1.0-0 \
65
+ # 图形和字体库
66
+ libgraphene-1.0-0 \
67
+ libatomic1 \
68
+ libxslt1.1 \
69
+ libwoff1 \
70
+ libvpx7 \
71
+ libevent-2.1-7 \
72
+ libopus0 \
73
  && rm -rf /var/lib/apt/lists/*
74
 
75
  WORKDIR /app
 
80
  # 安装Python依赖
81
  RUN pip install --no-cache-dir -r requirements.txt
82
 
83
+ # 设置环境变量(与我们的init_playwright.py脚本配合)
84
+ ENV PLAYWRIGHT_BROWSERS_PATH=/app/.cache/ms-playwright
85
+ ENV PLAYWRIGHT_SKIP_BROWSER_GC=1
86
+ ENV XDG_CACHE_HOME=/app/.cache
87
+ ENV HOME=/app
88
+ ENV TMPDIR=/app/.cache/tmp
89
+ ENV TMP=/app/.cache/tmp
90
+ ENV TEMP=/app/.cache/tmp
91
+
92
+ # 创建缓存目录并设置权限
93
+ RUN mkdir -p /app/.cache/ms-playwright /app/.cache/tmp && \
94
+ chmod -R 755 /app/.cache
95
+
96
+ # 安装playwright浏览器到指定目录
97
+ RUN playwright install chromium --with-deps || \
98
+ (echo "第一次安装失败,尝试不使用--with-deps" && playwright install chromium)
99
 
100
  # 复制应用代码
101
  COPY . .
102
 
103
+ # 确保所有文件有正确权限
104
+ RUN chmod +x init_playwright.py
105
+
106
  # 暴露端口
107
  EXPOSE 7860
108
 
init_playwright.py CHANGED
@@ -12,39 +12,72 @@ from pathlib import Path
12
 
13
  def setup_environment():
14
  """设置环境变量,解决权限问题"""
15
- # 设置缓存目录到用户主目录
16
- home_dir = Path.home()
17
- cache_dir = home_dir / ".cache"
18
 
19
- # 确保缓存目录存在
20
- cache_dir.mkdir(parents=True, exist_ok=True)
 
 
 
 
 
21
 
22
- # 设置 Playwright 相关环境变量
23
- os.environ["PLAYWRIGHT_BROWSERS_PATH"] = str(cache_dir / "ms-playwright")
24
- os.environ["PLAYWRIGHT_SKIP_BROWSER_GC"] = "1"
 
25
 
26
- # 设置系统缓存目录
27
- os.environ["XDG_CACHE_HOME"] = str(cache_dir)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- print(f"📁 设置缓存目录: {cache_dir}")
30
  return str(cache_dir)
31
 
32
  def check_playwright_installation():
33
  """检查playwright是否正确安装"""
34
  try:
 
 
35
  # 尝试导入playwright
36
  from playwright.sync_api import sync_playwright
 
37
 
38
  # 尝试启动浏览器
 
39
  with sync_playwright() as p:
40
  browser = p.chromium.launch(headless=True)
 
41
  browser.close()
 
42
 
43
  print("✅ Playwright 浏览器检查通过!")
44
  return True
45
 
46
  except Exception as e:
47
  print(f"❌ Playwright 浏览器检查失败: {e}")
 
48
  return False
49
 
50
  def install_browsers():
@@ -52,19 +85,30 @@ def install_browsers():
52
  try:
53
  print("🔄 正在安装 Playwright 浏览器...")
54
 
55
- # 使用 --with-deps 选项一次性安装浏览器和依赖
56
- result = subprocess.run([
57
- sys.executable, "-m", "playwright", "install",
58
- "chromium", "--with-deps"
59
- ], capture_output=True, text=True)
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  if result.returncode != 0:
62
- print(f"安装输出: {result.stdout}")
63
- print(f"错误输出: {result.stderr}")
64
  return False
65
 
66
  print("✅ Playwright 浏览器安装完成!")
67
  return True
 
68
  except subprocess.CalledProcessError as e:
69
  print(f"❌ 浏览器安装失败: {e}")
70
  return False
@@ -74,12 +118,13 @@ def install_browsers():
74
 
75
  def main():
76
  """主函数"""
77
- print("🚀 初始化 Playwright (Hugging Face Space 优化版)...")
78
 
79
  # 首先设置环境变量
80
  cache_dir = setup_environment()
81
 
82
  # 检查是否已正确安装
 
83
  if check_playwright_installation():
84
  return True
85
 
@@ -87,8 +132,10 @@ def main():
87
  print("🔧 检测到问题,正在重新安装浏览器...")
88
  if install_browsers():
89
  # 重新检查
 
90
  return check_playwright_installation()
91
 
 
92
  return False
93
 
94
  if __name__ == "__main__":
@@ -96,5 +143,9 @@ if __name__ == "__main__":
96
  if not success:
97
  print("❌ Playwright 初始化失败!")
98
  print("💡 提示:如果仍有问题,可能需要在系统级别安装浏览器依赖")
 
 
 
 
99
  sys.exit(1)
100
  print("🎉 Playwright 初始化成功!")
 
12
 
13
  def setup_environment():
14
  """设置环境变量,解决权限问题"""
15
+ # 获取当前工作目录
16
+ current_dir = Path.cwd()
17
+ app_cache_dir = current_dir / ".cache"
18
 
19
+ # 如果在 /app 目录下,使用 /app/.cache
20
+ if str(current_dir).startswith('/app'):
21
+ cache_dir = app_cache_dir
22
+ else:
23
+ # 否则使用用户主目录
24
+ home_dir = Path.home()
25
+ cache_dir = home_dir / ".cache"
26
 
27
+ # 确保缓存目录存在并有正确权限
28
+ cache_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
29
+ playwright_cache = cache_dir / "ms-playwright"
30
+ playwright_cache.mkdir(parents=True, exist_ok=True, mode=0o755)
31
 
32
+ # 强制设置所有相关环境变量
33
+ env_vars = {
34
+ "PLAYWRIGHT_BROWSERS_PATH": str(playwright_cache),
35
+ "PLAYWRIGHT_SKIP_BROWSER_GC": "1",
36
+ "XDG_CACHE_HOME": str(cache_dir),
37
+ "HOME": str(cache_dir.parent), # 确保 HOME 指向正确位置
38
+ "TMPDIR": str(cache_dir / "tmp"),
39
+ "TMP": str(cache_dir / "tmp"),
40
+ "TEMP": str(cache_dir / "tmp"),
41
+ }
42
+
43
+ # 创建临时目录
44
+ tmp_dir = cache_dir / "tmp"
45
+ tmp_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
46
+
47
+ # 设置环境变量
48
+ for key, value in env_vars.items():
49
+ os.environ[key] = value
50
+ print(f"🔧 设置环境变量: {key}={value}")
51
+
52
+ print(f"📁 缓存目录: {cache_dir}")
53
+ print(f"🎭 Playwright缓存: {playwright_cache}")
54
+ print(f"📂 当前工作目录: {current_dir}")
55
 
 
56
  return str(cache_dir)
57
 
58
  def check_playwright_installation():
59
  """检查playwright是否正确安装"""
60
  try:
61
+ print("🔍 开始检查 Playwright 安装...")
62
+
63
  # 尝试导入playwright
64
  from playwright.sync_api import sync_playwright
65
+ print("✅ Playwright 模块导入成功")
66
 
67
  # 尝试启动浏览器
68
+ print("🌐 尝试启动 Chromium 浏览器...")
69
  with sync_playwright() as p:
70
  browser = p.chromium.launch(headless=True)
71
+ print("✅ 浏览器启动成功")
72
  browser.close()
73
+ print("✅ 浏览器关闭成功")
74
 
75
  print("✅ Playwright 浏览器检查通过!")
76
  return True
77
 
78
  except Exception as e:
79
  print(f"❌ Playwright 浏览器检查失败: {e}")
80
+ print(f"🔍 错误类型: {type(e).__name__}")
81
  return False
82
 
83
  def install_browsers():
 
85
  try:
86
  print("🔄 正在安装 Playwright 浏览器...")
87
 
88
+ # 打印当前环境变量
89
+ print("🔍 当前环境变量:")
90
+ for key in ["PLAYWRIGHT_BROWSERS_PATH", "XDG_CACHE_HOME", "HOME", "TMPDIR"]:
91
+ print(f" {key}: {os.environ.get(key, 'NOT SET')}")
92
+
93
+ # 使用更详细的安装命令
94
+ cmd = [sys.executable, "-m", "playwright", "install", "chromium"]
95
+ print(f"🚀 执行命令: {' '.join(cmd)}")
96
+
97
+ result = subprocess.run(cmd, capture_output=True, text=True, env=os.environ.copy())
98
+
99
+ print(f"📤 命令返回码: {result.returncode}")
100
+ if result.stdout:
101
+ print(f"📜 标准输出:\n{result.stdout}")
102
+ if result.stderr:
103
+ print(f"⚠️ 错误输出:\n{result.stderr}")
104
 
105
  if result.returncode != 0:
106
+ print(" 安装命令执行失败")
 
107
  return False
108
 
109
  print("✅ Playwright 浏览器安装完成!")
110
  return True
111
+
112
  except subprocess.CalledProcessError as e:
113
  print(f"❌ 浏览器安装失败: {e}")
114
  return False
 
118
 
119
  def main():
120
  """主函数"""
121
+ print("🚀 初始化 Playwright (Hugging Face Space 增强版)...")
122
 
123
  # 首先设置环境变量
124
  cache_dir = setup_environment()
125
 
126
  # 检查是否已正确安装
127
+ print("🔍 第一次检查...")
128
  if check_playwright_installation():
129
  return True
130
 
 
132
  print("🔧 检测到问题,正在重新安装浏览器...")
133
  if install_browsers():
134
  # 重新检查
135
+ print("🔍 安装后重新检查...")
136
  return check_playwright_installation()
137
 
138
+ print("😞 所有尝试都失败了")
139
  return False
140
 
141
  if __name__ == "__main__":
 
143
  if not success:
144
  print("❌ Playwright 初始化失败!")
145
  print("💡 提示:如果仍有问题,可能需要在系统级别安装浏览器依赖")
146
+ print("🔍 调试信息:")
147
+ print(f" - 当前用户: {os.getuid() if hasattr(os, 'getuid') else 'N/A'}")
148
+ print(f" - 工作目录: {Path.cwd()}")
149
+ print(f" - 环境变量 HOME: {os.environ.get('HOME', 'NOT SET')}")
150
  sys.exit(1)
151
  print("🎉 Playwright 初始化成功!")