nyasukun commited on
Commit
aacf53d
·
1 Parent(s): 0e6b4d6
Files changed (2) hide show
  1. app.py +97 -119
  2. troubleshooting.md +132 -0
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import gradio as gr
2
- from huggingface_hub import AsyncInferenceClient
3
  from typing import List, Dict, Optional, Union
4
  import logging
5
  from enum import Enum, auto
6
  import torch
7
- from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, pipeline
8
  import spaces
9
 
10
  # ロガーの設定
@@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
16
 
17
  # モデルタイプの定義
18
  LOCAL = "local"
19
- INFERENCE_API = "inference_api"
20
 
21
  # モデル定義
22
  TEXT_GENERATION_MODELS = [
@@ -50,7 +50,6 @@ CLASSIFICATION_MODELS = [
50
  ]
51
 
52
  # グローバル変数でモデルやトークナイザーを管理
53
- models = {}
54
  tokenizers = {}
55
  pipelines = {}
56
  api_clients = {}
@@ -60,156 +59,135 @@ def initialize_api_clients():
60
  """Inference APIクライアントの初期化"""
61
  for model in TEXT_GENERATION_MODELS + CLASSIFICATION_MODELS:
62
  if model["type"] == INFERENCE_API and "model_id" in model:
63
- api_clients[model["model_id"]] = AsyncInferenceClient(
 
64
  model["model_id"],
65
  token=True # これによりHFトークンを使用
66
  )
67
  logger.info("API clients initialized")
68
 
69
- # モデルのロード関数
70
- def load_model(model_path, task="text-generation"):
71
- """モデルの同期ロード"""
72
- if model_path not in models:
73
- logger.info(f"Loading model: {model_path}")
74
- try:
75
- tokenizers[model_path] = AutoTokenizer.from_pretrained(model_path)
76
-
77
- if task == "text-generation":
78
- model = AutoModelForCausalLM.from_pretrained(
79
- model_path,
80
- torch_dtype=torch.float16,
81
- device_map="auto"
82
- )
83
  pipelines[model_path] = pipeline(
84
  "text-generation",
85
- model=model,
86
- tokenizer=tokenizers[model_path]
87
- )
88
- else: # classification
89
- model = AutoModelForSequenceClassification.from_pretrained(
90
- model_path,
91
  device_map="auto"
92
  )
93
- pipelines[model_path] = pipeline(
94
- "text-classification",
95
- model=model,
96
- tokenizer=tokenizers[model_path]
97
- )
98
-
99
- models[model_path] = model
100
- logger.info(f"Model loaded successfully: {model_path}")
101
- except Exception as e:
102
- logger.error(f"Error loading model {model_path}: {str(e)}")
103
- raise
104
-
105
- # すべてのモデルを事前にロード
106
- def preload_models():
107
- """起動時にすべてのローカルモデルを事前にロード"""
108
- logger.info("Preloading all local models...")
109
- for model in TEXT_GENERATION_MODELS:
110
- if model["type"] == LOCAL and "model_path" in model:
111
- try:
112
- load_model(model["model_path"], "text-generation")
113
  except Exception as e:
114
- logger.error(f"Failed to preload {model['name']}: {e}")
115
-
 
116
  for model in CLASSIFICATION_MODELS:
117
  if model["type"] == LOCAL and "model_path" in model:
 
118
  try:
119
- load_model(model["model_path"], "text-classification")
 
 
 
 
 
 
 
 
 
 
120
  except Exception as e:
121
- logger.error(f"Failed to preload {model['name']}: {e}")
122
-
123
- logger.info("Model preloading complete")
124
 
125
- # テキスト生成の実行関数
126
- @spaces.GPU()
127
- def generate_text(model_path, text):
128
- """テキスト生成の実行"""
129
- if model_path not in models:
130
- load_model(model_path, "text-generation")
131
-
132
  try:
 
133
  outputs = pipelines[model_path](
134
  text,
135
- max_new_tokens=50, # トークン数を減らしてGPUメモリ使用量を削減
136
- do_sample=True,
137
- temperature=0.7,
138
- top_p=0.9,
139
  num_return_sequences=1
140
  )
141
  return outputs[0]["generated_text"]
142
  except Exception as e:
143
- logger.error(f"Error in text generation with {model_path}: {str(e)}")
144
- raise
145
 
146
- # テキスト分類の実行関数
147
- @spaces.GPU()
148
- def classify_text(model_path, text):
149
- """テキスト分類の実行"""
150
- if model_path not in models:
151
- load_model(model_path, "text-classification")
152
-
 
 
 
 
 
 
 
 
 
 
153
  try:
 
154
  result = pipelines[model_path](text)
155
  return str(result)
156
  except Exception as e:
157
- logger.error(f"Error in classification with {model_path}: {str(e)}")
158
- raise
 
 
 
 
 
 
 
 
 
 
159
 
160
- # 複数のモデルでテキスト生成を実行
161
- async def run_text_generation(text, selected_types):
162
- """テキスト生成モデルの実行"""
163
  results = []
 
 
164
  for model in TEXT_GENERATION_MODELS:
165
  if model["type"] in selected_types:
166
- try:
167
- if model["type"] == INFERENCE_API:
168
- logger.info(f"Running API text generation: {model['name']}")
169
- response = await api_clients[model["model_id"]].text_generation(
170
- text, max_new_tokens=50, temperature=0.7
171
- )
172
- results.append(f"{model['name']}: {response}")
173
- else:
174
- logger.info(f"Running local text generation: {model['name']}")
175
- response = generate_text(model["model_path"], text)
176
- results.append(f"{model['name']}: {response}")
177
- except Exception as e:
178
- logger.error(f"Error in {model['name']}: {str(e)}")
179
- results.append(f"{model['name']}: Error - {str(e)}")
180
- return results
181
-
182
- # 複数のモデルでテキスト分類を実行
183
- async def run_classification(text, selected_types):
184
- """分類モデルの実行"""
185
- results = []
186
  for model in CLASSIFICATION_MODELS:
187
  if model["type"] in selected_types:
188
- try:
189
- if model["type"] == INFERENCE_API:
190
- logger.info(f"Running API classification: {model['name']}")
191
- response = await api_clients[model["model_id"]].text_classification(text)
192
- results.append(f"{model['name']}: {response}")
193
- else:
194
- logger.info(f"Running local classification: {model['name']}")
195
- response = classify_text(model["model_path"], text)
196
- results.append(f"{model['name']}: {response}")
197
- except Exception as e:
198
- logger.error(f"Error in {model['name']}: {str(e)}")
199
- results.append(f"{model['name']}: Error - {str(e)}")
200
- return results
201
-
202
- # Invokeボタンのハンドラ
203
- async def handle_invoke(text, selected_types):
204
- """Invokeボタンのハンドラ"""
205
- gen_results = await run_text_generation(text, selected_types)
206
- class_results = await run_classification(text, selected_types)
207
 
208
  # 結果リストの長さを調整
209
- gen_results.extend([""] * (len(TEXT_GENERATION_MODELS) - len(gen_results)))
210
- class_results.extend([""] * (len(CLASSIFICATION_MODELS) - len(class_results)))
211
 
212
- return gen_results + class_results
213
 
214
  # モデルの表示状態を更新
215
  def update_model_visibility(selected_types):
@@ -231,7 +209,7 @@ def load_models_and_update_ui():
231
  # APIクライアント初期化
232
  initialize_api_clients()
233
  # モデルのロード
234
- preload_models()
235
  logger.info("Models loaded successfully")
236
  # ロード完了メッセージを返して、UIのロード中表示を非表示にする
237
  return gr.update(visible=False), gr.update(visible=True)
 
1
  import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
  from typing import List, Dict, Optional, Union
4
  import logging
5
  from enum import Enum, auto
6
  import torch
7
+ from transformers import AutoTokenizer, pipeline
8
  import spaces
9
 
10
  # ロガーの設定
 
16
 
17
  # モデルタイプの定義
18
  LOCAL = "local"
19
+ INFERENCE_API = "api"
20
 
21
  # モデル定義
22
  TEXT_GENERATION_MODELS = [
 
50
  ]
51
 
52
  # グローバル変数でモデルやトークナイザーを管理
 
53
  tokenizers = {}
54
  pipelines = {}
55
  api_clients = {}
 
59
  """Inference APIクライアントの初期化"""
60
  for model in TEXT_GENERATION_MODELS + CLASSIFICATION_MODELS:
61
  if model["type"] == INFERENCE_API and "model_id" in model:
62
+ logger.info(f"Initializing API client for {model['name']}")
63
+ api_clients[model["model_id"]] = InferenceClient(
64
  model["model_id"],
65
  token=True # これによりHFトークンを使用
66
  )
67
  logger.info("API clients initialized")
68
 
69
+ # ローカルモデルを事前ロード
70
+ def preload_local_models():
71
+ """ローカルモデルを事前ロード"""
72
+ logger.info("Preloading local models at application startup...")
73
+
74
+ # テキスト生成モデル
75
+ for model in TEXT_GENERATION_MODELS:
76
+ if model["type"] == LOCAL and "model_path" in model:
77
+ model_path = model["model_path"]
78
+ try:
79
+ logger.info(f"Preloading text generation model: {model_path}")
80
+ tokenizers[model_path] = AutoTokenizer.from_pretrained(model_path)
 
 
81
  pipelines[model_path] = pipeline(
82
  "text-generation",
83
+ model=model_path,
84
+ tokenizer=tokenizers[model_path],
85
+ torch_dtype=torch.bfloat16,
86
+ trust_remote_code=True,
 
 
87
  device_map="auto"
88
  )
89
+ logger.info(f"Model preloaded successfully: {model_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  except Exception as e:
91
+ logger.error(f"Error preloading model {model_path}: {str(e)}")
92
+
93
+ # 分類モデル
94
  for model in CLASSIFICATION_MODELS:
95
  if model["type"] == LOCAL and "model_path" in model:
96
+ model_path = model["model_path"]
97
  try:
98
+ logger.info(f"Preloading classification model: {model_path}")
99
+ tokenizers[model_path] = AutoTokenizer.from_pretrained(model_path)
100
+ pipelines[model_path] = pipeline(
101
+ "text-classification",
102
+ model=model_path,
103
+ tokenizer=tokenizers[model_path],
104
+ torch_dtype=torch.bfloat16,
105
+ trust_remote_code=True,
106
+ device_map="auto"
107
+ )
108
+ logger.info(f"Model preloaded successfully: {model_path}")
109
  except Exception as e:
110
+ logger.error(f"Error preloading model {model_path}: {str(e)}")
 
 
111
 
112
+ @spaces.GPU
113
+ def generate_text_local(model_path, text):
114
+ """ローカルモデルでのテキスト生成"""
 
 
 
 
115
  try:
116
+ logger.info(f"Running local text generation with {model_path}")
117
  outputs = pipelines[model_path](
118
  text,
119
+ max_new_tokens=40,
120
+ do_sample=False,
 
 
121
  num_return_sequences=1
122
  )
123
  return outputs[0]["generated_text"]
124
  except Exception as e:
125
+ logger.error(f"Error in local text generation with {model_path}: {str(e)}")
126
+ return f"Error: {str(e)}"
127
 
128
+ def generate_text_api(model_id, text):
129
+ """API経由でのテキスト生成"""
130
+ try:
131
+ logger.info(f"Running API text generation with {model_id}")
132
+ response = api_clients[model_id].text_generation(
133
+ text,
134
+ max_new_tokens=40,
135
+ temperature=0.7
136
+ )
137
+ return response
138
+ except Exception as e:
139
+ logger.error(f"Error in API text generation with {model_id}: {str(e)}")
140
+ return f"Error: {str(e)}"
141
+
142
+ @spaces.GPU
143
+ def classify_text_local(model_path, text):
144
+ """ローカルモデルでのテキスト分類"""
145
  try:
146
+ logger.info(f"Running local classification with {model_path}")
147
  result = pipelines[model_path](text)
148
  return str(result)
149
  except Exception as e:
150
+ logger.error(f"Error in local classification with {model_path}: {str(e)}")
151
+ return f"Error: {str(e)}"
152
+
153
+ def classify_text_api(model_id, text):
154
+ """API経由でのテキスト分類"""
155
+ try:
156
+ logger.info(f"Running API classification with {model_id}")
157
+ response = api_clients[model_id].text_classification(text)
158
+ return str(response)
159
+ except Exception as e:
160
+ logger.error(f"Error in API classification with {model_id}: {str(e)}")
161
+ return f"Error: {str(e)}"
162
 
163
+ # Invokeボタンのハンドラ
164
+ def handle_invoke(text, selected_types):
165
+ """Invokeボタンのハンドラ"""
166
  results = []
167
+
168
+ # テキスト生成モデルの実行
169
  for model in TEXT_GENERATION_MODELS:
170
  if model["type"] in selected_types:
171
+ if model["type"] == LOCAL:
172
+ result = generate_text_local(model["model_path"], text)
173
+ else: # api
174
+ result = generate_text_api(model["model_id"], text)
175
+ results.append(f"{model['name']}: {result}")
176
+
177
+ # 分類モデルの実行
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  for model in CLASSIFICATION_MODELS:
179
  if model["type"] in selected_types:
180
+ if model["type"] == LOCAL:
181
+ result = classify_text_local(model["model_path"], text)
182
+ else: # api
183
+ result = classify_text_api(model["model_id"], text)
184
+ results.append(f"{model['name']}: {result}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
  # 結果リストの長さを調整
187
+ while len(results) < len(TEXT_GENERATION_MODELS) + len(CLASSIFICATION_MODELS):
188
+ results.append("")
189
 
190
+ return results
191
 
192
  # モデルの表示状態を更新
193
  def update_model_visibility(selected_types):
 
209
  # APIクライアント初期化
210
  initialize_api_clients()
211
  # モデルのロード
212
+ preload_local_models()
213
  logger.info("Models loaded successfully")
214
  # ロード完了メッセージを返して、UIのロード中表示を非表示にする
215
  return gr.update(visible=False), gr.update(visible=True)
troubleshooting.md ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Troubleshooting Guide
2
+
3
+ This document provides solutions for common issues encountered when running the Toxic Eye application.
4
+
5
+ ## Gradio Version Compatibility
6
+
7
+ Ensure that you're using Gradio version 5.23.2 as specified in the project's `README.md` file:
8
+
9
+ ```bash
10
+ pip install gradio==5.23.2
11
+ ```
12
+
13
+ You can check your current Gradio version with:
14
+
15
+ ```bash
16
+ pip show gradio
17
+ ```
18
+
19
+ If you're running on HuggingFace Spaces, check that the `sdk_version` in the README.md frontmatter is set to 5.23.2:
20
+
21
+ ```yaml
22
+ sdk: gradio
23
+ sdk_version: 5.23.2
24
+ ```
25
+
26
+ Using older or newer versions might cause unexpected behavior with the Spaces GPU integration.
27
+
28
+ ## GPU Acceleration Issues
29
+
30
+ ### spaces.GPU() Decorator Issues
31
+
32
+ We've observed that the `spaces.GPU()` decorator may not work correctly when used with methods inside a class. This can lead to errors like:
33
+
34
+ ```
35
+ HTTP Request: POST http://device-api.zero/release?allowToken=... "HTTP/1.1 404 Not Found"
36
+ Error in text generation: 'GPU task aborted'
37
+ ```
38
+
39
+ ### Solution
40
+
41
+ 1. Use the `@spaces.GPU` decorator (without parentheses) instead of `@spaces.GPU()` with standalone functions:
42
+
43
+ **Problematic:**
44
+ ```python
45
+ @spaces.GPU() # With parentheses
46
+ def generate_text(model_path, text):
47
+ # ...
48
+ ```
49
+
50
+ **Recommended:**
51
+ ```python
52
+ @spaces.GPU # Without parentheses
53
+ def generate_text_local(model_path, text):
54
+ # ...
55
+ ```
56
+
57
+ 2. Use direct pipeline creation instead of loading model and tokenizer separately:
58
+
59
+ **Problematic:**
60
+ ```python
61
+ model = AutoModelForCausalLM.from_pretrained(model_path, ...)
62
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
63
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
64
+ ```
65
+
66
+ **Recommended:**
67
+ ```python
68
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
69
+ pipe = pipeline(
70
+ "text-generation",
71
+ model=model_path, # Pass the model ID/path directly
72
+ tokenizer=tokenizer,
73
+ torch_dtype=torch.bfloat16,
74
+ device_map="auto"
75
+ )
76
+ ```
77
+
78
+ 3. Use synchronous `InferenceClient` instead of `AsyncInferenceClient` for API calls:
79
+
80
+ **Problematic:**
81
+ ```python
82
+ from huggingface_hub import AsyncInferenceClient
83
+ client = AsyncInferenceClient(model_id)
84
+ response = await client.text_generation(text)
85
+ ```
86
+
87
+ **Recommended:**
88
+ ```python
89
+ from huggingface_hub import InferenceClient
90
+ client = InferenceClient(model_id)
91
+ response = client.text_generation(text) # Synchronous call
92
+ ```
93
+
94
+ 4. Implement appropriate error handling to gracefully recover from GPU task aborts:
95
+
96
+ ```python
97
+ try:
98
+ result = pipeline(text)
99
+ return result
100
+ except Exception as e:
101
+ logger.error(f"Error: {str(e)}")
102
+ return f"Error: {str(e)}" # Return error message instead of raising
103
+ ```
104
+
105
+ ## Other Common Issues
106
+
107
+ ### Multiple Models Loading Timeout
108
+
109
+ When preloading multiple large models, the application might timeout or crash due to memory constraints.
110
+
111
+ **Solution:**
112
+ - Use `torch.bfloat16` or `torch.float16` precision to reduce memory usage
113
+ - Add `trust_remote_code=True` parameter when loading models
114
+ - Use `do_sample=False` to make text generation more deterministic
115
+ - Keep token generation limits reasonable (max_new_tokens=40 or less)
116
+
117
+ ### API vs Local Model Performance
118
+
119
+ When mixing API and local models, you might encounter inconsistent behavior.
120
+
121
+ **Solution:**
122
+ - Keep separate functions for API and local model execution
123
+ - Handle errors distinctly for each type
124
+ - Use non-async code for simpler execution flow
125
+
126
+ ## Reporting Issues
127
+
128
+ If you encounter issues not covered in this guide, please report them by creating an issue in the repository with:
129
+ - A detailed description of the problem
130
+ - Relevant error messages
131
+ - Steps to reproduce the issue
132
+ - Your environment information (OS, Python version, GPU, etc.)