Spaces:

nyasukun
/

toxic-eye

Sleeping

App Files Files Community

nyasukun commited on Mar 31

Commit

aacf53d

1 Parent(s): 0e6b4d6

.

Browse files

Files changed (2) hide show

app.py +97 -119
troubleshooting.md +132 -0

app.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import gradio as gr
-from huggingface_hub import AsyncInferenceClient
 from typing import List, Dict, Optional, Union
 import logging
 from enum import Enum, auto
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, pipeline
 import spaces
 # ロガーの設定
@@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
 # モデルタイプの定義
 LOCAL = "local"
-INFERENCE_API = "inference_api"
 # モデル定義
 TEXT_GENERATION_MODELS = [
@@ -50,7 +50,6 @@ CLASSIFICATION_MODELS = [
 ]
 # グローバル変数でモデルやトークナイザーを管理
-models = {}
 tokenizers = {}
 pipelines = {}
 api_clients = {}
@@ -60,156 +59,135 @@ def initialize_api_clients():
     """Inference APIクライアントの初期化"""
     for model in TEXT_GENERATION_MODELS + CLASSIFICATION_MODELS:
         if model["type"] == INFERENCE_API and "model_id" in model:
-            api_clients[model["model_id"]] = AsyncInferenceClient(
                 model["model_id"],
                 token=True  # これによりHFトークンを使用
             )
     logger.info("API clients initialized")
-# モデルのロード関数
-def load_model(model_path, task="text-generation"):
-    """モデルの同期ロード"""
-    if model_path not in models:
-        logger.info(f"Loading model: {model_path}")
-        try:
-            tokenizers[model_path] = AutoTokenizer.from_pretrained(model_path)
-            if task == "text-generation":
-                model = AutoModelForCausalLM.from_pretrained(
-                    model_path,
-                    torch_dtype=torch.float16,
-                    device_map="auto"
-                )
                 pipelines[model_path] = pipeline(
                     "text-generation",
-                    model=model,
-                    tokenizer=tokenizers[model_path]
-                )
-            else:  # classification
-                model = AutoModelForSequenceClassification.from_pretrained(
-                    model_path,
                     device_map="auto"
                 )
-                pipelines[model_path] = pipeline(
-                    "text-classification",
-                    model=model,
-                    tokenizer=tokenizers[model_path]
-                )
-            models[model_path] = model
-            logger.info(f"Model loaded successfully: {model_path}")
-        except Exception as e:
-            logger.error(f"Error loading model {model_path}: {str(e)}")
-            raise
-# すべてのモデルを事前にロード
-def preload_models():
-    """起動時にすべてのローカルモデルを事前にロード"""
-    logger.info("Preloading all local models...")
-    for model in TEXT_GENERATION_MODELS:
-        if model["type"] == LOCAL and "model_path" in model:
-            try:
-                load_model(model["model_path"], "text-generation")
             except Exception as e:
-                logger.error(f"Failed to preload {model['name']}: {e}")
     for model in CLASSIFICATION_MODELS:
         if model["type"] == LOCAL and "model_path" in model:
             try:
-                load_model(model["model_path"], "text-classification")
             except Exception as e:
-                logger.error(f"Failed to preload {model['name']}: {e}")
-    logger.info("Model preloading complete")
-# テキスト生成の実行関数
-@spaces.GPU()
-def generate_text(model_path, text):
-    """テキスト生成の実行"""
-    if model_path not in models:
-        load_model(model_path, "text-generation")
     try:
         outputs = pipelines[model_path](
             text,
-            max_new_tokens=50,  # トークン数を減らしてGPUメモリ使用量を削減
-            do_sample=True,
-            temperature=0.7,
-            top_p=0.9,
             num_return_sequences=1
         )
         return outputs[0]["generated_text"]
     except Exception as e:
-        logger.error(f"Error in text generation with {model_path}: {str(e)}")
-        raise
-# テキスト分類の実行関数
-@spaces.GPU()
-def classify_text(model_path, text):
-    """テキスト分類の実行"""
-    if model_path not in models:
-        load_model(model_path, "text-classification")
     try:
         result = pipelines[model_path](text)
         return str(result)
     except Exception as e:
-        logger.error(f"Error in classification with {model_path}: {str(e)}")
-        raise
-# 複数のモデルでテキスト生成を実行
-async def run_text_generation(text, selected_types):
-    """テキスト生成モデルの実行"""
     results = []
     for model in TEXT_GENERATION_MODELS:
         if model["type"] in selected_types:
-            try:
-                if model["type"] == INFERENCE_API:
-                    logger.info(f"Running API text generation: {model['name']}")
-                    response = await api_clients[model["model_id"]].text_generation(
-                        text, max_new_tokens=50, temperature=0.7
-                    )
-                    results.append(f"{model['name']}: {response}")
-                else:
-                    logger.info(f"Running local text generation: {model['name']}")
-                    response = generate_text(model["model_path"], text)
-                    results.append(f"{model['name']}: {response}")
-            except Exception as e:
-                logger.error(f"Error in {model['name']}: {str(e)}")
-                results.append(f"{model['name']}: Error - {str(e)}")
-    return results
-# 複数のモデルでテキスト分類を実行
-async def run_classification(text, selected_types):
-    """分類モデルの実行"""
-    results = []
     for model in CLASSIFICATION_MODELS:
         if model["type"] in selected_types:
-            try:
-                if model["type"] == INFERENCE_API:
-                    logger.info(f"Running API classification: {model['name']}")
-                    response = await api_clients[model["model_id"]].text_classification(text)
-                    results.append(f"{model['name']}: {response}")
-                else:
-                    logger.info(f"Running local classification: {model['name']}")
-                    response = classify_text(model["model_path"], text)
-                    results.append(f"{model['name']}: {response}")
-            except Exception as e:
-                logger.error(f"Error in {model['name']}: {str(e)}")
-                results.append(f"{model['name']}: Error - {str(e)}")
-    return results
-# Invokeボタンのハンドラ
-async def handle_invoke(text, selected_types):
-    """Invokeボタンのハンドラ"""
-    gen_results = await run_text_generation(text, selected_types)
-    class_results = await run_classification(text, selected_types)
     # 結果リストの長さを調整
-    gen_results.extend([""] * (len(TEXT_GENERATION_MODELS) - len(gen_results)))
-    class_results.extend([""] * (len(CLASSIFICATION_MODELS) - len(class_results)))
-    return gen_results + class_results
 # モデルの表示状態を更新
 def update_model_visibility(selected_types):
@@ -231,7 +209,7 @@ def load_models_and_update_ui():
         # APIクライアント初期化
         initialize_api_clients()
         # モデルのロード
-        preload_models()
         logger.info("Models loaded successfully")
         # ロード完了メッセージを返して、UIのロード中表示を非表示にする
         return gr.update(visible=False), gr.update(visible=True)

 import gradio as gr
+from huggingface_hub import InferenceClient
 from typing import List, Dict, Optional, Union
 import logging
 from enum import Enum, auto
 import torch
+from transformers import AutoTokenizer, pipeline
 import spaces
 # ロガーの設定
 # モデルタイプの定義
 LOCAL = "local"
+INFERENCE_API = "api"
 # モデル定義
 TEXT_GENERATION_MODELS = [
 ]
 # グローバル変数でモデルやトークナイザーを管理
 tokenizers = {}
 pipelines = {}
 api_clients = {}
     """Inference APIクライアントの初期化"""
     for model in TEXT_GENERATION_MODELS + CLASSIFICATION_MODELS:
         if model["type"] == INFERENCE_API and "model_id" in model:
+            logger.info(f"Initializing API client for {model['name']}")
+            api_clients[model["model_id"]] = InferenceClient(
                 model["model_id"],
                 token=True  # これによりHFトークンを使用
             )
     logger.info("API clients initialized")
+# ローカルモデルを事前ロード
+def preload_local_models():
+    """ローカルモデルを事前ロード"""
+    logger.info("Preloading local models at application startup...")
+    # テキスト生成モデル
+    for model in TEXT_GENERATION_MODELS:
+        if model["type"] == LOCAL and "model_path" in model:
+            model_path = model["model_path"]
+            try:
+                logger.info(f"Preloading text generation model: {model_path}")
+                tokenizers[model_path] = AutoTokenizer.from_pretrained(model_path)
                 pipelines[model_path] = pipeline(
                     "text-generation",
+                    model=model_path,
+                    tokenizer=tokenizers[model_path],
+                    torch_dtype=torch.bfloat16,
+                    trust_remote_code=True,
                     device_map="auto"
                 )
+                logger.info(f"Model preloaded successfully: {model_path}")
             except Exception as e:
+                logger.error(f"Error preloading model {model_path}: {str(e)}")
+    # 分類モデル
     for model in CLASSIFICATION_MODELS:
         if model["type"] == LOCAL and "model_path" in model:
+            model_path = model["model_path"]
             try:
+                logger.info(f"Preloading classification model: {model_path}")
+                tokenizers[model_path] = AutoTokenizer.from_pretrained(model_path)
+                pipelines[model_path] = pipeline(
+                    "text-classification",
+                    model=model_path,
+                    tokenizer=tokenizers[model_path],
+                    torch_dtype=torch.bfloat16,
+                    trust_remote_code=True,
+                    device_map="auto"
+                )
+                logger.info(f"Model preloaded successfully: {model_path}")
             except Exception as e:
+                logger.error(f"Error preloading model {model_path}: {str(e)}")
+@spaces.GPU
+def generate_text_local(model_path, text):
+    """ローカルモデルでのテキスト生成"""
     try:
+        logger.info(f"Running local text generation with {model_path}")
         outputs = pipelines[model_path](
             text,
+            max_new_tokens=40,
+            do_sample=False,
             num_return_sequences=1
         )
         return outputs[0]["generated_text"]
     except Exception as e:
+        logger.error(f"Error in local text generation with {model_path}: {str(e)}")
+        return f"Error: {str(e)}"
+def generate_text_api(model_id, text):
+    """API経由でのテキスト生成"""
+    try:
+        logger.info(f"Running API text generation with {model_id}")
+        response = api_clients[model_id].text_generation(
+            text,
+            max_new_tokens=40,
+            temperature=0.7
+        )
+        return response
+    except Exception as e:
+        logger.error(f"Error in API text generation with {model_id}: {str(e)}")
+        return f"Error: {str(e)}"
+@spaces.GPU
+def classify_text_local(model_path, text):
+    """ローカルモデルでのテキスト分類"""
     try:
+        logger.info(f"Running local classification with {model_path}")
         result = pipelines[model_path](text)
         return str(result)
     except Exception as e:
+        logger.error(f"Error in local classification with {model_path}: {str(e)}")
+        return f"Error: {str(e)}"
+def classify_text_api(model_id, text):
+    """API経由でのテキスト分類"""
+    try:
+        logger.info(f"Running API classification with {model_id}")
+        response = api_clients[model_id].text_classification(text)
+        return str(response)
+    except Exception as e:
+        logger.error(f"Error in API classification with {model_id}: {str(e)}")
+        return f"Error: {str(e)}"
+# Invokeボタンのハンドラ
+def handle_invoke(text, selected_types):
+    """Invokeボタンのハンドラ"""
     results = []
+    # テキスト生成モデルの実行
     for model in TEXT_GENERATION_MODELS:
         if model["type"] in selected_types:
+            if model["type"] == LOCAL:
+                result = generate_text_local(model["model_path"], text)
+            else:  # api
+                result = generate_text_api(model["model_id"], text)
+            results.append(f"{model['name']}: {result}")
+    # 分類モデルの実行
     for model in CLASSIFICATION_MODELS:
         if model["type"] in selected_types:
+            if model["type"] == LOCAL:
+                result = classify_text_local(model["model_path"], text)
+            else:  # api
+                result = classify_text_api(model["model_id"], text)
+            results.append(f"{model['name']}: {result}")
     # 結果リストの長さを調整
+    while len(results) < len(TEXT_GENERATION_MODELS) + len(CLASSIFICATION_MODELS):
+        results.append("")
+    return results
 # モデルの表示状態を更新
 def update_model_visibility(selected_types):
         # APIクライアント初期化
         initialize_api_clients()
         # モデルのロード
+        preload_local_models()
         logger.info("Models loaded successfully")
         # ロード完了メッセージを返して、UIのロード中表示を非表示にする
         return gr.update(visible=False), gr.update(visible=True)

troubleshooting.md ADDED Viewed

	@@ -0,0 +1,132 @@

+# Troubleshooting Guide
+This document provides solutions for common issues encountered when running the Toxic Eye application.
+## Gradio Version Compatibility
+Ensure that you're using Gradio version 5.23.2 as specified in the project's `README.md` file:
+```bash
+pip install gradio==5.23.2
+```
+You can check your current Gradio version with:
+```bash
+pip show gradio
+```
+If you're running on HuggingFace Spaces, check that the `sdk_version` in the README.md frontmatter is set to 5.23.2:
+```yaml
+sdk: gradio
+sdk_version: 5.23.2
+```
+Using older or newer versions might cause unexpected behavior with the Spaces GPU integration.
+## GPU Acceleration Issues
+### spaces.GPU() Decorator Issues
+We've observed that the `spaces.GPU()` decorator may not work correctly when used with methods inside a class. This can lead to errors like:
+```
+HTTP Request: POST http://device-api.zero/release?allowToken=... "HTTP/1.1 404 Not Found"
+Error in text generation: 'GPU task aborted'
+```
+### Solution
+1. Use the `@spaces.GPU` decorator (without parentheses) instead of `@spaces.GPU()` with standalone functions:
+   **Problematic:**
+   ```python
+   @spaces.GPU()  # With parentheses
+   def generate_text(model_path, text):
+       # ...
+   ```
+   **Recommended:**
+   ```python
+   @spaces.GPU  # Without parentheses
+   def generate_text_local(model_path, text):
+       # ...
+   ```
+2. Use direct pipeline creation instead of loading model and tokenizer separately:
+   **Problematic:**
+   ```python
+   model = AutoModelForCausalLM.from_pretrained(model_path, ...)
+   tokenizer = AutoTokenizer.from_pretrained(model_path)
+   pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+   ```
+   **Recommended:**
+   ```python
+   tokenizer = AutoTokenizer.from_pretrained(model_path)
+   pipe = pipeline(
+       "text-generation",
+       model=model_path,  # Pass the model ID/path directly
+       tokenizer=tokenizer,
+       torch_dtype=torch.bfloat16,
+       device_map="auto"
+   )
+   ```
+3. Use synchronous `InferenceClient` instead of `AsyncInferenceClient` for API calls:
+   **Problematic:**
+   ```python
+   from huggingface_hub import AsyncInferenceClient
+   client = AsyncInferenceClient(model_id)
+   response = await client.text_generation(text)
+   ```
+   **Recommended:**
+   ```python
+   from huggingface_hub import InferenceClient
+   client = InferenceClient(model_id)
+   response = client.text_generation(text)  # Synchronous call
+   ```
+4. Implement appropriate error handling to gracefully recover from GPU task aborts:
+   ```python
+   try:
+       result = pipeline(text)
+       return result
+   except Exception as e:
+       logger.error(f"Error: {str(e)}")
+       return f"Error: {str(e)}"  # Return error message instead of raising
+   ```
+## Other Common Issues
+### Multiple Models Loading Timeout
+When preloading multiple large models, the application might timeout or crash due to memory constraints.
+**Solution:**
+- Use `torch.bfloat16` or `torch.float16` precision to reduce memory usage
+- Add `trust_remote_code=True` parameter when loading models
+- Use `do_sample=False` to make text generation more deterministic
+- Keep token generation limits reasonable (max_new_tokens=40 or less)
+### API vs Local Model Performance
+When mixing API and local models, you might encounter inconsistent behavior.
+**Solution:**
+- Keep separate functions for API and local model execution
+- Handle errors distinctly for each type
+- Use non-async code for simpler execution flow
+## Reporting Issues
+If you encounter issues not covered in this guide, please report them by creating an issue in the repository with:
+- A detailed description of the problem
+- Relevant error messages
+- Steps to reproduce the issue
+- Your environment information (OS, Python version, GPU, etc.)