Spaces:

nyasukun
/

toxic-eye

Sleeping

App Files Files Community

nyasukun commited on Mar 30

Commit

5df8f2d

1 Parent(s): 8936e3e

.

Browse files

Files changed (4) hide show

.gitignore +60 -0
README.md +66 -1
app.py +361 -59
requirements.txt +7 -1

.gitignore ADDED Viewed

	@@ -0,0 +1,60 @@

+# Environment variables
+.env
+# Pipenv files
+Pipfile
+Pipfile.lock
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual Environment
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+.DS_Store
+# Jupyter Notebook
+.ipynb_checkpoints
+# Model files and cache
+*.pt
+*.pth
+*.bin
+.cache/
+*.ckpt
+transformers_cache/
+torch_cache/
+# Logs
+*.log
+logs/

README.md CHANGED Viewed

@@ -9,4 +9,69 @@ app_file: app.py
 pinned: false
 ---
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 pinned: false
 ---
+# Toxic Total: Multi-Model Toxicity Evaluation Platform
+## Overview
+Toxic Total is a comprehensive platform that evaluates text toxicity using multiple language models and classifiers. This platform provides a unique approach by combining both generative and classification models to analyze potentially toxic content.
+## Features
+### 1. Text Generation Models
+Our platform utilizes four state-of-the-art language models:
+- **Zephyr-7B**: Specialized in understanding context and nuance
+- **Llama-2**: Known for its robust performance in content analysis
+- **Mistral-7B**: Offers precise and detailed text evaluation
+- **Claude-2**: Provides comprehensive toxicity assessment
+### 2. Classification Models
+We employ four specialized classification models:
+- **Toxic-BERT**: Fine-tuned for toxic content detection
+- **RoBERTa-Toxic**: Advanced toxic pattern recognition
+- **DistilBERT-Toxic**: Efficient toxicity classification
+- **XLM-RoBERTa-Toxic**: Multilingual toxicity detection
+### 3. Community Integration
+Access to community insights and discussions about similar content patterns and toxicity analysis.
+## Technical Details
+### Model Architecture
+Each model in our platform is carefully selected to provide complementary analysis:
+```python
+def analyze_toxicity(text):
+    # Multiple model evaluation
+    llm_results = text_generation_models(text)
+    classification_results = toxicity_classifiers(text)
+    community_insights = fetch_community_data(text)
+    return combined_analysis(llm_results, classification_results, community_insights)
+```
+### Performance Considerations
+- Real-time analysis capabilities
+- Efficient multi-model parallel processing
+- Optimized response generation
+## Usage Guidelines
+1. Enter the text you want to analyze in the input box
+2. Review results from multiple models
+3. Compare different model perspectives
+4. Check community insights for context
+## References
+- [Hugging Face Models](https://huggingface.co/models)
+- [Toxicity Classification Research](https://arxiv.org/abs/2103.00153)
+- [Language Model Evaluation Methods](https://arxiv.org/abs/2009.07118)
+## Citation
+If you use this platform in your research, please cite:
+```bibtex
+@software{toxic_total,
+  title = {Toxic Total: Multi-Model Toxicity Evaluation Platform},
+  year = {2024},
+  publisher = {Hugging Face},
+  url = {https://huggingface.co/spaces/[your-username]/toxic-total}
+}
+```

app.py CHANGED Viewed

@@ -1,64 +1,366 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
-if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+from huggingface_hub import AsyncInferenceClient
+from typing import List, Dict, Optional, Union
+import logging
+from dataclasses import dataclass
+from enum import Enum, auto
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, pipeline
+import spaces
+# ロガーの設定
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
+logger = logging.getLogger(__name__)
+# モデルの型定義
+class ModelType(Enum):
+    LOCAL = "local"
+    INFERENCE_API = "inference_api"
+@dataclass
+class ModelConfig:
+    name: str
+    description: str
+    type: ModelType
+    model_id: Optional[str] = None
+    model_path: Optional[str] = None
+# モデル定義を拡充
+TEXT_GENERATION_MODELS = [
+    ModelConfig(
+        name="Zephyr-7B",
+        description="Specialized in understanding context and nuance",
+        type=ModelType.INFERENCE_API,
+        model_id="HuggingFaceH4/zephyr-7b-beta"
+    ),
+    ModelConfig(
+        name="Llama-2",
+        description="Known for its robust performance in content analysis",
+        type=ModelType.LOCAL,
+        model_path="meta-llama/Llama-2-7b-hf"
+    ),
+    ModelConfig(
+        name="Mistral-7B",
+        description="Offers precise and detailed text evaluation",
+        type=ModelType.LOCAL,
+        model_path="mistralai/Mistral-7B-v0.1"
+    ),
+    ModelConfig(
+        name="Claude-2",
+        description="Provides comprehensive toxicity assessment",
+        type=ModelType.INFERENCE_API,
+        model_id="anthropic/claude-2"
+    )
+]
+CLASSIFICATION_MODELS = [
+    ModelConfig(
+        name="Toxic-BERT",
+        description="Fine-tuned for toxic content detection",
+        type=ModelType.LOCAL,
+        model_path="unitary/toxic-bert"
+    ),
+    ModelConfig(
+        name="RoBERTa-Toxic",
+        description="Advanced toxic pattern recognition",
+        type=ModelType.INFERENCE_API,
+        model_id="unitary/multilingual-toxic-xlm-roberta"
+    ),
+    ModelConfig(
+        name="DistilBERT-Toxic",
+        description="Efficient toxicity classification",
+        type=ModelType.LOCAL,
+        model_path="unitary/multilingual-toxic-distilbert"
+    ),
+    ModelConfig(
+        name="XLM-RoBERTa-Toxic",
+        description="Multilingual toxicity detection",
+        type=ModelType.INFERENCE_API,
+        model_id="unitary/multilingual-toxic-xlm-roberta"
+    )
+]
+class LocalModelManager:
+    def __init__(self):
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        logger.info(f"Using device: {self.device}")
+        self.models = {}
+        self.tokenizers = {}
+        self.pipelines = {}
+    async def load_model(self, model_path: str, task: str = "text-generation"):
+        """モデルの遅延ロード"""
+        if model_path not in self.models:
+            logger.info(f"Loading model: {model_path}")
+            try:
+                self.tokenizers[model_path] = AutoTokenizer.from_pretrained(model_path)
+                if task == "text-generation":
+                    model = AutoModelForCausalLM.from_pretrained(
+                        model_path,
+                        torch_dtype=torch.float16,
+                        device_map="auto"
+                    )
+                    self.pipelines[model_path] = pipeline(
+                        "text-generation",
+                        model=model,
+                        tokenizer=self.tokenizers[model_path]
+                    )
+                else:  # classification
+                    model = AutoModelForSequenceClassification.from_pretrained(
+                        model_path,
+                        device_map="auto"
+                    )
+                    self.pipelines[model_path] = pipeline(
+                        "text-classification",
+                        model=model,
+                        tokenizer=self.tokenizers[model_path]
+                    )
+                self.models[model_path] = model
+                logger.info(f"Model loaded successfully: {model_path}")
+            except Exception as e:
+                logger.error(f"Error loading model {model_path}: {str(e)}")
+                raise
+    @spaces.GPU(duration=120)  # GPUを120秒間確保
+    async def generate_text(self, model_path: str, text: str) -> str:
+        """テキスト生成の実行"""
+        if model_path not in self.models:
+            await self.load_model(model_path, "text-generation")
+        try:
+            outputs = self.pipelines[model_path](
+                text,
+                max_new_tokens=100,
+                do_sample=True,
+                temperature=0.7,
+                top_p=0.9,
+                num_return_sequences=1
+            )
+            return outputs[0]["generated_text"]
+        except Exception as e:
+            logger.error(f"Error in text generation with {model_path}: {str(e)}")
+            raise
+    @spaces.GPU(duration=60)  # GPUを60秒間確保
+    async def classify_text(self, model_path: str, text: str) -> str:
+        """テキスト分類の実行"""
+        if model_path not in self.models:
+            await self.load_model(model_path, "text-classification")
+        try:
+            result = self.pipelines[model_path](text)
+            return str(result)
+        except Exception as e:
+            logger.error(f"Error in classification with {model_path}: {str(e)}")
+            raise
+class ModelManager:
+    def __init__(self):
+        self.api_clients = {}
+        self.local_manager = LocalModelManager()
+        self._initialize_clients()
+    def _initialize_clients(self):
+        """Inference APIクライアントの初期化"""
+        for model in TEXT_GENERATION_MODELS + CLASSIFICATION_MODELS:
+            if model.type == ModelType.INFERENCE_API and model.model_id:
+                self.api_clients[model.model_id] = AsyncInferenceClient(model.model_id)
+    async def run_text_generation(self, text: str, selected_types: List[str]) -> List[str]:
+        """テキスト生成モデルの実行"""
+        results = []
+        for model in TEXT_GENERATION_MODELS:
+            if model.type.value in selected_types:
+                try:
+                    if model.type == ModelType.INFERENCE_API:
+                        logger.info(f"Running API text generation: {model.name}")
+                        response = await self.api_clients[model.model_id].text_generation(
+                            text, max_new_tokens=100, temperature=0.7
+                        )
+                        results.append(f"{model.name}: {response}")
+                    else:
+                        logger.info(f"Running local text generation: {model.name}")
+                        response = await self.local_manager.generate_text(model.model_path, text)
+                        results.append(f"{model.name}: {response}")
+                except Exception as e:
+                    logger.error(f"Error in {model.name}: {str(e)}")
+                    results.append(f"{model.name}: Error - {str(e)}")
+        return results
+    async def run_classification(self, text: str, selected_types: List[str]) -> List[str]:
+        """分類モデルの実行"""
+        results = []
+        for model in CLASSIFICATION_MODELS:
+            if model.type.value in selected_types:
+                try:
+                    if model.type == ModelType.INFERENCE_API:
+                        logger.info(f"Running API classification: {model.name}")
+                        response = await self.api_clients[model.model_id].text_classification(text)
+                        results.append(f"{model.name}: {response}")
+                    else:
+                        logger.info(f"Running local classification: {model.name}")
+                        response = await self.local_manager.classify_text(model.model_path, text)
+                        results.append(f"{model.name}: {response}")
+                except Exception as e:
+                    logger.error(f"Error in {model.name}: {str(e)}")
+                    results.append(f"{model.name}: Error - {str(e)}")
+        return results
+class UIComponents:
+    def __init__(self):
+        self.input_text = None
+        self.filter_checkboxes = None
+        self.invoke_button = None
+        self.gen_model_outputs = []
+        self.class_model_outputs = []
+        self.community_output = None
+    def create_header(self):
+        """ヘッダーセクションの作成"""
+        return gr.Markdown("""
+        # Toxic Total
+        This system evaluates the toxicity level of input text using multiple approaches.
+        """)
+    def create_input_section(self):
+        """入力セクションの作成"""
+        with gr.Row():
+            self.input_text = gr.Textbox(
+                label="Input Text",
+                placeholder="Enter text to analyze...",
+                lines=3
+            )
+    def create_filter_section(self):
+        """フィルターセクションの作成"""
+        with gr.Row():
+            self.filter_checkboxes = gr.CheckboxGroup(
+                choices=[t.value for t in ModelType],
+                value=[t.value for t in ModelType],
+                label="Filter Models",
+                info="Choose which types of models to display",
+                interactive=True
+            )
+    def create_invoke_button(self):
+        """Invokeボタンの作成"""
+        with gr.Row():
+            self.invoke_button = gr.Button(
+                "Invoke Selected Models",
+                variant="primary",
+                size="lg"
+            )
+    def create_model_grid(self, models: List[ModelConfig]) -> List[Dict]:
+        """モデルグリッドの作成"""
+        outputs = []
+        with gr.Column() as container:
+            for i in range(0, len(models), 2):
+                with gr.Row() as row:
+                    for j in range(min(2, len(models) - i)):
+                        model = models[i + j]
+                        with gr.Column():
+                            with gr.Group() as group:
+                                gr.Markdown(f"### {model.name}")
+                                gr.Markdown(f"Type: {model.type.value}")
+                                output = gr.Textbox(
+                                    label="Model Output",
+                                    lines=5,
+                                    interactive=False,
+                                    info=model.description
+                                )
+                                outputs.append({
+                                    "type": model.type.value,
+                                    "name": model.name,
+                                    "output": output,
+                                    "group": group
+                                })
+        return outputs
+    def create_model_tabs(self):
+        """モデルタブの作成"""
+        with gr.Tabs():
+            with gr.Tab("Text Generation LLM"):
+                self.gen_model_outputs = self.create_model_grid(TEXT_GENERATION_MODELS)
+            with gr.Tab("Classification LLM"):
+                self.class_model_outputs = self.create_model_grid(CLASSIFICATION_MODELS)
+            with gr.Tab("Community (Not implemented)"):
+                with gr.Column():
+                    self.community_output = gr.Textbox(
+                        label="Related Community Topics",
+                        lines=5,
+                        interactive=False
+                    )
+class ToxicityApp:
+    def __init__(self):
+        self.ui = UIComponents()
+        self.model_manager = ModelManager()
+    def update_model_visibility(self, selected_types: List[str]) -> List[gr.update]:
+        """モデルの表示状態を更新"""
+        logger.info(f"Updating visibility for types: {selected_types}")
+        updates = []
+        for outputs in [self.ui.gen_model_outputs, self.ui.class_model_outputs]:
+            for output in outputs:
+                visible = output["type"] in selected_types
+                logger.info(f"Model {output['name']} (type: {output['type']}): visible = {visible}")
+                updates.append(gr.update(visible=visible))
+        return updates
+    async def handle_invoke(self, text: str, selected_types: List[str]) -> List[str]:
+        """Invokeボタンのハンドラ"""
+        gen_results = await self.model_manager.run_text_generation(text, selected_types)
+        class_results = await self.model_manager.run_classification(text, selected_types)
+        # 結果リストの長さを調整
+        gen_results.extend([""] * (len(TEXT_GENERATION_MODELS) - len(gen_results)))
+        class_results.extend([""] * (len(CLASSIFICATION_MODELS) - len(class_results)))
+        return gen_results + class_results
+    def create_ui(self):
+        """UIの作成"""
+        with gr.Blocks() as demo:
+            self.ui.create_header()
+            self.ui.create_input_section()
+            self.ui.create_filter_section()
+            self.ui.create_invoke_button()
+            self.ui.create_model_tabs()
+            # イベントハンドラの設定
+            self.ui.filter_checkboxes.change(
+                fn=self.update_model_visibility,
+                inputs=[self.ui.filter_checkboxes],
+                outputs=[
+                    output["group"]
+                    for outputs in [self.ui.gen_model_outputs, self.ui.class_model_outputs]
+                    for output in outputs
+                ]
+            )
+            self.ui.invoke_button.click(
+                fn=self.handle_invoke,
+                inputs=[self.ui.input_text, self.ui.filter_checkboxes],
+                outputs=[
+                    output["output"]
+                    for outputs in [self.ui.gen_model_outputs, self.ui.class_model_outputs]
+                    for output in outputs
+                ]
+            )
+        return demo
+def main():
+    app = ToxicityApp()
+    demo = app.create_ui()
     demo.launch()
+if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

	@@ -1 +1,7 @@
1	- ~~huggingface_hub==~~0.~~25.2~~

+gradio>=4.0.0
+huggingface_hub
+transformers
+torch
+accelerate
+aiohttp
+spaces