Spaces:

doropiza
/

extractive_summary

Sleeping

App Files Files Community

doropiza commited on Jun 4

Commit

ddf4ee3

1 Parent(s): a1d9ef3

commit

Browse files

Files changed (2) hide show

app.py +160 -47
requirements.txt +14 -8

app.py CHANGED Viewed

@@ -13,26 +13,54 @@ class TextSummarizer:
         # GPUが利用可能かチェック
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"使用デバイス: {self.device}")
-        # 日本語対応の要約モデルを初期化
-        # 軽量で高性能なモデルを使用
-        model_name = "rinna/japanese-gpt2-medium"  # 英語用
-        # 日本語の場合は "rinna/japanese-gpt2-medium" や "cyberagent/open-calm-7b" などを検討
         try:
-            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-            self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(self.device)
             self.summarizer = pipeline(
                 "summarization",
-                model=self.model,
-                tokenizer=self.tokenizer,
-                device=0 if self.device == "cuda" else -1
             )
-            print("モデルの読み込みが完了しました")
         except Exception as e:
-            print(f"モデル読み込みエラー: {e}")
-            # フォールバック用の軽量モデル
-            self.summarizer = pipeline("summarization", device=0 if self.device == "cuda" else -1)
     def clean_text(self, text):
         """テキストの前処理"""
@@ -174,47 +202,98 @@ summarizer = TextSummarizer()
 def process_text_input(text, max_length, min_length):
     """テキスト入力の処理"""
-    if not text.strip():
-        return "テキストを入力してください。"
-    summary = summarizer.summarize_text(text, max_length, min_length)
-    return summarizer.structure_summary(summary)
 def process_pdf_input(pdf_file, max_length, min_length):
     """PDF��力の処理"""
-    if pdf_file is None:
-        return "PDFファイルを選択してください。"
-    text = summarizer.extract_text_from_pdf(pdf_file)
-    if text.startswith("PDFの読み込みで"):
-        return text
-    summary = summarizer.summarize_text(text, max_length, min_length)
-    return summarizer.structure_summary(summary)
 def process_url_input(url, max_length, min_length):
     """URL入力の処理"""
-    if not url.strip():
-        return "URLを入力してください。"
-    if not url.startswith(('http://', 'https://')):
-        url = 'https://' + url
-    text = summarizer.extract_text_from_url(url)
-    if text.startswith("Webサイトの読み込みで"):
-        return text
-    summary = summarizer.summarize_text(text, max_length, min_length)
-    return summarizer.structure_summary(summary)
 # Gradioインターフェース作成
 def create_interface():
     with gr.Blocks(title="🤖 ローカルLLM テキスト要約ツール", theme=gr.themes.Soft()) as app:
         gr.Markdown("""
-        # 🤖 ローカルLLM テキスト要約ツール
         このツールは、ローカルで動作するLLMを使用してテキストを要約し、構造化された形式で出力します。
         ## 📝 対応入力形式
         - **テキスト直接入力**
         - **PDFファイル**
@@ -249,9 +328,10 @@ def create_interface():
                         text_output = gr.Markdown(label="要約結果")
                 text_btn.click(
-                    process_text_input,
                     inputs=[text_input, max_length, min_length],
-                    outputs=text_output
                 )
             # PDF入力タブ
@@ -268,9 +348,10 @@ def create_interface():
                         pdf_output = gr.Markdown(label="要約結果")
                 pdf_btn.click(
-                    process_pdf_input,
                     inputs=[pdf_input, max_length, min_length],
-                    outputs=pdf_output
                 )
             # URL入力タブ
@@ -287,9 +368,10 @@ def create_interface():
                         url_output = gr.Markdown(label="要約結果")
                 url_btn.click(
-                    process_url_input,
                     inputs=[url_input, max_length, min_length],
-                    outputs=url_output
                 )
         # 使用方法
@@ -301,15 +383,46 @@ def create_interface():
         3. **実行**: 対応する実行ボタンをクリック
         4. **結果確認**: 構造化された要約結果を確認
-        ## ⚙️ 技術仕様
         - **モデル**: Facebook BART (ローカル実行)
         - **GPU加速**: CUDA対応
         - **出力形式**: 構造化Markdown
         """)
     return app
 if __name__ == "__main__":
     # アプリケーション起動
     app = create_interface()
     app.launch(

         # GPUが利用可能かチェック
         self.device = "cuda" if torch.cuda.is_available() else "cpu"
         print(f"使用デバイス: {self.device}")
+        print(f"PyTorch バージョン: {torch.__version__}")
+        # PyTorch v2.6未満の場合は警告
+        torch_version = torch.__version__.split('+')[0]  # +cu121などを除去
+        major, minor = map(int, torch_version.split('.')[:2])
+        if major < 2 or (major == 2 and minor < 6):
+            print("⚠️  警告: PyTorch v2.6未満です。セキュリティ脆弱性(CVE-2025-32434)のため、アップグレードを推奨します。")
+            print("   アップグレード: pip install torch>=2.6.0")
+        # safetensorsを優先したモデル読み込み
         try:
+            print("モデルを読み込み中...")
             self.summarizer = pipeline(
                 "summarization",
+                model="facebook/bart-large-cnn",
+                device=0 if self.device == "cuda" else -1,
+                framework="pt",
+                use_safetensors=True,  # safetensorsを優先
+                trust_remote_code=False  # セキュリティ強化
             )
+            print("✅ モデルの読み込みが完了しました (safetensors使用)")
         except Exception as e:
+            print(f"❌ メインモデル読み込みエラー: {e}")
+            # より軽量なフォールバック（safetensors対応）
+            try:
+                print("フォールバックモデルを試行中...")
+                self.summarizer = pipeline(
+                    "summarization",
+                    model="sshleifer/distilbart-cnn-12-6",
+                    device=0 if self.device == "cuda" else -1,
+                    use_safetensors=True,
+                    trust_remote_code=False
+                )
+                print("✅ フォールバックモデルの読み込みが完了しました")
+            except Exception as e2:
+                print(f"❌ フォールバックモデルもエラー: {e2}")
+                # 最終フォールバック（safetensorsなし）
+                try:
+                    print("最終フォールバック（レガシーモード）...")
+                    self.summarizer = pipeline(
+                        "summarization",
+                        model="sshleifer/distilbart-cnn-12-6",
+                        device=0 if self.device == "cuda" else -1
+                    )
+                    print("⚠️  レガシーモードで読み込み完了（safetensorsなし）")
+                except Exception as e3:
+                    print(f"❌ 全てのモデル読み込みに失敗: {e3}")
+                    raise Exception("モデルの読み込みに失敗しました。requirements.txtを確認し、依存関係を更新してください。")
     def clean_text(self, text):
         """テキストの前処理"""
 def process_text_input(text, max_length, min_length):
     """テキスト入力の処理"""
+    try:
+        print(f"テキスト処理開始: {len(text) if text else 0}文字")
+        if not text or not text.strip():
+            return "## ⚠️ エラー\nテキストを入力してください。"
+        # プログレス表示
+        yield "## 🔄 処理中...\nテキストを要約しています..."
+        summary = summarizer.summarize_text(text, int(max_length), int(min_length))
+        result = summarizer.structure_summary(summary)
+        print("テキスト処理完了")
+        yield result
+    except Exception as e:
+        error_msg = f"## ❌ エラーが発生しました\n```\n{str(e)}\n```"
+        print(f"テキスト処理エラー: {e}")
+        yield error_msg
 def process_pdf_input(pdf_file, max_length, min_length):
     """PDF��力の処理"""
+    try:
+        print(f"PDF処理開始: {pdf_file}")
+        if pdf_file is None:
+            return "## ⚠️ エラー\nPDFファイルを選択してください。"
+        yield "## 🔄 処理中...\nPDFファイルを読み込んでいます..."
+        text = summarizer.extract_text_from_pdf(pdf_file)
+        if text.startswith("PDFの読み込みで"):
+            yield f"## ❌ エラー\n{text}"
+            return
+        yield "## 🔄 処理中...\nテキストを要約しています..."
+        summary = summarizer.summarize_text(text, int(max_length), int(min_length))
+        result = summarizer.structure_summary(summary)
+        print("PDF処理完了")
+        yield result
+    except Exception as e:
+        error_msg = f"## ❌ エラーが発生しました\n```\n{str(e)}\n```"
+        print(f"PDF処理エラー: {e}")
+        yield error_msg
 def process_url_input(url, max_length, min_length):
     """URL入力の処理"""
+    try:
+        print(f"URL処理開始: {url}")
+        if not url or not url.strip():
+            return "## ⚠️ エラー\nURLを入力してください。"
+        if not url.startswith(('http://', 'https://')):
+            url = 'https://' + url
+        yield "## 🔄 処理中...\nWebサイトを読み込んでいます..."
+        text = summarizer.extract_text_from_url(url)
+        if text.startswith("Webサイトの読み込みで"):
+            yield f"## ❌ エラー\n{text}"
+            return
+        yield "## 🔄 処理中...\nテキストを要約しています..."
+        summary = summarizer.summarize_text(text, int(max_length), int(min_length))
+        result = summarizer.structure_summary(summary)
+        print("URL処理完了")
+        yield result
+    except Exception as e:
+        error_msg = f"## ❌ エラーが発生しました\n```\n{str(e)}\n```"
+        print(f"URL処理エラー: {e}")
+        yield error_msg
 # Gradioインターフェース作成
 def create_interface():
     with gr.Blocks(title="🤖 ローカルLLM テキスト要約ツール", theme=gr.themes.Soft()) as app:
         gr.Markdown("""
+        # 🤖 ローカルLLM テキスト要約ツール (v5 Security Update)
         このツールは、ローカルで動作するLLMを使用してテキストを要約し、構造化された形式で出力します。
+        ## 🔒 セキュリティアップデート
+        - **PyTorch v2.6+** 対応（CVE-2025-32434 対策）
+        - **safetensors** 形式を優先使用
+        - **trust_remote_code=False** でセキュリティ強化
         ## 📝 対応入力形式
         - **テキスト直接入力**
         - **PDFファイル**
                         text_output = gr.Markdown(label="要約結果")
                 text_btn.click(
+                    fn=process_text_input,
                     inputs=[text_input, max_length, min_length],
+                    outputs=text_output,
+                    show_progress=True
                 )
             # PDF入力タブ
                         pdf_output = gr.Markdown(label="要約結果")
                 pdf_btn.click(
+                    fn=process_pdf_input,
                     inputs=[pdf_input, max_length, min_length],
+                    outputs=pdf_output,
+                    show_progress=True
                 )
             # URL入力タブ
                         url_output = gr.Markdown(label="要約結果")
                 url_btn.click(
+                    fn=process_url_input,
                     inputs=[url_input, max_length, min_length],
+                    outputs=url_output,
+                    show_progress=True
                 )
         # 使用方法
         3. **実行**: 対応する実行ボタンをクリック
         4. **結果確認**: 構造化された要約結果を確認
+        ## ⚙️ 技術仕様 (v5 Security Update)
         - **モデル**: Facebook BART (ローカル実行)
+        - **セキュリティ**: PyTorch v2.6+ (CVE-2025-32434 対策)
+        - **フォーマット**: safetensors優先
         - **GPU加速**: CUDA対応
         - **出力形式**: 構造化Markdown
+        ## 🔒 セキュリティ機能
+        - safetensors形式での安全なモデル読み込み
+        - trust_remote_code=False設定
+        - 脆弱性対策済みPyTorchバージョン要求
         """)
     return app
 if __name__ == "__main__":
+    # セキュリティアップデート情報
+    print("""
+    🔒 セキュリティアップデート v5 🔒
+    PyTorch脆弱性(CVE-2025-32434)対応のため、以下の更新が必要です:
+    pip install torch>=2.6.0 transformers>=4.40.0 safetensors>=0.4.0
+    または、requirements.txtを更新:
+    pip install -r requirements.txt
+    GPU使用の場合:
+    pip install torch>=2.6.0+cu121 --extra-index-url https://download.pytorch.org/whl/cu121
+    """)
+    # PyTorchバージョンチェック
+    torch_version = torch.__version__.split('+')[0]
+    major, minor = map(int, torch_version.split('.')[:2])
+    if major < 2 or (major == 2 and minor < 6):
+        print(f"⚠️  現在のPyTorchバージョン: {torch.__version__}")
+        print("🚨 セキュリティリスクあり - アップグレードを強く推奨します")
+    else:
+        print(f"✅ PyTorchバージョン: {torch.__version__} (セキュア)")
     # アプリケーション起動
     app = create_interface()
     app.launch(

requirements.txt CHANGED Viewed

@@ -1,10 +1,11 @@
-# Core ML Libraries
-torch>=2.0.0
-transformers>=4.30.0
-tokenizers>=0.13.0
 # Web Interface
-gradio>=3.35.0
 # PDF Processing
 PyPDF2>=3.0.0
@@ -21,14 +22,19 @@ pandas>=2.0.0
 nltk>=3.8.0
 regex>=2023.6.3
 # Optional: Japanese Text Processing
 # fugashi>=1.3.0
 # unidic-lite>=1.0.8
 # mecab-python3>=1.0.6
-# Optional: GPU Support (uncomment if using CUDA)
-# torch-audio>=2.0.0+cu118 --extra-index-url https://download.pytorch.org/whl/cu118
-# torchvision>=0.15.0+cu118 --extra-index-url https://download.pytorch.org/whl/cu118
 # Development Tools (optional)
 # jupyter>=1.0.0

+# Core ML Libraries - Updated for security (CVE-2025-32434)
+torch>=2.6.0
+transformers>=4.40.0
+tokenizers>=0.15.0
+safetensors>=0.4.0
 # Web Interface
+gradio>=4.0.0
 # PDF Processing
 PyPDF2>=3.0.0
 nltk>=3.8.0
 regex>=2023.6.3
+# Security and Performance
+accelerate>=0.25.0
+huggingface-hub>=0.20.0
 # Optional: Japanese Text Processing
 # fugashi>=1.3.0
 # unidic-lite>=1.0.8
 # mecab-python3>=1.0.6
+# Optional: GPU Support (CUDA 12.1+)
+# torch>=2.6.0+cu121 --extra-index-url https://download.pytorch.org/whl/cu121
+# torchvision>=0.18.0+cu121 --extra-index-url https://download.pytorch.org/whl/cu121
+# torchaudio>=2.6.0+cu121 --extra-index-url https://download.pytorch.org/whl/cu121
 # Development Tools (optional)
 # jupyter>=1.0.0