Spaces:

Antigma
/

Abliteration

Running

App Files Files Community

Brianpuz commited on Jul 1

Commit

005d861

1 Parent(s): 3d9243c

try

Browse files

Files changed (1) hide show

app.py +16 -27

app.py CHANGED Viewed

@@ -289,7 +289,7 @@ class AbliterationProcessor:
                 layer.mlp.down_proj.weight.data = modified_weight
     def chat(self, message, history, max_new_tokens=2048, temperature=0.7):
-        """Chat functionality"""
         print(f"DEBUG: Starting chat with max_new_tokens={max_new_tokens}, temperature={temperature}")
         if self.model is None or self.tokenizer is None:
@@ -325,25 +325,14 @@ class AbliterationProcessor:
             )
             print(f"DEBUG: Input tokens shape: {toks.shape}")
-            # Generate response with streaming like abliterated_optimized.py
-            from transformers import TextStreamer
-            # Create a custom streamer that captures all output
-            captured_output = []
-            class CustomStreamer(TextStreamer):
-                def __init__(self, tokenizer, skip_prompt=True, skip_special_tokens=True):
-                    super().__init__(tokenizer, skip_prompt=skip_prompt, skip_special_tokens=skip_special_tokens)
-                    self.captured = []
-                def on_finalized_text(self, text: str, stream_end: bool = False):
-                    print(f"DEBUG: Streamer received text: '{text}' (stream_end={stream_end})")
-                    self.captured.append(text)
-                    super().on_finalized_text(text, stream_end)
-            streamer = CustomStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
-            print(f"DEBUG: Starting generation with max_new_tokens={max_new_tokens}, temperature={temperature}")
             gen = self.model.generate(
                 toks.to(self.model.device),
                 max_new_tokens=max_new_tokens,
@@ -353,21 +342,20 @@ class AbliterationProcessor:
                 streamer=streamer
             )
-            print(f"DEBUG: Generation completed, output shape: {gen.shape}")
-            print(f"DEBUG: Streamer captured {len(streamer.captured)} text chunks")
-            # Get the complete response from streamer
-            response = "".join(streamer.captured).strip()
-            print(f"DEBUG: Final response length: {len(response)}")
-            print(f"DEBUG: Response preview: {response[:200]}...")
-            return response, history + [[message, response]]
         except Exception as e:
             print(f"DEBUG: Exception occurred: {str(e)}")
             import traceback
             traceback.print_exc()
             return f"❌ Chat error: {str(e)}", history
 def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
     """Create new model card"""
@@ -577,12 +565,13 @@ def create_interface():
             outputs=[process_output, process_image]
         )
-        # Chat functionality
         def user(user_message, history):
             return "", history + [{"role": "user", "content": user_message}]
         def bot(history, max_new_tokens, temperature):
             if history and history[-1]["role"] == "user":
                 response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens, temperature)
                 history.append({"role": "assistant", "content": response})
             return history

                 layer.mlp.down_proj.weight.data = modified_weight
     def chat(self, message, history, max_new_tokens=2048, temperature=0.7):
+        """Chat functionality with streaming output"""
         print(f"DEBUG: Starting chat with max_new_tokens={max_new_tokens}, temperature={temperature}")
         if self.model is None or self.tokenizer is None:
             )
             print(f"DEBUG: Input tokens shape: {toks.shape}")
+            # Generate response with streaming
+            print(f"DEBUG: Starting generation with max_new_tokens={max_new_tokens}, temperature={temperature}")
+            # Use TextStreamer to show output in real-time
+            from transformers import TextStreamer
+            streamer = TextStreamer(self.tokenizer, skip_prompt=True, skip_special_tokens=True)
+            # Generate with streamer to show output in console
             gen = self.model.generate(
                 toks.to(self.model.device),
                 max_new_tokens=max_new_tokens,
                 streamer=streamer
             )
+            # Decode the generated tokens
+            generated_text = self.tokenizer.decode(gen[0][toks.shape[1]:], skip_special_tokens=True)
+            print(f"DEBUG: Generated text length: {len(generated_text)}")
+            print(f"DEBUG: Generated text preview: {generated_text[:200]}...")
+            return generated_text, history + [[message, generated_text]]
         except Exception as e:
             print(f"DEBUG: Exception occurred: {str(e)}")
             import traceback
             traceback.print_exc()
             return f"❌ Chat error: {str(e)}", history
 def get_new_model_card(original_card: ModelCard, original_model_id: str, new_repo_url: str) -> ModelCard:
     """Create new model card"""
             outputs=[process_output, process_image]
         )
+        # Chat functionality with streaming
         def user(user_message, history):
             return "", history + [{"role": "user", "content": user_message}]
         def bot(history, max_new_tokens, temperature):
             if history and history[-1]["role"] == "user":
+                # Get complete response first
                 response, _ = processor.chat(history[-1]["content"], history[:-1], max_new_tokens, temperature)
                 history.append({"role": "assistant", "content": response})
             return history