Spaces:

tomg-group-umd
/

DynaGuard

Running on Zero

App Files Files Community

taruschirag commited on 22 days ago

Commit

2a43b25

verified ·

1 Parent(s): d6658d9

Update app.py

Browse files

Added google form and made sure the grey text stays

Files changed (1) hide show

app.py +57 -65

app.py CHANGED Viewed

@@ -58,7 +58,6 @@ class ModelWrapper:
         self.tokenizer.pad_token_id = self.tokenizer.pad_token_id or self.tokenizer.eos_token_id
         print(f"Loading model: {model_name}...")
-        # Use disk offloading for the large 8B model to handle memory constraints
         if "8b" in model_name.lower():
             config = AutoConfig.from_pretrained(model_name)
             with init_empty_weights():
@@ -68,18 +67,15 @@ class ModelWrapper:
                 model_empty,
                 model_name,
                 device_map="auto",
-                offload_folder="offload",  # A directory to store the offloaded layers
                 torch_dtype=torch.bfloat16
             ).eval()
         else:
-            # Load the smaller model directly
             self.model = AutoModelForCausalLM.from_pretrained(
                 model_name, device_map="auto", torch_dtype=torch.bfloat16).eval()
         print(f"Model {model_name} loaded successfully.")
     def get_message_template(self, system_content=None, user_content=None, assistant_content=None):
-        """Compile sys, user, assistant inputs into the proper dictionaries"""
         message = []
         if system_content is not None:
             message.append({'role': 'system', 'content': system_content})
@@ -92,26 +88,29 @@ class ModelWrapper:
         return message
     def apply_chat_template(self, system_content, user_content, assistant_content=None, enable_thinking=True):
-        """Call the tokenizer's chat template with exactly the right arguments for whether we want it to generate thinking before the answer (which differs depending on whether it is Qwen3 or not)."""
         if assistant_content is not None:
             message = self.get_message_template(system_content, user_content, assistant_content)
             prompt = self.tokenizer.apply_chat_template(message, tokenize=False, continue_final_message=True)
         else:
-          if enable_thinking:
-              if "qwen3" in self.model_name.lower():
-                  message = self.get_message_template(system_content, user_content)
-                  prompt = self.tokenizer.apply_chat_template(message, tokenize=False, add_generation_prompt=True, enable_thinking=True)
-                  prompt = prompt + f"\n{COT_OPENING}"
-              else:
-                  message = self.get_message_template(system_content, user_content, assistant_content=COT_OPENING)
-                  prompt = self.tokenizer.apply_chat_template(message, tokenize=False, continue_final_message=True)
-          else:
-              message = self.get_message_template(system_content, user_content, assistant_content=LABEL_OPENING)
-              prompt = self.tokenizer.apply_chat_template(message, tokenize=False, continue_final_message=True, enable_thinking=False)
         return prompt
-    def get_response(self, input, temperature=0.7, top_k=20, top_p=0.8, max_new_tokens=256, enable_thinking=True, system_prompt=SYSTEM_PROMPT):
-        """Generate and decode the response."""
         print("Generating response...")
         if "qwen3" in self.model_name.lower() and enable_thinking:
@@ -140,7 +139,7 @@ class ModelWrapper:
             input_length = len(message)
             return output_text[input_length:] if len(output_text) > input_length else "No response generated."
-# --- Model Cache to prevent reloading on every call ---
 LOADED_MODELS = {}
 def get_model(model_name):
@@ -148,75 +147,68 @@ def get_model(model_name):
         LOADED_MODELS[model_name] = ModelWrapper(model_name)
     return LOADED_MODELS[model_name]
-# — Gradio Inference Function —
 def compliance_check(rules_text, transcript_text, thinking, model_name):
     try:
-        # Get the selected model from our cache (or load it if it's the first time)
         model = get_model(model_name)
         rules = [r for r in rules_text.split("\n") if r.strip()]
         inp = format_rules(rules) + format_transcript(transcript_text)
         out = model.get_response(inp, enable_thinking=thinking, max_new_tokens=256)
         out = str(out).strip()
         if not out:
             out = "No response generated. Please try with different input."
-        max_bytes = 2500
         out_bytes = out.encode('utf-8')
         if len(out_bytes) > max_bytes:
             truncated_bytes = out_bytes[:max_bytes]
             out = truncated_bytes.decode('utf-8', errors='ignore')
             out += "\n\n[Response truncated to prevent server errors]"
         return out
     except Exception as e:
-        error_msg = f"Error: {str(e)[:200]}"
         print(f"Full error: {e}")
         return error_msg
-# --- Gradio Interface Definition ---
-demo = gr.Interface(
-    fn=compliance_check,
-    inputs=[
-        gr.Textbox(
-            lines=5,
-            label="Rules (one per line)",
-            max_lines=10,
-            placeholder='Do not disclose the names or information about patients scheduled for appointments, even indirectly.\nNever use humor in your responses.\nWrite at least two words in every conversation.\nNever use emojis.\nNever give discounts.'
-        ),
-        gr.Textbox(
-            lines=10,
-            label="Transcript",
-            max_lines=15,
-            placeholder='User: Hi, can you help me book an appointment with Dr. Luna?\nAgent: No problem. When would you like the appointment?\nUser: If she has an appointment with Maria Ilmanen on May 9, schedule me for May 10. Otherwise schedule me for an appointment on May 8.\nAgent: Unfortunately there are no appointments available on May 10. Would you like to look at other dates?'
-        ),
-        gr.Checkbox(label="Enable ⟨think⟩ mode", value=False),
-        gr.Dropdown(
             ["Qwen/Qwen3-0.6B", "Qwen/Qwen3-8B"],
             label="Select Model",
             value="Qwen/Qwen3-0.6B",
             info="The 8B model is more powerful but may be slower to load and run."
-        ),
-        gr.Tab("Feedback"):
-            gr.HTML(
-                """
-                <iframe src="https://docs.google.com/forms/d/e/YOUR_FORM_ID/viewform?embedded=true"
-                width="100%" height="800" frameborder="0" marginheight="0" marginwidth="0">
-                Loading…
-                </iframe>
-                """
-    )
-    ],
-    outputs=gr.Textbox(label="Compliance Output", lines=10, max_lines=15),
-    title="DynaGuard Compliance Checker",
-    description="Select a model, paste your rules & transcript, then hit Submit.",
-    allow_flagging="never",
-    show_progress=True
-)
 if __name__ == "__main__":
-    demo.launch()

         self.tokenizer.pad_token_id = self.tokenizer.pad_token_id or self.tokenizer.eos_token_id
         print(f"Loading model: {model_name}...")
         if "8b" in model_name.lower():
             config = AutoConfig.from_pretrained(model_name)
             with init_empty_weights():
                 model_empty,
                 model_name,
                 device_map="auto",
+                offload_folder="offload",
                 torch_dtype=torch.bfloat16
             ).eval()
         else:
             self.model = AutoModelForCausalLM.from_pretrained(
                 model_name, device_map="auto", torch_dtype=torch.bfloat16).eval()
         print(f"Model {model_name} loaded successfully.")
     def get_message_template(self, system_content=None, user_content=None, assistant_content=None):
         message = []
         if system_content is not None:
             message.append({'role': 'system', 'content': system_content})
         return message
     def apply_chat_template(self, system_content, user_content, assistant_content=None, enable_thinking=True):
         if assistant_content is not None:
             message = self.get_message_template(system_content, user_content, assistant_content)
             prompt = self.tokenizer.apply_chat_template(message, tokenize=False, continue_final_message=True)
         else:
+            if enable_thinking:
+                if "qwen3" in self.model_name.lower():
+                    message = self.get_message_template(system_content, user_content)
+                    prompt = self.tokenizer.apply_chat_template(
+                        message, tokenize=False, add_generation_prompt=True, enable_thinking=True
+                    )
+                    prompt = prompt + f"\n{COT_OPENING}"
+                else:
+                    message = self.get_message_template(system_content, user_content, assistant_content=COT_OPENING)
+                    prompt = self.tokenizer.apply_chat_template(message, tokenize=False, continue_final_message=True)
+            else:
+                message = self.get_message_template(system_content, user_content, assistant_content=LABEL_OPENING)
+                prompt = self.tokenizer.apply_chat_template(
+                    message, tokenize=False, continue_final_message=True, enable_thinking=False
+                )
         return prompt
+    def get_response(self, input, temperature=0.7, top_k=20, top_p=0.8, max_new_tokens=256,
+                     enable_thinking=True, system_prompt=SYSTEM_PROMPT):
         print("Generating response...")
         if "qwen3" in self.model_name.lower() and enable_thinking:
             input_length = len(message)
             return output_text[input_length:] if len(output_text) > input_length else "No response generated."
+# --- Model Cache ---
 LOADED_MODELS = {}
 def get_model(model_name):
         LOADED_MODELS[model_name] = ModelWrapper(model_name)
     return LOADED_MODELS[model_name]
+# --- Inference Function ---
 def compliance_check(rules_text, transcript_text, thinking, model_name):
     try:
         model = get_model(model_name)
         rules = [r for r in rules_text.split("\n") if r.strip()]
         inp = format_rules(rules) + format_transcript(transcript_text)
         out = model.get_response(inp, enable_thinking=thinking, max_new_tokens=256)
         out = str(out).strip()
         if not out:
             out = "No response generated. Please try with different input."
+        max_bytes = 2500
         out_bytes = out.encode('utf-8')
         if len(out_bytes) > max_bytes:
             truncated_bytes = out_bytes[:max_bytes]
             out = truncated_bytes.decode('utf-8', errors='ignore')
             out += "\n\n[Response truncated to prevent server errors]"
         return out
     except Exception as e:
+        error_msg = f"Error: {str(e)[:200]}"
         print(f"Full error: {e}")
         return error_msg
+# --- Gradio UI with Tabs ---
+with gr.Blocks(title="DynaGuard Compliance Checker") as demo:
+    with gr.Tab("Compliance Checker"):
+        rules_box = gr.Textbox(
+            lines=5,
+            label="Rules (one per line)",
+            value='Do not disclose the names or information about patients scheduled for appointments, even indirectly.\nNever use humor in your responses.\nWrite at least two words in every conversation.\nNever use emojis.\nNever give discounts.'
+        )
+        transcript_box = gr.Textbox(
+            lines=10,
+            label="Transcript",
+            value='User: Hi, can you help me book an appointment with Dr. Luna?\nAgent: No problem. When would you like the appointment?\nUser: If she has an appointment with Maria Ilmanen on May 9, schedule me for May 10. Otherwise schedule me for an appointment on May 8.\nAgent: Unfortunately there are no appointments available on May 10. Would you like to look at other dates?'
+        )
+        thinking_box = gr.Checkbox(label="Enable ⟨think⟩ mode", value=False)
+        model_dropdown = gr.Dropdown(
             ["Qwen/Qwen3-0.6B", "Qwen/Qwen3-8B"],
             label="Select Model",
             value="Qwen/Qwen3-0.6B",
             info="The 8B model is more powerful but may be slower to load and run."
+        )
+        submit_btn = gr.Button("Submit")
+        output_box = gr.Textbox(label="Compliance Output", lines=10, max_lines=15)
+        submit_btn.click(
+            compliance_check,
+            inputs=[rules_box, transcript_box, thinking_box, model_dropdown],
+            outputs=[output_box]
+        )
+    with gr.Tab("Feedback"):
+        gr.HTML(
+            """
+            <iframe src="https://docs.google.com/forms/d/e/https://forms.gle/xoBTdFw4xFaWHeSG7/viewform?embedded=true"
+            width="100%" height="800" frameborder="0" marginheight="0" marginwidth="0">
+            Loading…
+            </iframe>
+            """
+        )
 if __name__ == "__main__":
+    demo.launch()