Spaces:

abaryan
/

BioXP-0.5b-v2

Running

App Files Files Community

Abaryan commited on 8 days ago

Commit

dee81c5

verified ·

1 Parent(s): e4a59fb

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -38

app.py CHANGED Viewed

@@ -8,6 +8,18 @@ import re
 # Load model and tokenizer
 # model_name = "rgb2gbr/GRPO_BioMedmcqa_Qwen2.5-0.5B"
 model_name = "rgb2gbr/BioXP-0.5B-MedMCQA"
 model = AutoModelForCausalLM.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
@@ -21,8 +33,8 @@ model.eval()
 def get_random_question():
     """Get a random question from the dataset"""
-    index = random.randint(0, len(dataset['train']) - 1)
-    question_data = dataset['train'][index]
     return (
         question_data['question'],
         question_data['opa'],
@@ -33,49 +45,46 @@ def get_random_question():
         question_data.get('exp', None)   # Explanation
     )
-def extract_answer(prediction: str) -> tuple:
-    """Extract answer and reasoning from model output"""
-    # Try to find the answer part
-    answer_match = re.search(r"Answer:\s*([A-D])", prediction, re.IGNORECASE)
-    answer = answer_match.group(1).upper() if answer_match else "Not found"
-    # Try to find reasoning part
-    reasoning = ""
-    if "Reasoning:" in prediction:
-        reasoning = prediction.split("Reasoning:")[-1].strip()
-    elif "Explanation:" in prediction:
-        reasoning = prediction.split("Explanation:")[-1].strip()
-    return answer, reasoning
 def predict(question: str, option_a: str, option_b: str, option_c: str, option_d: str,
            correct_option: int = None, explanation: str = None,
-           temperature: float = 0.6, top_p: float = 0.9, max_tokens: int = 20):
-    # Format the prompt
-    prompt = f"Question: {question}\n\nOptions:\nA. {option_a}\nB. {option_b}\nC. {option_c}\nD. {option_d}\n\nAnswer:"
     # Tokenize and generate
-    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512)
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    with torch.no_grad():
-        outputs = model.generate(
-            **inputs,
             max_new_tokens=max_tokens,
             temperature=temperature,
             top_p=top_p,
-            do_sample=True,
-            # pad_token_id=tokenizer.eos_token_id
         )
-    # Get prediction
-    prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    model_answer, model_reasoning = extract_answer(prediction)
     # Format output with evaluation if available
-    output = prediction
     if correct_option is not None:
         correct_letter = chr(65 + correct_option)  # Convert 0-3 to A-D
         is_correct = model_answer == correct_letter
         output += f"\n\n---\nEvaluation:\n"
         output += f"Correct Answer: {correct_letter}\n"
@@ -95,10 +104,13 @@ with gr.Blocks(title="Medical-QA (MedMCQA) Predictor") as demo:
         with gr.Column():
             # Input fields
             question = gr.Textbox(label="Question", lines=3, interactive=True)
-            option_a = gr.Textbox(label="Option A", interactive=True)
-            option_b = gr.Textbox(label="Option B", interactive=True)
-            option_c = gr.Textbox(label="Option C", interactive=True)
-            option_d = gr.Textbox(label="Option D", interactive=True)
             # Generation parameters
             with gr.Accordion("Generation Parameters", open=False):
@@ -119,12 +131,12 @@ with gr.Blocks(title="Medical-QA (MedMCQA) Predictor") as demo:
                     info="Higher values allow more diverse tokens, lower values more focused"
                 )
                 max_tokens = gr.Slider(
-                    minimum=10,
                     maximum=512,
-                    value=20,
                     step=32,
                     label="Max Tokens",
-                    info="Maximum length of the generated response"
                 )
             # Hidden fields for correct answer and explanation

 # Load model and tokenizer
 # model_name = "rgb2gbr/GRPO_BioMedmcqa_Qwen2.5-0.5B"
 model_name = "rgb2gbr/BioXP-0.5B-MedMCQA"
+SYSTEM_PROMPT = """
+You're a medical expert. Answer the question with careful analysis and explain why the selected option is correct in 150 words without reapeating.
+Respond in the following format:
+<answer>
+[correct answer]
+</answer>
+<reasoning>
+[explain why the selected option is correct]
+</reasoning>
+"""
 model = AutoModelForCausalLM.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 def get_random_question():
     """Get a random question from the dataset"""
+    index = random.randint(0, len(dataset['validation']) - 1)
+    question_data = dataset['validation'][index]
     return (
         question_data['question'],
         question_data['opa'],
         question_data.get('exp', None)   # Explanation
     )
 def predict(question: str, option_a: str, option_b: str, option_c: str, option_d: str,
            correct_option: int = None, explanation: str = None,
+           temperature: float = 0.6, top_p: float = 0.9, max_tokens: int = 256):
+    # Format the question with options
+    formatted_question = f"Question: {question}\n\nOptions:\nA. {option_a}\nB. {option_b}\nC. {option_c}\nD. {option_d}"
+    # Create chat-style prompt
+    prompt = [
+        {'role': 'system', 'content': SYSTEM_PROMPT},
+        {'role': 'user', 'content': formatted_question}
+    ]
+    # Use apply_chat_template for better formatting
+    text = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
     # Tokenize and generate
+    model_inputs = tokenizer([text], return_tensors="pt").to(device)
+    with torch.inference_mode():
+        generated_ids = model.generate(
+            **model_inputs,
             max_new_tokens=max_tokens,
             temperature=temperature,
             top_p=top_p,
+            # repetition_penalty=1.1,
         )
+    # Get only the generated response (excluding the prompt)
+    generated_ids = generated_ids[0, model_inputs.input_ids.shape[1]:]
+    model_response = tokenizer.decode(generated_ids, skip_special_tokens=True)
     # Format output with evaluation if available
+    output = model_response
     if correct_option is not None:
         correct_letter = chr(65 + correct_option)  # Convert 0-3 to A-D
+        # Extract answer from model response for evaluation
+        answer_match = re.search(r"<answer>\s*([A-D])\s*</answer>", model_response, re.IGNORECASE)
+        model_answer = answer_match.group(1).upper() if answer_match else "Not found"
         is_correct = model_answer == correct_letter
         output += f"\n\n---\nEvaluation:\n"
         output += f"Correct Answer: {correct_letter}\n"
         with gr.Column():
             # Input fields
             question = gr.Textbox(label="Question", lines=3, interactive=True)
+            # Options in an expandable accordion
+            with gr.Accordion("Options", open=False):
+                option_a = gr.Textbox(label="Option A", interactive=True)
+                option_b = gr.Textbox(label="Option B", interactive=True)
+                option_c = gr.Textbox(label="Option C", interactive=True)
+                option_d = gr.Textbox(label="Option D", interactive=True)
             # Generation parameters
             with gr.Accordion("Generation Parameters", open=False):
                     info="Higher values allow more diverse tokens, lower values more focused"
                 )
                 max_tokens = gr.Slider(
+                    minimum=50,
                     maximum=512,
+                    value=256,
                     step=32,
                     label="Max Tokens",
+                    info="Maximum length of the generated response (recommended: 256)"
                 )
             # Hidden fields for correct answer and explanation