# app.py - Gradio version (much simpler for HF Spaces) import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification import logging # Set up logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Global variables for model and tokenizer model = None tokenizer = None label_mapping = {0: "✅ Correct", 1: "🤔 Conceptually Flawed", 2: "🔢 Computationally Flawed"} def load_model(): """Load your trained LoRA adapter with base model""" global model, tokenizer try: from peft import AutoPeftModelForCausalLM # Changed from SequenceClassification # Load the LoRA adapter model for text generation model = AutoPeftModelForCausalLM.from_pretrained( "./lora_adapter", # Path to your adapter files torch_dtype=torch.float16, device_map="auto" ) # Load tokenizer from the same directory tokenizer = AutoTokenizer.from_pretrained("./lora_adapter") # Fix padding token issue if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token logger.info("Set pad_token to eos_token") logger.info("LoRA model loaded successfully") return "LoRA model loaded successfully!" except Exception as e: logger.error(f"Error loading LoRA model: {e}") # Fallback to placeholder for testing logger.warning("Using placeholder model loading - replace with your actual model!") model_name = "microsoft/DialoGPT-medium" # Closer to Phi-4 architecture tokenizer = AutoTokenizer.from_pretrained(model_name) # Fix padding token for fallback model too if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained(model_name) return f"Fallback model loaded. LoRA error: {e}" def get_system_prompt(): """Generates the specific system prompt for the fine-tuning task.""" return """You are a mathematics tutor. You are given a math word problem, and a solution written by a student. Analyze the solution carefully, line-by-line, and classify it into one of the following categories: - Correct (All logic is correct, and all calculations are correct) - Conceptual Error (There is an error in reasoning or logic somewhere in the solution) - Computational Error (All logic and reasoning is correct, but the result of some calculation is incorrect) Respond *only* with a valid JSON object that follows this exact schema: ```json { "verdict": "must be one of 'correct', 'conceptual_error', or 'computational_error'", "erroneous_line": "the exact, verbatim text of the first incorrect line, or null if the verdict is 'correct'", "explanation": "a brief, one-sentence explanation of the error, or null if the verdict is 'correct'" } ``` Do NOT add any text or explanations before or after the JSON object. """ def classify_solution(question: str, solution: str): """ Classify the math solution using the exact training format Returns: (classification_label, confidence_score, explanation) """ if not question.strip() or not solution.strip(): return "Please fill in both fields", "", "" if not model or not tokenizer: return "Model not loaded", "", "" try: # Create the exact prompt format used in training system_prompt = get_system_prompt() user_message = f"Problem: {question}\n\nSolution:\n{solution}" # Format as chat messages (common for instruction-tuned models) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_message} ] # Apply chat template text_input = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_token=True ) # Tokenize input inputs = tokenizer( text_input, return_tensors="pt", truncation=True, padding=True, max_length=2048 # Increased for longer prompts ) # Generate response (not just classify) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=200, temperature=0.1, do_sample=True, pad_token_id=tokenizer.pad_token_id ) # Decode the generated response generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract just the JSON response (after the input) response_start = generated_text.find(text_input) + len(text_input) json_response = generated_text[response_start:].strip() # Parse the JSON response import json try: result = json.loads(json_response) verdict = result.get("verdict", "unknown") erroneous_line = result.get("erroneous_line", "") explanation = result.get("explanation", "") # Map verdict to display format verdict_mapping = { "correct": "✅ Correct", "conceptual_error": "🤔 Conceptual Error", "computational_error": "🔢 Computational Error" } display_verdict = verdict_mapping.get(verdict, f"❓ {verdict}") return display_verdict, erroneous_line or "None", explanation or "Solution is correct" except json.JSONDecodeError: return f"Model response: {json_response}", "", "Could not parse JSON response" except Exception as e: logger.error(f"Error during classification: {e}") return f"Classification error: {str(e)}", "", "" # Load model on startup load_model() # Create Gradio interface with gr.Blocks(title="Math Solution Classifier", theme=gr.themes.Soft()) as app: gr.Markdown("# 🧮 Math Solution Classifier") gr.Markdown("Classify math solutions as correct, conceptually flawed, or computationally flawed.") with gr.Row(): with gr.Column(): question_input = gr.Textbox( label="Math Question", placeholder="e.g., Solve for x: 2x + 5 = 13", lines=3 ) solution_input = gr.Textbox( label="Proposed Solution", placeholder="e.g., 2x + 5 = 13\n2x = 13 - 5\n2x = 8\nx = 4", lines=5 ) classify_btn = gr.Button("Classify Solution", variant="primary") with gr.Column(): classification_output = gr.Textbox(label="Classification", interactive=False) erroneous_line_output = gr.Textbox(label="Erroneous Line", interactive=False) explanation_output = gr.Textbox(label="Explanation", interactive=False, lines=3) # Examples gr.Examples( examples=[ [ "Solve for x: 2x + 5 = 13", "2x + 5 = 13\n2x = 13 - 5\n2x = 8\nx = 4" ], [ "Find the derivative of f(x) = x²", "f'(x) = 2x + 1" # This should be computationally flawed ], [ "What is 15% of 200?", "15% = 15/100 = 0.15\n0.15 × 200 = 30" ] ], inputs=[question_input, solution_input] ) classify_btn.click( fn=classify_solution, inputs=[question_input, solution_input], outputs=[classification_output, erroneous_line_output, explanation_output] ) if __name__ == "__main__": app.launch()