# app.py ── Math-solution classifier on HF Spaces (Zero-GPU-safe) # # Pin in requirements.txt: # gradio==4.44.0 torch==2.1.0 transformers==4.35.0 peft==0.7.1 accelerate==0.25.0 spaces import os import json import logging from typing import Tuple import gradio as gr import spaces # <- Hugging Face Spaces SDK (Zero) # ────────────────────────────────────────────────────────────────────────────── # CONSTANTS (no CUDA use here) # ────────────────────────────────────────────────────────────────────────────── ADAPTER_PATH = os.getenv("ADAPTER_PATH", "./lora_adapter") # dir or Hub repo FALLBACK_MODEL = "distilbert-base-uncased" LABELS = {0: "✅ Correct", 1: "🤔 Conceptual Error", 2: "🔢 Computational Error"} logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # Globals that will live **inside the GPU worker** model = None tokenizer = None model_ty = None # "classification" | "causal_lm" | "baseline" # ────────────────────────────────────────────────────────────────────────────── # GPU-SIDE INITIALISATION & INFERENCE # ────────────────────────────────────────────────────────────────────────────── def _load_model_gpu(): """ Runs **inside the GPU worker**. Tries LoRA classification adapter → LoRA causal-LM adapter → plain baseline. """ global model, tokenizer, model_ty import torch from transformers import ( AutoTokenizer, AutoModelForSequenceClassification, ) from peft.auto import ( AutoPeftModelForSequenceClassification, AutoPeftModelForCausalLM, ) dtype = torch.float16 if os.path.isdir(ADAPTER_PATH): logger.info(f"[GPU] Loading adapter from {ADAPTER_PATH}") try: # 1) classification adapter model = AutoPeftModelForSequenceClassification.from_pretrained( ADAPTER_PATH, torch_dtype=dtype, device_map="auto" ) model_ty = "classification" except ValueError: logger.info("[GPU] Not a classifier, trying causal-LM") model = AutoPeftModelForCausalLM.from_pretrained( ADAPTER_PATH, torch_dtype=dtype, device_map="auto" ) model_ty = "causal_lm" tokenizer = AutoTokenizer.from_pretrained(ADAPTER_PATH) else: logger.warning("[GPU] No adapter found – using baseline DistilBERT") tokenizer = AutoTokenizer.from_pretrained(FALLBACK_MODEL) model = AutoModelForSequenceClassification.from_pretrained( FALLBACK_MODEL, num_labels=3, ignore_mismatched_sizes=True ) model_ty = "baseline" if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token or tokenizer.sep_token model.eval() logger.info(f"[GPU] Model ready ({model_ty})") def _classify_logits(question: str, solution: str) -> Tuple[str, str, str]: import torch text = f"Question: {question}\n\nSolution:\n{solution}" inputs = tokenizer( text, return_tensors="pt", padding=True, truncation=True, max_length=512 ).to("cuda") with torch.no_grad(): logits = model(**inputs).logits probs = torch.softmax(logits, dim=-1)[0] pred = int(torch.argmax(probs)) conf = f"{probs[pred].item():.3f}" return LABELS[pred], conf, "—" def _classify_generate(question: str, solution: str) -> Tuple[str, str, str]: import torch prompt = ( "You are a mathematics tutor.\n" "You are given a math word problem and a student's solution. Decide whether the solution is correct.\n\n" "- Correct = all reasoning and calculations are correct.\n" "- Conceptual Error = reasoning is wrong.\n" "- Computational Error= reasoning okay but arithmetic off.\n\n" "Reply with ONLY one of these JSON lines:\n" '{"verdict": "correct"}\n' '{"verdict": "conceptual"}\n' '{"verdict": "computational"}\n\n' f"Question: {question}\n\nSolution:\n{solution}\n\nAnswer:" ) inputs = tokenizer(prompt, return_tensors="pt").to("cuda") with torch.no_grad(): out_ids = model.generate( **inputs, max_new_tokens=32, pad_token_id=tokenizer.eos_token_id, ) generated = tokenizer.decode( out_ids[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True, ).strip() verdict = "Unparsed" try: data = json.loads(generated.splitlines()[-1]) v = data.get("verdict", "").lower() if v.startswith("corr"): verdict = LABELS[0] elif v.startswith("conc"): verdict = LABELS[1] elif v.startswith("comp"): verdict = LABELS[2] except Exception: pass return verdict, "", generated @spaces.GPU # <-- every CUDA op happens inside here def gpu_classify(question: str, solution: str): """ Proxy target for Gradio. Executed in the GPU worker so CUDA is allowed. Returns (verdict, confidence, raw_output) """ if model is None: _load_model_gpu() if not question.strip() or not solution.strip(): return "Please fill both fields.", "", "" if model_ty in ("classification", "baseline"): return _classify_logits(question, solution) else: # causal_lm return _classify_generate(question, solution) # ────────────────────────────────────────────────────────────────────────────── # CPU-SIDE UI (no torch.cuda here) # ────────────────────────────────────────────────────────────────────────────── def classify_proxy(q, s): """Simple wrapper so Gradio can call the GPU function.""" return gpu_classify(q, s) with gr.Blocks(title="Math Solution Classifier") as demo: gr.Markdown("# 🧮 Math Solution Classifier") gr.Markdown( "Classify a student's math solution as **correct**, **conceptually flawed**, " "or **computationally flawed**." ) with gr.Row(): with gr.Column(): q_in = gr.Textbox(label="Math Question", lines=3) s_in = gr.Textbox(label="Proposed Solution", lines=6) btn = gr.Button("Classify", variant="primary") with gr.Column(): verdict = gr.Textbox(label="Verdict", interactive=False) conf = gr.Textbox(label="Confidence", interactive=False) raw = gr.Textbox(label="Model Output", interactive=False) btn.click(classify_proxy, [q_in, s_in], [verdict, conf, raw]) gr.Examples( [ ["Solve for x: 2x + 5 = 13", "2x + 5 = 13\n2x = 8\nx = 4"], ["Find the derivative of f(x)=x²", "f'(x)=2x+1"], ["What is 15% of 200?", "0.15 × 200 = 30"], ], inputs=[q_in, s_in], ) @spaces.CPU # UI served from the CPU worker def launch_app(): return demo