# app.py  ── Math-solution classifier on HF Spaces (Zero-GPU-safe)
#
# Pin in requirements.txt:
#   gradio==4.44.0 torch==2.1.0 transformers==4.35.0 peft==0.7.1 accelerate==0.25.0 spaces

import os
import json
import logging
from typing import Tuple

import gradio as gr
import spaces  # <- Hugging Face Spaces SDK (Zero)

# ──────────────────────────────────────────────────────────────────────────────
# CONSTANTS (no CUDA use here)
# ──────────────────────────────────────────────────────────────────────────────
ADAPTER_PATH   = os.getenv("ADAPTER_PATH", "./lora_adapter")  # dir or Hub repo
FALLBACK_MODEL = "distilbert-base-uncased"
LABELS         = {0: "✅ Correct",
                  1: "🤔 Conceptual Error",
                  2: "🔢 Computational Error"}

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Globals that will live **inside the GPU worker**
model     = None
tokenizer = None
model_ty  = None          # "classification" | "causal_lm" | "baseline"

# ──────────────────────────────────────────────────────────────────────────────
# GPU-SIDE INITIALISATION & INFERENCE
# ──────────────────────────────────────────────────────────────────────────────
def _load_model_gpu():
    """
    Runs **inside the GPU worker**.
    Tries LoRA classification adapter → LoRA causal-LM adapter → plain baseline.
    """
    global model, tokenizer, model_ty
    import torch
    from transformers import (
        AutoTokenizer,
        AutoModelForSequenceClassification,
    )
    from peft.auto import (
        AutoPeftModelForSequenceClassification,
        AutoPeftModelForCausalLM,
    )

    dtype = torch.float16
    if os.path.isdir(ADAPTER_PATH):
        logger.info(f"[GPU] Loading adapter from {ADAPTER_PATH}")

        try:  # 1) classification adapter
            model = AutoPeftModelForSequenceClassification.from_pretrained(
                ADAPTER_PATH, torch_dtype=dtype, device_map="auto"
            )
            model_ty = "classification"
        except ValueError:
            logger.info("[GPU] Not a classifier, trying causal-LM")
            model = AutoPeftModelForCausalLM.from_pretrained(
                ADAPTER_PATH, torch_dtype=dtype, device_map="auto"
            )
            model_ty = "causal_lm"

        tokenizer = AutoTokenizer.from_pretrained(ADAPTER_PATH)

    else:
        logger.warning("[GPU] No adapter found – using baseline DistilBERT")
        tokenizer = AutoTokenizer.from_pretrained(FALLBACK_MODEL)
        model     = AutoModelForSequenceClassification.from_pretrained(
            FALLBACK_MODEL, num_labels=3, ignore_mismatched_sizes=True
        )
        model_ty = "baseline"

    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token or tokenizer.sep_token

    model.eval()
    logger.info(f"[GPU] Model ready ({model_ty})")


def _classify_logits(question: str, solution: str) -> Tuple[str, str, str]:
    import torch
    text = f"Question: {question}\n\nSolution:\n{solution}"
    inputs = tokenizer(
        text, return_tensors="pt", padding=True, truncation=True, max_length=512
    ).to("cuda")

    with torch.no_grad():
        logits = model(**inputs).logits
        probs  = torch.softmax(logits, dim=-1)[0]
        pred   = int(torch.argmax(probs))
        conf   = f"{probs[pred].item():.3f}"

    return LABELS[pred], conf, "—"


def _classify_generate(question: str, solution: str) -> Tuple[str, str, str]:
    import torch
    prompt = (
        "You are a mathematics tutor.\n"
        "You are given a math word problem and a student's solution. Decide whether the solution is correct.\n\n"
        "- Correct            = all reasoning and calculations are correct.\n"
        "- Conceptual Error   = reasoning is wrong.\n"
        "- Computational Error= reasoning okay but arithmetic off.\n\n"
        "Reply with ONLY one of these JSON lines:\n"
        '{"verdict": "correct"}\n'
        '{"verdict": "conceptual"}\n'
        '{"verdict": "computational"}\n\n'
        f"Question: {question}\n\nSolution:\n{solution}\n\nAnswer:"
    )

    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    with torch.no_grad():
        out_ids = model.generate(
            **inputs,
            max_new_tokens=32,
            pad_token_id=tokenizer.eos_token_id,
        )
    generated = tokenizer.decode(
        out_ids[0][inputs["input_ids"].shape[1]:],
        skip_special_tokens=True,
    ).strip()

    verdict = "Unparsed"
    try:
        data = json.loads(generated.splitlines()[-1])
        v = data.get("verdict", "").lower()
        if v.startswith("corr"):
            verdict = LABELS[0]
        elif v.startswith("conc"):
            verdict = LABELS[1]
        elif v.startswith("comp"):
            verdict = LABELS[2]
    except Exception:
        pass

    return verdict, "", generated


@spaces.GPU                         # <-- every CUDA op happens inside here
def gpu_classify(question: str, solution: str):
    """
    Proxy target for Gradio. Executed in the GPU worker so CUDA is allowed.
    Returns (verdict, confidence, raw_output)
    """
    if model is None:
        _load_model_gpu()

    if not question.strip() or not solution.strip():
        return "Please fill both fields.", "", ""

    if model_ty in ("classification", "baseline"):
        return _classify_logits(question, solution)
    else:  # causal_lm
        return _classify_generate(question, solution)

# ──────────────────────────────────────────────────────────────────────────────
# CPU-SIDE UI (no torch.cuda here)
# ──────────────────────────────────────────────────────────────────────────────
def classify_proxy(q, s):
    """Simple wrapper so Gradio can call the GPU function."""
    return gpu_classify(q, s)

with gr.Blocks(title="Math Solution Classifier") as demo:
    gr.Markdown("# 🧮 Math Solution Classifier")
    gr.Markdown(
        "Classify a student's math solution as **correct**, **conceptually flawed**, "
        "or **computationally flawed**."
    )

    with gr.Row():
        with gr.Column():
            q_in = gr.Textbox(label="Math Question", lines=3)
            s_in = gr.Textbox(label="Proposed Solution", lines=6)
            btn  = gr.Button("Classify", variant="primary")
        with gr.Column():
            verdict = gr.Textbox(label="Verdict", interactive=False)
            conf    = gr.Textbox(label="Confidence", interactive=False)
            raw     = gr.Textbox(label="Model Output", interactive=False)

    btn.click(classify_proxy, [q_in, s_in], [verdict, conf, raw])

    gr.Examples(
        [
            ["Solve for x: 2x + 5 = 13", "2x + 5 = 13\n2x = 8\nx = 4"],
            ["Find the derivative of f(x)=x²", "f'(x)=2x+1"],
            ["What is 15% of 200?", "0.15 × 200 = 30"],
        ],
        inputs=[q_in, s_in],
    )

@spaces.CPU                         # UI served from the CPU worker
def launch_app():
    return demo