File size: 7,985 Bytes
2f64eb3 d49524c 2f64eb3 d49524c 2f64eb3 d49524c 6a197e4 d49524c cecea85 d49524c cecea85 6a197e4 2aa4dcf 6a197e4 d49524c 6a197e4 d49524c 6a197e4 d49524c 2aa4dcf cecea85 d49524c 6a197e4 d49524c cecea85 2f64eb3 d49524c cecea85 2f64eb3 d49524c 2f64eb3 cecea85 2f64eb3 cecea85 2f64eb3 d49524c cecea85 d49524c cecea85 d49524c cecea85 d49524c cecea85 d49524c cecea85 2f64eb3 cecea85 2f64eb3 cecea85 d49524c cecea85 d49524c 2f64eb3 d49524c 2f64eb3 d49524c 2f64eb3 d49524c 2f64eb3 cecea85 2f64eb3 cecea85 2f64eb3 d49524c 2f64eb3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
# app.py - Gradio version (much simpler for HF Spaces)
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import logging
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Global variables for model and tokenizer
model = None
tokenizer = None
label_mapping = {0: "✅ Correct", 1: "🤔 Conceptually Flawed", 2: "🔢 Computationally Flawed"}
def load_model():
"""Load your trained LoRA adapter with base model"""
global model, tokenizer
try:
from peft import AutoPeftModelForCausalLM # Changed from SequenceClassification
# Load the LoRA adapter model for text generation
model = AutoPeftModelForCausalLM.from_pretrained(
"./lora_adapter", # Path to your adapter files
torch_dtype=torch.float16,
device_map="auto"
)
# Load tokenizer from the same directory
tokenizer = AutoTokenizer.from_pretrained("./lora_adapter")
# Fix padding token issue
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
logger.info("Set pad_token to eos_token")
logger.info("LoRA model loaded successfully")
return "LoRA model loaded successfully!"
except Exception as e:
logger.error(f"Error loading LoRA model: {e}")
# Fallback to placeholder for testing
logger.warning("Using placeholder model loading - replace with your actual model!")
model_name = "microsoft/DialoGPT-medium" # Closer to Phi-4 architecture
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Fix padding token for fallback model too
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(model_name)
return f"Fallback model loaded. LoRA error: {e}"
def get_system_prompt():
"""Generates the specific system prompt for the fine-tuning task."""
return """You are a mathematics tutor.
You are given a math word problem, and a solution written by a student.
Analyze the solution carefully, line-by-line, and classify it into one of the following categories:
- Correct (All logic is correct, and all calculations are correct)
- Conceptual Error (There is an error in reasoning or logic somewhere in the solution)
- Computational Error (All logic and reasoning is correct, but the result of some calculation is incorrect)
Respond *only* with a valid JSON object that follows this exact schema:
```json
{
"verdict": "must be one of 'correct', 'conceptual_error', or 'computational_error'",
"erroneous_line": "the exact, verbatim text of the first incorrect line, or null if the verdict is 'correct'",
"explanation": "a brief, one-sentence explanation of the error, or null if the verdict is 'correct'"
}
```
Do NOT add any text or explanations before or after the JSON object.
"""
def classify_solution(question: str, solution: str):
"""
Classify the math solution using the exact training format
Returns: (classification_label, confidence_score, explanation)
"""
if not question.strip() or not solution.strip():
return "Please fill in both fields", "", ""
if not model or not tokenizer:
return "Model not loaded", "", ""
try:
# Create the exact prompt format used in training
system_prompt = get_system_prompt()
user_message = f"Problem: {question}\n\nSolution:\n{solution}"
# Format as chat messages (common for instruction-tuned models)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message}
]
# Apply chat template
text_input = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_token=True
)
# Tokenize input
inputs = tokenizer(
text_input,
return_tensors="pt",
truncation=True,
padding=True,
max_length=2048 # Increased for longer prompts
)
# Generate response (not just classify)
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=200,
temperature=0.1,
do_sample=True,
pad_token_id=tokenizer.pad_token_id
)
# Decode the generated response
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract just the JSON response (after the input)
response_start = generated_text.find(text_input) + len(text_input)
json_response = generated_text[response_start:].strip()
# Parse the JSON response
import json
try:
result = json.loads(json_response)
verdict = result.get("verdict", "unknown")
erroneous_line = result.get("erroneous_line", "")
explanation = result.get("explanation", "")
# Map verdict to display format
verdict_mapping = {
"correct": "✅ Correct",
"conceptual_error": "🤔 Conceptual Error",
"computational_error": "🔢 Computational Error"
}
display_verdict = verdict_mapping.get(verdict, f"❓ {verdict}")
return display_verdict, erroneous_line or "None", explanation or "Solution is correct"
except json.JSONDecodeError:
return f"Model response: {json_response}", "", "Could not parse JSON response"
except Exception as e:
logger.error(f"Error during classification: {e}")
return f"Classification error: {str(e)}", "", ""
# Load model on startup
load_model()
# Create Gradio interface
with gr.Blocks(title="Math Solution Classifier", theme=gr.themes.Soft()) as app:
gr.Markdown("# 🧮 Math Solution Classifier")
gr.Markdown("Classify math solutions as correct, conceptually flawed, or computationally flawed.")
with gr.Row():
with gr.Column():
question_input = gr.Textbox(
label="Math Question",
placeholder="e.g., Solve for x: 2x + 5 = 13",
lines=3
)
solution_input = gr.Textbox(
label="Proposed Solution",
placeholder="e.g., 2x + 5 = 13\n2x = 13 - 5\n2x = 8\nx = 4",
lines=5
)
classify_btn = gr.Button("Classify Solution", variant="primary")
with gr.Column():
classification_output = gr.Textbox(label="Classification", interactive=False)
erroneous_line_output = gr.Textbox(label="Erroneous Line", interactive=False)
explanation_output = gr.Textbox(label="Explanation", interactive=False, lines=3)
# Examples
gr.Examples(
examples=[
[
"Solve for x: 2x + 5 = 13",
"2x + 5 = 13\n2x = 13 - 5\n2x = 8\nx = 4"
],
[
"Find the derivative of f(x) = x²",
"f'(x) = 2x + 1" # This should be computationally flawed
],
[
"What is 15% of 200?",
"15% = 15/100 = 0.15\n0.15 × 200 = 30"
]
],
inputs=[question_input, solution_input]
)
classify_btn.click(
fn=classify_solution,
inputs=[question_input, solution_input],
outputs=[classification_output, erroneous_line_output, explanation_output]
)
if __name__ == "__main__":
app.launch() |