Upload app.py
Browse files
app.py
CHANGED
@@ -359,80 +359,137 @@ def analyze_solution(question: str, solution: str):
|
|
359 |
|
360 |
|
361 |
|
362 |
-
|
363 |
-
|
364 |
-
"""
|
365 |
-
Classify the math solution
|
366 |
-
Returns: (classification_label, confidence_score, explanation)
|
367 |
-
"""
|
368 |
if not question.strip() or not solution.strip():
|
369 |
-
|
370 |
-
|
|
|
|
|
371 |
if not models_ready():
|
372 |
-
|
373 |
-
|
|
|
|
|
|
|
|
|
|
|
374 |
try:
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
379 |
logger.exception("inference failed")
|
|
|
|
|
380 |
|
381 |
|
382 |
|
383 |
|
384 |
-
|
385 |
-
|
386 |
-
# Create Gradio interface
|
387 |
with gr.Blocks(title="Math Solution Classifier", theme=gr.themes.Soft()) as app:
|
388 |
gr.Markdown("# 🧮 Math Solution Classifier")
|
389 |
-
gr.Markdown("Classify math solutions as correct
|
390 |
-
|
|
|
391 |
with gr.Row():
|
392 |
-
|
|
|
393 |
question_input = gr.Textbox(
|
394 |
label="Math Question",
|
395 |
placeholder="e.g., Solve for x: 2x + 5 = 13",
|
396 |
-
lines=3
|
397 |
)
|
398 |
-
|
399 |
solution_input = gr.Textbox(
|
400 |
-
label="Proposed Solution",
|
401 |
placeholder="e.g., 2x + 5 = 13\n2x = 13 - 5\n2x = 8\nx = 4",
|
402 |
-
lines=
|
403 |
)
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
|
|
|
|
408 |
classification_output = gr.Textbox(label="Classification", interactive=False)
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
# Examples
|
413 |
gr.Examples(
|
414 |
examples=[
|
415 |
[
|
416 |
"Solve for x: 2x + 5 = 13",
|
417 |
-
"2x + 5 = 13\n2x = 13 - 5\n2x = 8\nx = 4"
|
418 |
],
|
419 |
[
|
420 |
-
"John has three apples and Mary has seven, how many apples do they have together?",
|
421 |
-
"They have 7 + 3 = 11 apples."
|
422 |
],
|
423 |
[
|
424 |
"What is 15% of 200?",
|
425 |
-
"15% = 15/100 = 0.15\n0.15 × 200 = 30"
|
426 |
-
]
|
427 |
],
|
428 |
-
inputs=[question_input, solution_input]
|
429 |
)
|
430 |
-
|
|
|
431 |
classify_btn.click(
|
432 |
-
fn=
|
433 |
inputs=[question_input, solution_input],
|
434 |
-
outputs=[classification_output, explanation_output]
|
|
|
435 |
)
|
436 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
437 |
if __name__ == "__main__":
|
438 |
app.launch()
|
|
|
359 |
|
360 |
|
361 |
|
362 |
+
def classify_solution_stream(question: str, solution: str, progress=gr.Progress()):
|
363 |
+
# always yield EXACTLY THREE outputs (classification, explanation, status)
|
|
|
|
|
|
|
|
|
364 |
if not question.strip() or not solution.strip():
|
365 |
+
yield "Please fill in both fields", "", "⚠️ Provide a question and a solution."
|
366 |
+
return
|
367 |
+
|
368 |
+
# lazy init if needed
|
369 |
if not models_ready():
|
370 |
+
yield "⏳ Loading models…", "", "Booting models…"
|
371 |
+
msg = load_model()
|
372 |
+
progress(0.1, desc="Loading models")
|
373 |
+
if not models_ready():
|
374 |
+
yield "Models not loaded", "", f"❌ {msg}"
|
375 |
+
return
|
376 |
+
|
377 |
try:
|
378 |
+
# Stage 1 — conceptual
|
379 |
+
progress(0.2, desc="Stage 1: Conceptual check")
|
380 |
+
yield "⏳ Working…", "Starting conceptual check…", "🔎 **Stage 1:** running classifier…"
|
381 |
+
conceptual = run_conceptual_check(question, solution, classifier_model, classifier_tokenizer)
|
382 |
+
conf = conceptual['probabilities'][conceptual['prediction']]
|
383 |
+
yield "⏳ Working…", f"Stage 1: model predicts **{conceptual['prediction']}** (confidence {conf:.2%}). Now checking calculations…", \
|
384 |
+
f"✅ **Stage 1 done** — prediction: **{conceptual['prediction']}** (p={conf:.2%})."
|
385 |
+
|
386 |
+
# Stage 2 — computational
|
387 |
+
progress(0.6, desc="Stage 2: Computational check")
|
388 |
+
yield "⏳ Working…", "Running computational check…", "🧮 **Stage 2:** extracting & evaluating equations…"
|
389 |
+
computational = run_computational_check(solution, gemma_model, gemma_tokenizer)
|
390 |
+
|
391 |
+
# Final verdict
|
392 |
+
if computational["error"]:
|
393 |
+
classification = "computational_error"
|
394 |
+
explanation = (
|
395 |
+
f"A calculation error was found.\n"
|
396 |
+
f"On the line: \"{computational['line_text']}\"\n"
|
397 |
+
f"The correct calculation should be: {computational['correct_calc']}"
|
398 |
+
)
|
399 |
+
status = "🟥 **Final:** computational error detected."
|
400 |
+
else:
|
401 |
+
if conceptual['prediction'] == 'correct':
|
402 |
+
classification = 'correct'
|
403 |
+
explanation = "All calculations are correct and the overall logic appears to be sound."
|
404 |
+
status = "🟩 **Final:** correct."
|
405 |
+
else:
|
406 |
+
classification = 'conceptual_error'
|
407 |
+
explanation = "All calculations are correct, but there appears to be a conceptual error in the logic or setup of the solution."
|
408 |
+
status = "🟨 **Final:** conceptual issue."
|
409 |
+
|
410 |
+
progress(1.0, desc="Done")
|
411 |
+
yield classification, explanation, status
|
412 |
+
|
413 |
+
except Exception as e:
|
414 |
logger.exception("inference failed")
|
415 |
+
yield "Runtime error", f"{type(e).__name__}: {e}", "❌ Exception during inference."
|
416 |
+
|
417 |
|
418 |
|
419 |
|
420 |
|
421 |
+
# Create Gradio interface (streaming UI)
|
|
|
|
|
422 |
with gr.Blocks(title="Math Solution Classifier", theme=gr.themes.Soft()) as app:
|
423 |
gr.Markdown("# 🧮 Math Solution Classifier")
|
424 |
+
gr.Markdown("Classify math solutions as **correct**, **conceptually flawed**, or **computationally flawed**. "
|
425 |
+
"Status updates stream below as the two-stage pipeline runs.")
|
426 |
+
|
427 |
with gr.Row():
|
428 |
+
# ---------- Left: inputs ----------
|
429 |
+
with gr.Column(scale=1):
|
430 |
question_input = gr.Textbox(
|
431 |
label="Math Question",
|
432 |
placeholder="e.g., Solve for x: 2x + 5 = 13",
|
433 |
+
lines=3,
|
434 |
)
|
|
|
435 |
solution_input = gr.Textbox(
|
436 |
+
label="Proposed Solution",
|
437 |
placeholder="e.g., 2x + 5 = 13\n2x = 13 - 5\n2x = 8\nx = 4",
|
438 |
+
lines=8,
|
439 |
)
|
440 |
+
with gr.Row():
|
441 |
+
classify_btn = gr.Button("Classify Solution", variant="primary")
|
442 |
+
clear_btn = gr.Button("Clear")
|
443 |
+
|
444 |
+
# ---------- Right: outputs ----------
|
445 |
+
with gr.Column(scale=1):
|
446 |
classification_output = gr.Textbox(label="Classification", interactive=False)
|
447 |
+
explanation_output = gr.Textbox(label="Explanation", interactive=False, lines=6)
|
448 |
+
status_output = gr.Markdown(value="*(idle)*") # live stage updates
|
449 |
+
|
450 |
+
# ---------- Examples ----------
|
451 |
gr.Examples(
|
452 |
examples=[
|
453 |
[
|
454 |
"Solve for x: 2x + 5 = 13",
|
455 |
+
"2x + 5 = 13\n2x = 13 - 5\n2x = 8\nx = 4",
|
456 |
],
|
457 |
[
|
458 |
+
"John has three apples and Mary has seven, how many apples do they have together?",
|
459 |
+
"They have 7 + 3 = 11 apples.",
|
460 |
],
|
461 |
[
|
462 |
"What is 15% of 200?",
|
463 |
+
"15% = 15/100 = 0.15\n0.15 × 200 = 30",
|
464 |
+
],
|
465 |
],
|
466 |
+
inputs=[question_input, solution_input],
|
467 |
)
|
468 |
+
|
469 |
+
# ---------- Wiring ----------
|
470 |
classify_btn.click(
|
471 |
+
fn=classify_solution_stream, # <- your generator function
|
472 |
inputs=[question_input, solution_input],
|
473 |
+
outputs=[classification_output, explanation_output, status_output],
|
474 |
+
show_progress="full",
|
475 |
)
|
476 |
|
477 |
+
# Clear everything
|
478 |
+
clear_btn.click(
|
479 |
+
lambda: ("", "", "", "", "*(idle)*"),
|
480 |
+
inputs=None,
|
481 |
+
outputs=[
|
482 |
+
question_input,
|
483 |
+
solution_input,
|
484 |
+
classification_output,
|
485 |
+
explanation_output,
|
486 |
+
status_output,
|
487 |
+
],
|
488 |
+
queue=False,
|
489 |
+
)
|
490 |
+
|
491 |
+
if __name__ == "__main__":
|
492 |
+
app.launch()
|
493 |
+
|
494 |
if __name__ == "__main__":
|
495 |
app.launch()
|