Upload app.py
Browse files
app.py
CHANGED
@@ -358,54 +358,86 @@ def analyze_solution(question: str, solution: str):
|
|
358 |
return final_verdict
|
359 |
|
360 |
def classify_solution_stream(question: str, solution: str):
|
361 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
if not question.strip() or not solution.strip():
|
363 |
-
|
|
|
364 |
return
|
365 |
|
|
|
366 |
if not models_ready():
|
367 |
-
|
|
|
368 |
msg = load_model()
|
369 |
if not models_ready():
|
370 |
-
|
|
|
371 |
return
|
|
|
372 |
|
373 |
try:
|
374 |
-
# Stage 1
|
375 |
-
|
|
|
|
|
376 |
conceptual = run_conceptual_check(question, solution, classifier_model, classifier_tokenizer)
|
377 |
-
|
378 |
-
|
379 |
-
|
|
|
|
|
|
|
|
|
|
|
380 |
|
381 |
-
# Stage 2 — computational
|
382 |
-
yield "⏳ Working…", "Running computational check…", "🧮 **Stage 2:** extracting & evaluating equations…"
|
383 |
computational = run_computational_check(solution, gemma_model, gemma_tokenizer)
|
384 |
|
385 |
-
# Final verdict
|
386 |
if computational["error"]:
|
|
|
|
|
|
|
|
|
387 |
classification = "computational_error"
|
388 |
explanation = (
|
389 |
-
|
390 |
-
f
|
391 |
-
f"The correct calculation should be: {
|
392 |
)
|
393 |
-
status = "🟥 **Final:** computational error detected."
|
394 |
else:
|
395 |
-
|
396 |
-
|
|
|
397 |
explanation = "All calculations are correct and the overall logic appears to be sound."
|
398 |
-
status = "🟩 **Final:** correct."
|
399 |
else:
|
400 |
-
classification =
|
401 |
-
explanation =
|
402 |
-
|
|
|
|
|
403 |
|
404 |
-
yield
|
|
|
405 |
|
406 |
except Exception as e:
|
407 |
logger.exception("inference failed")
|
408 |
-
|
|
|
|
|
409 |
|
410 |
# ---------------- UI: streaming, no progress bars ----------------
|
411 |
with gr.Blocks(title="Math Solution Classifier", theme=gr.themes.Soft()) as app:
|
|
|
358 |
return final_verdict
|
359 |
|
360 |
def classify_solution_stream(question: str, solution: str):
|
361 |
+
"""
|
362 |
+
Streams (classification, explanation, status_markdown)
|
363 |
+
Status shows a growing checklist:
|
364 |
+
⏳ Stage 1 ...
|
365 |
+
✅ Stage 1 ... done
|
366 |
+
⏳ Stage 2 ...
|
367 |
+
✅ / 🟥 Stage 2 ... result
|
368 |
+
"""
|
369 |
+
def render(log_lines):
|
370 |
+
# join as a bulleted list
|
371 |
+
return "\n".join(f"- {line}" for line in log_lines) or "*(idle)*"
|
372 |
+
|
373 |
+
log = []
|
374 |
+
|
375 |
+
# basic input check
|
376 |
if not question.strip() or not solution.strip():
|
377 |
+
log.append("⚠️ Provide a question and a solution.")
|
378 |
+
yield "Please fill in both fields", "", render(log)
|
379 |
return
|
380 |
|
381 |
+
# lazy-load if needed
|
382 |
if not models_ready():
|
383 |
+
log.append("⏳ Loading models…")
|
384 |
+
yield "⏳ Working…", "", render(log)
|
385 |
msg = load_model()
|
386 |
if not models_ready():
|
387 |
+
log[-1] = f"🟥 Failed to load models — {msg}"
|
388 |
+
yield "Models not loaded", "", render(log)
|
389 |
return
|
390 |
+
log[-1] = "✅ Models loaded."
|
391 |
|
392 |
try:
|
393 |
+
# ---------- Stage 1: Conceptual ----------
|
394 |
+
log.append("⏳ **Stage 1: Conceptual check**")
|
395 |
+
yield "⏳ Working…", "Starting conceptual check…", render(log)
|
396 |
+
|
397 |
conceptual = run_conceptual_check(question, solution, classifier_model, classifier_tokenizer)
|
398 |
+
pred = conceptual["prediction"]
|
399 |
+
conf = conceptual["probabilities"][pred]
|
400 |
+
log[-1] = f"✅ **Stage 1: Conceptual check** — prediction **{pred}** (p={conf:.2%})"
|
401 |
+
yield "⏳ Working…", f"Stage 1: {pred} (p={conf:.2%}). Now checking calculations…", render(log)
|
402 |
+
|
403 |
+
# ---------- Stage 2: Computational ----------
|
404 |
+
log.append("⏳ **Stage 2: Computational check**")
|
405 |
+
yield "⏳ Working…", "Running computational check…", render(log)
|
406 |
|
|
|
|
|
407 |
computational = run_computational_check(solution, gemma_model, gemma_tokenizer)
|
408 |
|
409 |
+
# ---------- Final verdict ----------
|
410 |
if computational["error"]:
|
411 |
+
# mark stage 2 as failed
|
412 |
+
line_txt = computational["line_text"]
|
413 |
+
corr = computational["correct_calc"]
|
414 |
+
log[-1] = f"🟥 **Stage 2: Computational check** — error on line “{line_txt}” (correct: `{corr}`)"
|
415 |
classification = "computational_error"
|
416 |
explanation = (
|
417 |
+
"A calculation error was found.\n"
|
418 |
+
f'On the line: "{line_txt}"\n'
|
419 |
+
f"The correct calculation should be: {corr}"
|
420 |
)
|
|
|
421 |
else:
|
422 |
+
log[-1] = "✅ **Stage 2: Computational check** — no arithmetic issues found."
|
423 |
+
if pred == "correct":
|
424 |
+
classification = "correct"
|
425 |
explanation = "All calculations are correct and the overall logic appears to be sound."
|
|
|
426 |
else:
|
427 |
+
classification = "conceptual_error"
|
428 |
+
explanation = (
|
429 |
+
"All calculations are correct, but there appears to be a conceptual error "
|
430 |
+
"in the logic or setup of the solution."
|
431 |
+
)
|
432 |
|
433 |
+
# final yield updates both result fields + the complete checklist
|
434 |
+
yield classification, explanation, render(log)
|
435 |
|
436 |
except Exception as e:
|
437 |
logger.exception("inference failed")
|
438 |
+
log.append(f"🟥 Exception during inference: **{type(e).__name__}** — {e}")
|
439 |
+
yield "Runtime error", f"{type(e).__name__}: {e}", render(log)
|
440 |
+
|
441 |
|
442 |
# ---------------- UI: streaming, no progress bars ----------------
|
443 |
with gr.Blocks(title="Math Solution Classifier", theme=gr.themes.Soft()) as app:
|