Spaces:

mcamargo00
/

math-solution-classifier

Paused

App Files Files Community

mcamargo00 commited on 11 days ago

Commit

44ab37e

verified ·

1 Parent(s): 4599113

Upload app.py

Browse files

Files changed (1) hide show

app.py +372 -22

app.py CHANGED Viewed

@@ -389,7 +389,7 @@ def classify_solution_stream(question: str, solution: str):
             return
         log[-1] = "✅ Models loaded."
-    verdicts_mapping = {"correct": "Correct.", "conceptual_error": "Conceptual error.", "computational_error": "Computational error."}
     try:
         # ---------- Stage 1: Conceptual ----------
@@ -476,47 +476,397 @@ with gr.Blocks(title="Math Solution Classifier", theme=gr.themes.Soft()) as app:
             status_output        = gr.Markdown(value="*(idle)*")  # live stage updates
     # -------- Examples --------
     gr.Examples(
         examples=[
-        ["John has three apples and Mary has seven, how many apples do they have together?",
              "They have 7 + 3 = 11 apples."],
-        ["A tank holds 60 liters of fuel. A generator uses fuel at a rate of 5 liters per hour. After running for 9 hours, how many liters are still in the tank?",
-"The generator uses 5 L/h × 9 h = 45 L of fuel in 9 hours.\n Then, there remain 60 L + 45 L = 105 L in the tank.\n Final answer: 105 L"],
             ["What is 15% of 200?",
              "15% = 15/100 = 0.15\n0.15 × 200 = 30"],
-             ["A 24-meter rope is cut into 6 equal pieces. A climber uses 2 of those pieces. How many meters of rope are still unused?",
-             "The length of each piece is 24 / 6 = 4 m.\n The climber uses 2 × 4 m = 8 m of rope.\n There are 24 m − 8 m = 16 m of rope still unused."]
         ],
         inputs=[question_input, solution_input],
     )
-    # -------- Wiring --------
     classify_btn.click(
-        fn=classify_solution_stream,  # <- generator that yields (classification, explanation, status)
         inputs=[question_input, solution_input],
         outputs=[classification_output, explanation_output, status_output],
-        show_progress=False,          # <- no Gradio progress bars
-        concurrency_limit=1,          # <- per-event limit (good for GPU)
     )
-    clear_btn.click(
-        lambda: ("", "", "", "", "*(idle)*"),
         inputs=None,
-        outputs=[
-            question_input,
-            solution_input,
-            classification_output,
-            explanation_output,
-            status_output,
-        ],
         queue=False,
     )
-# enable queue for streaming (no deprecated args)
 app.queue()
 if __name__ == "__main__":
     app.launch()

             return
         log[-1] = "✅ Models loaded."
+    verdicts_mapping = {"correct": "Correct.", "conceptual_error": "Conceptually flawed.", "computational_error": "Computationally flawed."}
     try:
         # ---------- Stage 1: Conceptual ----------
             status_output        = gr.Markdown(value="*(idle)*")  # live stage updates
     # -------- Examples --------
+import csv, random
+from typing import Dict, Optional, List, Tuple
+# ---------- Data structures ----------
+class QAItem:
+    __slots__ = ("id", "question", "correct", "wrong", "error_type")
+    def __init__(self, id: int, question: str,
+                 correct: Optional[str], wrong: Optional[str], error_type: Optional[str]):
+        self.id = id
+        self.question = question
+        self.correct = correct or None
+        self.wrong = wrong or None
+        self.error_type = (error_type or "").strip() or None  # e.g., "computational_error" / "conceptual_error"
+# ---------- CSV loader ----------
+def load_examples_csv(path: str) -> Dict[int, QAItem]:
+    """
+    Loads CSV and returns a dict: {question_id: QAItem}
+    Accepts either 1 row per question (both solutions present) or 2 rows merged by `index`.
+    """
+    def norm(s: Optional[str]) -> str:
+        return (s or "").strip()
+    pool: Dict[int, QAItem] = {}
+    with open(path, "r", encoding="utf-8") as f:
+        rdr = csv.DictReader(f)
+        # normalize headers
+        fieldmap = {k: k.strip().lower() for k in rdr.fieldnames or []}
+        rows = []
+        for row in rdr:
+            r = {fieldmap.get(k, k).lower(): v for k, v in row.items()}
+            rows.append(r)
+    for r in rows:
+        try:
+            qid = int(norm(r.get("index")))
+        except Exception:
+            # skip bad index rows
+            continue
+        q = norm(r.get("question"))
+        ca = norm(r.get("correct_answer"))
+        wa = norm(r.get("wrong_answer"))
+        et = norm(r.get("error_type"))
+        if qid not in pool:
+            pool[qid] = QAItem(qid, q, ca, wa, et)
+        else:
+            # merge if the CSV has multiple rows per id
+            item = pool[qid]
+            if not item.question and q:
+                item.question = q
+            if ca and not item.correct:
+                item.correct = ca
+            if wa and not item.wrong:
+                item.wrong = wa
+            if et and not item.error_type:
+                item.error_type = et
+    # drop questions that have neither solution
+    pool = {k: v for k, v in pool.items() if (v.correct or v.wrong)}
+    return pool
+# ---------- Selection state with balance ----------
+class ExampleSelector:
+    """
+    Keeps one solution per question, balances correct vs wrong across picks,
+    and supports label filtering.
+    """
+    def __init__(self, pool: Dict[int, QAItem], seed: Optional[int] = None):
+        self.pool = pool
+        self._rng = random.Random(seed)
+        self.reset()
+    def reset(self):
+        self.ids: List[int] = list(self.pool.keys())
+        self._rng.shuffle(self.ids)
+        self.cursor: int = 0
+        self.seen_ids: set[int] = set()
+        self.balance = {"correct": 0, "wrong": 0}
+    # ---- public API ----
+    def next_batch(self, k: int, filter_label: str = "any") -> List[Dict]:
+        """Return up to k rows (id, question, solution, label), updating internal state."""
+        out: List[Dict] = []
+        # iterate over id list cyclically until filled or exhausted
+        tried = 0
+        max_tries = len(self.ids) * 2  # guard
+        while len(out) < k and tried < max_tries:
+            if self.cursor >= len(self.ids):
+                break
+            qid = self.ids[self.cursor]
+            self.cursor += 1
+            tried += 1
+            if qid in self.seen_ids:
+                continue
+            item = self.pool[qid]
+            variant = self._choose_variant(item, filter_label)
+            if variant is None:
+                continue  # no variant matches filter
+            row = self._build_row(item, variant)
+            out.append(row)
+            self._mark_used(item, variant)
+        return out
+    def surprise(self, filter_label: str = "any") -> Optional[Dict]:
+        """Pick a single row at random (respecting filter & balance)."""
+        candidates = [qid for qid in self.ids if qid not in self.seen_ids and self._variant_available(self.pool[qid], filter_label)]
+        if not candidates:
+            return None
+        qid = self._rng.choice(candidates)
+        item = self.pool[qid]
+        variant = self._choose_variant(item, filter_label)
+        if variant is None:
+            return None
+        row = self._build_row(item, variant)
+        self._mark_used(item, variant)
+        return row
+    # ---- helpers ----
+    def _variant_available(self, item: QAItem, filter_label: str) -> bool:
+        return self._choose_variant(item, filter_label, dry_run=True) is not None
+    def _choose_variant(self, item: QAItem, filter_label: str, dry_run: bool = False) -> Optional[str]:
+        """
+        Returns 'correct' or 'wrong' or None given availability, filter, and current balance.
+        filter_label ∈ {"any","correct","wrong","computational_error","conceptual_error"}
+        """
+        has_correct = bool(item.correct)
+        has_wrong = bool(item.wrong)
+        want_correct = (filter_label == "correct")
+        want_wrong   = (filter_label == "wrong") or (filter_label in ("computational_error", "conceptual_error"))
+        # Build allowed set based on filter
+        allowed = []
+        if filter_label == "any":
+            if has_correct: allowed.append("correct")
+            if has_wrong:   allowed.append("wrong")
+        elif want_correct:
+            if has_correct: allowed.append("correct")
+        elif want_wrong:
+            if has_wrong and (filter_label in ("wrong", "any") or (item.error_type == filter_label)):
+                allowed.append("wrong")
+        if not allowed:
+            return None
+        if len(allowed) == 1:
+            return allowed[0]
+        # Balance correct vs wrong across already-shown items
+        c, w = self.balance["correct"], self.balance["wrong"]
+        if c > w and "wrong" in allowed:
+            return "wrong"
+        if w > c and "correct" in allowed:
+            return "correct"
+        # tie-breaker: prefer wrong when specifically filtering to an error type
+        if filter_label in ("computational_error", "conceptual_error") and "wrong" in allowed:
+            return "wrong"
+        return self._rng.choice(allowed)
+    def _build_row(self, item: QAItem, variant: str) -> Dict:
+        if variant == "correct":
+            label = "correct"
+            sol = item.correct
+        else:
+            label = item.error_type or "wrong"
+            sol = item.wrong
+        return {
+            "id": item.id,
+            "question": item.question,
+            "solution": sol,
+            "label": label,  # "correct" | "computational_error" | "conceptual_error" | "wrong"
+        }
+    def _mark_used(self, item: QAItem, variant: str):
+        # we mark the whole question as used so we never show both solutions
+        self.seen_ids.add(item.id)
+        if variant == "correct":
+            self.balance["correct"] += 1
+        else:
+            self.balance["wrong"] += 1
+# ===== CSV hookup (place near other imports / globals) =====
+from pathlib import Path
+import time
+CSV_PATH = Path(__file__).resolve().parent / "examples.csv"
+POOL = load_examples_csv(str(CSV_PATH))
+def new_selector(seed: int | None = None):
+    # per-session selector; seed for reproducibility if you want
+    return ExampleSelector(POOL, seed=seed or int(time.time()) & 0xFFFF)
+# small helpers for UI
+def _truncate(s: str, n: int = 100) -> str:
+    s = s or ""
+    return s if len(s) <= n else s[: n - 1] + "…"
+def _rows_to_table(rows: list[dict]) -> list[list[str]]:
+    # Dataframe value: list of rows [ID, Label, Question, Solution]
+    table = []
+    for r in rows:
+        table.append([
+            str(r["id"]),
+            r["label"],
+            _truncate(r["question"], 120),
+            _truncate(r["solution"], 120),
+        ])
+    return table
+def _dropdown_choices(rows: list[dict]) -> list[tuple[str, int]]:
+    # Friendly labels mapped to ID values
+    choices = []
+    for r in rows:
+        label = f'#{r["id"]} — {r["label"]} — {_truncate(r["question"], 60)}'
+        choices.append((label, r["id"]))
+    return choices
+# ===== Gradio callbacks for examples =====
+def ui_see_more(selector, rows, filter_label):
+    """Append a chunk of examples to the browser."""
+    chunk = selector.next_batch(k=6, filter_label=filter_label)
+    rows = (rows or []) + chunk
+    return (
+        rows,                                            # rows_state
+        gr.update(value=_rows_to_table(rows)),           # examples_df
+        gr.update(choices=_dropdown_choices(rows), value=None),  # row_picker
+    )
+def ui_reset_examples():
+    """Reset per-session selector and clear the browser."""
+    sel = new_selector()
+    rows: list[dict] = []
+    return (
+        sel,                                            # selector_state
+        rows,                                           # rows_state
+        gr.update(value=_rows_to_table(rows)),          # examples_df
+        gr.update(choices=[], value=None),              # row_picker
+    )
+def ui_load_selected(rows, selected_id):
+    """Load the selected example into the main inputs."""
+    if not rows or selected_id is None:
+        return gr.update(), gr.update()
+    for r in rows:
+        if r["id"] == selected_id:
+            return r["question"], r["solution"]
+    return gr.update(), gr.update()
+def ui_surprise(selector, filter_label):
+    """Pick one example and push it straight to inputs."""
+    r = selector.surprise(filter_label=filter_label)
+    if not r:
+        # no more examples; keep inputs unchanged
+        return gr.update(), gr.update()
+    return r["question"], r["solution"]
+# ---------------- UI: add CSV-driven examples ----------------
+with gr.Blocks(title="Math Solution Classifier", theme=gr.themes.Soft()) as app:
+    gr.Markdown("# 🧮 Math Solution Classifier")
+    gr.Markdown(
+        "Classify math solutions as **correct**, **conceptually flawed**, or **computationally flawed**. "
+        "Live status updates appear below as the two-stage pipeline runs."
+    )
+    # Per-session state
+    selector_state = gr.State(new_selector())
+    rows_state     = gr.State([])  # list[dict] rows currently in the browser
+    with gr.Row():
+        # -------- Left: inputs --------
+        with gr.Column(scale=1):
+            question_input = gr.Textbox(
+                label="Math Question",
+                placeholder="e.g., Solve for x: 2x + 5 = 13",
+                lines=3,
+            )
+            solution_input = gr.Textbox(
+                label="Proposed Solution",
+                placeholder="e.g., 2x + 5 = 13\n2x = 13 - 5\n2x = 8\nx = 4",
+                lines=8,
+            )
+            with gr.Row():
+                classify_btn = gr.Button("Classify Solution", variant="primary")
+                surprise_btn = gr.Button("Surprise me")   # <- new
+                clear_btn    = gr.Button("Clear")
+        # -------- Right: outputs --------
+        with gr.Column(scale=1):
+            classification_output = gr.Textbox(label="Classification", interactive=False)
+            explanation_output   = gr.Textbox(label="Explanation",   interactive=False, lines=6)
+            status_output        = gr.Markdown(value="*(idle)*")  # live stage updates
+    # -------- Curated starter examples (static) --------
     gr.Examples(
         examples=[
+            ["Solve for x: 2x + 5 = 13",
+             "2x + 5 = 13\n2x = 13 - 5\n2x = 8\nx = 4"],
+            ["John has three apples and Mary has seven, how many apples do they have together?",
              "They have 7 + 3 = 11 apples."],
             ["What is 15% of 200?",
              "15% = 15/100 = 0.15\n0.15 × 200 = 30"],
         ],
         inputs=[question_input, solution_input],
     )
+    # -------- Dynamic browser (CSV) --------
+    with gr.Accordion("Browse more examples", open=False):
+        with gr.Row():
+            filter_dd = gr.Dropdown(
+                label="Filter",
+                choices=[
+                    ("Any", "any"),
+                    ("Correct only", "correct"),
+                    ("Conceptual error only", "conceptual_error"),
+                    ("Computational error only", "computational_error"),
+                ],
+                value="any",
+                allow_custom_value=False,
+            )
+            see_more_btn   = gr.Button("See more")
+            reset_list_btn = gr.Button("Reset list")
+        examples_df = gr.Dataframe(
+            headers=["ID", "Label", "Question", "Solution"],
+            value=[],
+            interactive=False,
+            row_count=(0, "dynamic"),
+            col_count=4,
+            wrap=True,
+            height=260,
+            label="Examples",
+        )
+        with gr.Row():
+            row_picker = gr.Dropdown(label="Select example to load", choices=[], value=None, scale=2)
+            load_btn   = gr.Button("Load to editor", scale=1)
+    # ---------- Wiring ----------
+    # Main classify (streaming)
     classify_btn.click(
+        fn=classify_solution_stream,
         inputs=[question_input, solution_input],
         outputs=[classification_output, explanation_output, status_output],
+        show_progress=False,
+        concurrency_limit=1,
+    )
+    # Surprise me → fills inputs from the CSV pool
+    surprise_btn.click(
+        fn=ui_surprise,
+        inputs=[selector_state, filter_dd],
+        outputs=[question_input, solution_input],
+        queue=True,
+    )
+    # See more → appends rows to the browser
+    see_more_btn.click(
+        fn=ui_see_more,
+        inputs=[selector_state, rows_state, filter_dd],
+        outputs=[rows_state, examples_df, row_picker],
+        queue=False,
     )
+    # Reset list → new selector + clear table
+    reset_list_btn.click(
+        fn=ui_reset_examples,
         inputs=None,
+        outputs=[selector_state, rows_state, examples_df, row_picker],
+        queue=False,
+    )
+    # Load selected row → fills main inputs
+    load_btn.click(
+        fn=ui_load_selected,
+        inputs=[rows_state, row_picker],
+        outputs=[question_input, solution_input],
         queue=False,
     )
+# Enable queue for streaming
 app.queue()
 if __name__ == "__main__":
     app.launch()