Spaces:

advaitgupta
/

MCQ_Comparison

Sleeping

App Files Files Community

advaitgupta commited on about 1 month ago

Commit

3718d91

verified ·

1 Parent(s): 7295fa4

Update visualizer.py

Browse files

Files changed (1) hide show

visualizer.py +1 -202

visualizer.py CHANGED Viewed

@@ -1,204 +1,3 @@
-# import os
-# import glob
-# import json
-# import base64
-# import pandas as pd
-# import gradio as gr
-# import re
-# # --- Data Loading ---
-# ABS_DATA_PATH = "data"
-# if os.path.exists(ABS_DATA_PATH):
-#     os.chdir(ABS_DATA_PATH)
-# AITW_DATA_ROOT = "."
-# MODEL_DISPLAY_MAPPING = {
-#     "gpt": "GPT-4o",
-#     "gemini": "Gemini 2.5 Pro",
-#     "qwen": "Qwen 2.5 VL 72B"
-# }
-# MODELS_IN_ORDER = ["gpt", "gemini", "qwen"]
-# def image_to_base64_markdown(img_path):
-#     if not img_path or not os.path.exists(img_path):
-#         return "Image not found"
-#     try:
-#         with open(img_path, "rb") as f:
-#             encoded = base64.b64encode(f.read()).decode("utf-8")
-#             ext = os.path.splitext(img_path)[-1].lstrip(".").lower()
-#             if ext not in ['png', 'jpg', 'jpeg', 'gif', 'bmp']:
-#                 ext = 'png'
-#             return f"![image](data:image/{ext};base64,{encoded})"
-#     except Exception as e:
-#         print(f"Error encoding image {img_path}: {e}")
-#         return "Error loading image"
-# def load_and_prepare_data(data_root_path):
-#     primary_model_dir = os.path.join(data_root_path, MODELS_IN_ORDER[0])
-#     if not os.path.isdir(primary_model_dir):
-#         print(f"Error: Primary model directory not found at '{primary_model_dir}'")
-#         return pd.DataFrame()
-#     all_rows = []
-#     json_files = glob.glob(os.path.join(primary_model_dir, "*.json"))
-#     for json_path in json_files:
-#         with open(json_path, 'r', encoding='utf-8') as f:
-#             data = json.load(f)
-#         for episode_id, episode_data in data.items():
-#             episode_goal = episode_data.get("episode_goal", "N/A")
-#             for step in episode_data.get("steps", []):
-#                 question_block = step.get("questions", {})
-#                 question = question_block.get("question", "N/A")
-#                 options = question_block.get("options", [])
-#                 answer_index = question_block.get("correct_answer_index")
-#                 valid_answer_index = -1
-#                 if answer_index is not None:
-#                     try:
-#                         valid_answer_index = int(answer_index)
-#                     except (ValueError, TypeError):
-#                         pass
-#                 formatted_options = "\n".join(f"{i+1}. {opt}" for i, opt in enumerate(options))
-#                 correct_option_text = "N/A"
-#                 if 0 <= valid_answer_index < len(options):
-#                     correct_option_text = options[valid_answer_index]
-#                 image_markdown = {}
-#                 base_screenshot_path = step.get("screenshot_path", "").lstrip("/")
-#                 for model_key in MODELS_IN_ORDER:
-#                     img_path = os.path.join(data_root_path, model_key, base_screenshot_path)
-#                     image_markdown[model_key] = image_to_base64_markdown(img_path)
-#                 row = [
-#                     episode_goal,
-#                     question,
-#                     formatted_options,
-#                     correct_option_text,
-#                     image_markdown.get("gpt"),
-#                     image_markdown.get("gemini"),
-#                     image_markdown.get("qwen")
-#                 ]
-#                 all_rows.append(row)
-#     headers = [
-#         "Episode Goal", "Question", "Options", "Correct Option",
-#         MODEL_DISPLAY_MAPPING["gpt"],
-#         MODEL_DISPLAY_MAPPING["gemini"],
-#         MODEL_DISPLAY_MAPPING["qwen"]
-#     ]
-#     return pd.DataFrame(all_rows, columns=headers)
-# # --- CSS for the modal overlay ---
-# modal_css = """
-# #image-modal {
-#     position: fixed;
-#     top: 0;
-#     left: 0;
-#     width: 100%;
-#     height: 100%;
-#     background-color: rgba(0, 0, 0, 0.8);
-#     display: flex;
-#     justify-content: center;
-#     align-items: center;
-#     z-index: 9999;
-# }
-# #image-modal .modal-content {
-#     background-color: white;
-#     padding: 20px;
-#     border-radius: 10px;
-#     max-width: 90vw;
-#     max-height: 90vh;
-#     display: flex;
-#     flex-direction: column;
-#     align-items: center;
-# }
-# #image-modal .modal-content img {
-#     max-width: 100%;
-#     max-height: calc(90vh - 80px);
-#     object-fit: contain;
-# }
-# #close-modal-btn {
-#     margin-top: 15px;
-# }
-# """
-# # --- Gradio Interface ---
-# with gr.Blocks(theme=gr.themes.Default(spacing_size=gr.themes.sizes.spacing_sm),
-#                css=modal_css) as demo:
-#     gr.Markdown("# AITW Benchmark Visualizer")
-#     gr.Markdown("Visual comparison of model outputs for the Android in the Wild (AITW) benchmark.")
-#     full_df_state = gr.State()
-#     display_df = gr.DataFrame(
-#         headers=[
-#             "Episode Goal", "Question", "Options", "Correct Option",
-#             MODEL_DISPLAY_MAPPING["gpt"],
-#             MODEL_DISPLAY_MAPPING["gemini"],
-#             MODEL_DISPLAY_MAPPING["qwen"]
-#         ],
-#         datatype=["markdown", "markdown", "markdown", "markdown", "markdown", "markdown", "markdown"],
-#         interactive=False,
-#         row_count=(20, "dynamic")
-#     )
-#     # --- The hidden modal for displaying the zoomed image ---
-#     with gr.Column(visible=False, elem_id="image-modal") as modal:
-#         # --- FIX: Replaced gr.Box with gr.Column ---
-#         with gr.Column(elem_classes=["modal-content"]):
-#             modal_image = gr.Image(interactive=False)
-#             close_modal_btn = gr.Button("Close", elem_id="close-modal-btn")
-#     def load_initial_data():
-#         print(f"Current working directory: {os.getcwd()}")
-#         print("Loading and preparing AITW data...")
-#         prepared_df = load_and_prepare_data(AITW_DATA_ROOT)
-#         if prepared_df.empty:
-#             gr.Warning(f"No data loaded. Please check that the '{AITW_DATA_ROOT}' directory is structured correctly.")
-#         else:
-#             print(f"Successfully loaded {len(prepared_df)} steps.")
-#         return prepared_df, prepared_df
-#     def show_image_in_modal(df_state, evt: gr.SelectData):
-#         if evt.index is None or evt.value is None:
-#             return gr.update(visible=False), gr.update(visible=False)
-#         if evt.index[1] not in [4, 5, 6]:
-#             return gr.update(visible=False), gr.update(visible=False)
-#         match = re.search(r'\(data:image/[^)]+\)', evt.value)
-#         if not match:
-#             return gr.update(visible=False), gr.update(visible=False)
-#         image_data_uri = match.group(0).strip('()')
-#         return gr.update(visible=True), gr.update(value=image_data_uri, visible=True)
-#     def close_modal():
-#         return gr.update(visible=False), gr.update(visible=False)
-#     # --- Event Wiring ---
-#     demo.load(fn=load_initial_data, inputs=None, outputs=[display_df, full_df_state])
-#     display_df.select(fn=show_image_in_modal, inputs=[full_df_state], outputs=[modal, modal_image], show_progress=False)
-#     close_modal_btn.click(fn=close_modal, inputs=None, outputs=[modal, modal_image], show_progress=False)
-# if __name__ == "__main__":
-#     demo.launch(share=True, debug=True)
 import os
 import glob
@@ -217,7 +16,7 @@ if os.path.exists(ABS_DATA_PATH):
 AITW_DATA_ROOT = "."
 MODEL_DISPLAY_MAPPING = {
-    "gpt": "GPT-4o",
     "gemini": "Gemini 2.5 Pro",
     "qwen": "Qwen 2.5 VL 72B"
 }

 import os
 import glob
 AITW_DATA_ROOT = "."
 MODEL_DISPLAY_MAPPING = {
+    "gpt": "OpenAI o1",
     "gemini": "Gemini 2.5 Pro",
     "qwen": "Qwen 2.5 VL 72B"
 }