VLM_Comparison / aitw_o1_dataset.json
advaitgupta's picture
Update aitw_o1_dataset.json
d1f7285 verified
{
"12172380859428428757": {
"episode_goal": "Open a new Chrome private window",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_o1_images/episode_12172380859428428757_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [382, 838]",
"Tap: [450, 840]",
"Tap: [280, 840]",
"Swipe: Up"
],
"correct_answer_index": 0
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_o1_images/episode_12172380859428428757_step_1.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [224, 776]",
"Tap: [224, 660]",
"Tap: [400, 780]",
"Swipe: Down"
],
"correct_answer_index": 0
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_o1_images/episode_12172380859428428757_step_2.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Tap: [405, 715]",
"Tap: [420, 775]",
"Tap: [130, 770]"
],
"correct_answer_index": 2
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_o1_images/episode_12172380859428428757_step_3.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [468, 64]",
"Tap: [515, 530]",
"Type: 'secret mode'",
"Tap: [505, 64]"
],
"correct_answer_index": 0
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_o1_images/episode_12172380859428428757_step_4.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Down",
"Tap: [80, 100]",
"Tap: [525, 64]",
"Tap: [44, 78]"
],
"correct_answer_index": 3
}
},
{
"step_id": 5,
"screenshot_path": "./aitw_o1_images/episode_12172380859428428757_step_5.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]\nStep 4: Tapped at pixel coordinates (x,y): [44, 78]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [480, 62]",
"Tap: [270, 140]",
"Tap: [467, 62]",
"Swipe: Down"
],
"correct_answer_index": 2
}
},
{
"step_id": 6,
"screenshot_path": "./aitw_o1_images/episode_12172380859428428757_step_6.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]\nStep 4: Tapped at pixel coordinates (x,y): [44, 78]\nStep 5: Tapped at pixel coordinates (x,y): [467, 62]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [514, 64]",
"Tap: [40, 65]",
"Tap: [505, 85]",
"Swipe: Up"
],
"correct_answer_index": 0
}
},
{
"step_id": 7,
"screenshot_path": "./aitw_o1_images/episode_12172380859428428757_step_7.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]\nStep 4: Tapped at pixel coordinates (x,y): [44, 78]\nStep 5: Tapped at pixel coordinates (x,y): [467, 62]\nStep 6: Tapped at pixel coordinates (x,y): [514, 64]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [370, 127]",
"Swipe: Down",
"Tap: [370, 190]",
"Tap: [370, 310]"
],
"correct_answer_index": 0
}
},
{
"step_id": 8,
"screenshot_path": "./aitw_o1_images/episode_12172380859428428757_step_8.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]\nStep 4: Tapped at pixel coordinates (x,y): [44, 78]\nStep 5: Tapped at pixel coordinates (x,y): [467, 62]\nStep 6: Tapped at pixel coordinates (x,y): [514, 64]\nStep 7: Tapped at pixel coordinates (x,y): [370, 127]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [530, 65]",
"Button: Unknown",
"Button: Press Home",
"Type: 'open private mode'"
],
"correct_answer_index": 1
}
}
]
},
"9027843537045096280": {
"episode_goal": "Open a new Chrome window",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_o1_images/episode_9027843537045096280_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Home",
"Button: Press Back",
"Tap: [320, 1050]",
"Type: 'chrome'"
],
"correct_answer_index": 0
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_o1_images/episode_9027843537045096280_step_1.png",
"action_history": "Step 0: Pressed Button: Press Home",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [460, 830]",
"Tap: [365, 826]",
"Tap: [340, 950]",
"Swipe: Down"
],
"correct_answer_index": 1
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_o1_images/episode_9027843537045096280_step_2.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Tapped at pixel coordinates (x,y): [365, 826]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Home",
"Swipe: Up",
"Tap: [480, 130]",
"Button: Unknown"
],
"correct_answer_index": 3
}
}
]
},
"2247237893045084693": {
"episode_goal": "Open the calendar and show me this week's events?",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_o1_images/episode_2247237893045084693_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Left",
"Button: Press Home",
"Tap: [360, 950]",
"Swipe: Swipe Up"
],
"correct_answer_index": 3
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_o1_images/episode_2247237893045084693_step_1.png",
"action_history": "Step 0: Swiped: Swipe Up",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Down",
"Tap: [320, 170]",
"Tap: [370, 400]",
"Tap: [379, 183]"
],
"correct_answer_index": 3
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_o1_images/episode_2247237893045084693_step_2.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [379, 183]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Home",
"Tap: [450, 950]",
"Tap: [499, 976]",
"Tap: [60, 70]"
],
"correct_answer_index": 2
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_o1_images/episode_2247237893045084693_step_3.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [379, 183]\nStep 2: Tapped at pixel coordinates (x,y): [499, 976]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [330, 890]",
"Swipe: Down",
"Tap: [65, 120]",
"Tap: [302, 950]"
],
"correct_answer_index": 3
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_o1_images/episode_2247237893045084693_step_4.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [379, 183]\nStep 2: Tapped at pixel coordinates (x,y): [499, 976]\nStep 3: Tapped at pixel coordinates (x,y): [302, 950]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Tap: [480, 72]",
"Swipe: Down",
"Button: Unknown"
],
"correct_answer_index": 3
}
}
]
},
"12224608131504749719": {
"episode_goal": "How much does a 2x4x8 board cost at Lowes?",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_o1_images/episode_12224608131504749719_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Swipe Up",
"Tap: [90, 1050]",
"Swipe: Down",
"Type: 'How to cut a 2x4 board'"
],
"correct_answer_index": 0
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_o1_images/episode_12224608131504749719_step_1.png",
"action_history": "Step 0: Swiped: Swipe Up",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Home",
"Type: 'How much do 2x6 boards cost at Lowes?'",
"Button: Press Back",
"Tap: [300, 1050]"
],
"correct_answer_index": 0
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_o1_images/episode_12224608131504749719_step_2.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Pressed Button: Press Home",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Down",
"Tap: [430, 920]",
"Tap: [289, 950]",
"Tap: [320, 880]"
],
"correct_answer_index": 2
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_o1_images/episode_12224608131504749719_step_3.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Pressed Button: Press Home\nStep 2: Tapped at pixel coordinates (x,y): [289, 950]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [450, 950]",
"Type: 'How much does a 2x4x8 board cost at Home Depot?'",
"Type: 'How much does a 2x4x8 board cost at Lowes?'",
"Swipe: Up"
],
"correct_answer_index": 2
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_o1_images/episode_12224608131504749719_step_4.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Pressed Button: Press Home\nStep 2: Tapped at pixel coordinates (x,y): [289, 950]\nStep 3: Typed: 'How much does a 2x4x8 board cost at Lowes?'",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [280, 200]",
"Tap: [286, 129]",
"Swipe: Down",
"Tap: [515, 120]"
],
"correct_answer_index": 1
}
},
{
"step_id": 5,
"screenshot_path": "./aitw_o1_images/episode_12224608131504749719_step_5.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Pressed Button: Press Home\nStep 2: Tapped at pixel coordinates (x,y): [289, 950]\nStep 3: Typed: 'How much does a 2x4x8 board cost at Lowes?'\nStep 4: Tapped at pixel coordinates (x,y): [286, 129]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Button: Unknown",
"Swipe: Down",
"Tap: [450, 140]"
],
"correct_answer_index": 1
}
}
]
},
"1307957808436696980": {
"episode_goal": "Google the capital of Mexico",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_o1_images/episode_1307957808436696980_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [480, 1050]",
"Swipe: Up",
"Button: Press Back",
"Button: Press Home"
],
"correct_answer_index": 3
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_o1_images/episode_1307957808436696980_step_1.png",
"action_history": "Step 0: Pressed Button: Press Home",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Swipe Up",
"Tap: [60, 920]",
"Button: Press Back",
"Swipe: Down"
],
"correct_answer_index": 0
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_o1_images/episode_1307957808436696980_step_2.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Home",
"Button: Press Back",
"Tap: [250,1050]",
"Type: 'Check phone battery level'"
],
"correct_answer_index": 0
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_o1_images/episode_1307957808436696980_step_3.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up\nStep 2: Pressed Button: Press Home",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Up",
"Tap: [320, 890]",
"Tap: [320, 974]",
"Tap: [400, 950]"
],
"correct_answer_index": 2
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_o1_images/episode_1307957808436696980_step_4.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up\nStep 2: Pressed Button: Press Home\nStep 3: Tapped at pixel coordinates (x,y): [320, 974]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Type: 'Google the capital of Mexico'",
"Type: 'Find the best Mexican restaurants in the area'",
"Tap: [340, 270]",
"Swipe: Down"
],
"correct_answer_index": 0
}
},
{
"step_id": 5,
"screenshot_path": "./aitw_o1_images/episode_1307957808436696980_step_5.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up\nStep 2: Pressed Button: Press Home\nStep 3: Tapped at pixel coordinates (x,y): [320, 974]\nStep 4: Typed: 'Google the capital of Mexico'",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [500, 114]",
"Tap: [210, 190]",
"Tap: [185, 114]",
"Button: Press Back"
],
"correct_answer_index": 2
}
},
{
"step_id": 6,
"screenshot_path": "./aitw_o1_images/episode_1307957808436696980_step_6.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up\nStep 2: Pressed Button: Press Home\nStep 3: Tapped at pixel coordinates (x,y): [320, 974]\nStep 4: Typed: 'Google the capital of Mexico'\nStep 5: Tapped at pixel coordinates (x,y): [185, 114]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [485, 1050]",
"Button: Unknown",
"Button: Press Back",
"Swipe: Down"
],
"correct_answer_index": 1
}
}
]
}
}