VLM_Comparison / aitw_gemini_dataset.json
advaitgupta's picture
Update aitw_gemini_dataset.json
ee4cbbd verified
{
"12172380859428428757": {
"episode_goal": "Open a new Chrome private window",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_gemini_images/episode_12172380859428428757_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Type: 'search'",
"Swipe: Down",
"Tap: [382, 838]",
"Button: Press Back"
],
"correct_answer_index": 2
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_gemini_images/episode_12172380859428428757_step_1.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Tap: [120, 625]",
"Swipe: Down",
"Tap: [224, 776]"
],
"correct_answer_index": 3
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_gemini_images/episode_12172380859428428757_step_2.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [500, 420]",
"Tap: [160, 775]",
"Button: Press Back",
"Tap: [420, 775]"
],
"correct_answer_index": 3
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_gemini_images/episode_12172380859428428757_step_3.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [468, 64]",
"Tap: [513, 64]",
"Tap: [518, 526]",
"Type: 'new incognito tab'"
],
"correct_answer_index": 0
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_gemini_images/episode_12172380859428428757_step_4.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Right",
"Tap: [44, 78]",
"Type: 'search'",
"Button: Press Enter"
],
"correct_answer_index": 1
}
},
{
"step_id": 5,
"screenshot_path": "./aitw_gemini_images/episode_12172380859428428757_step_5.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]\nStep 4: Tapped at pixel coordinates (x,y): [44, 78]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Type: 'new private window'",
"Tap: [515, 522]",
"Tap: [515, 62]",
"Tap: [467, 62]"
],
"correct_answer_index": 3
}
},
{
"step_id": 6,
"screenshot_path": "./aitw_gemini_images/episode_12172380859428428757_step_6.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]\nStep 4: Tapped at pixel coordinates (x,y): [44, 78]\nStep 5: Tapped at pixel coordinates (x,y): [467, 62]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [490, 114]",
"Swipe: Down",
"Tap: [150, 78]",
"Tap: [514, 64]"
],
"correct_answer_index": 3
}
},
{
"step_id": 7,
"screenshot_path": "./aitw_gemini_images/episode_12172380859428428757_step_7.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]\nStep 4: Tapped at pixel coordinates (x,y): [44, 78]\nStep 5: Tapped at pixel coordinates (x,y): [467, 62]\nStep 6: Tapped at pixel coordinates (x,y): [514, 64]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [370, 127]",
"Tap: [370, 298]",
"Button: Press Back",
"Tap: [370, 70]"
],
"correct_answer_index": 0
}
},
{
"step_id": 8,
"screenshot_path": "./aitw_gemini_images/episode_12172380859428428757_step_8.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]\nStep 4: Tapped at pixel coordinates (x,y): [44, 78]\nStep 5: Tapped at pixel coordinates (x,y): [467, 62]\nStep 6: Tapped at pixel coordinates (x,y): [514, 64]\nStep 7: Tapped at pixel coordinates (x,y): [370, 127]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [270, 80]",
"Button: Press Back",
"Tap: [450, 80]",
"Button: Unknown"
],
"correct_answer_index": 3
}
}
]
},
"9027843537045096280": {
"episode_goal": "Open a new Chrome window",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_gemini_images/episode_9027843537045096280_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Type: 'search'",
"Swipe: Left",
"Button: Press Home"
],
"correct_answer_index": 3
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_gemini_images/episode_9027843537045096280_step_1.png",
"action_history": "Step 0: Pressed Button: Press Home",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [90, 960]",
"Tap: [278, 826]",
"Tap: [365, 826]",
"Swipe: Up"
],
"correct_answer_index": 2
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_gemini_images/episode_9027843537045096280_step_2.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Tapped at pixel coordinates (x,y): [365, 826]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [46, 86]",
"Type: 'new tab'",
"Button: Unknown",
"Button: Press Back"
],
"correct_answer_index": 2
}
}
]
},
"2247237893045084693": {
"episode_goal": "Open the calendar and show me this week's events?",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_gemini_images/episode_2247237893045084693_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Swipe Down",
"Swipe: Swipe Up",
"Tap: [270, 880]",
"Tap: [114, 153]"
],
"correct_answer_index": 1
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_gemini_images/episode_2247237893045084693_step_1.png",
"action_history": "Step 0: Swiped: Swipe Up",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [379, 183]",
"Tap: [75, 345]",
"Tap: [275, 183]",
"Type: 'Calendar'"
],
"correct_answer_index": 0
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_gemini_images/episode_2247237893045084693_step_2.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [379, 183]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [270, 315]",
"Tap: [440, 976]",
"Swipe: Left",
"Tap: [499, 976]"
],
"correct_answer_index": 3
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_gemini_images/episode_2247237893045084693_step_3.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [379, 183]\nStep 2: Tapped at pixel coordinates (x,y): [499, 976]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [302, 950]",
"Swipe: Down",
"Tap: [425, 950]",
"Tap: [270, 600]"
],
"correct_answer_index": 0
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_gemini_images/episode_2247237893045084693_step_4.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [379, 183]\nStep 2: Tapped at pixel coordinates (x,y): [499, 976]\nStep 3: Tapped at pixel coordinates (x,y): [302, 950]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Right",
"Tap: [453, 86]",
"Tap: [140, 184]",
"Button: Unknown"
],
"correct_answer_index": 3
}
}
]
},
"12224608131504749719": {
"episode_goal": "How much does a 2x4x8 board cost at Lowes?",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_gemini_images/episode_12224608131504749719_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [375, 790]",
"Swipe: Swipe Up",
"Tap: [270, 882]",
"Swipe: Swipe Down"
],
"correct_answer_index": 1
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_gemini_images/episode_12224608131504749719_step_1.png",
"action_history": "Step 0: Swiped: Swipe Up",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Button: Press Home",
"Type: 'how much does a 2x4x8 board cost at lowes'",
"Tap: [337, 362]"
],
"correct_answer_index": 1
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_gemini_images/episode_12224608131504749719_step_2.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Pressed Button: Press Home",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [405, 853]",
"Type: 'How much does a 2x4x8 board cost at Lowes?'",
"Tap: [475, 950]",
"Tap: [289, 950]"
],
"correct_answer_index": 3
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_gemini_images/episode_12224608131504749719_step_3.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Pressed Button: Press Home\nStep 2: Tapped at pixel coordinates (x,y): [289, 950]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [495, 85]",
"Type: 'how much does a 2 bedroom apartment rent for in house'",
"Type: 'How much does a 2x4x8 board cost at Lowes?'",
"Button: Press Back"
],
"correct_answer_index": 2
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_gemini_images/episode_12224608131504749719_step_4.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Pressed Button: Press Home\nStep 2: Tapped at pixel coordinates (x,y): [289, 950]\nStep 3: Typed: 'How much does a 2x4x8 board cost at Lowes?'",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [280, 198]",
"Tap: [504, 82]",
"Button: Press Enter",
"Tap: [286, 129]"
],
"correct_answer_index": 3
}
},
{
"step_id": 5,
"screenshot_path": "./aitw_gemini_images/episode_12224608131504749719_step_5.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Pressed Button: Press Home\nStep 2: Tapped at pixel coordinates (x,y): [289, 950]\nStep 3: Typed: 'How much does a 2x4x8 board cost at Lowes?'\nStep 4: Tapped at pixel coordinates (x,y): [286, 129]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Down",
"Tap: [270, 830]",
"Tap: [490, 129]",
"Button: Unknown"
],
"correct_answer_index": 3
}
}
]
},
"1307957808436696980": {
"episode_goal": "Google the capital of Mexico",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_gemini_images/episode_1307957808436696980_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [175, 178]",
"Button: Press Back",
"Button: Press Home",
"Tap: [52, 83]"
],
"correct_answer_index": 2
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_gemini_images/episode_1307957808436696980_step_1.png",
"action_history": "Step 0: Pressed Button: Press Home",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [270, 915]",
"Swipe: Swipe Down",
"Tap: [373, 801]",
"Swipe: Swipe Up"
],
"correct_answer_index": 3
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_gemini_images/episode_1307957808436696980_step_2.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Home",
"Type: 'capital of Mexico'",
"Button: Press Back",
"Tap: [270, 100]"
],
"correct_answer_index": 0
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_gemini_images/episode_1307957808436696980_step_3.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up\nStep 2: Pressed Button: Press Home",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Type: 'capital of Mexico'",
"Swipe: Up",
"Tap: [320, 974]",
"Tap: [398, 866]"
],
"correct_answer_index": 2
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_gemini_images/episode_1307957808436696980_step_4.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up\nStep 2: Pressed Button: Press Home\nStep 3: Tapped at pixel coordinates (x,y): [320, 974]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Type: 'Google the capital of Mexico'",
"Tap: [484, 86]",
"Type: 'Mexico City'"
],
"correct_answer_index": 1
}
},
{
"step_id": 5,
"screenshot_path": "./aitw_gemini_images/episode_1307957808436696980_step_5.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up\nStep 2: Pressed Button: Press Home\nStep 3: Tapped at pixel coordinates (x,y): [320, 974]\nStep 4: Typed: 'Google the capital of Mexico'",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Enter",
"Swipe: Up",
"Type: 'search'",
"Tap: [185, 114]"
],
"correct_answer_index": 3
}
},
{
"step_id": 6,
"screenshot_path": "./aitw_gemini_images/episode_1307957808436696980_step_6.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up\nStep 2: Pressed Button: Press Home\nStep 3: Tapped at pixel coordinates (x,y): [320, 974]\nStep 4: Typed: 'Google the capital of Mexico'\nStep 5: Tapped at pixel coordinates (x,y): [185, 114]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Type: 'search'",
"Button: Press Enter",
"Button: Unknown",
"Swipe: Left"
],
"correct_answer_index": 2
}
}
]
}
}