VLM_Comparison / aitw_o1_dataset1.json
advaitgupta's picture
Update aitw_o1_dataset1.json
2d04abb verified
{
"16849016505201470177": {
"episode_goal": "Check the settings for the Instagram app",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_o1_images1/episode_16849016505201470177_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Swipe Up",
"Type: 'instagram'",
"Tap: [640, 70]",
"Swipe: Down"
],
"correct_answer_index": 0
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_o1_images1/episode_16849016505201470177_step_1.png",
"action_history": "Step 0: Swiped: Swipe Up",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [455, 705]",
"Tap: [105, 1400]",
"Swipe: Down",
"Tap: [491, 753]"
],
"correct_answer_index": 3
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_o1_images1/episode_16849016505201470177_step_2.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [491, 753]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Type: 'Check the setting for the Instagram app'",
"Tap: [365, 1450]",
"Type: 'Check the settings for the Instagram app'",
"Swipe: Down"
],
"correct_answer_index": 2
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_o1_images1/episode_16849016505201470177_step_3.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [491, 753]\nStep 2: Typed: 'Check the settings for the Instagram app'",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [128, 313]",
"Tap: [128, 380]",
"Tap: [650, 100]",
"Type: 'Open Instagram Settings'"
],
"correct_answer_index": 0
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_o1_images1/episode_16849016505201470177_step_4.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [491, 753]\nStep 2: Typed: 'Check the settings for the Instagram app'\nStep 3: Tapped at pixel coordinates (x,y): [128, 313]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [650, 80]",
"Type: 'Check the settings for the Telegram app'",
"Swipe: Down",
"Type: 'Check the settings for the Instagram app'"
],
"correct_answer_index": 3
}
},
{
"step_id": 5,
"screenshot_path": "./aitw_o1_images1/episode_16849016505201470177_step_5.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [491, 753]\nStep 2: Typed: 'Check the settings for the Instagram app'\nStep 3: Tapped at pixel coordinates (x,y): [128, 313]\nStep 4: Typed: 'Check the settings for the Instagram app'",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Type: 'Open Wi-Fi settings'",
"Tap: [194, 108]",
"Swipe: Down",
"Tap: [200, 145]"
],
"correct_answer_index": 1
}
},
{
"step_id": 6,
"screenshot_path": "./aitw_o1_images1/episode_16849016505201470177_step_6.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [491, 753]\nStep 2: Typed: 'Check the settings for the Instagram app'\nStep 3: Tapped at pixel coordinates (x,y): [128, 313]\nStep 4: Typed: 'Check the settings for the Instagram app'\nStep 5: Tapped at pixel coordinates (x,y): [194, 108]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Type: 'Check the settings for the Instagram app'",
"Tap: [100, 180]",
"Type: 'Check the battery usage for the Instagram app'",
"Swipe: Down"
],
"correct_answer_index": 0
}
},
{
"step_id": 7,
"screenshot_path": "./aitw_o1_images1/episode_16849016505201470177_step_7.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [491, 753]\nStep 2: Typed: 'Check the settings for the Instagram app'\nStep 3: Tapped at pixel coordinates (x,y): [128, 313]\nStep 4: Typed: 'Check the settings for the Instagram app'\nStep 5: Tapped at pixel coordinates (x,y): [194, 108]\nStep 6: Typed: 'Check the settings for the Instagram app'",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [660, 298]",
"Swipe: Left",
"Tap: [221, 296]",
"Tap: [222, 240]"
],
"correct_answer_index": 2
}
},
{
"step_id": 8,
"screenshot_path": "./aitw_o1_images1/episode_16849016505201470177_step_8.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [491, 753]\nStep 2: Typed: 'Check the settings for the Instagram app'\nStep 3: Tapped at pixel coordinates (x,y): [128, 313]\nStep 4: Typed: 'Check the settings for the Instagram app'\nStep 5: Tapped at pixel coordinates (x,y): [194, 108]\nStep 6: Typed: 'Check the settings for the Instagram app'\nStep 7: Tapped at pixel coordinates (x,y): [221, 296]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Home",
"Type: 'Open Wi-Fi settings'",
"Tap: [690, 120]",
"Button: Unknown"
],
"correct_answer_index": 3
}
}
]
},
"9836837435576948503": {
"episode_goal": "What's on the menu at Five Guys?",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_o1_images1/episode_9836837435576948503_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Type: 'View McDonald\u2019s menu'",
"Swipe: Down",
"Swipe: Swipe Up",
"Tap: [350, 150]"
],
"correct_answer_index": 2
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_o1_images1/episode_9836837435576948503_step_1.png",
"action_history": "Step 0: Swiped: Swipe Up",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [460, 760]",
"Tap: [495, 743]",
"Swipe: Down",
"Tap: [495, 900]"
],
"correct_answer_index": 1
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_o1_images1/episode_9836837435576948503_step_2.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [590, 760]",
"Tap: [530, 763]",
"Type: 'Burger King menu'",
"Swipe: Left"
],
"correct_answer_index": 1
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_o1_images1/episode_9836837435576948503_step_3.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]\nStep 2: Tapped at pixel coordinates (x,y): [530, 763]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Type: 'five guys phone number'",
"Swipe: Down",
"Tap: [620, 750]",
"Tap: [494, 754]"
],
"correct_answer_index": 3
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_o1_images1/episode_9836837435576948503_step_4.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]\nStep 2: Tapped at pixel coordinates (x,y): [530, 763]\nStep 3: Tapped at pixel coordinates (x,y): [494, 754]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [320, 282]",
"Tap: [220, 530]",
"Tap: [320, 330]",
"Type: 'What's the weather tomorrow?'"
],
"correct_answer_index": 0
}
},
{
"step_id": 5,
"screenshot_path": "./aitw_o1_images1/episode_9836837435576948503_step_5.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]\nStep 2: Tapped at pixel coordinates (x,y): [530, 763]\nStep 3: Tapped at pixel coordinates (x,y): [494, 754]\nStep 4: Tapped at pixel coordinates (x,y): [320, 282]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [650, 120]",
"Tap: [95, 95]",
"Swipe: Right",
"Tap: [662, 103]"
],
"correct_answer_index": 3
}
},
{
"step_id": 6,
"screenshot_path": "./aitw_o1_images1/episode_9836837435576948503_step_6.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]\nStep 2: Tapped at pixel coordinates (x,y): [530, 763]\nStep 3: Tapped at pixel coordinates (x,y): [494, 754]\nStep 4: Tapped at pixel coordinates (x,y): [320, 282]\nStep 5: Tapped at pixel coordinates (x,y): [662, 103]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Down",
"Type: 'What's on the menu at Five Guys?'",
"Type: 'What's on the menu at Shake Shack?'",
"Tap: [660, 510]"
],
"correct_answer_index": 1
}
},
{
"step_id": 7,
"screenshot_path": "./aitw_o1_images1/episode_9836837435576948503_step_7.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]\nStep 2: Tapped at pixel coordinates (x,y): [530, 763]\nStep 3: Tapped at pixel coordinates (x,y): [494, 754]\nStep 4: Tapped at pixel coordinates (x,y): [320, 282]\nStep 5: Tapped at pixel coordinates (x,y): [662, 103]\nStep 6: Typed: 'What's on the menu at Five Guys?'",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [660, 180]",
"Tap: [282, 180]",
"Swipe: Up",
"Tap: [220, 90]"
],
"correct_answer_index": 1
}
},
{
"step_id": 8,
"screenshot_path": "./aitw_o1_images1/episode_9836837435576948503_step_8.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]\nStep 2: Tapped at pixel coordinates (x,y): [530, 763]\nStep 3: Tapped at pixel coordinates (x,y): [494, 754]\nStep 4: Tapped at pixel coordinates (x,y): [320, 282]\nStep 5: Tapped at pixel coordinates (x,y): [662, 103]\nStep 6: Typed: 'What's on the menu at Five Guys?'\nStep 7: Tapped at pixel coordinates (x,y): [282, 180]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [360, 420]",
"Swipe: Down",
"Tap: [130, 1145]",
"Tap: [194, 1092]"
],
"correct_answer_index": 3
}
},
{
"step_id": 9,
"screenshot_path": "./aitw_o1_images1/episode_9836837435576948503_step_9.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]\nStep 2: Tapped at pixel coordinates (x,y): [530, 763]\nStep 3: Tapped at pixel coordinates (x,y): [494, 754]\nStep 4: Tapped at pixel coordinates (x,y): [320, 282]\nStep 5: Tapped at pixel coordinates (x,y): [662, 103]\nStep 6: Typed: 'What's on the menu at Five Guys?'\nStep 7: Tapped at pixel coordinates (x,y): [282, 180]\nStep 8: Tapped at pixel coordinates (x,y): [194, 1092]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [660, 100]",
"Swipe: Down",
"Tap: [540, 925]",
"Button: Unknown"
],
"correct_answer_index": 3
}
}
]
}
}