iruno's picture
Upload 245 files
498ffec verified
import json
import base64
import io
import html
from PIL import Image
def image_to_base64_url(image: str | Image.Image):
if isinstance(image, str):
with open(image, "rb") as f:
image = f.read()
elif isinstance(image, Image.Image):
if image.mode in ("RGBA", "LA"):
image = image.convert("RGB")
with io.BytesIO() as buffer:
image.save(buffer, format="PNG")
image = buffer.getvalue()
else:
raise ValueError(f"Invalid image type: {type(image)}")
return "data:image/png;base64," + base64.b64encode(image).decode("utf-8")
def load_json(file_path: str) -> dict:
with open(file_path, "r") as f:
return json.load(f)
def save_json(data: dict, file_path: str):
with open(file_path, "w") as f:
json.dump(data, f, indent=4)
def str_to_bool(s: str) -> bool:
if s.lower() in ["true", "1", "yes", "y"]:
return True
elif s.lower() in ["false", "0", "no", "n"]:
return False
else:
raise ValueError(f"Invalid boolean string: {s}")
def create_html_report(json_path, html_path, checklist_generation=False):
"""
Reads the given JSON result file and generates a filterable HTML report.
Args:
json_path (str): Path to the input JSON file.
html_path (str): Path to the output HTML file.
"""
try:
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
except FileNotFoundError:
print(f"Error: JSON file not found - {json_path}") # Error message in English
return
except json.JSONDecodeError:
print(f"Error: JSON file parsing error - {json_path}") # Error message in English
return
except Exception as e:
print(f"Unexpected error during data loading: {e}") # Error message in English
return
# Extract unique Task IDs and sort them
task_ids = sorted(list(set(item.get("task_id") for item in data if item.get("task_id") is not None)))
html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Benchmark Results Report</title>
<style>
body { font-family: sans-serif; line-height: 1.6; padding: 20px; }
.task-step { border: 1px solid #ccc; margin-bottom: 20px; padding: 15px; border-radius: 5px; background-color: #f9f9f9; }
.task-step h2 { margin-top: 0; color: #333; border-bottom: 1px solid #eee; padding-bottom: 5px;}
.task-step h3 { color: #555; margin-top: 15px; margin-bottom: 5px; }
.task-step h4 { color: #777; margin-top: 10px; margin-bottom: 5px; font-style: italic;}
pre { background-color: #eee; padding: 10px; border-radius: 3px; white-space: pre-wrap; word-wrap: break-word; font-size: 0.9em; margin-top: 5px; }
details { margin-top: 10px; border: 1px solid #ddd; border-radius: 3px; background-color: #fff; }
summary { cursor: pointer; padding: 8px; background-color: #f8f9fa; font-weight: bold; border-bottom: 1px solid #ddd; }
details[open] summary { border-bottom: 1px solid #ddd; }
details > pre { border: none; background-color: #fff; padding: 10px 8px; }
.response-item-toggle { margin-top: 10px; }
.chosen-section { border-left: 5px solid #4CAF50; padding-left: 10px; margin-top: 15px; }
.rejected-section { border-left: 5px solid #f44336; padding-left: 10px; margin-top: 15px; }
hr { border: 0; border-top: 1px solid #eee; margin: 15px 0; }
.thought-action { background-color: #f0f0f0; padding: 10px; border-radius: 3px; margin-bottom: 10px; border: 1px solid #e0e0e0;}
.thought-action h4 { margin-top: 0; color: #666; }
.task-container { display: none; }
.filter-controls { margin-bottom: 20px; padding: 10px; background-color: #e9ecef; border-radius: 5px; }
.filter-controls label { margin-right: 10px; font-weight: bold; }
.filter-controls select { padding: 5px; border-radius: 3px; border: 1px solid #ced4da; }
</style>
</head>
<body>
<h1>Benchmark Results Report</h1>
<!-- Task ID Filter Dropdown -->
<div class="filter-controls">
<label for="taskSelector">Select Task ID:</label>
<select id="taskSelector">
<option value="">-- Show All --</option>
"""
# Add dropdown options
for tid in task_ids:
html_content += f' <option value="{html.escape(str(tid))}">{html.escape(str(tid))}</option>\n'
html_content += """
</select>
</div>
<!-- Results Display Area -->
<div id="resultsArea">
"""
# Process each Task/Step data
for i, step_data in enumerate(data):
task_id = step_data.get("task_id", "N/A")
step_id = step_data.get("step_id", "N/A")
intent = step_data.get("intent", "N/A")
start_url = step_data.get("start_url", "N/A")
gt_checklist = step_data.get("gt_checklist", "N/A")
generated_checklist = step_data.get("generated_checklist", None)
trajectory = step_data.get("trajectory", "N/A")
text_observation = step_data.get("text_observation", "N/A")
source_name = step_data.get("source_name", "")
# Wrap each Task/Step in a container with a unique ID (hidden initially)
html_content += f"""
<div class="task-container" data-task-id="{html.escape(str(task_id))}">
<div class="task-step">
<h2>Task ID: {html.escape(str(task_id))} | Step ID: {html.escape(str(step_id))} {f'({html.escape(source_name)})' if source_name else ''}</h2>
<h3>Intent:</h3>
<p>{html.escape(intent)}</p>
<p><strong>Start URL:</strong> <a href="{html.escape(start_url)}" target="_blank">{html.escape(start_url)}</a></p>
<h3>Ground Truth Checklist:</h3>
<pre>{html.escape(gt_checklist)}</pre>
"""
if checklist_generation and generated_checklist is not None:
html_content += f"""
<details>
<summary>Generated Checklist (Click to expand/collapse)</summary>
<pre>{html.escape(str(generated_checklist))}</pre>
</details>
"""
html_content += f"""
<details>
<summary>Trajectory (Click to expand/collapse)</summary>
<pre>{html.escape(trajectory)}</pre>
</details>
<details>
<summary>Text Observation (Click to expand/collapse)</summary>
<pre>{html.escape(text_observation)}</pre>
</details>
<hr>
"""
# Chosen Responses
if 'chosen' in step_data and step_data['chosen']:
html_content += '<div class="chosen-section"><h3>Chosen Responses:</h3>'
for choice_block in step_data['chosen']:
thought = choice_block.get('thought', 'N/A')
action = choice_block.get('action', 'N/A')
responses = choice_block.get('response', [])
scores = choice_block.get('score', [])
# Add Thought and Action information
html_content += f"""
<div class="thought-action">
<h4>Thought:</h4>
<pre>{html.escape(thought)}</pre>
<h4>Action:</h4>
<pre>{html.escape(action)}</pre>
</div>"""
# Loop through responses and create toggles
for idx, (response, score) in enumerate(zip(responses, scores)):
html_content += f"""
<details class="response-item-toggle">
<summary>Judge Response {idx + 1}: {html.escape(str(score))}</summary>
<pre>{html.escape(str(response))}</pre>
</details>"""
html_content += '</div>' # End chosen-section
# Rejected Responses
if 'rejected' in step_data and step_data['rejected']:
html_content += '<div class="rejected-section"><h3>Rejected Responses:</h3>'
for rejection_block in step_data['rejected']:
thought = rejection_block.get('thought', 'N/A')
action = rejection_block.get('action', 'N/A')
responses = rejection_block.get('response', [])
scores = rejection_block.get('score', [])
# Add Thought and Action information
html_content += f"""
<div class="thought-action">
<h4>Thought:</h4>
<pre>{html.escape(thought)}</pre>
<h4>Action:</h4>
<pre>{html.escape(action)}</pre>
</div>"""
# Loop through responses and create toggles
for idx, (response, score) in enumerate(zip(responses, scores)):
html_content += f"""
<details class="response-item-toggle">
<summary>Judge Response {idx + 1}: {html.escape(str(score))}</summary>
<pre>{html.escape(str(response))}</pre>
</details>"""
html_content += '</div>' # End rejected-section
html_content += """
</div> <!-- End task-step -->
</div> <!-- End task-container -->
"""
# Finalize HTML and add JavaScript
html_content += """
</div> <!-- End resultsArea -->
<script>
document.addEventListener('DOMContentLoaded', function() {
const taskSelector = document.getElementById('taskSelector');
const taskContainers = document.querySelectorAll('.task-container');
function filterTasks() {
const selectedTaskId = taskSelector.value;
taskContainers.forEach(container => {
const containerTaskId = container.getAttribute('data-task-id');
// Show if no Task ID is selected (Show All) or if the container's Task ID matches
if (selectedTaskId === "" || containerTaskId === selectedTaskId) {
container.style.display = 'block';
} else {
// Otherwise, hide it
container.style.display = 'none';
}
});
}
// Run filter function on dropdown change
taskSelector.addEventListener('change', filterTasks);
// Run initial filtering on page load (default: Show All)
filterTasks();
});
</script>
</body>
</html>
"""
# Save the HTML file
try:
with open(html_path, 'w', encoding='utf-8') as f:
f.write(html_content)
print(f"Completed: HTML report created at {html_path}")
except IOError:
print(f"Error: Failed to write HTML file - {html_path}")
except Exception as e:
print(f"Unexpected error during HTML file saving: {e}")
# --- Example Usage ---
# input_json_file = 'path/to/your/results.json'
# output_html_file = 'trajectory_report.html'
# create_html_report(input_json_file, output_html_file)