import json
import base64
import io
import html
from PIL import Image
def image_to_base64_url(image: str | Image.Image):
if isinstance(image, str):
with open(image, "rb") as f:
image = f.read()
elif isinstance(image, Image.Image):
if image.mode in ("RGBA", "LA"):
image = image.convert("RGB")
with io.BytesIO() as buffer:
image.save(buffer, format="PNG")
image = buffer.getvalue()
else:
raise ValueError(f"Invalid image type: {type(image)}")
return "data:image/png;base64," + base64.b64encode(image).decode("utf-8")
def load_json(file_path: str) -> dict:
with open(file_path, "r") as f:
return json.load(f)
def save_json(data: dict, file_path: str):
with open(file_path, "w") as f:
json.dump(data, f, indent=4)
def str_to_bool(s: str) -> bool:
if s.lower() in ["true", "1", "yes", "y"]:
return True
elif s.lower() in ["false", "0", "no", "n"]:
return False
else:
raise ValueError(f"Invalid boolean string: {s}")
def create_html_report(json_path, html_path, checklist_generation=False):
"""
Reads the given JSON result file and generates a filterable HTML report.
Args:
json_path (str): Path to the input JSON file.
html_path (str): Path to the output HTML file.
"""
try:
with open(json_path, 'r', encoding='utf-8') as f:
data = json.load(f)
except FileNotFoundError:
print(f"Error: JSON file not found - {json_path}") # Error message in English
return
except json.JSONDecodeError:
print(f"Error: JSON file parsing error - {json_path}") # Error message in English
return
except Exception as e:
print(f"Unexpected error during data loading: {e}") # Error message in English
return
# Extract unique Task IDs and sort them
task_ids = sorted(list(set(item.get("task_id") for item in data if item.get("task_id") is not None)))
html_content = """
Benchmark Results Report
Benchmark Results Report
"""
# Process each Task/Step data
for i, step_data in enumerate(data):
task_id = step_data.get("task_id", "N/A")
step_id = step_data.get("step_id", "N/A")
intent = step_data.get("intent", "N/A")
start_url = step_data.get("start_url", "N/A")
gt_checklist = step_data.get("gt_checklist", "N/A")
generated_checklist = step_data.get("generated_checklist", None)
trajectory = step_data.get("trajectory", "N/A")
text_observation = step_data.get("text_observation", "N/A")
source_name = step_data.get("source_name", "")
# Wrap each Task/Step in a container with a unique ID (hidden initially)
html_content += f"""
"""
if checklist_generation and generated_checklist is not None:
html_content += f"""
Generated Checklist (Click to expand/collapse)
{html.escape(str(generated_checklist))}
"""
html_content += f"""
Trajectory (Click to expand/collapse)
{html.escape(trajectory)}
Text Observation (Click to expand/collapse)
{html.escape(text_observation)}
"""
# Chosen Responses
if 'chosen' in step_data and step_data['chosen']:
html_content += '
Chosen Responses:
'
for choice_block in step_data['chosen']:
thought = choice_block.get('thought', 'N/A')
action = choice_block.get('action', 'N/A')
responses = choice_block.get('response', [])
scores = choice_block.get('score', [])
# Add Thought and Action information
html_content += f"""
Thought:
{html.escape(thought)}
Action:
{html.escape(action)}
"""
# Loop through responses and create toggles
for idx, (response, score) in enumerate(zip(responses, scores)):
html_content += f"""
Judge Response {idx + 1}: {html.escape(str(score))}
{html.escape(str(response))}
"""
html_content += '
' # End chosen-section
# Rejected Responses
if 'rejected' in step_data and step_data['rejected']:
html_content += '
Rejected Responses:
'
for rejection_block in step_data['rejected']:
thought = rejection_block.get('thought', 'N/A')
action = rejection_block.get('action', 'N/A')
responses = rejection_block.get('response', [])
scores = rejection_block.get('score', [])
# Add Thought and Action information
html_content += f"""
Thought:
{html.escape(thought)}
Action:
{html.escape(action)}
"""
# Loop through responses and create toggles
for idx, (response, score) in enumerate(zip(responses, scores)):
html_content += f"""
Judge Response {idx + 1}: {html.escape(str(score))}
{html.escape(str(response))}
"""
html_content += '
' # End rejected-section
html_content += """
"""
# Finalize HTML and add JavaScript
html_content += """
"""
# Save the HTML file
try:
with open(html_path, 'w', encoding='utf-8') as f:
f.write(html_content)
print(f"Completed: HTML report created at {html_path}")
except IOError:
print(f"Error: Failed to write HTML file - {html_path}")
except Exception as e:
print(f"Unexpected error during HTML file saving: {e}")
# --- Example Usage ---
# input_json_file = 'path/to/your/results.json'
# output_html_file = 'trajectory_report.html'
# create_html_report(input_json_file, output_html_file)