Spaces:

iruno
/

test_wprm3

Sleeping

File size: 11,090 Bytes

498ffec

import json
import base64
import io
import html
from PIL import Image


def image_to_base64_url(image: str | Image.Image):
    if isinstance(image, str):
        with open(image, "rb") as f:
            image = f.read()
    elif isinstance(image, Image.Image):
        if image.mode in ("RGBA", "LA"):
            image = image.convert("RGB")
        with io.BytesIO() as buffer:
            image.save(buffer, format="PNG")
            image = buffer.getvalue()
    else:
        raise ValueError(f"Invalid image type: {type(image)}")
    
    return "data:image/png;base64," + base64.b64encode(image).decode("utf-8")


def load_json(file_path: str) -> dict:
    with open(file_path, "r") as f:
        return json.load(f)
    
def save_json(data: dict, file_path: str):
    with open(file_path, "w") as f:
        json.dump(data, f, indent=4)

def str_to_bool(s: str) -> bool:
    if s.lower() in ["true", "1", "yes", "y"]:
        return True
    elif s.lower() in ["false", "0", "no", "n"]:
        return False
    else:
        raise ValueError(f"Invalid boolean string: {s}")
    

def create_html_report(json_path, html_path, checklist_generation=False):
    """
    Reads the given JSON result file and generates a filterable HTML report.

    Args:
        json_path (str): Path to the input JSON file.
        html_path (str): Path to the output HTML file.
    """
    try:
        with open(json_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
    except FileNotFoundError:
        print(f"Error: JSON file not found - {json_path}") # Error message in English
        return
    except json.JSONDecodeError:
        print(f"Error: JSON file parsing error - {json_path}") # Error message in English
        return
    except Exception as e:
        print(f"Unexpected error during data loading: {e}") # Error message in English
        return

    # Extract unique Task IDs and sort them
    task_ids = sorted(list(set(item.get("task_id") for item in data if item.get("task_id") is not None)))

    html_content = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Benchmark Results Report</title>
    <style>
        body { font-family: sans-serif; line-height: 1.6; padding: 20px; }
        .task-step { border: 1px solid #ccc; margin-bottom: 20px; padding: 15px; border-radius: 5px; background-color: #f9f9f9; }
        .task-step h2 { margin-top: 0; color: #333; border-bottom: 1px solid #eee; padding-bottom: 5px;}
        .task-step h3 { color: #555; margin-top: 15px; margin-bottom: 5px; }
        .task-step h4 { color: #777; margin-top: 10px; margin-bottom: 5px; font-style: italic;}
        pre { background-color: #eee; padding: 10px; border-radius: 3px; white-space: pre-wrap; word-wrap: break-word; font-size: 0.9em; margin-top: 5px; }
        details { margin-top: 10px; border: 1px solid #ddd; border-radius: 3px; background-color: #fff; }
        summary { cursor: pointer; padding: 8px; background-color: #f8f9fa; font-weight: bold; border-bottom: 1px solid #ddd; }
        details[open] summary { border-bottom: 1px solid #ddd; }
        details > pre { border: none; background-color: #fff; padding: 10px 8px; }
        .response-item-toggle { margin-top: 10px; }
        .chosen-section { border-left: 5px solid #4CAF50; padding-left: 10px; margin-top: 15px; }
        .rejected-section { border-left: 5px solid #f44336; padding-left: 10px; margin-top: 15px; }
        hr { border: 0; border-top: 1px solid #eee; margin: 15px 0; }
        .thought-action { background-color: #f0f0f0; padding: 10px; border-radius: 3px; margin-bottom: 10px; border: 1px solid #e0e0e0;}
        .thought-action h4 { margin-top: 0; color: #666; }
        .task-container { display: none; }
        .filter-controls { margin-bottom: 20px; padding: 10px; background-color: #e9ecef; border-radius: 5px; }
        .filter-controls label { margin-right: 10px; font-weight: bold; }
        .filter-controls select { padding: 5px; border-radius: 3px; border: 1px solid #ced4da; }
    </style>
</head>
<body>
    <h1>Benchmark Results Report</h1>

    <!-- Task ID Filter Dropdown -->
    <div class="filter-controls">
        <label for="taskSelector">Select Task ID:</label>
        <select id="taskSelector">
            <option value="">-- Show All --</option>
"""
    # Add dropdown options
    for tid in task_ids:
        html_content += f'            <option value="{html.escape(str(tid))}">{html.escape(str(tid))}</option>\n'

    html_content += """
        </select>
    </div>

    <!-- Results Display Area -->
    <div id="resultsArea">
"""

    # Process each Task/Step data
    for i, step_data in enumerate(data):
        task_id = step_data.get("task_id", "N/A")
        step_id = step_data.get("step_id", "N/A")
        intent = step_data.get("intent", "N/A")
        start_url = step_data.get("start_url", "N/A")
        gt_checklist = step_data.get("gt_checklist", "N/A")
        generated_checklist = step_data.get("generated_checklist", None)
        trajectory = step_data.get("trajectory", "N/A")
        text_observation = step_data.get("text_observation", "N/A")
        source_name = step_data.get("source_name", "")

        # Wrap each Task/Step in a container with a unique ID (hidden initially)
        html_content += f"""
    <div class="task-container" data-task-id="{html.escape(str(task_id))}">
        <div class="task-step">
            <h2>Task ID: {html.escape(str(task_id))} | Step ID: {html.escape(str(step_id))} {f'({html.escape(source_name)})' if source_name else ''}</h2>
            <h3>Intent:</h3>
            <p>{html.escape(intent)}</p>
            <p><strong>Start URL:</strong> <a href="{html.escape(start_url)}" target="_blank">{html.escape(start_url)}</a></p>

            <h3>Ground Truth Checklist:</h3>
            <pre>{html.escape(gt_checklist)}</pre>
"""
        if checklist_generation and generated_checklist is not None:
            html_content += f"""
            <details>
                <summary>Generated Checklist (Click to expand/collapse)</summary>
                <pre>{html.escape(str(generated_checklist))}</pre>
            </details>
"""

        html_content += f"""
            <details>
                <summary>Trajectory (Click to expand/collapse)</summary>
                <pre>{html.escape(trajectory)}</pre>
            </details>

            <details>
                <summary>Text Observation (Click to expand/collapse)</summary>
                <pre>{html.escape(text_observation)}</pre>
            </details>
            <hr>
"""

        # Chosen Responses
        if 'chosen' in step_data and step_data['chosen']:
            html_content += '<div class="chosen-section"><h3>Chosen Responses:</h3>'
            for choice_block in step_data['chosen']:
                thought = choice_block.get('thought', 'N/A')
                action = choice_block.get('action', 'N/A')
                responses = choice_block.get('response', [])
                scores = choice_block.get('score', [])

                # Add Thought and Action information
                html_content += f"""
            <div class="thought-action">
                <h4>Thought:</h4>
                <pre>{html.escape(thought)}</pre>
                <h4>Action:</h4>
                <pre>{html.escape(action)}</pre>
            </div>"""

                # Loop through responses and create toggles
                for idx, (response, score) in enumerate(zip(responses, scores)):
                     html_content += f"""
            <details class="response-item-toggle">
                <summary>Judge Response {idx + 1}: {html.escape(str(score))}</summary>
                <pre>{html.escape(str(response))}</pre>
            </details>"""
            html_content += '</div>' # End chosen-section

        # Rejected Responses
        if 'rejected' in step_data and step_data['rejected']:
            html_content += '<div class="rejected-section"><h3>Rejected Responses:</h3>'
            for rejection_block in step_data['rejected']:
                thought = rejection_block.get('thought', 'N/A')
                action = rejection_block.get('action', 'N/A')
                responses = rejection_block.get('response', [])
                scores = rejection_block.get('score', [])

                # Add Thought and Action information
                html_content += f"""
            <div class="thought-action">
                <h4>Thought:</h4>
                <pre>{html.escape(thought)}</pre>
                <h4>Action:</h4>
                <pre>{html.escape(action)}</pre>
            </div>"""

                # Loop through responses and create toggles
                for idx, (response, score) in enumerate(zip(responses, scores)):
                     html_content += f"""
            <details class="response-item-toggle">
                <summary>Judge Response {idx + 1}: {html.escape(str(score))}</summary>
                <pre>{html.escape(str(response))}</pre>
            </details>"""
            html_content += '</div>' # End rejected-section

        html_content += """
        </div> <!-- End task-step -->
    </div> <!-- End task-container -->
"""

    # Finalize HTML and add JavaScript
    html_content += """
    </div> <!-- End resultsArea -->

    <script>
        document.addEventListener('DOMContentLoaded', function() {
            const taskSelector = document.getElementById('taskSelector');
            const taskContainers = document.querySelectorAll('.task-container');

            function filterTasks() {
                const selectedTaskId = taskSelector.value;

                taskContainers.forEach(container => {
                    const containerTaskId = container.getAttribute('data-task-id');
                    // Show if no Task ID is selected (Show All) or if the container's Task ID matches
                    if (selectedTaskId === "" || containerTaskId === selectedTaskId) {
                        container.style.display = 'block';
                    } else {
                        // Otherwise, hide it
                        container.style.display = 'none';
                    }
                });
            }

            // Run filter function on dropdown change
            taskSelector.addEventListener('change', filterTasks);

            // Run initial filtering on page load (default: Show All)
            filterTasks();
        });
    </script>

</body>
</html>
"""

    # Save the HTML file
    try:
        with open(html_path, 'w', encoding='utf-8') as f:
            f.write(html_content)
        print(f"Completed: HTML report created at {html_path}")
    except IOError:
        print(f"Error: Failed to write HTML file - {html_path}")
    except Exception as e:
        print(f"Unexpected error during HTML file saving: {e}")

# --- Example Usage ---
# input_json_file = 'path/to/your/results.json'
# output_html_file = 'trajectory_report.html'
# create_html_report(input_json_file, output_html_file)