# coding: utf-8

# Author: Du Mingzhe (mingzhe@nus.edu.sg)
# Date: 2025-04-01

import json
import random 
import pandas as pd
import streamlit as st
from datasets import load_dataset
from datasets import get_dataset_config_names

st.title("Code:blue[Arena]")

problem_dict = dict()

# Venus Data
with st.spinner("Loading Venus data...", show_time=True):
    venus_ds = load_dataset("Elfsong/leetcode_data", split='train')
    for problem in venus_ds:
        problem_id = problem["title"]
        problem['type'] = "leetcode"
        problem_dict[problem_id] = problem

# APPS Data
with st.spinner("Loading APPS data...", show_time=True):
    apps_ds = load_dataset("Elfsong/APPS_Python", split='test')
    for problem in apps_ds:
        problem_id = f'apps_{problem["problem_id"]}'
        problem['type'] = "apps"
        problem_dict[problem_id] = problem
    
problem_count = len(problem_dict)


if "problem" in st.query_params:
    problem_id = str(st.query_params["problem"])
    problem_instance = problem_dict[problem_id]
    problem_type = problem_instance['type']

    st.header(problem_id)
    
    with st.expander("Problem Description"):
        if problem_type == "leetcode":
            st.markdown(problem_instance["question_content"])
        elif problem_type == "apps":
            st.markdown(problem_instance["problem_content"])

    with st.expander("Test Cases"):
        test_cases = json.loads(problem_instance["test_cases"])
        df = pd.DataFrame(
            {
                "input": [test_case['input'] for test_case in test_cases],
                "output": [test_case['output'] for test_case in test_cases],
            }
        )
        st.dataframe(
            df,
            column_config={
                "input": st.column_config.TextColumn("Input"),
                "output": st.column_config.TextColumn("Output"),
            },
            column_order=("input", "output"),
        )

    with st.expander("Test Case Generator"):
        if problem_type == "leetcode":
            test_case_generator = problem_instance["test_case_generator"]
            prompt = "# For now, we only disclose the top 20 lines of the test case generator.\n# the full version will be released after the paper review process.\n"
            test_case_generator = "\n".join(test_case_generator.split("\n")[:20])
            st.code(prompt+test_case_generator)
        else:
            st.code("Stay tuned!")

    
else:
    tab_problem, tab_submission, tab_model, tab_about = st.tabs(["Problems", "Submissions", "Models", "About"])

    with tab_problem:
        with st.spinner("Loading Framework...", show_time=True):
            df = pd.DataFrame(
                {
                    "problem_id": [int(problem['problem_id']) for problem in problem_dict.values()],
                    "difficulty": [str(problem['difficulty']) for problem in problem_dict.values()],
                    "type": [str(problem['type']) for problem in problem_dict.values()],
                    "problem_link": ["https://huggingface.co/spaces/Elfsong/CodeArena/?problem=" + (str(problem['title']) if problem['type'] == "leetcode" else f'apps_{problem["problem_id"]}') for problem in problem_dict.values()],
                    "acceptance_rate": [[random.randint(0, 100) for _ in range(20)] for problem in problem_dict.values()],
                }
            )
        st.dataframe(
            df,
            column_config={
                "problem_id": st.column_config.NumberColumn("Problem ID", width='small'),
                "difficulty": st.column_config.TextColumn("Difficulty", width='small'),
                "type": st.column_config.TextColumn("Type", width='small'),
                "acceptance_rate": st.column_config.LineChartColumn("Acceptance Rate", y_min=0, y_max=100),
                "problem_link": st.column_config.LinkColumn("Link", display_text="Open", width='small'),
            },
            height=800,
            column_order=("problem_id", "difficulty", "type", "acceptance_rate", "problem_link"),
            hide_index=True,
        )

    with tab_submission:
        st.header("Submissions")
        models = get_dataset_config_names("Elfsong/Venus_Model_Evaluation")
        model_name = st.selectbox("Which model you are looking for?", models, placeholder="Select a model...")
        st.write("You selected:", model_name)

        with st.spinner("Loading Data...", show_time=True):
            ds = load_dataset("Elfsong/Venus_Model_Evaluation", model_name, split='train')
            df = pd.DataFrame(
                {
                    "problem_id": [int(problem['problem_id']) for problem in ds],
                    "solution": [str(problem['solution']) for problem in ds],
                }
            )
        st.dataframe(
            df,
            column_config={
                "problem_id": st.column_config.NumberColumn("Problem ID", width='small'),
                "solution": st.column_config.TextColumn("Solution", width='big'),
            },
            height=800,
            column_order=("problem_id", "solution"),
            hide_index=True,
        )


    with tab_model:
        model_list = [
            "deepSeek-Coder",
            "GPT-4o",
            "Claude-3-5-sonnet",
            "Gemini-1.5-flash",
            "DeepSeek-Coder-V2-Lite",
            "Claude-3-Opus",
            "Gemini-1.5-pro",
            "Llama-3.1-8B",
            "Llama-3-8B",
            "GPT-4-Turbo",
            "GPT-3.5-Turbo",
            "Mistral-Nemo",
            "CodeLlama-13b",
            "Claude-3-Haiku",
            "Mistral-7B-v0.3",
            "Codestral-22B-v0.1",
            "Claude-3-sonnet",
            "CodeLlama-34b",
            "CodeLlama-7b"
        ]


        df = pd.DataFrame(
            {
                "model_name": [model_name for model_name in model_list],
                "dynamic_point": [0 for model_name in model_list],
                "pass@1": [0 for model_name in model_list],
                "beyond@t": [0 for model_name in model_list],
                "beyond@m": [0 for model_name in model_list],
                "model_progress": [int(random.randint(0, problem_count+1)) for model_name in model_list],
            }
        )

        st.dataframe(
            df,
            column_config={
                "model_name": st.column_config.TextColumn("Model Name"),
                "dynamic_point": st.column_config.NumberColumn("Dynamic Point"),
                "pass@1": st.column_config.NumberColumn("Pass@1"),
                "beyond@t": st.column_config.NumberColumn("Beyond@Time"),
                "beyond@m": st.column_config.NumberColumn("Beyond@Memory"),
                "model_progress": st.column_config.ProgressColumn("Progress", min_value=0, max_value=problem_count, format="compact"),
            },
            column_order=("model_name", "Dynamic Point", "pass@1", "beyond@t", "beyond@m", "model_progress"),
            height=800,
        )

    with tab_about:
        st.write("Hello World!")
        st.write("This is the new version of Code Arena. Refer to [Monolith](https://github.com/Elfsong/Monolith) for instructions on how to submit code.")
        st.write("🚧 WIP: We will update real data very soon!")