File size: 5,496 Bytes
41302a5
 
 
 
 
 
8c89a6d
6a914f4
3788f63
bcc3eb3
3788f63
293121e
 
83b66cb
 
7c2ff6d
 
83b66cb
 
7c2ff6d
 
83b66cb
7c2ff6d
87f8f5e
83b66cb
 
08dccaf
2e8cfe5
bcd8088
2e8cfe5
 
7368e62
 
 
bcd8088
7368e62
41302a5
7368e62
 
41302a5
 
7368e62
 
 
 
 
2b8f77a
 
7368e62
 
 
16d5f45
 
cd8b99e
63dcee3
 
16d5f45
 
83b66cb
35b35ab
8bc1087
b4a4ef7
f5e6a19
feb488c
 
 
2085233
 
 
13485f6
2085233
feb488c
 
35b35ab
 
 
dbe0366
 
7c2ff6d
90ad64b
dbe0366
35b35ab
f830b7d
60a4a0f
35b35ab
 
 
 
 
 
 
206cd4a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
feb488c
 
 
 
206cd4a
8c50270
 
 
f830b7d
feb488c
 
 
 
 
 
87f8f5e
206cd4a
90ad64b
8c50270
 
0f54608
feb488c
90ad64b
f830b7d
feb488c
8bc1087
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# coding: utf-8

# Author: Du Mingzhe (mingzhe@nus.edu.sg)
# Date: 2025-04-01

import json
import random 
import pandas as pd
import streamlit as st
from datasets import load_dataset

st.title("Code Arena")

with st.spinner("Loading data...", show_time=True):
    problem_dict = dict()

    # Leetcode Data
    ds = load_dataset("Elfsong/leetcode_data", split='train')
    for problem in ds:
        problem_id = problem["title"]
        problem['type'] = "leetcode"
        problem_dict[problem_id] = problem
    
    problem_count = len(problem_dict)


if "problem" in st.query_params:
    problem_id = str(st.query_params["problem"])
    problem_instance = problem_dict[problem_id]

    st.header(problem_id)
    
    with st.expander("Problem Description"):
        st.markdown(problem_instance["question_content"])

    with st.expander("Test Cases"):
        test_cases = json.loads(problem_instance["test_cases"])
        df = pd.DataFrame(
            {
                "input": [test_case['input'] for test_case in test_cases],
                "output": [test_case['output'] for test_case in test_cases],
            }
        )
        st.dataframe(
            df,
            column_config={
                "input": st.column_config.TextColumn("Input"),
                "output": st.column_config.TextColumn("Output"),
            },
            column_order=("input", "output"),
        )

    with st.expander("Test Case Generator"):
        test_case_generator = problem_instance["test_case_generator"]
        prompt = "# For now, we only disclose the top 20 lines of the test case generator.\n# the full version will be released after the paper review process.\n"
        test_case_generator = "\n".join(test_case_generator.split("\n")[:20])
        st.code(prompt+test_case_generator)

    
else:
    tab_problem, tab_submission, tab_model, tab_about = st.tabs(["Problems", "Submissions", "Models", "About"])

    with tab_problem:
        with st.spinner("Loading Framework...", show_time=True):
            df = pd.DataFrame(
                {
                    "problem_id": [int(problem['problem_id']) for problem in problem_dict.values()],
                    "difficulty": [str(problem['difficulty']) for problem in problem_dict.values()],
                    "type": [str(problem['type']) for problem in problem_dict.values()],
                    "problem_link": ["https://huggingface.co/spaces/Elfsong/CodeArena/?problem=" + str(problem['title']) for problem in problem_dict.values()],
                    "acceptance_rate": [[random.randint(0, 100) for _ in range(20)] for problem in problem_dict.values()],
                }
            )
        st.dataframe(
            df,
            column_config={
                "problem_id": st.column_config.NumberColumn("Problem ID", width='small'),
                "difficulty": st.column_config.TextColumn("Difficulty", width='small'),
                "type": st.column_config.TextColumn("Type", width='small'),
                "acceptance_rate": st.column_config.LineChartColumn("Acceptance Rate", y_min=0, y_max=100),
                "problem_link": st.column_config.LinkColumn("Link", display_text="Open", width='small'),
            },
            height=800,
            column_order=("problem_id", "difficulty", "type", "acceptance_rate", "problem_link"),
            hide_index=True,
        )

    with tab_submission:
        st.header("Submissions")

    with tab_model:
        model_list = [
            "deepSeek-Coder",
            "GPT-4o",
            "Claude-3-5-sonnet",
            "Gemini-1.5-flash",
            "DeepSeek-Coder-V2-Lite",
            "Claude-3-Opus",
            "Gemini-1.5-pro",
            "Llama-3.1-8B",
            "Llama-3-8B",
            "GPT-4-Turbo",
            "GPT-3.5-Turbo",
            "Mistral-Nemo",
            "CodeLlama-13b",
            "Claude-3-Haiku",
            "Mistral-7B-v0.3",
            "Codestral-22B-v0.1",
            "Claude-3-sonnet",
            "CodeLlama-34b",
            "CodeLlama-7b"
        ]


        df = pd.DataFrame(
            {
                "model_name": [model_name for model_name in model_list],
                "dynamic_point": [0 for model_name in model_list],
                "pass@1": [0 for model_name in model_list],
                "beyond@t": [0 for model_name in model_list],
                "beyond@m": [0 for model_name in model_list],
                "model_progress": [int(random.randint(0, problem_count+1)) for model_name in model_list],
            }
        )

        st.dataframe(
            df,
            column_config={
                "model_name": st.column_config.TextColumn("Model Name"),
                "dynamic_point": st.column_config.NumberColumn("Dynamic Point"),
                "pass@1": st.column_config.NumberColumn("Pass@1"),
                "beyond@t": st.column_config.NumberColumn("Beyond@Time"),
                "beyond@m": st.column_config.NumberColumn("Beyond@Memory"),
                "model_progress": st.column_config.ProgressColumn("Progress", min_value=0, max_value=problem_count, format="compact"),
            },
            column_order=("model_name", "Dynamic Point", "pass@1", "beyond@t", "beyond@m", "model_progress"),
            height=800,
        )

    with tab_about:
        st.write("Hello World!")
        st.write("This is the new version of Code Arena. Refer to [Monolith](https://github.com/Elfsong/Monolith) for instructions on how to submit code.")