File size: 5,496 Bytes
41302a5 8c89a6d 6a914f4 3788f63 bcc3eb3 3788f63 293121e 83b66cb 7c2ff6d 83b66cb 7c2ff6d 83b66cb 7c2ff6d 87f8f5e 83b66cb 08dccaf 2e8cfe5 bcd8088 2e8cfe5 7368e62 bcd8088 7368e62 41302a5 7368e62 41302a5 7368e62 2b8f77a 7368e62 16d5f45 cd8b99e 63dcee3 16d5f45 83b66cb 35b35ab 8bc1087 b4a4ef7 f5e6a19 feb488c 2085233 13485f6 2085233 feb488c 35b35ab dbe0366 7c2ff6d 90ad64b dbe0366 35b35ab f830b7d 60a4a0f 35b35ab 206cd4a feb488c 206cd4a 8c50270 f830b7d feb488c 87f8f5e 206cd4a 90ad64b 8c50270 0f54608 feb488c 90ad64b f830b7d feb488c 8bc1087 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
# coding: utf-8
# Author: Du Mingzhe (mingzhe@nus.edu.sg)
# Date: 2025-04-01
import json
import random
import pandas as pd
import streamlit as st
from datasets import load_dataset
st.title("Code Arena")
with st.spinner("Loading data...", show_time=True):
problem_dict = dict()
# Leetcode Data
ds = load_dataset("Elfsong/leetcode_data", split='train')
for problem in ds:
problem_id = problem["title"]
problem['type'] = "leetcode"
problem_dict[problem_id] = problem
problem_count = len(problem_dict)
if "problem" in st.query_params:
problem_id = str(st.query_params["problem"])
problem_instance = problem_dict[problem_id]
st.header(problem_id)
with st.expander("Problem Description"):
st.markdown(problem_instance["question_content"])
with st.expander("Test Cases"):
test_cases = json.loads(problem_instance["test_cases"])
df = pd.DataFrame(
{
"input": [test_case['input'] for test_case in test_cases],
"output": [test_case['output'] for test_case in test_cases],
}
)
st.dataframe(
df,
column_config={
"input": st.column_config.TextColumn("Input"),
"output": st.column_config.TextColumn("Output"),
},
column_order=("input", "output"),
)
with st.expander("Test Case Generator"):
test_case_generator = problem_instance["test_case_generator"]
prompt = "# For now, we only disclose the top 20 lines of the test case generator.\n# the full version will be released after the paper review process.\n"
test_case_generator = "\n".join(test_case_generator.split("\n")[:20])
st.code(prompt+test_case_generator)
else:
tab_problem, tab_submission, tab_model, tab_about = st.tabs(["Problems", "Submissions", "Models", "About"])
with tab_problem:
with st.spinner("Loading Framework...", show_time=True):
df = pd.DataFrame(
{
"problem_id": [int(problem['problem_id']) for problem in problem_dict.values()],
"difficulty": [str(problem['difficulty']) for problem in problem_dict.values()],
"type": [str(problem['type']) for problem in problem_dict.values()],
"problem_link": ["https://huggingface.co/spaces/Elfsong/CodeArena/?problem=" + str(problem['title']) for problem in problem_dict.values()],
"acceptance_rate": [[random.randint(0, 100) for _ in range(20)] for problem in problem_dict.values()],
}
)
st.dataframe(
df,
column_config={
"problem_id": st.column_config.NumberColumn("Problem ID", width='small'),
"difficulty": st.column_config.TextColumn("Difficulty", width='small'),
"type": st.column_config.TextColumn("Type", width='small'),
"acceptance_rate": st.column_config.LineChartColumn("Acceptance Rate", y_min=0, y_max=100),
"problem_link": st.column_config.LinkColumn("Link", display_text="Open", width='small'),
},
height=800,
column_order=("problem_id", "difficulty", "type", "acceptance_rate", "problem_link"),
hide_index=True,
)
with tab_submission:
st.header("Submissions")
with tab_model:
model_list = [
"deepSeek-Coder",
"GPT-4o",
"Claude-3-5-sonnet",
"Gemini-1.5-flash",
"DeepSeek-Coder-V2-Lite",
"Claude-3-Opus",
"Gemini-1.5-pro",
"Llama-3.1-8B",
"Llama-3-8B",
"GPT-4-Turbo",
"GPT-3.5-Turbo",
"Mistral-Nemo",
"CodeLlama-13b",
"Claude-3-Haiku",
"Mistral-7B-v0.3",
"Codestral-22B-v0.1",
"Claude-3-sonnet",
"CodeLlama-34b",
"CodeLlama-7b"
]
df = pd.DataFrame(
{
"model_name": [model_name for model_name in model_list],
"dynamic_point": [0 for model_name in model_list],
"pass@1": [0 for model_name in model_list],
"beyond@t": [0 for model_name in model_list],
"beyond@m": [0 for model_name in model_list],
"model_progress": [int(random.randint(0, problem_count+1)) for model_name in model_list],
}
)
st.dataframe(
df,
column_config={
"model_name": st.column_config.TextColumn("Model Name"),
"dynamic_point": st.column_config.NumberColumn("Dynamic Point"),
"pass@1": st.column_config.NumberColumn("Pass@1"),
"beyond@t": st.column_config.NumberColumn("Beyond@Time"),
"beyond@m": st.column_config.NumberColumn("Beyond@Memory"),
"model_progress": st.column_config.ProgressColumn("Progress", min_value=0, max_value=problem_count, format="compact"),
},
column_order=("model_name", "Dynamic Point", "pass@1", "beyond@t", "beyond@m", "model_progress"),
height=800,
)
with tab_about:
st.write("Hello World!")
st.write("This is the new version of Code Arena. Refer to [Monolith](https://github.com/Elfsong/Monolith) for instructions on how to submit code.")
|