CodeArena / app.py
Elfsong's picture
Update app.py
151c6ad verified
# coding: utf-8
# Author: Du Mingzhe (mingzhe@nus.edu.sg)
# Date: 2025-04-01
import json
import random
import pandas as pd
import streamlit as st
from datasets import load_dataset
from datasets import get_dataset_config_names
st.title("Code:blue[Arena]")
problem_dict = dict()
# Venus Data
with st.spinner("Loading Venus data...", show_time=True):
venus_ds = load_dataset("Elfsong/leetcode_data", split='train')
for problem in venus_ds:
problem_id = problem["title"]
problem['type'] = "leetcode"
problem_dict[problem_id] = problem
# APPS Data
with st.spinner("Loading APPS data...", show_time=True):
apps_ds = load_dataset("Elfsong/APPS_Python", split='test')
for problem in apps_ds:
problem_id = f'apps_{problem["problem_id"]}'
problem['type'] = "apps"
problem_dict[problem_id] = problem
problem_count = len(problem_dict)
if "problem" in st.query_params:
problem_id = str(st.query_params["problem"])
problem_instance = problem_dict[problem_id]
problem_type = problem_instance['type']
st.header(problem_id)
with st.expander("Problem Description"):
if problem_type == "leetcode":
st.markdown(problem_instance["question_content"])
elif problem_type == "apps":
st.markdown(problem_instance["problem_content"])
with st.expander("Test Cases"):
test_cases = json.loads(problem_instance["test_cases"])
df = pd.DataFrame(
{
"input": [test_case['input'] for test_case in test_cases],
"output": [test_case['output'] for test_case in test_cases],
}
)
st.dataframe(
df,
column_config={
"input": st.column_config.TextColumn("Input"),
"output": st.column_config.TextColumn("Output"),
},
column_order=("input", "output"),
)
with st.expander("Test Case Generator"):
if problem_type == "leetcode":
test_case_generator = problem_instance["test_case_generator"]
prompt = "# For now, we only disclose the top 20 lines of the test case generator.\n# the full version will be released after the paper review process.\n"
test_case_generator = "\n".join(test_case_generator.split("\n")[:20])
st.code(prompt+test_case_generator)
else:
st.code("Stay tuned!")
else:
tab_problem, tab_submission, tab_model, tab_about = st.tabs(["Problems", "Submissions", "Models", "About"])
with tab_problem:
with st.spinner("Loading Framework...", show_time=True):
df = pd.DataFrame(
{
"problem_id": [int(problem['problem_id']) for problem in problem_dict.values()],
"difficulty": [str(problem['difficulty']) for problem in problem_dict.values()],
"type": [str(problem['type']) for problem in problem_dict.values()],
"problem_link": ["https://huggingface.co/spaces/Elfsong/CodeArena/?problem=" + (str(problem['title']) if problem['type'] == "leetcode" else f'apps_{problem["problem_id"]}') for problem in problem_dict.values()],
"acceptance_rate": [[random.randint(0, 100) for _ in range(20)] for problem in problem_dict.values()],
}
)
st.dataframe(
df,
column_config={
"problem_id": st.column_config.NumberColumn("Problem ID", width='small'),
"difficulty": st.column_config.TextColumn("Difficulty", width='small'),
"type": st.column_config.TextColumn("Type", width='small'),
"acceptance_rate": st.column_config.LineChartColumn("Acceptance Rate", y_min=0, y_max=100),
"problem_link": st.column_config.LinkColumn("Link", display_text="Open", width='small'),
},
height=800,
column_order=("problem_id", "difficulty", "type", "acceptance_rate", "problem_link"),
hide_index=True,
)
with tab_submission:
st.header("Submissions")
models = get_dataset_config_names("Elfsong/Venus_Model_Evaluation")
model_name = st.selectbox("Which model you are looking for?", models, placeholder="Select a model...")
st.write("You selected:", model_name)
with st.spinner("Loading Data...", show_time=True):
ds = load_dataset("Elfsong/Venus_Model_Evaluation", model_name, split='train')
df = pd.DataFrame(
{
"problem_id": [int(problem['problem_id']) for problem in ds],
"solution": [str(problem['solution']) for problem in ds],
}
)
st.dataframe(
df,
column_config={
"problem_id": st.column_config.NumberColumn("Problem ID", width='small'),
"solution": st.column_config.TextColumn("Solution", width='big'),
},
height=800,
column_order=("problem_id", "solution"),
hide_index=True,
)
with tab_model:
model_list = [
"deepSeek-Coder",
"GPT-4o",
"Claude-3-5-sonnet",
"Gemini-1.5-flash",
"DeepSeek-Coder-V2-Lite",
"Claude-3-Opus",
"Gemini-1.5-pro",
"Llama-3.1-8B",
"Llama-3-8B",
"GPT-4-Turbo",
"GPT-3.5-Turbo",
"Mistral-Nemo",
"CodeLlama-13b",
"Claude-3-Haiku",
"Mistral-7B-v0.3",
"Codestral-22B-v0.1",
"Claude-3-sonnet",
"CodeLlama-34b",
"CodeLlama-7b"
]
df = pd.DataFrame(
{
"model_name": [model_name for model_name in model_list],
"dynamic_point": [0 for model_name in model_list],
"pass@1": [0 for model_name in model_list],
"beyond@t": [0 for model_name in model_list],
"beyond@m": [0 for model_name in model_list],
"model_progress": [int(random.randint(0, problem_count+1)) for model_name in model_list],
}
)
st.dataframe(
df,
column_config={
"model_name": st.column_config.TextColumn("Model Name"),
"dynamic_point": st.column_config.NumberColumn("Dynamic Point"),
"pass@1": st.column_config.NumberColumn("Pass@1"),
"beyond@t": st.column_config.NumberColumn("Beyond@Time"),
"beyond@m": st.column_config.NumberColumn("Beyond@Memory"),
"model_progress": st.column_config.ProgressColumn("Progress", min_value=0, max_value=problem_count, format="compact"),
},
column_order=("model_name", "Dynamic Point", "pass@1", "beyond@t", "beyond@m", "model_progress"),
height=800,
)
with tab_about:
st.write("Hello World!")
st.write("This is the new version of Code Arena. Refer to [Monolith](https://github.com/Elfsong/Monolith) for instructions on how to submit code.")
st.write("🚧 WIP: We will update real data very soon!")