# coding: utf-8 # Author: Du Mingzhe (mingzhe@nus.edu.sg) # Date: 2025-04-01 import json import random import pandas as pd import streamlit as st from datasets import load_dataset from datasets import get_dataset_config_names st.title("Code:blue[Arena]") problem_dict = dict() # Venus Data with st.spinner("Loading Venus data...", show_time=True): venus_ds = load_dataset("Elfsong/leetcode_data", split='train') for problem in venus_ds: problem_id = problem["title"] problem['type'] = "leetcode" problem_dict[problem_id] = problem # APPS Data with st.spinner("Loading APPS data...", show_time=True): apps_ds = load_dataset("Elfsong/APPS_Python", split='test') for problem in apps_ds: problem_id = f'apps_{problem["problem_id"]}' problem['type'] = "apps" problem_dict[problem_id] = problem problem_count = len(problem_dict) if "problem" in st.query_params: problem_id = str(st.query_params["problem"]) problem_instance = problem_dict[problem_id] problem_type = problem_instance['type'] st.header(problem_id) with st.expander("Problem Description"): if problem_type == "leetcode": st.markdown(problem_instance["question_content"]) elif problem_type == "apps": st.markdown(problem_instance["problem_content"]) with st.expander("Test Cases"): test_cases = json.loads(problem_instance["test_cases"]) df = pd.DataFrame( { "input": [test_case['input'] for test_case in test_cases], "output": [test_case['output'] for test_case in test_cases], } ) st.dataframe( df, column_config={ "input": st.column_config.TextColumn("Input"), "output": st.column_config.TextColumn("Output"), }, column_order=("input", "output"), ) with st.expander("Test Case Generator"): if problem_type == "leetcode": test_case_generator = problem_instance["test_case_generator"] prompt = "# For now, we only disclose the top 20 lines of the test case generator.\n# the full version will be released after the paper review process.\n" test_case_generator = "\n".join(test_case_generator.split("\n")[:20]) st.code(prompt+test_case_generator) else: st.code("Stay tuned!") else: tab_problem, tab_submission, tab_model, tab_about = st.tabs(["Problems", "Submissions", "Models", "About"]) with tab_problem: with st.spinner("Loading Framework...", show_time=True): df = pd.DataFrame( { "problem_id": [int(problem['problem_id']) for problem in problem_dict.values()], "difficulty": [str(problem['difficulty']) for problem in problem_dict.values()], "type": [str(problem['type']) for problem in problem_dict.values()], "problem_link": ["https://huggingface.co/spaces/Elfsong/CodeArena/?problem=" + (str(problem['title']) if problem['type'] == "leetcode" else f'apps_{problem["problem_id"]}') for problem in problem_dict.values()], "acceptance_rate": [[random.randint(0, 100) for _ in range(20)] for problem in problem_dict.values()], } ) st.dataframe( df, column_config={ "problem_id": st.column_config.NumberColumn("Problem ID", width='small'), "difficulty": st.column_config.TextColumn("Difficulty", width='small'), "type": st.column_config.TextColumn("Type", width='small'), "acceptance_rate": st.column_config.LineChartColumn("Acceptance Rate", y_min=0, y_max=100), "problem_link": st.column_config.LinkColumn("Link", display_text="Open", width='small'), }, height=800, column_order=("problem_id", "difficulty", "type", "acceptance_rate", "problem_link"), hide_index=True, ) with tab_submission: st.header("Submissions") models = get_dataset_config_names("Elfsong/Venus_Model_Evaluation") model_name = st.selectbox("Which model you are looking for?", models, placeholder="Select a model...") st.write("You selected:", model_name) with st.spinner("Loading Data...", show_time=True): ds = load_dataset("Elfsong/Venus_Model_Evaluation", model_name, split='train') df = pd.DataFrame( { "problem_id": [int(problem['problem_id']) for problem in ds], "solution": [str(problem['solution']) for problem in ds], } ) st.dataframe( df, column_config={ "problem_id": st.column_config.NumberColumn("Problem ID", width='small'), "solution": st.column_config.TextColumn("Solution", width='big'), }, height=800, column_order=("problem_id", "solution"), hide_index=True, ) with tab_model: model_list = [ "deepSeek-Coder", "GPT-4o", "Claude-3-5-sonnet", "Gemini-1.5-flash", "DeepSeek-Coder-V2-Lite", "Claude-3-Opus", "Gemini-1.5-pro", "Llama-3.1-8B", "Llama-3-8B", "GPT-4-Turbo", "GPT-3.5-Turbo", "Mistral-Nemo", "CodeLlama-13b", "Claude-3-Haiku", "Mistral-7B-v0.3", "Codestral-22B-v0.1", "Claude-3-sonnet", "CodeLlama-34b", "CodeLlama-7b" ] df = pd.DataFrame( { "model_name": [model_name for model_name in model_list], "dynamic_point": [0 for model_name in model_list], "pass@1": [0 for model_name in model_list], "beyond@t": [0 for model_name in model_list], "beyond@m": [0 for model_name in model_list], "model_progress": [int(random.randint(0, problem_count+1)) for model_name in model_list], } ) st.dataframe( df, column_config={ "model_name": st.column_config.TextColumn("Model Name"), "dynamic_point": st.column_config.NumberColumn("Dynamic Point"), "pass@1": st.column_config.NumberColumn("Pass@1"), "beyond@t": st.column_config.NumberColumn("Beyond@Time"), "beyond@m": st.column_config.NumberColumn("Beyond@Memory"), "model_progress": st.column_config.ProgressColumn("Progress", min_value=0, max_value=problem_count, format="compact"), }, column_order=("model_name", "Dynamic Point", "pass@1", "beyond@t", "beyond@m", "model_progress"), height=800, ) with tab_about: st.write("Hello World!") st.write("This is the new version of Code Arena. Refer to [Monolith](https://github.com/Elfsong/Monolith) for instructions on how to submit code.") st.write("🚧 WIP: We will update real data very soon!")