# coding: utf-8 # Author: Du Mingzhe (mingzhe@nus.edu.sg) # Date: 2025-04-01 import json import random import pandas as pd import streamlit as st from datasets import load_dataset st.title("Code Arena") with st.spinner("Loading data...", show_time=True): problem_dict = dict() # Leetcode Data ds = load_dataset("Elfsong/leetcode_data", split='train') for problem in ds: problem_id = problem["title"] problem['type'] = "leetcode" problem_dict[problem_id] = problem problem_count = len(problem_dict) if "problem" in st.query_params: problem_id = int(st.query_params["problem"]) problem_instance = problem_dict[problem_id] with st.expander("Problem Description"): st.markdown(problem_instance["question_content"]) with st.expander("Test Cases"): test_cases = json.loads(problem_instance["test_cases"]) df = pd.DataFrame( { "input": [test_case['input'] for test_case in test_cases], "output": [test_case['output'] for test_case in test_cases], } ) st.dataframe( df, column_config={ "input": st.column_config.TextColumn("Input"), "output": st.column_config.TextColumn("Output"), }, column_order=("input", "output"), ) with st.expander("Test Case Generator"): test_case_generator = problem_instance["test_case_generator"] prompt = "# For now, we only disclose the top 20 lines of the test case generator.\n# the full version will be released after the paper review process.\n" test_case_generator = "\n".join(test_case_generator.split("\n")[:20]) st.code(prompt+test_case_generator) else: tab_problem, tab_submission, tab_model, tab_about = st.tabs(["Problems", "Submissions", "Models", "About"]) with tab_problem: with st.spinner("Loading Framework...", show_time=True): df = pd.DataFrame( { "problem_id": [int(problem['problem_id']) for problem in ds], "difficulty": [str(problem['difficulty']) for problem in ds], "type": [str(problem['type']) for problem in ds], "problem_link": ["https://huggingface.co/spaces/Elfsong/CodeArena/?problem=" + str(problem['problem_id']) for problem in ds], "acceptance_rate": [[random.randint(0, 100) for _ in range(20)] for problem in ds], } ) st.dataframe( df, column_config={ "problem_id": st.column_config.NumberColumn("Problem ID", width='small'), "difficulty": st.column_config.TextColumn("Difficulty", width='small'), "type": st.column_config.TextColumn("Type", width='small'), "acceptance_rate": st.column_config.LineChartColumn("Acceptance Rate", y_min=0, y_max=100), "problem_link": st.column_config.LinkColumn("Link", display_text="Open", width='small'), }, height=800, column_order=("problem_id", "difficulty", "acceptance_rate", "problem_link"), hide_index=True, ) with tab_submission: st.header("Submissions") with tab_model: model_list = [ "deepSeek-Coder", "GPT-4o", "Claude-3-5-sonnet", "Gemini-1.5-flash", "DeepSeek-Coder-V2-Lite", "Claude-3-Opus", "Gemini-1.5-pro", "Llama-3.1-8B", "Llama-3-8B", "GPT-4-Turbo", "GPT-3.5-Turbo", "Mistral-Nemo", "CodeLlama-13b", "Claude-3-Haiku", "Mistral-7B-v0.3", "Codestral-22B-v0.1", "Claude-3-sonnet", "CodeLlama-34b", "CodeLlama-7b" ] df = pd.DataFrame( { "model_name": [model_name for model_name in model_list], "dynamic_point": [0 for model_name in model_list], "pass@1": [0 for model_name in model_list], "beyond@t": [0 for model_name in model_list], "beyond@m": [0 for model_name in model_list], "model_progress": [int(random.randint(0, problem_count+1)) for model_name in model_list], } ) st.dataframe( df, column_config={ "model_name": st.column_config.TextColumn("Model Name"), "dynamic_point": st.column_config.NumberColumn("Dynamic Point"), "pass@1": st.column_config.NumberColumn("Pass@1"), "beyond@t": st.column_config.NumberColumn("Beyond@Time"), "beyond@m": st.column_config.NumberColumn("Beyond@Memory"), "model_progress": st.column_config.ProgressColumn("Progress", min_value=0, max_value=problem_count, format="compact"), }, column_order=("model_name", "Dynamic Point", "pass@1", "beyond@t", "beyond@m", "model_progress"), height=800, ) with tab_about: st.write("Hello World!") st.write("This is the new version of Code Arena. Refer to [Monolith](https://github.com/Elfsong/Monolith) for instructions on how to submit code.")