Spaces:

Elfsong
/

CodeArena

Running

App Files Files Community

CodeArena / app.py

Elfsong

Update app.py

151c6ad verified about 2 months ago

raw

history blame contribute delete

7.26 kB

	# coding: utf-8

	# Author: Du Mingzhe (mingzhe@nus.edu.sg)
	# Date: 2025-04-01

	import json
	import random
	import pandas as pd
	import streamlit as st
	from datasets import load_dataset
	from datasets import get_dataset_config_names

	st.title("Code:blue[Arena]")

	problem_dict = dict()

	# Venus Data
	with st.spinner("Loading Venus data...", show_time=True):
	venus_ds = load_dataset("Elfsong/leetcode_data", split='train')
	for problem in venus_ds:
	problem_id = problem["title"]
	problem['type'] = "leetcode"
	problem_dict[problem_id] = problem

	# APPS Data
	with st.spinner("Loading APPS data...", show_time=True):
	apps_ds = load_dataset("Elfsong/APPS_Python", split='test')
	for problem in apps_ds:
	problem_id = f'apps_{problem["problem_id"]}'
	problem['type'] = "apps"
	problem_dict[problem_id] = problem

	problem_count = len(problem_dict)


	if "problem" in st.query_params:
	problem_id = str(st.query_params["problem"])
	problem_instance = problem_dict[problem_id]
	problem_type = problem_instance['type']

	st.header(problem_id)

	with st.expander("Problem Description"):
	if problem_type == "leetcode":
	st.markdown(problem_instance["question_content"])
	elif problem_type == "apps":
	st.markdown(problem_instance["problem_content"])

	with st.expander("Test Cases"):
	test_cases = json.loads(problem_instance["test_cases"])
	df = pd.DataFrame(
	{
	"input": [test_case['input'] for test_case in test_cases],
	"output": [test_case['output'] for test_case in test_cases],
	}
	)
	st.dataframe(
	df,
	column_config={
	"input": st.column_config.TextColumn("Input"),
	"output": st.column_config.TextColumn("Output"),
	},
	column_order=("input", "output"),
	)

	with st.expander("Test Case Generator"):
	if problem_type == "leetcode":
	test_case_generator = problem_instance["test_case_generator"]
	prompt = "# For now, we only disclose the top 20 lines of the test case generator.\n# the full version will be released after the paper review process.\n"
	test_case_generator = "\n".join(test_case_generator.split("\n")[:20])
	st.code(prompt+test_case_generator)
	else:
	st.code("Stay tuned!")


	else:
	tab_problem, tab_submission, tab_model, tab_about = st.tabs(["Problems", "Submissions", "Models", "About"])

	with tab_problem:
	with st.spinner("Loading Framework...", show_time=True):
	df = pd.DataFrame(
	{
	"problem_id": [int(problem['problem_id']) for problem in problem_dict.values()],
	"difficulty": [str(problem['difficulty']) for problem in problem_dict.values()],
	"type": [str(problem['type']) for problem in problem_dict.values()],
	"problem_link": ["https://huggingface.co/spaces/Elfsong/CodeArena/?problem=" + (str(problem['title']) if problem['type'] == "leetcode" else f'apps_{problem["problem_id"]}') for problem in problem_dict.values()],
	"acceptance_rate": [[random.randint(0, 100) for _ in range(20)] for problem in problem_dict.values()],
	}
	)
	st.dataframe(
	df,
	column_config={
	"problem_id": st.column_config.NumberColumn("Problem ID", width='small'),
	"difficulty": st.column_config.TextColumn("Difficulty", width='small'),
	"type": st.column_config.TextColumn("Type", width='small'),
	"acceptance_rate": st.column_config.LineChartColumn("Acceptance Rate", y_min=0, y_max=100),
	"problem_link": st.column_config.LinkColumn("Link", display_text="Open", width='small'),
	},
	height=800,
	column_order=("problem_id", "difficulty", "type", "acceptance_rate", "problem_link"),
	hide_index=True,
	)

	with tab_submission:
	st.header("Submissions")
	models = get_dataset_config_names("Elfsong/Venus_Model_Evaluation")
	model_name = st.selectbox("Which model you are looking for?", models, placeholder="Select a model...")
	st.write("You selected:", model_name)

	with st.spinner("Loading Data...", show_time=True):
	ds = load_dataset("Elfsong/Venus_Model_Evaluation", model_name, split='train')
	df = pd.DataFrame(
	{
	"problem_id": [int(problem['problem_id']) for problem in ds],
	"solution": [str(problem['solution']) for problem in ds],
	}
	)
	st.dataframe(
	df,
	column_config={
	"problem_id": st.column_config.NumberColumn("Problem ID", width='small'),
	"solution": st.column_config.TextColumn("Solution", width='big'),
	},
	height=800,
	column_order=("problem_id", "solution"),
	hide_index=True,
	)


	with tab_model:
	model_list = [
	"deepSeek-Coder",
	"GPT-4o",
	"Claude-3-5-sonnet",
	"Gemini-1.5-flash",
	"DeepSeek-Coder-V2-Lite",
	"Claude-3-Opus",
	"Gemini-1.5-pro",
	"Llama-3.1-8B",
	"Llama-3-8B",
	"GPT-4-Turbo",
	"GPT-3.5-Turbo",
	"Mistral-Nemo",
	"CodeLlama-13b",
	"Claude-3-Haiku",
	"Mistral-7B-v0.3",
	"Codestral-22B-v0.1",
	"Claude-3-sonnet",
	"CodeLlama-34b",
	"CodeLlama-7b"
	]


	df = pd.DataFrame(
	{
	"model_name": [model_name for model_name in model_list],
	"dynamic_point": [0 for model_name in model_list],
	"pass@1": [0 for model_name in model_list],
	"beyond@t": [0 for model_name in model_list],
	"beyond@m": [0 for model_name in model_list],
	"model_progress": [int(random.randint(0, problem_count+1)) for model_name in model_list],
	}
	)

	st.dataframe(
	df,
	column_config={
	"model_name": st.column_config.TextColumn("Model Name"),
	"dynamic_point": st.column_config.NumberColumn("Dynamic Point"),
	"pass@1": st.column_config.NumberColumn("Pass@1"),
	"beyond@t": st.column_config.NumberColumn("Beyond@Time"),
	"beyond@m": st.column_config.NumberColumn("Beyond@Memory"),
	"model_progress": st.column_config.ProgressColumn("Progress", min_value=0, max_value=problem_count, format="compact"),
	},
	column_order=("model_name", "Dynamic Point", "pass@1", "beyond@t", "beyond@m", "model_progress"),
	height=800,
	)

	with tab_about:
	st.write("Hello World!")
	st.write("This is the new version of Code Arena. Refer to [Monolith](https://github.com/Elfsong/Monolith) for instructions on how to submit code.")
	st.write("🚧 WIP: We will update real data very soon!")