Spaces:

allenai
/

reward-bench

Running

App Files Files Community

natolambert commited on 21 days ago

Commit

96e55d5

1 Parent(s): 88c98d4

hard reset repo

Browse files

Files changed (11) hide show

.gitignore +5 -1
.pre-commit-config.yaml +0 -53
README.md +0 -34
app.py +7 -7
{src → leaderboard}/constants.py +0 -0
{src → leaderboard}/css.py +0 -0
{src → leaderboard}/logo.png +0 -0
{src → leaderboard}/md.py +0 -0
{src → leaderboard}/plt.py +0 -0
{src → leaderboard}/utils.py +0 -0
requirements.txt +2 -15

.gitignore CHANGED Viewed

@@ -11,5 +11,9 @@ eval-results/
 eval-queue-bk/
 eval-results-bk/
 logs/
 .gradio/
-.evals/

 eval-queue-bk/
 eval-results-bk/
 logs/
+evals/
 .gradio/
+.evals/
+__pycache__/*
+*.pyc

.pre-commit-config.yaml DELETED Viewed

@@ -1,53 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-default_language_version:
-  python: python3
-ci:
-  autofix_prs: true
-  autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
-  autoupdate_schedule: quarterly
-repos:
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.3.0
-    hooks:
-      - id: check-yaml
-      - id: check-case-conflict
-      - id: detect-private-key
-      - id: check-added-large-files
-        args: ['--maxkb=1000']
-      - id: requirements-txt-fixer
-      - id: end-of-file-fixer
-      - id: trailing-whitespace
-  - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
-    hooks:
-      - id: isort
-        name: Format imports
-  - repo: https://github.com/psf/black
-    rev: 22.12.0
-    hooks:
-      - id: black
-        name: Format code
-        additional_dependencies: ['click==8.0.2']
-  - repo: https://github.com/charliermarsh/ruff-pre-commit
-    # Ruff version.
-    rev: 'v0.0.267'
-    hooks:
-      - id: ruff

README.md CHANGED Viewed

@@ -11,37 +11,3 @@ license: apache-2.0
 tags:
   - leaderboard
 ---
-# Start the configuration
-Most of the variables to change for a default leaderboard are in `src/env.py` (replace the path for your leaderboard) and `src/about.py` (for tasks).
-Results files should have the following format and be stored as json files:
-```json
-{
-    "config": {
-        "model_dtype": "torch.float16", # or torch.bfloat16 or 8bit or 4bit
-        "model_name": "path of the model on the hub: org/model",
-        "model_sha": "revision on the hub",
-    },
-    "results": {
-        "task_name": {
-            "metric_name": score,
-        },
-        "task_name2": {
-            "metric_name": score,
-        }
-    }
-}
-```
-Request files are created automatically by this tool.
-If you encounter problem on the space, don't hesitate to restart it to remove the create eval-queue, eval-queue-bk, eval-results and eval-results-bk created folder.
-# Code logic for more complex edits
-You'll find
-- the main table' columns names and properties in `src/display/utils.py`
-- the logic to read all results and request files, then convert them in dataframe lines, in `src/leaderboard/read_evals.py`, and `src/populate.py`
-- the logic to allow or filter submissions in `src/submission/submit.py` and `src/submission/check_validity.py`

 tags:
   - leaderboard
 ---

app.py CHANGED Viewed

@@ -6,10 +6,10 @@ import numpy as np
 from datasets import load_dataset
 from huggingface_hub import HfApi, snapshot_download
-from src.constants import example_counts, length_categories, subset_mapping
-from src.css import custom_css
-from src.md import *
-from src.utils import load_all_data
 api = HfApi()
@@ -329,7 +329,7 @@ total_models = len(
         rewardbench_data_avg.copy(), "", ["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"], style=False
     ).values
 )
-assets = Path("src").resolve()  # absolute dir with the image
 # Using a string for a predefined color
 theme = gr.themes.Default(primary_hue="blue")
@@ -344,7 +344,7 @@ with gr.Blocks(theme=theme, css=custom_css) as app:
         #     # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
         #     # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
         #     gr.Markdown("""
-        #                 ![](/gradio_api/file=src/logo.png)
         #                 """)
     with gr.Tabs(elem_id="outer-tabs", elem_classes="tabs-big") as tabs_big:
@@ -358,7 +358,7 @@ with gr.Blocks(theme=theme, css=custom_css) as app:
                     # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
                     gr.Markdown(
                         """
-                                ![](/gradio_api/file=src/logo.png)
                                 """
                     )
             with gr.Tabs(elem_id="inner-tabs", elem_classes="tabs-small") as tabs:

 from datasets import load_dataset
 from huggingface_hub import HfApi, snapshot_download
+from leaderboard.constants import example_counts, length_categories, subset_mapping
+from leaderboard.css import custom_css
+from leaderboard.md import *
+from leaderboard.utils import load_all_data
 api = HfApi()
         rewardbench_data_avg.copy(), "", ["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"], style=False
     ).values
 )
+assets = Path("leaderboard").resolve()  # absolute dir with the image
 # Using a string for a predefined color
 theme = gr.themes.Default(primary_hue="blue")
         #     # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
         #     # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
         #     gr.Markdown("""
+        #                 ![](/gradio_api/file=leaderboard/logo.png)
         #                 """)
     with gr.Tabs(elem_id="outer-tabs", elem_classes="tabs-big") as tabs_big:
                     # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
                     gr.Markdown(
                         """
+                                ![](/gradio_api/file=leaderboard/logo.png)
                                 """
                     )
             with gr.Tabs(elem_id="inner-tabs", elem_classes="tabs-small") as tabs:

{src → leaderboard}/constants.py RENAMED Viewed

File without changes

{src → leaderboard}/css.py RENAMED Viewed

File without changes

{src → leaderboard}/logo.png RENAMED Viewed

File without changes

{src → leaderboard}/md.py RENAMED Viewed

File without changes

{src → leaderboard}/plt.py RENAMED Viewed

File without changes

{src → leaderboard}/utils.py RENAMED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -1,16 +1,3 @@
-APScheduler
-black
-datasets
-gradio
-gradio[oauth]
-gradio_leaderboard==0.0.13
-gradio_client
-huggingface-hub>=0.18.0
-matplotlib
-numpy
 pandas
-python-dateutil
-tqdm
-transformers
-tokenizers>=0.15.0
-sentencepiece

+apscheduler
 pandas
+datasets==2.21.0