Commit
·
4bec130
1
Parent(s):
d3e6b02
remove duplicates and change colors
Browse files- app.py +4 -3
- draw_utils.py +49 -10
app.py
CHANGED
@@ -17,7 +17,8 @@ def draw_leaderboard():
|
|
17 |
st.markdown(PAGE_INFO)
|
18 |
st.subheader("Evaluation results:")
|
19 |
st.text('Each tab corresponds to a task, avg - averaged scores over qa1-5 tasks.')
|
20 |
-
st.markdown('Predictions of all evaluated models:
|
|
|
21 |
search_term = st.text_input("Search models:", "")
|
22 |
|
23 |
tabs = st.tabs([str(task) for task in tasks])
|
@@ -32,10 +33,10 @@ def draw_leaderboard():
|
|
32 |
row_height = 35
|
33 |
height = (len(task_df) + 1) * row_height
|
34 |
|
35 |
-
|
36 |
|
37 |
st.dataframe(
|
38 |
-
|
39 |
width=1030,
|
40 |
height=height,
|
41 |
)
|
|
|
17 |
st.markdown(PAGE_INFO)
|
18 |
st.subheader("Evaluation results:")
|
19 |
st.text('Each tab corresponds to a task, avg - averaged scores over qa1-5 tasks.')
|
20 |
+
st.markdown('Predictions of all evaluated models: '
|
21 |
+
'[BABILong evals](https://huggingface.co/datasets/RMT-team/babilong_evals)')
|
22 |
search_term = st.text_input("Search models:", "")
|
23 |
|
24 |
tabs = st.tabs([str(task) for task in tasks])
|
|
|
33 |
row_height = 35
|
34 |
height = (len(task_df) + 1) * row_height
|
35 |
|
36 |
+
styled_df = style_dataframe(task_df).format(precision=1)
|
37 |
|
38 |
st.dataframe(
|
39 |
+
styled_df,
|
40 |
width=1030,
|
41 |
height=height,
|
42 |
)
|
draw_utils.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import pandas as pd
|
2 |
import numpy as np
|
|
|
3 |
|
4 |
PAGE_MARKDOWN = """
|
5 |
<style>
|
@@ -26,7 +27,19 @@ def load_results():
|
|
26 |
old_results = pd.read_csv(old_results_path)
|
27 |
new_results = pd.read_csv(new_results_path)
|
28 |
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
res.replace(-1, np.nan, inplace=True)
|
31 |
res['<=32k'] = res[LENGTHS_32k].mean(axis=1)
|
32 |
res['<=128k'] = res[LENGTHS_128k].mean(axis=1)
|
@@ -42,6 +55,30 @@ def load_results():
|
|
42 |
return res
|
43 |
|
44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
def style_dataframe(df):
|
46 |
"""
|
47 |
Style a pandas DataFrame with a color gradient.
|
@@ -50,15 +87,17 @@ def style_dataframe(df):
|
|
50 |
numeric_columns = styled_df.columns[1:]
|
51 |
|
52 |
def color_scale(val):
|
|
|
|
|
53 |
if pd.isna(val):
|
54 |
-
return 'background-color: white; color: white'
|
|
|
55 |
min_val = 0
|
56 |
max_val = 100
|
57 |
-
normalized = (val - min_val) / (max_val - min_val)
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
return styled
|
|
|
1 |
import pandas as pd
|
2 |
import numpy as np
|
3 |
+
from matplotlib.colors import LinearSegmentedColormap
|
4 |
|
5 |
PAGE_MARKDOWN = """
|
6 |
<style>
|
|
|
27 |
old_results = pd.read_csv(old_results_path)
|
28 |
new_results = pd.read_csv(new_results_path)
|
29 |
|
30 |
+
def normalize_model_name(name):
|
31 |
+
if '/' in name:
|
32 |
+
name = name.split('/')[-1]
|
33 |
+
return name.lower()
|
34 |
+
|
35 |
+
old_results['normalized_name'] = old_results['model_name'].apply(normalize_model_name)
|
36 |
+
new_results['normalized_name'] = new_results['model_name'].apply(normalize_model_name)
|
37 |
+
# clean duplicate models in v0 results and new results
|
38 |
+
duplicate_models = set(old_results['normalized_name']).intersection(set(new_results['normalized_name']))
|
39 |
+
old_results_filtered = old_results[~old_results['normalized_name'].isin(duplicate_models)]
|
40 |
+
res = pd.concat([old_results_filtered, new_results])
|
41 |
+
res.drop('normalized_name', axis=1, inplace=True)
|
42 |
+
|
43 |
res.replace(-1, np.nan, inplace=True)
|
44 |
res['<=32k'] = res[LENGTHS_32k].mean(axis=1)
|
45 |
res['<=128k'] = res[LENGTHS_128k].mean(axis=1)
|
|
|
55 |
return res
|
56 |
|
57 |
|
58 |
+
# from pandas/io/formats/style.py
|
59 |
+
def relative_luminance(rgba) -> float:
|
60 |
+
"""
|
61 |
+
Calculate relative luminance of a color.
|
62 |
+
|
63 |
+
The calculation adheres to the W3C standards
|
64 |
+
(https://www.w3.org/WAI/GL/wiki/Relative_luminance)
|
65 |
+
|
66 |
+
Parameters
|
67 |
+
----------
|
68 |
+
color : rgb or rgba tuple
|
69 |
+
|
70 |
+
Returns
|
71 |
+
-------
|
72 |
+
float
|
73 |
+
The relative luminance as a value from 0 to 1
|
74 |
+
"""
|
75 |
+
r, g, b = (
|
76 |
+
x / 12.92 if x <= 0.04045 else ((x + 0.055) / 1.055) ** 2.4
|
77 |
+
for x in rgba[:3]
|
78 |
+
)
|
79 |
+
return 0.2126 * r + 0.7152 * g + 0.0722 * b
|
80 |
+
|
81 |
+
|
82 |
def style_dataframe(df):
|
83 |
"""
|
84 |
Style a pandas DataFrame with a color gradient.
|
|
|
87 |
numeric_columns = styled_df.columns[1:]
|
88 |
|
89 |
def color_scale(val):
|
90 |
+
cmap = LinearSegmentedColormap.from_list('ryg', ["red", "yellow", "green"], N=256)
|
91 |
+
|
92 |
if pd.isna(val):
|
93 |
+
return 'background-color: white; color: white;'
|
94 |
+
|
95 |
min_val = 0
|
96 |
max_val = 100
|
97 |
+
normalized = (val - min_val) / (max_val - min_val)
|
98 |
+
rgba = cmap(normalized)
|
99 |
+
text_color = 'white' if relative_luminance(rgba) < 0.408 else 'black'
|
100 |
+
return f'background-color: rgba({rgba[0]*255},{rgba[1]*255},{rgba[2]*255},{rgba[3]}); color: {text_color}'
|
101 |
+
|
102 |
+
styled_df = styled_df.style.map(color_scale, subset=numeric_columns)
|
103 |
+
return styled_df
|
|