rzanoli commited on
Commit
97a28b9
·
1 Parent(s): b2119dc

Add the chart showing the model accuracy by task

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py CHANGED
@@ -15,6 +15,7 @@ import matplotlib.pyplot as plt
15
  import re
16
  import plotly.express as px
17
  import plotly.graph_objects as go
 
18
 
19
 
20
  def mean_of_max_per_field(df):
@@ -29,6 +30,8 @@ def mean_of_max_per_field(df):
29
  """
30
  fields = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
31
 
 
 
32
  # Controlla che tutte le colonne esistano nel DataFrame
33
  missing = [f for f in fields if f not in df.columns]
34
  if missing:
@@ -43,6 +46,99 @@ def mean_of_max_per_field(df):
43
  return mean_max
44
 
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  def line_chart(dataframe):
47
  # Separiamo i dati in base a IS_FS
48
  df_true = dataframe[dataframe['IS_FS'] == True]
@@ -99,6 +195,7 @@ def line_chart(dataframe):
99
  # Disabilita lo zoom e altri controlli
100
  fig.update_xaxes(fixedrange=True, rangeslider_visible=False)
101
  fig.update_yaxes(fixedrange=True)
 
102
 
103
  return fig
104
 
@@ -405,6 +502,7 @@ with demo:
405
  #gr.Plot(value=line_chart(LEADERBOARD_DF), label="Andamento di esempio")
406
  #gr.Plot(value=line_chart_interactive_test(), label="Andamento interattivo")
407
  gr.Plot(value=line_chart(LEADERBOARD_DF))
 
408
 
409
  # About tab
410
  with gr.TabItem("📝 About"):
 
15
  import re
16
  import plotly.express as px
17
  import plotly.graph_objects as go
18
+ import numpy as np
19
 
20
 
21
  def mean_of_max_per_field(df):
 
30
  """
31
  fields = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
32
 
33
+ #print(df.columns)
34
+
35
  # Controlla che tutte le colonne esistano nel DataFrame
36
  missing = [f for f in fields if f not in df.columns]
37
  if missing:
 
46
  return mean_max
47
 
48
 
49
+ def boxplot_per_task(dataframe=None, baselines=None):
50
+ tasks = ["TE", "SA", "HS", "AT", "WIC", "FAQ", "LS", "SU", "NER", "REL"]
51
+
52
+ if dataframe is None:
53
+ np.random.seed(42)
54
+ dataframe = pd.DataFrame({
55
+ task: np.random.uniform(0.4, 0.9, 20) * 100
56
+ for task in tasks
57
+ })
58
+
59
+ # baseline per ciascun task (se non viene passata, metto random tra 50 e 70)
60
+ if baselines is None:
61
+ baselines = {task: np.random.randint(50, 70) for task in tasks}
62
+
63
+ colors = ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "#9467bd",
64
+ "#8c564b", "#e377c2", "#7f7f7f", "#bcbd22", "#17becf"]
65
+
66
+ fig = go.Figure()
67
+
68
+ for i, task in enumerate(tasks):
69
+ if task in dataframe.columns:
70
+ y_data = dataframe[task].dropna().tolist()
71
+
72
+ # boxplot
73
+ fig.add_trace(go.Box(
74
+ y=y_data,
75
+ name=task,
76
+ boxmean="sd",
77
+ marker=dict(color=colors[i], line=dict(width=1)),
78
+ line=dict(color=colors[i]),
79
+ fillcolor=colors[i],
80
+ opacity=0.7,
81
+ hovertemplate=f"<b>{task}</b><br>Accuracy: "+"%{y:.2f}%"+"<extra></extra>",
82
+ width=0.6
83
+ ))
84
+
85
+ # baseline per task (se disponibile)
86
+ if task in baselines and baselines[task] is not None:
87
+ # baseline come linea orizzontale
88
+ fig.add_shape(
89
+ type="line",
90
+ x0=i-0.3, x1=i+0.3, # larghezza in corrispondenza del box
91
+ y0=baselines[task], y1=baselines[task],
92
+ line=dict(color="black", width=2, dash="dash"),
93
+ xref="x", yref="y"
94
+ )
95
+
96
+ # label con valore baseline
97
+ fig.add_annotation(
98
+ x=i, y=baselines[task],
99
+ text=f"{baselines[task]}%",
100
+ showarrow=False,
101
+ yshift=10,
102
+ font=dict(size=10, color="black")
103
+ )
104
+
105
+ fig.update_layout(
106
+ title="Distribution of Model Accuracy by Task.",
107
+ xaxis_title="Task",
108
+ yaxis_title="Accuracy (%)",
109
+ template="plotly_white",
110
+ boxmode="group",
111
+ dragmode=False,
112
+ font=dict(family="Arial", size=13),
113
+ margin=dict(b=80),
114
+ annotations = [
115
+ dict(
116
+ text=(
117
+ "Boxplots show LLM accuracy in zero/few-shot settings. <br>"
118
+ "Black dashed lines indicate the best-performing supervised models evaluated during EVALITA."
119
+ ),
120
+ xref="paper", yref="paper",
121
+ x=0.5, y=-0.33,
122
+ showarrow=False,
123
+ font=dict(size=12, color="gray")
124
+ )
125
+ ]
126
+ )
127
+ #fig.update_yaxes(fixedrange=True)
128
+ fig.update_yaxes(range=[0, 100], fixedrange=True)
129
+
130
+ return fig
131
+
132
+
133
+ # 🔹 Esempio d’uso
134
+ BASELINES = {
135
+ "TE":71.00, "SA": 66.38, "HS": 80.88, "AT": 82.40, "WIC": 85.00,
136
+ "LS": 38.82, "SU": 38.91, "NER":88.00, "REL": 62.99
137
+ }
138
+
139
+
140
+
141
+
142
  def line_chart(dataframe):
143
  # Separiamo i dati in base a IS_FS
144
  df_true = dataframe[dataframe['IS_FS'] == True]
 
195
  # Disabilita lo zoom e altri controlli
196
  fig.update_xaxes(fixedrange=True, rangeslider_visible=False)
197
  fig.update_yaxes(fixedrange=True)
198
+ #fig.update_yaxes(range=[0, 100], fixedrange=True)
199
 
200
  return fig
201
 
 
502
  #gr.Plot(value=line_chart(LEADERBOARD_DF), label="Andamento di esempio")
503
  #gr.Plot(value=line_chart_interactive_test(), label="Andamento interattivo")
504
  gr.Plot(value=line_chart(LEADERBOARD_DF))
505
+ gr.Plot(value=boxplot_per_task(LEADERBOARD_DF, BASELINES), interactive_plot_config={'displayModeBar': False })
506
 
507
  # About tab
508
  with gr.TabItem("📝 About"):