Spaces:
Running
Running
Optimization of the significance correction
Browse files
server.py
CHANGED
@@ -314,6 +314,7 @@ class LeaderboardServer:
|
|
314 |
self.fetch_existing_models()
|
315 |
|
316 |
tournament_results = self.load_tournament_results()
|
|
|
317 |
with self.var_lock.rw:
|
318 |
self.tournament_results = tournament_results
|
319 |
|
@@ -519,6 +520,9 @@ class LeaderboardServer:
|
|
519 |
renew_tournament_ended_time_elapsed = renew_tournament_ended_datetime - renew_tournament_began_datetime
|
520 |
print(f"Time elapsed: {renew_tournament_ended_time_elapsed}")
|
521 |
|
|
|
|
|
|
|
522 |
gr.Info('Uploading tournament results...', duration=5)
|
523 |
if self.tournament_results:
|
524 |
self._upload_tournament_results(self.tournament_results)
|
@@ -569,7 +573,6 @@ class LeaderboardServer:
|
|
569 |
|
570 |
with self.var_lock.ro:
|
571 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
572 |
-
tournament_results = self._correct_significance_in_tournament_results(tournament_results)
|
573 |
|
574 |
for competitor_id in tournament_results[submission_id].keys() - {submission_id}: # without self
|
575 |
if competitor_id not in self.submission_id_to_data:
|
@@ -638,17 +641,28 @@ class LeaderboardServer:
|
|
638 |
dataframe = dataframe.style.apply(self._model_tournament_table_highlight_true_and_false, axis=None)
|
639 |
return dataframe
|
640 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
641 |
def _correct_significance_in_tournament_results(self, tournament_results, alpha=0.05):
|
642 |
tournament_results = copy.deepcopy(tournament_results)
|
643 |
|
644 |
-
|
645 |
-
for
|
646 |
competitors = [competitor_id for competitor_id in tournament_results[submission_id].keys() - {submission_id}] # without self
|
647 |
-
|
648 |
-
|
649 |
-
|
650 |
-
|
651 |
-
|
|
|
652 |
|
653 |
return tournament_results
|
654 |
|
@@ -730,7 +744,6 @@ class LeaderboardServer:
|
|
730 |
def _get_leaderboard(self, pre_submit=None, category=None, to_csv=False):
|
731 |
with self.var_lock.ro:
|
732 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
733 |
-
tournament_results = self._correct_significance_in_tournament_results(tournament_results)
|
734 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
735 |
|
736 |
if len(tournament_results) == 0:
|
@@ -1051,6 +1064,7 @@ class LeaderboardServer:
|
|
1051 |
tournament_results = self.fake_tournament(submission_id, file)
|
1052 |
else:
|
1053 |
tournament_results = self.start_tournament(submission_id, file)
|
|
|
1054 |
|
1055 |
pre_submit = self.PreSubmit(
|
1056 |
tournament_results,
|
|
|
314 |
self.fetch_existing_models()
|
315 |
|
316 |
tournament_results = self.load_tournament_results()
|
317 |
+
tournament_results = self._correct_significance_in_tournament_results(tournament_results)
|
318 |
with self.var_lock.rw:
|
319 |
self.tournament_results = tournament_results
|
320 |
|
|
|
520 |
renew_tournament_ended_time_elapsed = renew_tournament_ended_datetime - renew_tournament_began_datetime
|
521 |
print(f"Time elapsed: {renew_tournament_ended_time_elapsed}")
|
522 |
|
523 |
+
gr.Info('Correcting significance in tournament results...', duration=5)
|
524 |
+
tournament_results = self._correct_significance_in_tournament_results(tournament_results)
|
525 |
+
|
526 |
gr.Info('Uploading tournament results...', duration=5)
|
527 |
if self.tournament_results:
|
528 |
self._upload_tournament_results(self.tournament_results)
|
|
|
573 |
|
574 |
with self.var_lock.ro:
|
575 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
|
|
576 |
|
577 |
for competitor_id in tournament_results[submission_id].keys() - {submission_id}: # without self
|
578 |
if competitor_id not in self.submission_id_to_data:
|
|
|
641 |
dataframe = dataframe.style.apply(self._model_tournament_table_highlight_true_and_false, axis=None)
|
642 |
return dataframe
|
643 |
|
644 |
+
def _is_correct_significance_in_tournament_results(self, tournament_results):
|
645 |
+
for submission_id in tournament_results:
|
646 |
+
competitors = [competitor_id for competitor_id in tournament_results[submission_id].keys() - {submission_id}] # without self
|
647 |
+
for task in self.TASKS_METADATA:
|
648 |
+
for competitor_id in competitors:
|
649 |
+
if "corrected_p_value" not in tournament_results[submission_id][competitor_id][task]:
|
650 |
+
return False
|
651 |
+
|
652 |
+
return True
|
653 |
+
|
654 |
def _correct_significance_in_tournament_results(self, tournament_results, alpha=0.05):
|
655 |
tournament_results = copy.deepcopy(tournament_results)
|
656 |
|
657 |
+
if not self._is_correct_significance_in_tournament_results(tournament_results):
|
658 |
+
for submission_id in tqdm.tqdm(tournament_results):
|
659 |
competitors = [competitor_id for competitor_id in tournament_results[submission_id].keys() - {submission_id}] # without self
|
660 |
+
for task in self.TASKS_METADATA:
|
661 |
+
model_task_pvals = [tournament_results[submission_id][competitor_id][task]["p_value"] for competitor_id in competitors]
|
662 |
+
corrected_model_task_pvals = correct_pvals_for_fdr(model_task_pvals)
|
663 |
+
for competitor_id, task_pval in zip(competitors, corrected_model_task_pvals):
|
664 |
+
tournament_results[submission_id][competitor_id][task]["corrected_p_value"] = task_pval
|
665 |
+
tournament_results[submission_id][competitor_id][task]["significant"] = bool(task_pval < alpha)
|
666 |
|
667 |
return tournament_results
|
668 |
|
|
|
744 |
def _get_leaderboard(self, pre_submit=None, category=None, to_csv=False):
|
745 |
with self.var_lock.ro:
|
746 |
tournament_results = pre_submit.tournament_results if pre_submit else self.tournament_results
|
|
|
747 |
category = category if category else self.TASKS_CATEGORY_OVERALL
|
748 |
|
749 |
if len(tournament_results) == 0:
|
|
|
1064 |
tournament_results = self.fake_tournament(submission_id, file)
|
1065 |
else:
|
1066 |
tournament_results = self.start_tournament(submission_id, file)
|
1067 |
+
tournament_results = self._correct_significance_in_tournament_results(tournament_results)
|
1068 |
|
1069 |
pre_submit = self.PreSubmit(
|
1070 |
tournament_results,
|