Spaces:

Kamichanw
/

vqa_accuracy

Runtime error

App Files Files Community

Kamichanw commited on Aug 11, 2024

Commit

5a12027

verified ·

1 Parent(s): 3663af6

Update vqa_accuracy.py

Browse files

Files changed (1) hide show

vqa_accuracy.py +7 -18

vqa_accuracy.py CHANGED Viewed

@@ -5,7 +5,7 @@ import re
 _DESCRIPTION = """
 VQA accuracy is a evaluation metric which is robust to inter-human variability in phrasing the answers:
 $$
-\\text{Acc}(\\textit{ans}) = \\min \\left( \\frac{\\text{# humans that said \\textit{ans}}{3}, 1 \\right)
 $$
 Where `ans` is answered by machine. In order to be consistent with 'human accuracies', machine accuracies are averaged over all 10 choose 9 sets of human annotators.
 """
@@ -17,9 +17,9 @@ Args:
     references (`list` of `str` lists): Ground truth answers.
     answer_types (`list` of `str`, *optional*): Answer types corresponding to each questions.
     questions_type (`list` of `str`, *optional*): Question types corresponding to each questions.
-    precision (`int`, defaults to 2): The precision of results.
 Returns:
-    visual question answering accuracy (`float` or `int`): Accuracy accuracy. Minimum possible value is 0. Maximum possible value is 1.0, or the number of examples input, if `normalize` is set to `True`.. A higher accuracy means higher accuracy.
 """
@@ -250,14 +250,7 @@ class VQAAccuracy(evaluate.Metric):
             ],
         )
-    def _compute(
-        self,
-        predictions,
-        references,
-        answer_types=None,
-        question_types=None,
-        precision=2,
-    ):
         if answer_types is None:
             answer_types = [None] * len(predictions)
@@ -300,21 +293,17 @@ class VQAAccuracy(evaluate.Metric):
                 ques_type_dict[ques_type].append(vqa_acc)
         # the following key names follow the naming of the official evaluation results
-        result = {"overall": round(100 * sum(total) / len(total), precision)}
         if len(ans_type_dict) > 0:
             result["perAnswerType"] = {
-                ans_type: round(
-                    100 * sum(accuracy_list) / len(accuracy_list), precision
-                )
                 for ans_type, accuracy_list in ans_type_dict.items()
             }
         if len(ques_type_dict) > 0:
             result["perQuestionType"] = {
-                ques_type: round(
-                    100 * sum(accuracy_list) / len(accuracy_list), precision
-                )
                 for ques_type, accuracy_list in ques_type_dict.items()
             }

 _DESCRIPTION = """
 VQA accuracy is a evaluation metric which is robust to inter-human variability in phrasing the answers:
 $$
+\\text{Acc}(ans) = \\min \\left( \\frac{\\text{# humans that said }ans}{3}, 1 \\right)
 $$
 Where `ans` is answered by machine. In order to be consistent with 'human accuracies', machine accuracies are averaged over all 10 choose 9 sets of human annotators.
 """
     references (`list` of `str` lists): Ground truth answers.
     answer_types (`list` of `str`, *optional*): Answer types corresponding to each questions.
     questions_type (`list` of `str`, *optional*): Question types corresponding to each questions.
 Returns:
+    visual question answering accuracy (`float` or `int`): Accuracy accuracy. Minimum possible value is 0. Maximum possible value is 100.
 """
             ],
         )
+    def _compute(self, predictions, references, answer_types=None, question_types=None):
         if answer_types is None:
             answer_types = [None] * len(predictions)
                 ques_type_dict[ques_type].append(vqa_acc)
         # the following key names follow the naming of the official evaluation results
+        result = {"overall": 100 * sum(total) / len(total)}
         if len(ans_type_dict) > 0:
             result["perAnswerType"] = {
+                ans_type: 100 * sum(accuracy_list) / len(accuracy_list)
                 for ans_type, accuracy_list in ans_type_dict.items()
             }
         if len(ques_type_dict) > 0:
             result["perQuestionType"] = {
+                ques_type: 100 * sum(accuracy_list) / len(accuracy_list)
                 for ques_type, accuracy_list in ques_type_dict.items()
             }