Spaces:

mixed-modality-search
/

Evaluation_for_MixBench

Running

App Files Files Community

mixed-modality-search commited on Jun 19

Commit

6eb346b

1 Parent(s): c63e6b2

update

Browse files

Files changed (1) hide show

main.py +24 -17

main.py CHANGED Viewed

@@ -24,11 +24,11 @@ def load_and_decrypt_qrel(secret_key):
     except Exception as e:
         raise ValueError(f"Failed to decrypt answer file: {str(e)}")
-def recall_at_k(rank_list, relevant_ids, k=1):
-    return int(any(item in relevant_ids for item in rank_list[:k]))
-def ndcg_at_k(rank_list, rel_dict, k):
-    all_items = list(dict.fromkeys(rank_list + list(rel_dict.keys())))
     y_true = [rel_dict.get(item, 0) for item in all_items]
@@ -46,14 +46,14 @@ def evaluate(pred_data, qrel_dict):
         for item in queries:
             qid = item["query_id"]
-            rank_list = item["rank_list"].split(",")
-            rank_list = [x.strip() for x in rank_list if x.strip()]
             rel_dict = qrel_dict[dataset].get(qid, {})
             relevant_ids = [cid for cid, score in rel_dict.items() if score > 0]
-            recall_1.append(recall_at_k(rank_list, relevant_ids, 1))
-            ndcg_10.append(ndcg_at_k(rank_list, rel_dict, 10))
-            ndcg_100.append(ndcg_at_k(rank_list, rel_dict, 100))
         results[dataset] = {
             "Recall@1": round(np.mean(recall_1) * 100, 2),
@@ -85,19 +85,26 @@ def process_json(file):
 # ==== Launch Gradio App ====
 def main_gradio():
     example_json = '''{
-  "mscoco": [
-    {"query_id": "1", "rank_list": "5, 2, 8"},
-    {"query_id": "2", "rank_list": "9, 1, 3"}
   ],
-  "google_wit": [
-    {"query_id": "3", "rank_list": "11, 5, 22"}
   ]
 }'''
     gr.Interface(
         fn=process_json,
-        inputs=gr.File(label="Upload Prediction JSON"),
-        outputs=gr.Textbox(label="Evaluation Metrics"),
-        title="Mixed-Modality Retrieval Evaluation",
         description="Upload a prediction JSON to evaluate Recall@1, NDCG@10, and NDCG@100 against encrypted qrels.\n\nExample input:\n" + example_json
     ).launch(share=True)

     except Exception as e:
         raise ValueError(f"Failed to decrypt answer file: {str(e)}")
+def recall_at_k(corpus_top_100_list, relevant_ids, k=1):
+    return int(any(item in relevant_ids for item in corpus_top_100_list[:k]))
+def ndcg_at_k(corpus_top_100_list, rel_dict, k):
+    all_items = list(dict.fromkeys(corpus_top_100_list + list(rel_dict.keys())))
     y_true = [rel_dict.get(item, 0) for item in all_items]
         for item in queries:
             qid = item["query_id"]
+            corpus_top_100_list = item["corpus_top_100_list"].split(",")
+            corpus_top_100_list = [x.strip() for x in corpus_top_100_list if x.strip()]
             rel_dict = qrel_dict[dataset].get(qid, {})
             relevant_ids = [cid for cid, score in rel_dict.items() if score > 0]
+            recall_1.append(recall_at_k(corpus_top_100_list, relevant_ids, 1))
+            ndcg_10.append(ndcg_at_k(corpus_top_100_list, rel_dict, 10))
+            ndcg_100.append(ndcg_at_k(corpus_top_100_list, rel_dict, 100))
         results[dataset] = {
             "Recall@1": round(np.mean(recall_1) * 100, 2),
 # ==== Launch Gradio App ====
 def main_gradio():
     example_json = '''{
+  "Google_WIT": [
+    {"query_id": "1", "corpus_top_100_list": "5, 2, 8, ..."},
+    {"query_id": "2", "corpus_top_100_list": "90, 13, 3, ..."}
   ],
+  "MSCOCO": [
+    {"query_id": "3", "corpus_top_100_list": "122, 35, 22, ..."},
+    {"query_id": "2", "corpus_top_100_list": "90, 19, 3, ..."}
+  ]
+  "OVEN": [
+    {"query_id": "3", "corpus_top_100_list": "11, 15, 22, ..."}
+  ]
+  "VisualNews": [
+    {"query_id": "3", "corpus_top_100_list": "101, 35, 22, ..."}
   ]
 }'''
     gr.Interface(
         fn=process_json,
+        inputs=gr.File(label="Upload Retrieval Result (JSON)"),
+        outputs=gr.Textbox(label="Results"),
+        title="Automated Evaluation of MixBench",
         description="Upload a prediction JSON to evaluate Recall@1, NDCG@10, and NDCG@100 against encrypted qrels.\n\nExample input:\n" + example_json
     ).launch(share=True)