Spaces:

darpanaswal
/

Patent_Retrieval

Configuration error

darpanaswal commited on Apr 11

Commit

f8a16e0

verified ·

1 Parent(s): 3a53014

Update cross_encoder_reranking_train.py

Files changed (1) hide show

cross_encoder_reranking_train.py CHANGED Viewed

@@ -137,6 +137,13 @@ def extract_text(content_dict, text_type="full"):
             if key == "features":
                 content += list(content_dict[key].values())
     elif text_type == "tac1":
         # Extract title, abstract, and first claim
         title = content_dict.get("title", "")
@@ -304,7 +311,7 @@ def main():
     parser.add_argument('--queries_list', type=str, default='test_queries.json',
                         help='Path to training queries JSON file')
     parser.add_argument('--text_type', type=str, default='TA',
-                        choices=['TA', 'claims', 'description', 'full', 'tac1', 'smart', 'smart2', 'claimfeat'],
                         help='Type of text to use for scoring')
     parser.add_argument('--model_name', type=str, default='intfloat/e5-large-v2',
                         help='Name of the cross-encoder model')

             if key == "features":
                 content += list(content_dict[key].values())
+    elif text_type == "feat":
+        # Extract all claims (keys starting with 'c')
+        content = []
+        for key, value in content_dict.items():
+            if key == "features":
+                content += list(content_dict[key].values())
     elif text_type == "tac1":
         # Extract title, abstract, and first claim
         title = content_dict.get("title", "")
     parser.add_argument('--queries_list', type=str, default='test_queries.json',
                         help='Path to training queries JSON file')
     parser.add_argument('--text_type', type=str, default='TA',
+                        choices=['TA', 'claims', 'description', 'full', 'tac1', 'smart', 'smart2', 'claimfeat', 'feat'],
                         help='Type of text to use for scoring')
     parser.add_argument('--model_name', type=str, default='intfloat/e5-large-v2',
                         help='Name of the cross-encoder model')