TheWeeeed commited on
Commit
23a0a11
·
verified ·
1 Parent(s): 78ded7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -3
app.py CHANGED
@@ -361,10 +361,56 @@ def two_stage_qa(question, candidate_paragraphs_str, max_seq_len_mc=512, max_seq
361
  logger.error(f"從 qa_features_dataset 選擇列時出錯: {e}. Features: {qa_features_dataset.features}")
362
  return f"錯誤: 準備模型輸入時出錯 (列選擇)。 Error: {e}", "N/A", "N/A"
363
 
364
- logger.info(f"--- In two_stage_qa, about to create DataLoader for question_id: {log_question_id} ---")
365
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  qa_dataloader = DataLoader(
367
- qa_features_dataset, collate_fn=default_data_collator, batch_size=8 # batch_size可以小一些
 
 
368
  )
369
 
370
  all_start_logits = []
 
361
  logger.error(f"從 qa_features_dataset 選擇列時出錯: {e}. Features: {qa_features_dataset.features}")
362
  return f"錯誤: 準備模型輸入時出錯 (列選擇)。 Error: {e}", "N/A", "N/A"
363
 
364
+ logger.info("--- 手動檢查 features_for_dataloader 以模擬 default_data_collator ---")
365
+ if len(features_for_dataloader) > 0:
366
+ # default_data_collator 會接收一個 features 列表,這裡我們模擬只有一個 feature 的情況
367
+ # 因為對於第一個 test_item,qa_features_dataset (以及 features_for_dataloader) 只有一行
368
+
369
+ # features_list_for_collator 將是 [features_for_dataloader[0]]
370
+ # 如果 qa_batch_size > 1 且 features_for_dataloader 行數也 > 1,這裡會更複雜
371
+ # 但錯誤發生在第一個批次,所以檢查第一個特徵就夠了。
372
+
373
+ single_feature_to_collate = features_for_dataloader[0]
374
+ keys_to_tensorize_by_collator = ["input_ids", "attention_mask", "token_type_ids"]
375
+
376
+ for k_collate in keys_to_tensorize_by_collator:
377
+ if k_collate in single_feature_to_collate:
378
+ value_to_tensorize = single_feature_to_collate[k_collate]
379
+ logger.info(f" 準備轉換鍵 '{k_collate}' 的值: {str(value_to_tensorize)[:100]}...") # 打印部分值
380
+ if value_to_tensorize is None:
381
+ logger.error(f" !!!!!! CRITICAL (Pre-Collate): {k_collate} 的值是 None!")
382
+ return f"錯誤: 預整理時發現 {k_collate} 為 None", "N/A", "N/A"
383
+ if not isinstance(value_to_tensorize, list):
384
+ logger.error(f" !!!!!! CRITICAL (Pre-Collate): {k_collate} 的值不是列表,類型為 {type(value_to_tensorize)}!")
385
+ return f"錯誤: 預整理時發現 {k_collate} 不是列表", "N/A", "N/A"
386
+ if not value_to_tensorize: # 空列表
387
+ logger.warning(f" Pre-Collate: {k_collate} 的值是空列表。")
388
+
389
+ problem_found_in_list = False
390
+ for elem_idx, elem_val in enumerate(value_to_tensorize):
391
+ if elem_val is None:
392
+ logger.error(f" !!!!!! CRITICAL (Pre-Collate): {k_collate}[{elem_idx}] 是 None!")
393
+ problem_found_in_list = True
394
+ break
395
+ if not isinstance(elem_val, int):
396
+ logger.error(f" !!!!!! CRITICAL (Pre-Collate): {k_collate}[{elem_idx}] 不是整數,值: {elem_val}, 類型: {type(elem_val)}!")
397
+ problem_found_in_list = True
398
+ break
399
+ if problem_found_in_list:
400
+ return f"錯誤: 預整理時在 {k_collate} 內部發現問題", "N/A", "N/A"
401
+
402
+ logger.info(f" 鍵 '{k_collate}' 的預整理檢查通過。")
403
+ else:
404
+ logger.warning(f" 鍵 '{k_collate}' 不在 features_for_dataloader[0] 中。")
405
+ else:
406
+ logger.error("features_for_dataloader 為空,無法進行手動檢查。")
407
+ return "錯誤: features_for_dataloader 為空", "N/A", "N/A"
408
+
409
+
410
  qa_dataloader = DataLoader(
411
+ features_for_dataloader,
412
+ collate_fn=default_data_collator,
413
+ batch_size=8 # 或者 args.qa_batch_size
414
  )
415
 
416
  all_start_logits = []