TheWeeeed commited on
Commit
4886d7e
·
verified ·
1 Parent(s): 4c26b67

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -2
app.py CHANGED
@@ -306,8 +306,40 @@ def two_stage_qa(question, candidate_paragraphs_str, max_seq_len_mc=512, max_seq
306
  if len(qa_features_dataset) == 0:
307
  return "錯誤: 無法為選定段落生成QA特徵 (可能段落太短或內容問題)。", f"選中的段落 (索引 {selected_idx}):\n{selected_paragraph}", "N/A"
308
 
309
- # 創建 DataLoader
310
- from transformers import default_data_collator # 需要導入
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  qa_dataloader = DataLoader(
312
  qa_features_dataset, collate_fn=default_data_collator, batch_size=8 # batch_size可以小一些
313
  )
 
306
  if len(qa_features_dataset) == 0:
307
  return "錯誤: 無法為選定段落生成QA特徵 (可能段落太短或內容問題)。", f"選中的段落 (索引 {selected_idx}):\n{selected_paragraph}", "N/A"
308
 
309
+ logger.info(f"--- In two_stage_qa, about to create DataLoader for question_id: {question_id} ---")
310
+ logger.info(f"Number of features in qa_features_dataset: {len(qa_features_dataset)}")
311
+
312
+ for i in range(len(qa_features_dataset)):
313
+ feature_item = qa_features_dataset[i]
314
+ logger.info(f" Inspecting feature {i} from qa_features_dataset:")
315
+ for key_to_check in ["input_ids", "attention_mask", "token_type_ids"]:
316
+ if key_to_check not in feature_item:
317
+ logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' is MISSING in feature {i}! Features: {feature_item.keys()}")
318
+ # 這是一個嚴重問題,會導致後續 collate 失敗
319
+ return f"錯誤: 特徵準備失敗,缺少 {key_to_check}", "N/A", "N/A"
320
+
321
+ val_list = feature_item[key_to_check]
322
+ if val_list is None:
323
+ logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' in feature {i} is None!")
324
+ return f"錯誤: 特徵準備失敗,{key_to_check} 為 None", "N/A", "N/A"
325
+
326
+ if not isinstance(val_list, list):
327
+ logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' in feature {i} is not a list, but {type(val_list)}!")
328
+ return f"錯誤: 特徵準備失敗,{key_to_check} 不是列表", "N/A", "N/A"
329
+
330
+ if not val_list: # 如果列表為空
331
+ logger.warning(f" Feature {i} has an empty list for '{key_to_check}'. This might be okay if handled by collator for padding, but check if intended.")
332
+
333
+ # 檢查列表內部元素
334
+ for elem_idx, elem in enumerate(val_list):
335
+ if elem is None:
336
+ logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' in feature {i} contains None at index {elem_idx}!")
337
+ return f"錯誤: 特徵準備失敗,{key_to_check} 內部有 None", "N/A", "N/A"
338
+ if not isinstance(elem, int):
339
+ logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' in feature {i} contains non-integer {elem} (type: {type(elem)}) at index {elem_idx}!")
340
+ return f"錯誤: 特徵準備失敗,{key_to_check} 內部有非整數", "N/A", "N/A"
341
+ logger.info(f" Feature {i}, key '{key_to_check}' passed inspection. Length: {len(val_list)}")
342
+
343
  qa_dataloader = DataLoader(
344
  qa_features_dataset, collate_fn=default_data_collator, batch_size=8 # batch_size可以小一些
345
  )