Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -306,8 +306,40 @@ def two_stage_qa(question, candidate_paragraphs_str, max_seq_len_mc=512, max_seq
|
|
306 |
if len(qa_features_dataset) == 0:
|
307 |
return "錯誤: 無法為選定段落生成QA特徵 (可能段落太短或內容問題)。", f"選中的段落 (索引 {selected_idx}):\n{selected_paragraph}", "N/A"
|
308 |
|
309 |
-
|
310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
qa_dataloader = DataLoader(
|
312 |
qa_features_dataset, collate_fn=default_data_collator, batch_size=8 # batch_size可以小一些
|
313 |
)
|
|
|
306 |
if len(qa_features_dataset) == 0:
|
307 |
return "錯誤: 無法為選定段落生成QA特徵 (可能段落太短或內容問題)。", f"選中的段落 (索引 {selected_idx}):\n{selected_paragraph}", "N/A"
|
308 |
|
309 |
+
logger.info(f"--- In two_stage_qa, about to create DataLoader for question_id: {question_id} ---")
|
310 |
+
logger.info(f"Number of features in qa_features_dataset: {len(qa_features_dataset)}")
|
311 |
+
|
312 |
+
for i in range(len(qa_features_dataset)):
|
313 |
+
feature_item = qa_features_dataset[i]
|
314 |
+
logger.info(f" Inspecting feature {i} from qa_features_dataset:")
|
315 |
+
for key_to_check in ["input_ids", "attention_mask", "token_type_ids"]:
|
316 |
+
if key_to_check not in feature_item:
|
317 |
+
logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' is MISSING in feature {i}! Features: {feature_item.keys()}")
|
318 |
+
# 這是一個嚴重問題,會導致後續 collate 失敗
|
319 |
+
return f"錯誤: 特徵準備失敗,缺少 {key_to_check}", "N/A", "N/A"
|
320 |
+
|
321 |
+
val_list = feature_item[key_to_check]
|
322 |
+
if val_list is None:
|
323 |
+
logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' in feature {i} is None!")
|
324 |
+
return f"錯誤: 特徵準備失敗,{key_to_check} 為 None", "N/A", "N/A"
|
325 |
+
|
326 |
+
if not isinstance(val_list, list):
|
327 |
+
logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' in feature {i} is not a list, but {type(val_list)}!")
|
328 |
+
return f"錯誤: 特徵準備失敗,{key_to_check} 不是列表", "N/A", "N/A"
|
329 |
+
|
330 |
+
if not val_list: # 如果列表為空
|
331 |
+
logger.warning(f" Feature {i} has an empty list for '{key_to_check}'. This might be okay if handled by collator for padding, but check if intended.")
|
332 |
+
|
333 |
+
# 檢查列表內部元素
|
334 |
+
for elem_idx, elem in enumerate(val_list):
|
335 |
+
if elem is None:
|
336 |
+
logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' in feature {i} contains None at index {elem_idx}!")
|
337 |
+
return f"錯誤: 特徵準備失敗,{key_to_check} 內部有 None", "N/A", "N/A"
|
338 |
+
if not isinstance(elem, int):
|
339 |
+
logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' in feature {i} contains non-integer {elem} (type: {type(elem)}) at index {elem_idx}!")
|
340 |
+
return f"錯誤: 特徵準備失敗,{key_to_check} 內部有非整數", "N/A", "N/A"
|
341 |
+
logger.info(f" Feature {i}, key '{key_to_check}' passed inspection. Length: {len(val_list)}")
|
342 |
+
|
343 |
qa_dataloader = DataLoader(
|
344 |
qa_features_dataset, collate_fn=default_data_collator, batch_size=8 # batch_size可以小一些
|
345 |
)
|