leonarb commited on
Commit
8d1fa76
·
verified ·
1 Parent(s): 6ba101c

Fix invalid tokens

Browse files
Files changed (1) hide show
  1. app.py +11 -1
app.py CHANGED
@@ -73,7 +73,17 @@ def process_pdf_to_epub(pdf_file, title, author):
73
  do_sample=True,
74
  )
75
  prompt_length = inputs["input_ids"].shape[1]
76
- new_tokens = output[:, prompt_length:]
 
 
 
 
 
 
 
 
 
 
77
 
78
  except Exception as processing_error:
79
  decoded = f"[Processing error on page {page_num}: {str(processing_error)}]"
 
73
  do_sample=True,
74
  )
75
  prompt_length = inputs["input_ids"].shape[1]
76
+ new_tokens = output[:, prompt_length:].detach().cpu()
77
+
78
+ decoded = "[No output generated]"
79
+ if new_tokens.shape[1] > 0:
80
+ try:
81
+ decoded_list = processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
82
+ decoded = decoded_list[0].strip() if decoded_list else "[No output generated]"
83
+ except Exception as decode_error:
84
+ decoded = f"[Decoding error on page {page_num}: {str(decode_error)}]"
85
+ else:
86
+ decoded = "[Model returned no new tokens]"
87
 
88
  except Exception as processing_error:
89
  decoded = f"[Processing error on page {page_num}: {str(processing_error)}]"