pierreguillou commited on
Commit
51a692d
·
1 Parent(s): 399d04b

Update files/functions.py

Browse files
Files changed (1) hide show
  1. files/functions.py +11 -2
files/functions.py CHANGED
@@ -337,6 +337,7 @@ def sort_data_wo_labels(bboxes, texts):
337
 
338
  return sorted_bboxes, sorted_texts
339
 
 
340
  ## PDf Processing
341
 
342
  # get filename and images of PDF pages
@@ -371,6 +372,7 @@ def pdf_to_images(uploaded_pdf):
371
 
372
  return filename, msg, images
373
 
 
374
  # Extraction of image data (text and bounding boxes)
375
  def extraction_data_from_image(images):
376
 
@@ -435,6 +437,7 @@ def extraction_data_from_image(images):
435
 
436
  return dataset, texts_lines, texts_pars, texts_lines_par, row_indexes, par_boxes, line_boxes, lines_par_boxes
437
 
 
438
  ## Inference
439
 
440
  def prepare_inference_features_paragraph(example, cls_box = cls_box, sep_box = sep_box):
@@ -530,7 +533,8 @@ def prepare_inference_features_paragraph(example, cls_box = cls_box, sep_box = s
530
  "normalized_bboxes": bb_list,
531
  }
532
 
533
- from torch.utils.data import Dataset
 
534
 
535
  class CustomDataset(Dataset):
536
  def __init__(self, dataset, tokenizer):
@@ -552,7 +556,8 @@ class CustomDataset(Dataset):
552
 
553
  return encoding
554
 
555
- import torch.nn.functional as F
 
556
 
557
  # get predictions at token level
558
  def predictions_token_level(images, custom_encoded_dataset):
@@ -602,6 +607,7 @@ def predictions_token_level(images, custom_encoded_dataset):
602
  else:
603
  print("An error occurred while getting predictions!")
604
 
 
605
  from functools import reduce
606
 
607
  # Get predictions (line level)
@@ -699,6 +705,7 @@ def predictions_paragraph_level_gradio(dataset, outputs, images_ids_list, chunk_
699
  else:
700
  print("An error occurred while getting predictions!")
701
 
 
702
  # Get labeled images with lines bounding boxes
703
  def get_labeled_images_gradio(dataset, images_ids_list, bboxes_list_dict, probs_dict_dict):
704
 
@@ -731,6 +738,7 @@ def get_labeled_images_gradio(dataset, images_ids_list, bboxes_list_dict, probs_
731
 
732
  return labeled_images
733
 
 
734
  # get data of encoded chunk
735
  def get_encoded_chunk_inference(index_chunk=None):
736
 
@@ -783,6 +791,7 @@ def get_encoded_chunk_inference(index_chunk=None):
783
 
784
  return image, df, num_tokens, page_no, num_pages
785
 
 
786
  # display chunk of PDF image and its data
787
  def display_chunk_paragraphs_inference(index_chunk=None):
788
 
 
337
 
338
  return sorted_bboxes, sorted_texts
339
 
340
+
341
  ## PDf Processing
342
 
343
  # get filename and images of PDF pages
 
372
 
373
  return filename, msg, images
374
 
375
+
376
  # Extraction of image data (text and bounding boxes)
377
  def extraction_data_from_image(images):
378
 
 
437
 
438
  return dataset, texts_lines, texts_pars, texts_lines_par, row_indexes, par_boxes, line_boxes, lines_par_boxes
439
 
440
+
441
  ## Inference
442
 
443
  def prepare_inference_features_paragraph(example, cls_box = cls_box, sep_box = sep_box):
 
533
  "normalized_bboxes": bb_list,
534
  }
535
 
536
+
537
+ from torch.utils.data import Dataset
538
 
539
  class CustomDataset(Dataset):
540
  def __init__(self, dataset, tokenizer):
 
556
 
557
  return encoding
558
 
559
+
560
+ import torch.nn.functional as F
561
 
562
  # get predictions at token level
563
  def predictions_token_level(images, custom_encoded_dataset):
 
607
  else:
608
  print("An error occurred while getting predictions!")
609
 
610
+
611
  from functools import reduce
612
 
613
  # Get predictions (line level)
 
705
  else:
706
  print("An error occurred while getting predictions!")
707
 
708
+
709
  # Get labeled images with lines bounding boxes
710
  def get_labeled_images_gradio(dataset, images_ids_list, bboxes_list_dict, probs_dict_dict):
711
 
 
738
 
739
  return labeled_images
740
 
741
+
742
  # get data of encoded chunk
743
  def get_encoded_chunk_inference(index_chunk=None):
744
 
 
791
 
792
  return image, df, num_tokens, page_no, num_pages
793
 
794
+
795
  # display chunk of PDF image and its data
796
  def display_chunk_paragraphs_inference(index_chunk=None):
797