Inference-APP-Document-Understanding-at-paragraphlevel-v1

Runtime error

App Files Files Community

pierreguillou commited on Feb 15, 2023

Commit

51a692d

1 Parent(s): 399d04b

Update files/functions.py

Browse files

Files changed (1) hide show

files/functions.py +11 -2

files/functions.py CHANGED Viewed

@@ -337,6 +337,7 @@ def sort_data_wo_labels(bboxes, texts):
     return sorted_bboxes, sorted_texts
 ## PDf Processing
 # get filename and images of PDF pages
@@ -371,6 +372,7 @@ def pdf_to_images(uploaded_pdf):
     return filename, msg, images
 # Extraction of image data (text and bounding boxes)
 def extraction_data_from_image(images):
@@ -435,6 +437,7 @@ def extraction_data_from_image(images):
             return dataset, texts_lines, texts_pars, texts_lines_par, row_indexes, par_boxes, line_boxes, lines_par_boxes
 ## Inference
 def prepare_inference_features_paragraph(example, cls_box = cls_box, sep_box = sep_box):
@@ -530,7 +533,8 @@ def prepare_inference_features_paragraph(example, cls_box = cls_box, sep_box = s
       "normalized_bboxes": bb_list,
   }
-  from torch.utils.data import Dataset
 class CustomDataset(Dataset):
   def __init__(self, dataset, tokenizer):
@@ -552,7 +556,8 @@ class CustomDataset(Dataset):
     return encoding
-    import torch.nn.functional as F
 # get predictions at token level
 def predictions_token_level(images, custom_encoded_dataset):
@@ -602,6 +607,7 @@ def predictions_token_level(images, custom_encoded_dataset):
     else:
         print("An error occurred while getting predictions!")
 from functools import reduce
 # Get predictions (line level)
@@ -699,6 +705,7 @@ def predictions_paragraph_level_gradio(dataset, outputs, images_ids_list, chunk_
     else:
         print("An error occurred while getting predictions!")
 # Get labeled images with lines bounding boxes
 def get_labeled_images_gradio(dataset, images_ids_list, bboxes_list_dict, probs_dict_dict):
@@ -731,6 +738,7 @@ def get_labeled_images_gradio(dataset, images_ids_list, bboxes_list_dict, probs_
     return labeled_images
 # get data of encoded chunk
 def get_encoded_chunk_inference(index_chunk=None):
@@ -783,6 +791,7 @@ def get_encoded_chunk_inference(index_chunk=None):
   return image, df, num_tokens, page_no, num_pages
 # display chunk of PDF image and its data
 def display_chunk_paragraphs_inference(index_chunk=None):

     return sorted_bboxes, sorted_texts
 ## PDf Processing
 # get filename and images of PDF pages
     return filename, msg, images
 # Extraction of image data (text and bounding boxes)
 def extraction_data_from_image(images):
             return dataset, texts_lines, texts_pars, texts_lines_par, row_indexes, par_boxes, line_boxes, lines_par_boxes
 ## Inference
 def prepare_inference_features_paragraph(example, cls_box = cls_box, sep_box = sep_box):
       "normalized_bboxes": bb_list,
   }
+from torch.utils.data import Dataset
 class CustomDataset(Dataset):
   def __init__(self, dataset, tokenizer):
     return encoding
+import torch.nn.functional as F
 # get predictions at token level
 def predictions_token_level(images, custom_encoded_dataset):
     else:
         print("An error occurred while getting predictions!")
 from functools import reduce
 # Get predictions (line level)
     else:
         print("An error occurred while getting predictions!")
 # Get labeled images with lines bounding boxes
 def get_labeled_images_gradio(dataset, images_ids_list, bboxes_list_dict, probs_dict_dict):
     return labeled_images
 # get data of encoded chunk
 def get_encoded_chunk_inference(index_chunk=None):
   return image, df, num_tokens, page_no, num_pages
 # display chunk of PDF image and its data
 def display_chunk_paragraphs_inference(index_chunk=None):