from transformers import AutoTokenizer | |
import torchvision.transforms as transforms | |
TOKENIZERS_CACHE = {} | |
def get_tokenizer(name): | |
if name not in TOKENIZERS_CACHE: | |
tokenizer = AutoTokenizer.from_pretrained(name, use_fast=True) | |
tokenizer.pad_token = tokenizer.eos_token | |
TOKENIZERS_CACHE[name] = tokenizer | |
return TOKENIZERS_CACHE[name] | |
def get_image_processor(img_size): | |
return transforms.Compose([ | |
transforms.Resize((img_size, img_size)), | |
transforms.ToTensor() | |
]) | |