Spaces:
Running
Running
File size: 2,111 Bytes
08ccc8e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import argparse
import glob
import os
import sys
import torch
from transformers import AutoConfig, AutoTokenizer
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from models import ReactionT5Yield
def parse_args():
"""
Parse command line arguments.
"""
parser = argparse.ArgumentParser(
description="ReactionT5Yield model impremented with nn.Module with transformers' PreTrainedModel"
)
parser.add_argument(
"--model_name_or_path",
type=str,
help="The name of a finetuned model or path to a model which you want to convert. You can use your local models or models uploaded to hugging face.",
)
parser.add_argument(
"--base_model_name_or_path",
type=str,
help="The name of the base model of the finetuned model",
)
parser.add_argument(
"--output_dir",
type=str,
default="./",
help="Directory to save the prediction.",
)
parser.add_argument(
"--fc_dropout",
type=float,
default=0.0,
)
return parser.parse_args()
if __name__ == "__main__":
CFG = parse_args()
if not os.path.exists(CFG.output_dir):
os.makedirs(CFG.output_dir)
CFG.tokenizer = AutoTokenizer.from_pretrained(
CFG.model_name_or_path, return_tensors="pt"
)
model = ReactionT5Yield(
CFG,
config_path=os.path.join(CFG.model_name_or_path, "config.pth"),
pretrained=False,
)
pth_files = glob.glob(os.path.join(CFG.model_name_or_path, "*.pth"))
for pth_file in pth_files:
state = torch.load(
pth_file,
map_location=torch.device("cpu"),
)
try:
model.load_state_dict(state)
break
except:
pass
config = AutoConfig.from_pretrained(CFG.base_model_name_or_path)
config.vocab_size = len(CFG.tokenizer)
CFG.tokenizer.save_pretrained(CFG.output_dir)
torch.save(model.state_dict(), os.path.join(CFG.output_dir, "pytorch_model.bin"))
config.save_pretrained(CFG.output_dir)
|