Spaces:

sagawa
/

ReactionT5

Running

App Files Files Community

ReactionT5 / task_yield /convert_to_PreTrainedModel.py

sagawa

Upload 42 files

08ccc8e verified 5 days ago

raw

history blame contribute delete

2.11 kB

	import argparse
	import glob
	import os
	import sys

	import torch
	from transformers import AutoConfig, AutoTokenizer

	sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
	from models import ReactionT5Yield


	def parse_args():
	"""
	Parse command line arguments.
	"""
	parser = argparse.ArgumentParser(
	description="ReactionT5Yield model impremented with nn.Module with transformers' PreTrainedModel"
	)
	parser.add_argument(
	"--model_name_or_path",
	type=str,
	help="The name of a finetuned model or path to a model which you want to convert. You can use your local models or models uploaded to hugging face.",
	)
	parser.add_argument(
	"--base_model_name_or_path",
	type=str,
	help="The name of the base model of the finetuned model",
	)
	parser.add_argument(
	"--output_dir",
	type=str,
	default="./",
	help="Directory to save the prediction.",
	)
	parser.add_argument(
	"--fc_dropout",
	type=float,
	default=0.0,
	)

	return parser.parse_args()


	if __name__ == "__main__":
	CFG = parse_args()

	if not os.path.exists(CFG.output_dir):
	os.makedirs(CFG.output_dir)

	CFG.tokenizer = AutoTokenizer.from_pretrained(
	CFG.model_name_or_path, return_tensors="pt"
	)

	model = ReactionT5Yield(
	CFG,
	config_path=os.path.join(CFG.model_name_or_path, "config.pth"),
	pretrained=False,
	)
	pth_files = glob.glob(os.path.join(CFG.model_name_or_path, "*.pth"))
	for pth_file in pth_files:
	state = torch.load(
	pth_file,
	map_location=torch.device("cpu"),
	)
	try:
	model.load_state_dict(state)
	break
	except:
	pass

	config = AutoConfig.from_pretrained(CFG.base_model_name_or_path)
	config.vocab_size = len(CFG.tokenizer)

	CFG.tokenizer.save_pretrained(CFG.output_dir)
	torch.save(model.state_dict(), os.path.join(CFG.output_dir, "pytorch_model.bin"))
	config.save_pretrained(CFG.output_dir)