Spaces:

optimum
/

neuron-export

Running

App Files Files Community

neuron-export / optimum_neuron_export.py

badaoui HF Staff

Create optimum_neuron_export.py

a54d563 verified 9 days ago

raw

history blame contribute delete

6.73 kB

	import os
	import shutil
	from tempfile import TemporaryDirectory
	from typing import List, Optional, Tuple, Dict, Any
	from huggingface_hub import (
	CommitOperationAdd,
	HfApi,
	ModelCard,
	Discussion,
	CommitInfo,
	)
	from huggingface_hub.file_download import repo_folder_name
	from optimum.exporters.neuron import main_export
	from optimum.exporters.tasks import TasksManager

	SPACES_URL = "https://huggingface.co/spaces/optimum/neuron-export"

	def get_default_compiler_kwargs() -> Dict[str, Any]:
	"""
	Get default compiler kwargs for neuron export.
	Based on infer_compiler_kwargs function from the original code.
	"""
	return {
	"auto_cast": None, # Default to None (equivalent to "none")
	"auto_cast_type": None,
	# Add other compiler-specific kwargs if needed
	# "disable_fast_relayout": False, # Only available for certain compilers
	# "disable_fallback": False, # Only available for certain compilers
	}

	def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discussion"]:
	try:
	discussions = api.get_repo_discussions(repo_id=model_id)
	except Exception:
	return None
	for discussion in discussions:
	if (
	discussion.status == "open"
	and discussion.is_pull_request
	and discussion.title == pr_title
	):
	return discussion
	return None

	def export_and_git_add(model_id: str, task: str, folder: str, token: str) -> List:
	# Get default compiler kwargs
	compiler_kwargs = get_default_compiler_kwargs()

	# Infer task if it's "auto"
	if task == "auto":
	try:
	task = TasksManager.infer_task_from_model(model_id)
	except Exception as e:
	raise Exception(f"Could not infer task for model {model_id}: {e}")

	print(f"Exporting model {model_id} with task: {task}")

	try:
	# Call main_export with all required parameters
	main_export(
	model_name_or_path=model_id,
	output=folder,
	compiler_kwargs=compiler_kwargs,
	task=task,
	token=token,
	# Add other commonly needed parameters with sensible defaults
	torch_dtype=None, # Let it use model's default
	dynamic_batch_size=False,
	do_validation=False, # Disable validation for now to avoid issues
	trust_remote_code=False,
	force_download=False,
	local_files_only=False,
	# Default optimization level (O2 is the default from original code)
	optlevel="2",
	# Other defaults
	tensor_parallel_size=1,
	disable_neuron_cache=False,
	inline_weights_to_neff=True,
	output_attentions=False,
	output_hidden_states=False,
	# Add input shapes for common models
	batch_size=1,
	sequence_length=128,
	)
	print(f"Export completed successfully")
	except Exception as e:
	print(f"Export failed with error: {e}")
	raise

	operations = [
	CommitOperationAdd(
	path_in_repo=os.path.join("neuron", file_name),
	path_or_fileobj=os.path.join(folder, file_name),
	)
	for file_name in os.listdir(folder)
	if os.path.isfile(os.path.join(folder, file_name)) # Only add files, not directories
	]

	try:
	card = ModelCard.load(model_id, token=token)
	if card.data.tags is None:
	card.data.tags = []
	if "neuron" not in card.data.tags:
	card.data.tags.append("neuron")
	card.save(os.path.join(folder, "README.md"))
	operations.append(
	CommitOperationAdd(
	path_in_repo="README.md",
	path_or_fileobj=os.path.join(folder, "README.md")
	)
	)
	except Exception as e:
	print(f"Warning: Could not update model card: {e}")
	pass

	return operations

	def convert(
	api: "HfApi",
	model_id: str,
	task: str,
	force: bool = False,
	token: str = None,
	) -> Tuple[str, "CommitInfo"]:
	pr_title = "Adding Neuron-optimized model files"
	info = api.model_info(model_id, token=token)
	filenames = set(s.rfilename for s in info.siblings)
	requesting_user = api.whoami(token=token)["name"]

	with TemporaryDirectory() as d:
	folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
	os.makedirs(folder, exist_ok=True)
	new_pr = None

	try:
	pr = previous_pr(api, model_id, pr_title)

	if any(fname.startswith("neuron/") for fname in filenames) and not force:
	raise Exception(
	f"Model {model_id} already has Neuron files, skipping export."
	)
	elif pr is not None and not force:
	url = f"https://huggingface.co/{model_id}/discussions/{pr.num}"
	new_pr = pr
	raise Exception(
	f"Model {model_id} already has an open PR: [{url}]({url})"
	)
	else:
	operations = export_and_git_add(model_id, task, folder, token=token)

	if not operations:
	raise Exception("No files were generated during export")

	commit_description = f"""
	🤖 Neuron Export Bot: On behalf of [{requesting_user}](https://huggingface.co/{requesting_user}), adding AWS Neuron-optimized model files.

	Neuron-optimized models can achieve high-performance inference on AWS Inferentia and Trainium chips. Learn more:
	- [AWS Neuron Documentation](https://awsdocs-neuron.readthedocs-hosted.com)
	- [🤗 Optimum Neuron Guide](https://huggingface.co/docs/optimum-neuron/index)
	"""

	new_pr = api.create_commit(
	repo_id=model_id,
	operations=operations,
	commit_message=pr_title,
	commit_description=commit_description,
	create_pr=True,
	token=token,
	)

	except Exception as e:
	# Clean up folder before re-raising
	if os.path.exists(folder):
	shutil.rmtree(folder, ignore_errors=True)
	print(f"Conversion failed with error: {e}") # Print the actual error
	return "1", str(e) # Return error code and message

	finally:
	# Ensure cleanup
	if os.path.exists(folder):
	shutil.rmtree(folder, ignore_errors=True)

	return "0", new_pr