neuron-export / optimum_neuron_export.py
badaoui's picture
badaoui HF Staff
Create optimum_neuron_export.py
a54d563 verified
import os
import shutil
from tempfile import TemporaryDirectory
from typing import List, Optional, Tuple, Dict, Any
from huggingface_hub import (
CommitOperationAdd,
HfApi,
ModelCard,
Discussion,
CommitInfo,
)
from huggingface_hub.file_download import repo_folder_name
from optimum.exporters.neuron import main_export
from optimum.exporters.tasks import TasksManager
SPACES_URL = "https://huggingface.co/spaces/optimum/neuron-export"
def get_default_compiler_kwargs() -> Dict[str, Any]:
"""
Get default compiler kwargs for neuron export.
Based on infer_compiler_kwargs function from the original code.
"""
return {
"auto_cast": None, # Default to None (equivalent to "none")
"auto_cast_type": None,
# Add other compiler-specific kwargs if needed
# "disable_fast_relayout": False, # Only available for certain compilers
# "disable_fallback": False, # Only available for certain compilers
}
def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discussion"]:
try:
discussions = api.get_repo_discussions(repo_id=model_id)
except Exception:
return None
for discussion in discussions:
if (
discussion.status == "open"
and discussion.is_pull_request
and discussion.title == pr_title
):
return discussion
return None
def export_and_git_add(model_id: str, task: str, folder: str, token: str) -> List:
# Get default compiler kwargs
compiler_kwargs = get_default_compiler_kwargs()
# Infer task if it's "auto"
if task == "auto":
try:
task = TasksManager.infer_task_from_model(model_id)
except Exception as e:
raise Exception(f"Could not infer task for model {model_id}: {e}")
print(f"Exporting model {model_id} with task: {task}")
try:
# Call main_export with all required parameters
main_export(
model_name_or_path=model_id,
output=folder,
compiler_kwargs=compiler_kwargs,
task=task,
token=token,
# Add other commonly needed parameters with sensible defaults
torch_dtype=None, # Let it use model's default
dynamic_batch_size=False,
do_validation=False, # Disable validation for now to avoid issues
trust_remote_code=False,
force_download=False,
local_files_only=False,
# Default optimization level (O2 is the default from original code)
optlevel="2",
# Other defaults
tensor_parallel_size=1,
disable_neuron_cache=False,
inline_weights_to_neff=True,
output_attentions=False,
output_hidden_states=False,
# Add input shapes for common models
batch_size=1,
sequence_length=128,
)
print(f"Export completed successfully")
except Exception as e:
print(f"Export failed with error: {e}")
raise
operations = [
CommitOperationAdd(
path_in_repo=os.path.join("neuron", file_name),
path_or_fileobj=os.path.join(folder, file_name),
)
for file_name in os.listdir(folder)
if os.path.isfile(os.path.join(folder, file_name)) # Only add files, not directories
]
try:
card = ModelCard.load(model_id, token=token)
if card.data.tags is None:
card.data.tags = []
if "neuron" not in card.data.tags:
card.data.tags.append("neuron")
card.save(os.path.join(folder, "README.md"))
operations.append(
CommitOperationAdd(
path_in_repo="README.md",
path_or_fileobj=os.path.join(folder, "README.md")
)
)
except Exception as e:
print(f"Warning: Could not update model card: {e}")
pass
return operations
def convert(
api: "HfApi",
model_id: str,
task: str,
force: bool = False,
token: str = None,
) -> Tuple[str, "CommitInfo"]:
pr_title = "Adding Neuron-optimized model files"
info = api.model_info(model_id, token=token)
filenames = set(s.rfilename for s in info.siblings)
requesting_user = api.whoami(token=token)["name"]
with TemporaryDirectory() as d:
folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
os.makedirs(folder, exist_ok=True)
new_pr = None
try:
pr = previous_pr(api, model_id, pr_title)
if any(fname.startswith("neuron/") for fname in filenames) and not force:
raise Exception(
f"Model {model_id} already has Neuron files, skipping export."
)
elif pr is not None and not force:
url = f"https://huggingface.co/{model_id}/discussions/{pr.num}"
new_pr = pr
raise Exception(
f"Model {model_id} already has an open PR: [{url}]({url})"
)
else:
operations = export_and_git_add(model_id, task, folder, token=token)
if not operations:
raise Exception("No files were generated during export")
commit_description = f"""
🤖 Neuron Export Bot: On behalf of [{requesting_user}](https://huggingface.co/{requesting_user}), adding AWS Neuron-optimized model files.
Neuron-optimized models can achieve high-performance inference on AWS Inferentia and Trainium chips. Learn more:
- [AWS Neuron Documentation](https://awsdocs-neuron.readthedocs-hosted.com)
- [🤗 Optimum Neuron Guide](https://huggingface.co/docs/optimum-neuron/index)
"""
new_pr = api.create_commit(
repo_id=model_id,
operations=operations,
commit_message=pr_title,
commit_description=commit_description,
create_pr=True,
token=token,
)
except Exception as e:
# Clean up folder before re-raising
if os.path.exists(folder):
shutil.rmtree(folder, ignore_errors=True)
print(f"Conversion failed with error: {e}") # Print the actual error
return "1", str(e) # Return error code and message
finally:
# Ensure cleanup
if os.path.exists(folder):
shutil.rmtree(folder, ignore_errors=True)
return "0", new_pr