Spaces:
Running
Running
import os | |
import shutil | |
from tempfile import TemporaryDirectory | |
from typing import List, Optional, Tuple, Dict, Any | |
from huggingface_hub import ( | |
CommitOperationAdd, | |
HfApi, | |
ModelCard, | |
Discussion, | |
CommitInfo, | |
) | |
from huggingface_hub.file_download import repo_folder_name | |
from optimum.exporters.neuron import main_export | |
from optimum.exporters.tasks import TasksManager | |
SPACES_URL = "https://huggingface.co/spaces/optimum/neuron-export" | |
def get_default_compiler_kwargs() -> Dict[str, Any]: | |
""" | |
Get default compiler kwargs for neuron export. | |
Based on infer_compiler_kwargs function from the original code. | |
""" | |
return { | |
"auto_cast": None, # Default to None (equivalent to "none") | |
"auto_cast_type": None, | |
# Add other compiler-specific kwargs if needed | |
# "disable_fast_relayout": False, # Only available for certain compilers | |
# "disable_fallback": False, # Only available for certain compilers | |
} | |
def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discussion"]: | |
try: | |
discussions = api.get_repo_discussions(repo_id=model_id) | |
except Exception: | |
return None | |
for discussion in discussions: | |
if ( | |
discussion.status == "open" | |
and discussion.is_pull_request | |
and discussion.title == pr_title | |
): | |
return discussion | |
return None | |
def export_and_git_add(model_id: str, task: str, folder: str, token: str) -> List: | |
# Get default compiler kwargs | |
compiler_kwargs = get_default_compiler_kwargs() | |
# Infer task if it's "auto" | |
if task == "auto": | |
try: | |
task = TasksManager.infer_task_from_model(model_id) | |
except Exception as e: | |
raise Exception(f"Could not infer task for model {model_id}: {e}") | |
print(f"Exporting model {model_id} with task: {task}") | |
try: | |
# Call main_export with all required parameters | |
main_export( | |
model_name_or_path=model_id, | |
output=folder, | |
compiler_kwargs=compiler_kwargs, | |
task=task, | |
token=token, | |
# Add other commonly needed parameters with sensible defaults | |
torch_dtype=None, # Let it use model's default | |
dynamic_batch_size=False, | |
do_validation=False, # Disable validation for now to avoid issues | |
trust_remote_code=False, | |
force_download=False, | |
local_files_only=False, | |
# Default optimization level (O2 is the default from original code) | |
optlevel="2", | |
# Other defaults | |
tensor_parallel_size=1, | |
disable_neuron_cache=False, | |
inline_weights_to_neff=True, | |
output_attentions=False, | |
output_hidden_states=False, | |
# Add input shapes for common models | |
batch_size=1, | |
sequence_length=128, | |
) | |
print(f"Export completed successfully") | |
except Exception as e: | |
print(f"Export failed with error: {e}") | |
raise | |
operations = [ | |
CommitOperationAdd( | |
path_in_repo=os.path.join("neuron", file_name), | |
path_or_fileobj=os.path.join(folder, file_name), | |
) | |
for file_name in os.listdir(folder) | |
if os.path.isfile(os.path.join(folder, file_name)) # Only add files, not directories | |
] | |
try: | |
card = ModelCard.load(model_id, token=token) | |
if card.data.tags is None: | |
card.data.tags = [] | |
if "neuron" not in card.data.tags: | |
card.data.tags.append("neuron") | |
card.save(os.path.join(folder, "README.md")) | |
operations.append( | |
CommitOperationAdd( | |
path_in_repo="README.md", | |
path_or_fileobj=os.path.join(folder, "README.md") | |
) | |
) | |
except Exception as e: | |
print(f"Warning: Could not update model card: {e}") | |
pass | |
return operations | |
def convert( | |
api: "HfApi", | |
model_id: str, | |
task: str, | |
force: bool = False, | |
token: str = None, | |
) -> Tuple[str, "CommitInfo"]: | |
pr_title = "Adding Neuron-optimized model files" | |
info = api.model_info(model_id, token=token) | |
filenames = set(s.rfilename for s in info.siblings) | |
requesting_user = api.whoami(token=token)["name"] | |
with TemporaryDirectory() as d: | |
folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models")) | |
os.makedirs(folder, exist_ok=True) | |
new_pr = None | |
try: | |
pr = previous_pr(api, model_id, pr_title) | |
if any(fname.startswith("neuron/") for fname in filenames) and not force: | |
raise Exception( | |
f"Model {model_id} already has Neuron files, skipping export." | |
) | |
elif pr is not None and not force: | |
url = f"https://huggingface.co/{model_id}/discussions/{pr.num}" | |
new_pr = pr | |
raise Exception( | |
f"Model {model_id} already has an open PR: [{url}]({url})" | |
) | |
else: | |
operations = export_and_git_add(model_id, task, folder, token=token) | |
if not operations: | |
raise Exception("No files were generated during export") | |
commit_description = f""" | |
🤖 Neuron Export Bot: On behalf of [{requesting_user}](https://huggingface.co/{requesting_user}), adding AWS Neuron-optimized model files. | |
Neuron-optimized models can achieve high-performance inference on AWS Inferentia and Trainium chips. Learn more: | |
- [AWS Neuron Documentation](https://awsdocs-neuron.readthedocs-hosted.com) | |
- [🤗 Optimum Neuron Guide](https://huggingface.co/docs/optimum-neuron/index) | |
""" | |
new_pr = api.create_commit( | |
repo_id=model_id, | |
operations=operations, | |
commit_message=pr_title, | |
commit_description=commit_description, | |
create_pr=True, | |
token=token, | |
) | |
except Exception as e: | |
# Clean up folder before re-raising | |
if os.path.exists(folder): | |
shutil.rmtree(folder, ignore_errors=True) | |
print(f"Conversion failed with error: {e}") # Print the actual error | |
return "1", str(e) # Return error code and message | |
finally: | |
# Ensure cleanup | |
if os.path.exists(folder): | |
shutil.rmtree(folder, ignore_errors=True) | |
return "0", new_pr | |