Spaces:
Running
Running
"""Hugging Face Hub integration utilities.""" | |
import re | |
import os | |
from typing import Optional | |
from huggingface_hub import HfApi, create_repo | |
from dotenv import load_dotenv | |
# Load environment variables | |
load_dotenv() | |
HF_TOKEN = os.getenv("HUGGINGFACE_ACCESS_TOKEN") | |
def is_valid_repo_name(repo_name: str) -> bool: | |
"""Check if a repository name is valid for Hugging Face Hub. | |
Args: | |
repo_name: The repository name to validate | |
Returns: | |
True if the name is valid, False otherwise | |
""" | |
return bool(re.match(r'^[A-Za-z0-9_./-]+$', repo_name)) | |
def create_hf_repo( | |
repo_name: str, | |
repo_type: str = "dataset", | |
private: bool = False | |
) -> str: | |
"""Create a new repository on Hugging Face Hub. | |
Args: | |
repo_name: Name of the repository to create | |
repo_type: Type of repository (dataset, model, or space) | |
private: Whether the repository should be private | |
Returns: | |
The repository ID | |
Raises: | |
Exception: If the repository name is invalid or creation fails | |
""" | |
if not is_valid_repo_name(repo_name): | |
raise Exception( | |
"Invalid repo name: must not contain slashes, spaces, or special " | |
"characters except '-', '_', '.'" | |
) | |
try: | |
api = HfApi(token=HF_TOKEN) | |
create_repo( | |
repo_id=repo_name, | |
repo_type=repo_type, | |
private=private, | |
exist_ok=True, | |
token=HF_TOKEN | |
) | |
return repo_name | |
except Exception as e: | |
raise Exception(f"Error creating repository: {str(e)}") | |
def upload_to_hf( | |
file_path: str, | |
repo_name: str, | |
repo_type: str = "dataset", | |
private: bool = False | |
) -> str: | |
"""Upload a file to Hugging Face Hub. | |
Args: | |
file_path: Path to the file to upload | |
repo_name: Name of the repository to upload to | |
repo_type: Type of repository | |
private: Whether the repository should be private | |
Returns: | |
The repository ID | |
Raises: | |
Exception: If the upload fails | |
""" | |
try: | |
# Create or get repository | |
repo_id = create_hf_repo(repo_name, repo_type, private) | |
# Upload file | |
api = HfApi(token=HF_TOKEN) | |
api.upload_file( | |
path_or_fileobj=file_path, | |
path_in_repo=os.path.basename(file_path), | |
repo_id=repo_id, | |
repo_type=repo_type, | |
token=HF_TOKEN | |
) | |
return repo_id | |
except Exception as e: | |
raise Exception(f"Error uploading to Hugging Face Hub: {str(e)}") | |
def download_from_hf( | |
repo_name: str, | |
file_name: str, | |
local_path: Optional[str] = None | |
) -> str: | |
"""Download a file from Hugging Face Hub. | |
Args: | |
repo_name: Name of the repository to download from | |
file_name: Name of the file to download | |
local_path: Optional local path to save the file to | |
Returns: | |
Path to the downloaded file | |
Raises: | |
Exception: If the download fails | |
""" | |
try: | |
import requests | |
# Construct the raw URL for the file | |
raw_url = f"https://huggingface.co/datasets/{repo_name}/raw/main/{file_name}" | |
# Download the file | |
response = requests.get(raw_url) | |
if response.status_code != 200: | |
raise Exception(f"Failed to download file: {response.status_code}") | |
# Save the file | |
if local_path is None: | |
local_path = os.path.join("data", file_name) | |
os.makedirs(os.path.dirname(local_path), exist_ok=True) | |
with open(local_path, "wb") as f: | |
f.write(response.content) | |
return local_path | |
except Exception as e: | |
raise Exception(f"Error downloading from Hugging Face Hub: {str(e)}") |