"""Hugging Face Hub integration utilities.""" import re import os from typing import Optional from huggingface_hub import HfApi, create_repo from dotenv import load_dotenv # Load environment variables load_dotenv() HF_TOKEN = os.getenv("HUGGINGFACE_ACCESS_TOKEN") def is_valid_repo_name(repo_name: str) -> bool: """Check if a repository name is valid for Hugging Face Hub. Args: repo_name: The repository name to validate Returns: True if the name is valid, False otherwise """ return bool(re.match(r'^[A-Za-z0-9_./-]+$', repo_name)) def create_hf_repo( repo_name: str, repo_type: str = "dataset", private: bool = False ) -> str: """Create a new repository on Hugging Face Hub. Args: repo_name: Name of the repository to create repo_type: Type of repository (dataset, model, or space) private: Whether the repository should be private Returns: The repository ID Raises: Exception: If the repository name is invalid or creation fails """ if not is_valid_repo_name(repo_name): raise Exception( "Invalid repo name: must not contain slashes, spaces, or special " "characters except '-', '_', '.'" ) try: api = HfApi(token=HF_TOKEN) create_repo( repo_id=repo_name, repo_type=repo_type, private=private, exist_ok=True, token=HF_TOKEN ) return repo_name except Exception as e: raise Exception(f"Error creating repository: {str(e)}") def upload_to_hf( file_path: str, repo_name: str, repo_type: str = "dataset", private: bool = False ) -> str: """Upload a file to Hugging Face Hub. Args: file_path: Path to the file to upload repo_name: Name of the repository to upload to repo_type: Type of repository private: Whether the repository should be private Returns: The repository ID Raises: Exception: If the upload fails """ try: # Create or get repository repo_id = create_hf_repo(repo_name, repo_type, private) # Upload file api = HfApi(token=HF_TOKEN) api.upload_file( path_or_fileobj=file_path, path_in_repo=os.path.basename(file_path), repo_id=repo_id, repo_type=repo_type, token=HF_TOKEN ) return repo_id except Exception as e: raise Exception(f"Error uploading to Hugging Face Hub: {str(e)}") def download_from_hf( repo_name: str, file_name: str, local_path: Optional[str] = None ) -> str: """Download a file from Hugging Face Hub. Args: repo_name: Name of the repository to download from file_name: Name of the file to download local_path: Optional local path to save the file to Returns: Path to the downloaded file Raises: Exception: If the download fails """ try: import requests # Construct the raw URL for the file raw_url = f"https://huggingface.co/datasets/{repo_name}/raw/main/{file_name}" # Download the file response = requests.get(raw_url) if response.status_code != 200: raise Exception(f"Failed to download file: {response.status_code}") # Save the file if local_path is None: local_path = os.path.join("data", file_name) os.makedirs(os.path.dirname(local_path), exist_ok=True) with open(local_path, "wb") as f: f.write(response.content) return local_path except Exception as e: raise Exception(f"Error downloading from Hugging Face Hub: {str(e)}")