nam pham
feat: improve ui/ux
a33a001
"""Hugging Face Hub integration utilities."""
import re
import os
from typing import Optional
from huggingface_hub import HfApi, create_repo
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
HF_TOKEN = os.getenv("HUGGINGFACE_ACCESS_TOKEN")
def is_valid_repo_name(repo_name: str) -> bool:
"""Check if a repository name is valid for Hugging Face Hub.
Args:
repo_name: The repository name to validate
Returns:
True if the name is valid, False otherwise
"""
return bool(re.match(r'^[A-Za-z0-9_./-]+$', repo_name))
def create_hf_repo(
repo_name: str,
repo_type: str = "dataset",
private: bool = False
) -> str:
"""Create a new repository on Hugging Face Hub.
Args:
repo_name: Name of the repository to create
repo_type: Type of repository (dataset, model, or space)
private: Whether the repository should be private
Returns:
The repository ID
Raises:
Exception: If the repository name is invalid or creation fails
"""
if not is_valid_repo_name(repo_name):
raise Exception(
"Invalid repo name: must not contain slashes, spaces, or special "
"characters except '-', '_', '.'"
)
try:
api = HfApi(token=HF_TOKEN)
create_repo(
repo_id=repo_name,
repo_type=repo_type,
private=private,
exist_ok=True,
token=HF_TOKEN
)
return repo_name
except Exception as e:
raise Exception(f"Error creating repository: {str(e)}")
def upload_to_hf(
file_path: str,
repo_name: str,
repo_type: str = "dataset",
private: bool = False
) -> str:
"""Upload a file to Hugging Face Hub.
Args:
file_path: Path to the file to upload
repo_name: Name of the repository to upload to
repo_type: Type of repository
private: Whether the repository should be private
Returns:
The repository ID
Raises:
Exception: If the upload fails
"""
try:
# Create or get repository
repo_id = create_hf_repo(repo_name, repo_type, private)
# Upload file
api = HfApi(token=HF_TOKEN)
api.upload_file(
path_or_fileobj=file_path,
path_in_repo=os.path.basename(file_path),
repo_id=repo_id,
repo_type=repo_type,
token=HF_TOKEN
)
return repo_id
except Exception as e:
raise Exception(f"Error uploading to Hugging Face Hub: {str(e)}")
def download_from_hf(
repo_name: str,
file_name: str,
local_path: Optional[str] = None
) -> str:
"""Download a file from Hugging Face Hub.
Args:
repo_name: Name of the repository to download from
file_name: Name of the file to download
local_path: Optional local path to save the file to
Returns:
Path to the downloaded file
Raises:
Exception: If the download fails
"""
try:
import requests
# Construct the raw URL for the file
raw_url = f"https://huggingface.co/datasets/{repo_name}/raw/main/{file_name}"
# Download the file
response = requests.get(raw_url)
if response.status_code != 200:
raise Exception(f"Failed to download file: {response.status_code}")
# Save the file
if local_path is None:
local_path = os.path.join("data", file_name)
os.makedirs(os.path.dirname(local_path), exist_ok=True)
with open(local_path, "wb") as f:
f.write(response.content)
return local_path
except Exception as e:
raise Exception(f"Error downloading from Hugging Face Hub: {str(e)}")