coyotte508
/

js-hub

Model card Files Files and versions Community

coyotte508 HF Staff commited on 25 days ago

Commit

21dd449

verified ·

1 Parent(s): e22900f

Add 1 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

consts.ts +1 -0
error.ts +49 -0
index.ts +25 -0
lib/cache-management.spec.ts +137 -0
lib/cache-management.ts +265 -0
lib/check-repo-access.spec.ts +34 -0
lib/check-repo-access.ts +32 -0
lib/commit.spec.ts +271 -0
lib/commit.ts +609 -0
lib/count-commits.spec.ts +16 -0
lib/count-commits.ts +35 -0
lib/create-branch.spec.ts +159 -0
lib/create-branch.ts +54 -0
lib/create-repo.spec.ts +103 -0
lib/create-repo.ts +78 -0
lib/dataset-info.spec.ts +56 -0
lib/dataset-info.ts +61 -0
lib/delete-branch.spec.ts +43 -0
lib/delete-branch.ts +32 -0
lib/delete-file.spec.ts +64 -0
lib/delete-file.ts +35 -0
lib/delete-files.spec.ts +81 -0
lib/delete-files.ts +33 -0
lib/delete-repo.ts +37 -0
lib/download-file-to-cache-dir.spec.ts +306 -0
lib/download-file-to-cache-dir.ts +138 -0
lib/download-file.spec.ts +82 -0
lib/download-file.ts +77 -0
lib/file-download-info.spec.ts +59 -0
lib/file-download-info.ts +151 -0
lib/file-exists.spec.ts +30 -0
lib/file-exists.ts +41 -0
lib/index.ts +32 -0
lib/list-commits.spec.ts +117 -0
lib/list-commits.ts +70 -0
lib/list-datasets.spec.ts +47 -0
lib/list-datasets.ts +121 -0
lib/list-files.spec.ts +173 -0
lib/list-files.ts +94 -0
lib/list-models.spec.ts +118 -0
lib/list-models.ts +139 -0
lib/list-spaces.spec.ts +40 -0
lib/list-spaces.ts +111 -0
lib/model-info.spec.ts +59 -0
lib/model-info.ts +62 -0
lib/oauth-handle-redirect.spec.ts +60 -0
lib/oauth-handle-redirect.ts +334 -0
lib/oauth-login-url.ts +166 -0
lib/parse-safetensors-metadata.spec.ts +122 -0
lib/parse-safetensors-metadata.ts +274 -0

consts.ts ADDED Viewed

	@@ -0,0 +1 @@


1	+ export const HUB_URL = "https://huggingface.co";

error.ts ADDED Viewed

	@@ -0,0 +1,49 @@

+import type { JsonObject } from "./vendor/type-fest/basic";
+export async function createApiError(
+	response: Response,
+	opts?: { requestId?: string; message?: string }
+): Promise<never> {
+	const error = new HubApiError(response.url, response.status, response.headers.get("X-Request-Id") ?? opts?.requestId);
+	error.message = `Api error with status ${error.statusCode}${opts?.message ? `. ${opts.message}` : ""}`;
+	const trailer = [`URL: ${error.url}`, error.requestId ? `Request ID: ${error.requestId}` : undefined]
+		.filter(Boolean)
+		.join(". ");
+	if (response.headers.get("Content-Type")?.startsWith("application/json")) {
+		const json = await response.json();
+		error.message = json.error || json.message || error.message;
+		if (json.error_description) {
+			error.message = error.message ? error.message + `: ${json.error_description}` : json.error_description;
+		}
+		error.data = json;
+	} else {
+		error.data = { message: await response.text() };
+	}
+	error.message += `. ${trailer}`;
+	throw error;
+}
+/**
+ * Error thrown when an API call to the Hugging Face Hub fails.
+ */
+export class HubApiError extends Error {
+	statusCode: number;
+	url: string;
+	requestId?: string;
+	data?: JsonObject;
+	constructor(url: string, statusCode: number, requestId?: string, message?: string) {
+		super(message);
+		this.statusCode = statusCode;
+		this.requestId = requestId;
+		this.url = url;
+	}
+}
+export class InvalidApiResponseFormatError extends Error {}

index.ts ADDED Viewed

	@@ -0,0 +1,25 @@

+export * from "./lib";
+// Typescript 5 will add 'export type *'
+export type {
+	AccessToken,
+	AccessTokenRole,
+	AuthType,
+	Credentials,
+	PipelineType,
+	RepoDesignation,
+	RepoFullName,
+	RepoId,
+	RepoType,
+	SpaceHardwareFlavor,
+	SpaceResourceConfig,
+	SpaceResourceRequirement,
+	SpaceRuntime,
+	SpaceSdk,
+	SpaceStage,
+} from "./types/public";
+export { HubApiError, InvalidApiResponseFormatError } from "./error";
+/**
+ * Only exported for E2Es convenience
+ */
+export { sha256 as __internal_sha256 } from "./utils/sha256";
+export { XetBlob as __internal_XetBlob } from "./utils/XetBlob";

lib/cache-management.spec.ts ADDED Viewed

	@@ -0,0 +1,137 @@

+import { describe, test, expect, vi, beforeEach } from "vitest";
+import {
+	scanCacheDir,
+	scanCachedRepo,
+	scanSnapshotDir,
+	parseRepoType,
+	getBlobStat,
+	type CachedFileInfo,
+} from "./cache-management";
+import { stat, readdir, realpath, lstat } from "node:fs/promises";
+import type { Dirent, Stats } from "node:fs";
+import { join } from "node:path";
+// Mocks
+vi.mock("node:fs/promises");
+beforeEach(() => {
+	vi.resetAllMocks();
+	vi.restoreAllMocks();
+});
+describe("scanCacheDir", () => {
+	test("should throw an error if cacheDir is not a directory", async () => {
+		vi.mocked(stat).mockResolvedValueOnce({
+			isDirectory: () => false,
+		} as Stats);
+		await expect(scanCacheDir("/fake/dir")).rejects.toThrow("Scan cache expects a directory");
+	});
+	test("empty directory should return an empty set of repository and no warnings", async () => {
+		vi.mocked(stat).mockResolvedValueOnce({
+			isDirectory: () => true,
+		} as Stats);
+		// mock empty cache folder
+		vi.mocked(readdir).mockResolvedValue([]);
+		const result = await scanCacheDir("/fake/dir");
+		// cacheDir must have been read
+		expect(readdir).toHaveBeenCalledWith("/fake/dir");
+		expect(result.warnings.length).toBe(0);
+		expect(result.repos).toHaveLength(0);
+		expect(result.size).toBe(0);
+	});
+});
+describe("scanCachedRepo", () => {
+	test("should throw an error for invalid repo path", async () => {
+		await expect(() => {
+			return scanCachedRepo("/fake/repo_path");
+		}).rejects.toThrow("Repo path is not a valid HuggingFace cache directory");
+	});
+	test("should throw an error if the snapshot folder does not exist", async () => {
+		vi.mocked(readdir).mockResolvedValue([]);
+		vi.mocked(stat).mockResolvedValue({
+			isDirectory: () => false,
+		} as Stats);
+		await expect(() => {
+			return scanCachedRepo("/fake/cacheDir/models--hello-world--name");
+		}).rejects.toThrow("Snapshots dir doesn't exist in cached repo");
+	});
+	test("should properly parse the repository name", async () => {
+		const repoPath = "/fake/cacheDir/models--hello-world--name";
+		vi.mocked(readdir).mockResolvedValue([]);
+		vi.mocked(stat).mockResolvedValue({
+			isDirectory: () => true,
+		} as Stats);
+		const result = await scanCachedRepo(repoPath);
+		expect(readdir).toHaveBeenCalledWith(join(repoPath, "refs"), {
+			withFileTypes: true,
+		});
+		expect(result.id.name).toBe("hello-world/name");
+		expect(result.id.type).toBe("model");
+	});
+});
+describe("scanSnapshotDir", () => {
+	test("should scan a valid snapshot directory", async () => {
+		const cachedFiles: CachedFileInfo[] = [];
+		const blobStats = new Map<string, Stats>();
+		vi.mocked(readdir).mockResolvedValueOnce([{ name: "file1", isDirectory: () => false } as Dirent]);
+		vi.mocked(realpath).mockResolvedValueOnce("/fake/realpath");
+		vi.mocked(lstat).mockResolvedValueOnce({ size: 1024, atimeMs: Date.now(), mtimeMs: Date.now() } as Stats);
+		await scanSnapshotDir("/fake/revision", cachedFiles, blobStats);
+		expect(cachedFiles).toHaveLength(1);
+		expect(blobStats.size).toBe(1);
+	});
+});
+describe("getBlobStat", () => {
+	test("should retrieve blob stat if already cached", async () => {
+		const blobStats = new Map<string, Stats>([["/fake/blob", { size: 1024 } as Stats]]);
+		const result = await getBlobStat("/fake/blob", blobStats);
+		expect(lstat).not.toHaveBeenCalled();
+		expect(result.size).toBe(1024);
+	});
+	test("should fetch and cache blob stat if not cached", async () => {
+		const blobStats = new Map();
+		vi.mocked(lstat).mockResolvedValueOnce({ size: 2048 } as Stats);
+		const result = await getBlobStat("/fake/blob", blobStats);
+		expect(result.size).toBe(2048);
+		expect(blobStats.size).toBe(1);
+	});
+});
+describe("parseRepoType", () => {
+	test("should parse models repo type", () => {
+		expect(parseRepoType("models")).toBe("model");
+	});
+	test("should parse dataset repo type", () => {
+		expect(parseRepoType("datasets")).toBe("dataset");
+	});
+	test("should parse space repo type", () => {
+		expect(parseRepoType("spaces")).toBe("space");
+	});
+	test("should throw an error for invalid repo type", () => {
+		expect(() => parseRepoType("invalid")).toThrowError("Invalid repo type: invalid");
+	});
+});

lib/cache-management.ts ADDED Viewed

	@@ -0,0 +1,265 @@

+import { homedir } from "node:os";
+import { join, basename } from "node:path";
+import { stat, readdir, readFile, realpath, lstat } from "node:fs/promises";
+import type { Stats } from "node:fs";
+import type { RepoType, RepoId } from "../types/public";
+function getDefaultHome(): string {
+	return join(homedir(), ".cache");
+}
+function getDefaultCachePath(): string {
+	return join(process.env["HF_HOME"] ?? join(process.env["XDG_CACHE_HOME"] ?? getDefaultHome(), "huggingface"), "hub");
+}
+function getHuggingFaceHubCache(): string {
+	return process.env["HUGGINGFACE_HUB_CACHE"] ?? getDefaultCachePath();
+}
+export function getHFHubCachePath(): string {
+	return process.env["HF_HUB_CACHE"] ?? getHuggingFaceHubCache();
+}
+const FILES_TO_IGNORE: string[] = [".DS_Store"];
+export const REPO_ID_SEPARATOR: string = "--";
+export function getRepoFolderName({ name, type }: RepoId): string {
+	const parts = [`${type}s`, ...name.split("/")];
+	return parts.join(REPO_ID_SEPARATOR);
+}
+export interface CachedFileInfo {
+	path: string;
+	/**
+	 * Underlying file - which `path` is symlinked to
+	 */
+	blob: {
+		size: number;
+		path: string;
+		lastModifiedAt: Date;
+		lastAccessedAt: Date;
+	};
+}
+export interface CachedRevisionInfo {
+	commitOid: string;
+	path: string;
+	size: number;
+	files: CachedFileInfo[];
+	refs: string[];
+	lastModifiedAt: Date;
+}
+export interface CachedRepoInfo {
+	id: RepoId;
+	path: string;
+	size: number;
+	filesCount: number;
+	revisions: CachedRevisionInfo[];
+	lastAccessedAt: Date;
+	lastModifiedAt: Date;
+}
+export interface HFCacheInfo {
+	size: number;
+	repos: CachedRepoInfo[];
+	warnings: Error[];
+}
+export async function scanCacheDir(cacheDir: string | undefined = undefined): Promise<HFCacheInfo> {
+	if (!cacheDir) cacheDir = getHFHubCachePath();
+	const s = await stat(cacheDir);
+	if (!s.isDirectory()) {
+		throw new Error(
+			`Scan cache expects a directory but found a file: ${cacheDir}. Please use \`cacheDir\` argument or set \`HF_HUB_CACHE\` environment variable.`
+		);
+	}
+	const repos: CachedRepoInfo[] = [];
+	const warnings: Error[] = [];
+	const directories = await readdir(cacheDir);
+	for (const repo of directories) {
+		// skip .locks folder
+		if (repo === ".locks") continue;
+		// get the absolute path of the repo
+		const absolute = join(cacheDir, repo);
+		// ignore non-directory element
+		const s = await stat(absolute);
+		if (!s.isDirectory()) {
+			continue;
+		}
+		try {
+			const cached = await scanCachedRepo(absolute);
+			repos.push(cached);
+		} catch (err: unknown) {
+			warnings.push(err as Error);
+		}
+	}
+	return {
+		repos: repos,
+		size: [...repos.values()].reduce((sum, repo) => sum + repo.size, 0),
+		warnings: warnings,
+	};
+}
+export async function scanCachedRepo(repoPath: string): Promise<CachedRepoInfo> {
+	// get the directory name
+	const name = basename(repoPath);
+	if (!name.includes(REPO_ID_SEPARATOR)) {
+		throw new Error(`Repo path is not a valid HuggingFace cache directory: ${name}`);
+	}
+	// parse the repoId from directory name
+	const [type, ...remaining] = name.split(REPO_ID_SEPARATOR);
+	const repoType = parseRepoType(type);
+	const repoId = remaining.join("/");
+	const snapshotsPath = join(repoPath, "snapshots");
+	const refsPath = join(repoPath, "refs");
+	const snapshotStat = await stat(snapshotsPath);
+	if (!snapshotStat.isDirectory()) {
+		throw new Error(`Snapshots dir doesn't exist in cached repo ${snapshotsPath}`);
+	}
+	// Check if the refs directory exists and scan it
+	const refsByHash: Map<string, string[]> = new Map();
+	const refsStat = await stat(refsPath);
+	if (refsStat.isDirectory()) {
+		await scanRefsDir(refsPath, refsByHash);
+	}
+	// Scan snapshots directory and collect cached revision information
+	const cachedRevisions: CachedRevisionInfo[] = [];
+	const blobStats: Map<string, Stats> = new Map(); // Store blob stats
+	const snapshotDirs = await readdir(snapshotsPath);
+	for (const dir of snapshotDirs) {
+		if (FILES_TO_IGNORE.includes(dir)) continue; // Ignore unwanted files
+		const revisionPath = join(snapshotsPath, dir);
+		const revisionStat = await stat(revisionPath);
+		if (!revisionStat.isDirectory()) {
+			throw new Error(`Snapshots folder corrupted. Found a file: ${revisionPath}`);
+		}
+		const cachedFiles: CachedFileInfo[] = [];
+		await scanSnapshotDir(revisionPath, cachedFiles, blobStats);
+		const revisionLastModified =
+			cachedFiles.length > 0
+				? Math.max(...[...cachedFiles].map((file) => file.blob.lastModifiedAt.getTime()))
+				: revisionStat.mtimeMs;
+		cachedRevisions.push({
+			commitOid: dir,
+			files: cachedFiles,
+			refs: refsByHash.get(dir) || [],
+			size: [...cachedFiles].reduce((sum, file) => sum + file.blob.size, 0),
+			path: revisionPath,
+			lastModifiedAt: new Date(revisionLastModified),
+		});
+		refsByHash.delete(dir);
+	}
+	// Verify that all refs refer to a valid revision
+	if (refsByHash.size > 0) {
+		throw new Error(
+			`Reference(s) refer to missing commit hashes: ${JSON.stringify(Object.fromEntries(refsByHash))} (${repoPath})`
+		);
+	}
+	const repoStats = await stat(repoPath);
+	const repoLastAccessed =
+		blobStats.size > 0 ? Math.max(...[...blobStats.values()].map((stat) => stat.atimeMs)) : repoStats.atimeMs;
+	const repoLastModified =
+		blobStats.size > 0 ? Math.max(...[...blobStats.values()].map((stat) => stat.mtimeMs)) : repoStats.mtimeMs;
+	// Return the constructed CachedRepoInfo object
+	return {
+		id: {
+			name: repoId,
+			type: repoType,
+		},
+		path: repoPath,
+		filesCount: blobStats.size,
+		revisions: cachedRevisions,
+		size: [...blobStats.values()].reduce((sum, stat) => sum + stat.size, 0),
+		lastAccessedAt: new Date(repoLastAccessed),
+		lastModifiedAt: new Date(repoLastModified),
+	};
+}
+export async function scanRefsDir(refsPath: string, refsByHash: Map<string, string[]>): Promise<void> {
+	const refFiles = await readdir(refsPath, { withFileTypes: true });
+	for (const refFile of refFiles) {
+		const refFilePath = join(refsPath, refFile.name);
+		if (refFile.isDirectory()) continue; // Skip directories
+		const commitHash = await readFile(refFilePath, "utf-8");
+		const refName = refFile.name;
+		if (!refsByHash.has(commitHash)) {
+			refsByHash.set(commitHash, []);
+		}
+		refsByHash.get(commitHash)?.push(refName);
+	}
+}
+export async function scanSnapshotDir(
+	revisionPath: string,
+	cachedFiles: CachedFileInfo[],
+	blobStats: Map<string, Stats>
+): Promise<void> {
+	const files = await readdir(revisionPath, { withFileTypes: true });
+	for (const file of files) {
+		if (file.isDirectory()) continue; // Skip directories
+		const filePath = join(revisionPath, file.name);
+		const blobPath = await realpath(filePath);
+		const blobStat = await getBlobStat(blobPath, blobStats);
+		cachedFiles.push({
+			path: filePath,
+			blob: {
+				path: blobPath,
+				size: blobStat.size,
+				lastAccessedAt: new Date(blobStat.atimeMs),
+				lastModifiedAt: new Date(blobStat.mtimeMs),
+			},
+		});
+	}
+}
+export async function getBlobStat(blobPath: string, blobStats: Map<string, Stats>): Promise<Stats> {
+	const blob = blobStats.get(blobPath);
+	if (!blob) {
+		const statResult = await lstat(blobPath);
+		blobStats.set(blobPath, statResult);
+		return statResult;
+	}
+	return blob;
+}
+export function parseRepoType(type: string): RepoType {
+	switch (type) {
+		case "models":
+			return "model";
+		case "datasets":
+			return "dataset";
+		case "spaces":
+			return "space";
+		default:
+			throw new TypeError(`Invalid repo type: ${type}`);
+	}
+}

lib/check-repo-access.spec.ts ADDED Viewed

	@@ -0,0 +1,34 @@

+import { assert, describe, expect, it } from "vitest";
+import { checkRepoAccess } from "./check-repo-access";
+import { HubApiError } from "../error";
+import { TEST_ACCESS_TOKEN, TEST_HUB_URL } from "../test/consts";
+describe("checkRepoAccess", () => {
+	it("should throw 401 when accessing unexisting repo unauthenticated", async () => {
+		try {
+			await checkRepoAccess({ repo: { name: "i--d/dont", type: "model" } });
+			assert(false, "should have thrown");
+		} catch (err) {
+			expect(err).toBeInstanceOf(HubApiError);
+			expect((err as HubApiError).statusCode).toBe(401);
+		}
+	});
+	it("should throw 404 when accessing unexisting repo authenticated", async () => {
+		try {
+			await checkRepoAccess({
+				repo: { name: "i--d/dont", type: "model" },
+				hubUrl: TEST_HUB_URL,
+				accessToken: TEST_ACCESS_TOKEN,
+			});
+			assert(false, "should have thrown");
+		} catch (err) {
+			expect(err).toBeInstanceOf(HubApiError);
+			expect((err as HubApiError).statusCode).toBe(404);
+		}
+	});
+	it("should not throw when accessing public repo", async () => {
+		await checkRepoAccess({ repo: { name: "openai-community/gpt2", type: "model" } });
+	});
+});

lib/check-repo-access.ts ADDED Viewed

	@@ -0,0 +1,32 @@

+import { HUB_URL } from "../consts";
+// eslint-disable-next-line @typescript-eslint/no-unused-vars
+import { createApiError, type HubApiError } from "../error";
+import type { CredentialsParams, RepoDesignation } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { toRepoId } from "../utils/toRepoId";
+/**
+ * Check if we have read access to a repository.
+ *
+ * Throw a {@link HubApiError} error if we don't have access. HubApiError.statusCode will be 401, 403 or 404.
+ */
+export async function checkRepoAccess(
+	params: {
+		repo: RepoDesignation;
+		hubUrl?: string;
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): Promise<void> {
+	const accessToken = params && checkCredentials(params);
+	const repoId = toRepoId(params.repo);
+	const response = await (params.fetch || fetch)(`${params?.hubUrl || HUB_URL}/api/${repoId.type}s/${repoId.name}`, {
+		headers: {
+			...(accessToken ? { Authorization: `Bearer ${accessToken}` } : {}),
+		},
+	});
+	if (!response.ok) {
+		throw await createApiError(response);
+	}
+}

lib/commit.spec.ts ADDED Viewed

	@@ -0,0 +1,271 @@

+import { assert, it, describe } from "vitest";
+import { TEST_HUB_URL, TEST_ACCESS_TOKEN, TEST_USER } from "../test/consts";
+import type { RepoId } from "../types/public";
+import type { CommitFile } from "./commit";
+import { commit } from "./commit";
+import { createRepo } from "./create-repo";
+import { deleteRepo } from "./delete-repo";
+import { downloadFile } from "./download-file";
+import { fileDownloadInfo } from "./file-download-info";
+import { insecureRandomString } from "../utils/insecureRandomString";
+import { isFrontend } from "../utils/isFrontend";
+const lfsContent = "O123456789".repeat(100_000);
+describe("commit", () => {
+	it("should commit to a repo with blobs", async function () {
+		const tokenizerJsonUrl = new URL(
+			"https://huggingface.co/spaces/aschen/push-model-from-web/raw/main/mobilenet/model.json"
+		);
+		const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`;
+		const repo: RepoId = {
+			name: repoName,
+			type: "model",
+		};
+		await createRepo({
+			accessToken: TEST_ACCESS_TOKEN,
+			hubUrl: TEST_HUB_URL,
+			repo,
+			license: "mit",
+		});
+		try {
+			const readme1 = await downloadFile({ repo, path: "README.md", hubUrl: TEST_HUB_URL });
+			assert(readme1, "Readme doesn't exist");
+			const nodeOperation: CommitFile[] = isFrontend
+				? []
+				: [
+						{
+							operation: "addOrUpdate",
+							path: "tsconfig.json",
+							content: (await import("node:url")).pathToFileURL("./tsconfig.json") as URL,
+						},
+				  ];
+			await commit({
+				repo,
+				title: "Some commit",
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+				operations: [
+					{
+						operation: "addOrUpdate",
+						content: new Blob(["This is me"]),
+						path: "test.txt",
+					},
+					{
+						operation: "addOrUpdate",
+						content: new Blob([lfsContent]),
+						path: "test.lfs.txt",
+					},
+					...nodeOperation,
+					{
+						operation: "addOrUpdate",
+						content: tokenizerJsonUrl,
+						path: "lamaral.json",
+					},
+					{
+						operation: "delete",
+						path: "README.md",
+					},
+				],
+				// To test web workers in the front-end
+				useWebWorkers: { minSize: 5_000 },
+			});
+			const fileContent = await downloadFile({ repo, path: "test.txt", hubUrl: TEST_HUB_URL });
+			assert.strictEqual(await fileContent?.text(), "This is me");
+			const lfsFileContent = await downloadFile({ repo, path: "test.lfs.txt", hubUrl: TEST_HUB_URL });
+			assert.strictEqual(await lfsFileContent?.text(), lfsContent);
+			const lfsFileUrl = `${TEST_HUB_URL}/${repoName}/raw/main/test.lfs.txt`;
+			const lfsFilePointer = await fetch(lfsFileUrl);
+			assert.strictEqual(lfsFilePointer.status, 200);
+			assert.strictEqual(
+				(await lfsFilePointer.text()).trim(),
+				`
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3bbce7ee1df7233d85b5f4d60faa3755f93f537804f8b540c72b0739239ddf8
+size ${lfsContent.length}
+				`.trim()
+			);
+			if (!isFrontend) {
+				const fileUrlContent = await downloadFile({ repo, path: "tsconfig.json", hubUrl: TEST_HUB_URL });
+				assert.strictEqual(
+					await fileUrlContent?.text(),
+					(await import("node:fs")).readFileSync("./tsconfig.json", "utf-8")
+				);
+			}
+			const webResourceContent = await downloadFile({ repo, path: "lamaral.json", hubUrl: TEST_HUB_URL });
+			assert.strictEqual(await webResourceContent?.text(), await (await fetch(tokenizerJsonUrl)).text());
+			const readme2 = await downloadFile({ repo, path: "README.md", hubUrl: TEST_HUB_URL });
+			assert.strictEqual(readme2, null);
+		} finally {
+			await deleteRepo({
+				repo: {
+					name: repoName,
+					type: "model",
+				},
+				hubUrl: TEST_HUB_URL,
+				credentials: { accessToken: TEST_ACCESS_TOKEN },
+			});
+		}
+	}, 60_000);
+	it("should commit a full repo from HF with web urls", async function () {
+		const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`;
+		const repo: RepoId = {
+			name: repoName,
+			type: "model",
+		};
+		await createRepo({
+			accessToken: TEST_ACCESS_TOKEN,
+			repo,
+			hubUrl: TEST_HUB_URL,
+		});
+		try {
+			const FILES_TO_UPLOAD = [
+				`https://huggingface.co/spaces/huggingfacejs/push-model-from-web/resolve/main/mobilenet/model.json`,
+				`https://huggingface.co/spaces/huggingfacejs/push-model-from-web/resolve/main/mobilenet/group1-shard1of2`,
+				`https://huggingface.co/spaces/huggingfacejs/push-model-from-web/resolve/main/mobilenet/group1-shard2of2`,
+				`https://huggingface.co/spaces/huggingfacejs/push-model-from-web/resolve/main/mobilenet/coffee.jpg`,
+				`https://huggingface.co/spaces/huggingfacejs/push-model-from-web/resolve/main/mobilenet/README.md`,
+			];
+			const operations: CommitFile[] = await Promise.all(
+				FILES_TO_UPLOAD.map(async (file) => {
+					return {
+						operation: "addOrUpdate",
+						path: file.slice(file.indexOf("main/") + "main/".length),
+						// upload remote file
+						content: new URL(file),
+					};
+				})
+			);
+			await commit({
+				repo,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+				title: "upload model",
+				operations,
+			});
+			const LFSSize = (await fileDownloadInfo({ repo, path: "mobilenet/group1-shard1of2", hubUrl: TEST_HUB_URL }))
+				?.size;
+			assert.strictEqual(LFSSize, 4_194_304);
+			const pointerFile = await downloadFile({
+				repo,
+				path: "mobilenet/group1-shard1of2",
+				raw: true,
+				hubUrl: TEST_HUB_URL,
+			});
+			// Make sure SHA is computed properly as well
+			assert.strictEqual(
+				(await pointerFile?.text())?.trim(),
+				`
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fb621eb9b37478239504ee083042d5b18699e8b8618e569478b03b119a85a69
+size 4194304
+			`.trim()
+			);
+		} finally {
+			await deleteRepo({
+				repo: {
+					name: repoName,
+					type: "model",
+				},
+				hubUrl: TEST_HUB_URL,
+				credentials: { accessToken: TEST_ACCESS_TOKEN },
+			});
+		}
+		// https://huggingfacejs-push-model-from-web.hf.space/
+	}, 60_000);
+	it("should be able to create a PR and then commit to it", async function () {
+		const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`;
+		const repo: RepoId = {
+			name: repoName,
+			type: "model",
+		};
+		await createRepo({
+			credentials: {
+				accessToken: TEST_ACCESS_TOKEN,
+			},
+			repo,
+			hubUrl: TEST_HUB_URL,
+		});
+		try {
+			const pr = await commit({
+				repo,
+				credentials: {
+					accessToken: TEST_ACCESS_TOKEN,
+				},
+				hubUrl: TEST_HUB_URL,
+				title: "Create PR",
+				isPullRequest: true,
+				operations: [
+					{
+						operation: "addOrUpdate",
+						content: new Blob(["This is me"]),
+						path: "test.txt",
+					},
+				],
+			});
+			if (!pr) {
+				throw new Error("PR creation failed");
+			}
+			if (!pr.pullRequestUrl) {
+				throw new Error("No pull request url");
+			}
+			const prNumber = pr.pullRequestUrl.split("/").pop();
+			const prRef = `refs/pr/${prNumber}`;
+			await commit({
+				repo,
+				credentials: {
+					accessToken: TEST_ACCESS_TOKEN,
+				},
+				hubUrl: TEST_HUB_URL,
+				branch: prRef,
+				title: "Some commit",
+				operations: [
+					{
+						operation: "addOrUpdate",
+						content: new URL(
+							`https://huggingface.co/spaces/huggingfacejs/push-model-from-web/resolve/main/mobilenet/group1-shard1of2`
+						),
+						path: "mobilenet/group1-shard1of2",
+					},
+				],
+			});
+			assert(commit, "PR commit failed");
+		} finally {
+			await deleteRepo({
+				repo: {
+					name: repoName,
+					type: "model",
+				},
+				hubUrl: TEST_HUB_URL,
+				credentials: { accessToken: TEST_ACCESS_TOKEN },
+			});
+		}
+	}, 60_000);
+});

lib/commit.ts ADDED Viewed

	@@ -0,0 +1,609 @@

+import { HUB_URL } from "../consts";
+import { HubApiError, createApiError, InvalidApiResponseFormatError } from "../error";
+import type {
+	ApiCommitHeader,
+	ApiCommitLfsFile,
+	ApiCommitOperation,
+	ApiLfsBatchRequest,
+	ApiLfsBatchResponse,
+	ApiLfsCompleteMultipartRequest,
+	ApiPreuploadRequest,
+	ApiPreuploadResponse,
+} from "../types/api/api-commit";
+import type { CredentialsParams, RepoDesignation } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { chunk } from "../utils/chunk";
+import { promisesQueue } from "../utils/promisesQueue";
+import { promisesQueueStreaming } from "../utils/promisesQueueStreaming";
+import { sha256 } from "../utils/sha256";
+import { toRepoId } from "../utils/toRepoId";
+import { WebBlob } from "../utils/WebBlob";
+import { eventToGenerator } from "../utils/eventToGenerator";
+import { base64FromBytes } from "../utils/base64FromBytes";
+import { isFrontend } from "../utils/isFrontend";
+import { createBlobs } from "../utils/createBlobs";
+const CONCURRENT_SHAS = 5;
+const CONCURRENT_LFS_UPLOADS = 5;
+const MULTIPART_PARALLEL_UPLOAD = 5;
+export interface CommitDeletedEntry {
+	operation: "delete";
+	path: string;
+}
+export type ContentSource = Blob | URL;
+export interface CommitFile {
+	operation: "addOrUpdate";
+	path: string;
+	content: ContentSource;
+	// forceLfs?: boolean
+}
+type CommitBlob = Omit<CommitFile, "content"> & { content: Blob };
+// TODO: find a nice way to handle LFS & non-LFS files in an uniform manner, see https://github.com/huggingface/moon-landing/issues/4370
+// export type CommitRenameFile = {
+// 	operation: "rename";
+// 	path:      string;
+// 	oldPath:   string;
+// 	content?:  ContentSource;
+// };
+export type CommitOperation = CommitDeletedEntry | CommitFile /* | CommitRenameFile */;
+type CommitBlobOperation = Exclude<CommitOperation, CommitFile> | CommitBlob;
+export type CommitParams = {
+	title: string;
+	description?: string;
+	repo: RepoDesignation;
+	operations: CommitOperation[];
+	/** @default "main" */
+	branch?: string;
+	/**
+	 * Parent commit. Optional
+	 *
+	 * - When opening a PR: will use parentCommit as the parent commit
+	 * - When committing on a branch: Will make sure that there were no intermediate commits
+	 */
+	parentCommit?: string;
+	isPullRequest?: boolean;
+	hubUrl?: string;
+	/**
+	 * Whether to use web workers to compute SHA256 hashes.
+	 *
+	 * @default false
+	 */
+	useWebWorkers?: boolean | { minSize?: number; poolSize?: number };
+	/**
+	 * Maximum depth of folders to upload. Files deeper than this will be ignored
+	 *
+	 * @default 5
+	 */
+	maxFolderDepth?: number;
+	/**
+	 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+	 */
+	fetch?: typeof fetch;
+	abortSignal?: AbortSignal;
+	// Credentials are optional due to custom fetch functions or cookie auth
+} & Partial<CredentialsParams>;
+export interface CommitOutput {
+	pullRequestUrl?: string;
+	commit: {
+		oid: string;
+		url: string;
+	};
+	hookOutput: string;
+}
+function isFileOperation(op: CommitOperation): op is CommitBlob {
+	const ret = op.operation === "addOrUpdate";
+	if (ret && !(op.content instanceof Blob)) {
+		throw new TypeError("Precondition failed: op.content should be a Blob");
+	}
+	return ret;
+}
+export type CommitProgressEvent =
+	| {
+			event: "phase";
+			phase: "preuploading" | "uploadingLargeFiles" | "committing";
+	  }
+	| {
+			event: "fileProgress";
+			path: string;
+			progress: number;
+			state: "hashing" | "uploading";
+	  };
+/**
+ * Internal function for now, used by commit.
+ *
+ * Can be exposed later to offer fine-tuned progress info
+ */
+export async function* commitIter(params: CommitParams): AsyncGenerator<CommitProgressEvent, CommitOutput> {
+	const accessToken = checkCredentials(params);
+	const repoId = toRepoId(params.repo);
+	yield { event: "phase", phase: "preuploading" };
+	const lfsShas = new Map<string, string | null>();
+	const abortController = new AbortController();
+	const abortSignal = abortController.signal;
+	// Polyfill see https://discuss.huggingface.co/t/why-cant-i-upload-a-parquet-file-to-my-dataset-error-o-throwifaborted-is-not-a-function/62245
+	if (!abortSignal.throwIfAborted) {
+		abortSignal.throwIfAborted = () => {
+			if (abortSignal.aborted) {
+				throw new DOMException("Aborted", "AbortError");
+			}
+		};
+	}
+	if (params.abortSignal) {
+		params.abortSignal.addEventListener("abort", () => abortController.abort());
+	}
+	try {
+		const allOperations = (
+			await Promise.all(
+				params.operations.map(async (operation) => {
+					if (operation.operation !== "addOrUpdate") {
+						return operation;
+					}
+					if (!(operation.content instanceof URL)) {
+						/** TS trick to enforce `content` to be a `Blob` */
+						return { ...operation, content: operation.content };
+					}
+					const lazyBlobs = await createBlobs(operation.content, operation.path, {
+						fetch: params.fetch,
+						maxFolderDepth: params.maxFolderDepth,
+					});
+					abortSignal?.throwIfAborted();
+					return lazyBlobs.map((blob) => ({
+						...operation,
+						content: blob.blob,
+						path: blob.path,
+					}));
+				})
+			)
+		).flat(1);
+		const gitAttributes = allOperations.filter(isFileOperation).find((op) => op.path === ".gitattributes")?.content;
+		for (const operations of chunk(allOperations.filter(isFileOperation), 100)) {
+			const payload: ApiPreuploadRequest = {
+				gitAttributes: gitAttributes && (await gitAttributes.text()),
+				files: await Promise.all(
+					operations.map(async (operation) => ({
+						path: operation.path,
+						size: operation.content.size,
+						sample: base64FromBytes(new Uint8Array(await operation.content.slice(0, 512).arrayBuffer())),
+					}))
+				),
+			};
+			abortSignal?.throwIfAborted();
+			const res = await (params.fetch ?? fetch)(
+				`${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/preupload/${encodeURIComponent(
+					params.branch ?? "main"
+				)}` + (params.isPullRequest ? "?create_pr=1" : ""),
+				{
+					method: "POST",
+					headers: {
+						...(accessToken && { Authorization: `Bearer ${accessToken}` }),
+						"Content-Type": "application/json",
+					},
+					body: JSON.stringify(payload),
+					signal: abortSignal,
+				}
+			);
+			if (!res.ok) {
+				throw await createApiError(res);
+			}
+			const json: ApiPreuploadResponse = await res.json();
+			for (const file of json.files) {
+				if (file.uploadMode === "lfs") {
+					lfsShas.set(file.path, null);
+				}
+			}
+		}
+		yield { event: "phase", phase: "uploadingLargeFiles" };
+		for (const operations of chunk(
+			allOperations.filter(isFileOperation).filter((op) => lfsShas.has(op.path)),
+			100
+		)) {
+			const shas = yield* eventToGenerator<
+				{ event: "fileProgress"; state: "hashing"; path: string; progress: number },
+				string[]
+			>((yieldCallback, returnCallback, rejectCallack) => {
+				return promisesQueue(
+					operations.map((op) => async () => {
+						const iterator = sha256(op.content, { useWebWorker: params.useWebWorkers, abortSignal: abortSignal });
+						let res: IteratorResult<number, string>;
+						do {
+							res = await iterator.next();
+							if (!res.done) {
+								yieldCallback({ event: "fileProgress", path: op.path, progress: res.value, state: "hashing" });
+							}
+						} while (!res.done);
+						const sha = res.value;
+						lfsShas.set(op.path, res.value);
+						return sha;
+					}),
+					CONCURRENT_SHAS
+				).then(returnCallback, rejectCallack);
+			});
+			abortSignal?.throwIfAborted();
+			const payload: ApiLfsBatchRequest = {
+				operation: "upload",
+				// multipart is a custom protocol for HF
+				transfers: ["basic", "multipart"],
+				hash_algo: "sha_256",
+				...(!params.isPullRequest && {
+					ref: {
+						name: params.branch ?? "main",
+					},
+				}),
+				objects: operations.map((op, i) => ({
+					oid: shas[i],
+					size: op.content.size,
+				})),
+			};
+			const res = await (params.fetch ?? fetch)(
+				`${params.hubUrl ?? HUB_URL}/${repoId.type === "model" ? "" : repoId.type + "s/"}${
+					repoId.name
+				}.git/info/lfs/objects/batch`,
+				{
+					method: "POST",
+					headers: {
+						...(accessToken && { Authorization: `Bearer ${accessToken}` }),
+						Accept: "application/vnd.git-lfs+json",
+						"Content-Type": "application/vnd.git-lfs+json",
+					},
+					body: JSON.stringify(payload),
+					signal: abortSignal,
+				}
+			);
+			if (!res.ok) {
+				throw await createApiError(res);
+			}
+			const json: ApiLfsBatchResponse = await res.json();
+			const batchRequestId = res.headers.get("X-Request-Id") || undefined;
+			const shaToOperation = new Map(operations.map((op, i) => [shas[i], op]));
+			yield* eventToGenerator<CommitProgressEvent, void>((yieldCallback, returnCallback, rejectCallback) => {
+				return promisesQueueStreaming(
+					json.objects.map((obj) => async () => {
+						const op = shaToOperation.get(obj.oid);
+						if (!op) {
+							throw new InvalidApiResponseFormatError("Unrequested object ID in response");
+						}
+						abortSignal?.throwIfAborted();
+						if (obj.error) {
+							const errorMessage = `Error while doing LFS batch call for ${operations[shas.indexOf(obj.oid)].path}: ${
+								obj.error.message
+							}${batchRequestId ? ` - Request ID: ${batchRequestId}` : ""}`;
+							throw new HubApiError(res.url, obj.error.code, batchRequestId, errorMessage);
+						}
+						if (!obj.actions?.upload) {
+							// Already uploaded
+							yieldCallback({
+								event: "fileProgress",
+								path: op.path,
+								progress: 1,
+								state: "uploading",
+							});
+							return;
+						}
+						yieldCallback({
+							event: "fileProgress",
+							path: op.path,
+							progress: 0,
+							state: "uploading",
+						});
+						const content = op.content;
+						const header = obj.actions.upload.header;
+						if (header?.chunk_size) {
+							const chunkSize = parseInt(header.chunk_size);
+							// multipart upload
+							// parts are in upload.header['00001'] to upload.header['99999']
+							const completionUrl = obj.actions.upload.href;
+							const parts = Object.keys(header).filter((key) => /^[0-9]+$/.test(key));
+							if (parts.length !== Math.ceil(content.size / chunkSize)) {
+								throw new Error("Invalid server response to upload large LFS file, wrong number of parts");
+							}
+							const completeReq: ApiLfsCompleteMultipartRequest = {
+								oid: obj.oid,
+								parts: parts.map((part) => ({
+									partNumber: +part,
+									etag: "",
+								})),
+							};
+							// Defined here so that it's not redefined at each iteration (and the caller can tell it's for the same file)
+							const progressCallback = (progress: number) =>
+								yieldCallback({ event: "fileProgress", path: op.path, progress, state: "uploading" });
+							await promisesQueueStreaming(
+								parts.map((part) => async () => {
+									abortSignal?.throwIfAborted();
+									const index = parseInt(part) - 1;
+									const slice = content.slice(index * chunkSize, (index + 1) * chunkSize);
+									const res = await (params.fetch ?? fetch)(header[part], {
+										method: "PUT",
+										/** Unfortunately, browsers don't support our inherited version of Blob in fetch calls */
+										body: slice instanceof WebBlob && isFrontend ? await slice.arrayBuffer() : slice,
+										signal: abortSignal,
+										...({
+											progressHint: {
+												path: op.path,
+												part: index,
+												numParts: parts.length,
+												progressCallback,
+											},
+											// eslint-disable-next-line @typescript-eslint/no-explicit-any
+										} as any),
+									});
+									if (!res.ok) {
+										throw await createApiError(res, {
+											requestId: batchRequestId,
+											message: `Error while uploading part ${part} of ${
+												operations[shas.indexOf(obj.oid)].path
+											} to LFS storage`,
+										});
+									}
+									const eTag = res.headers.get("ETag");
+									if (!eTag) {
+										throw new Error("Cannot get ETag of part during multipart upload");
+									}
+									completeReq.parts[Number(part) - 1].etag = eTag;
+								}),
+								MULTIPART_PARALLEL_UPLOAD
+							);
+							abortSignal?.throwIfAborted();
+							const res = await (params.fetch ?? fetch)(completionUrl, {
+								method: "POST",
+								body: JSON.stringify(completeReq),
+								headers: {
+									Accept: "application/vnd.git-lfs+json",
+									"Content-Type": "application/vnd.git-lfs+json",
+								},
+								signal: abortSignal,
+							});
+							if (!res.ok) {
+								throw await createApiError(res, {
+									requestId: batchRequestId,
+									message: `Error completing multipart upload of ${
+										operations[shas.indexOf(obj.oid)].path
+									} to LFS storage`,
+								});
+							}
+							yieldCallback({
+								event: "fileProgress",
+								path: op.path,
+								progress: 1,
+								state: "uploading",
+							});
+						} else {
+							const res = await (params.fetch ?? fetch)(obj.actions.upload.href, {
+								method: "PUT",
+								headers: {
+									...(batchRequestId ? { "X-Request-Id": batchRequestId } : undefined),
+								},
+								/** Unfortunately, browsers don't support our inherited version of Blob in fetch calls */
+								body: content instanceof WebBlob && isFrontend ? await content.arrayBuffer() : content,
+								signal: abortSignal,
+								...({
+									progressHint: {
+										path: op.path,
+										progressCallback: (progress: number) =>
+											yieldCallback({
+												event: "fileProgress",
+												path: op.path,
+												progress,
+												state: "uploading",
+											}),
+									},
+									// eslint-disable-next-line @typescript-eslint/no-explicit-any
+								} as any),
+							});
+							if (!res.ok) {
+								throw await createApiError(res, {
+									requestId: batchRequestId,
+									message: `Error while uploading ${operations[shas.indexOf(obj.oid)].path} to LFS storage`,
+								});
+							}
+							yieldCallback({
+								event: "fileProgress",
+								path: op.path,
+								progress: 1,
+								state: "uploading",
+							});
+						}
+					}),
+					CONCURRENT_LFS_UPLOADS
+				).then(returnCallback, rejectCallback);
+			});
+		}
+		abortSignal?.throwIfAborted();
+		yield { event: "phase", phase: "committing" };
+		return yield* eventToGenerator<CommitProgressEvent, CommitOutput>(
+			async (yieldCallback, returnCallback, rejectCallback) =>
+				(params.fetch ?? fetch)(
+					`${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/commit/${encodeURIComponent(
+						params.branch ?? "main"
+					)}` + (params.isPullRequest ? "?create_pr=1" : ""),
+					{
+						method: "POST",
+						headers: {
+							...(accessToken && { Authorization: `Bearer ${accessToken}` }),
+							"Content-Type": "application/x-ndjson",
+						},
+						body: [
+							{
+								key: "header",
+								value: {
+									summary: params.title,
+									description: params.description,
+									parentCommit: params.parentCommit,
+								} satisfies ApiCommitHeader,
+							},
+							...((await Promise.all(
+								allOperations.map((operation) => {
+									if (isFileOperation(operation)) {
+										const sha = lfsShas.get(operation.path);
+										if (sha) {
+											return {
+												key: "lfsFile",
+												value: {
+													path: operation.path,
+													algo: "sha256",
+													size: operation.content.size,
+													oid: sha,
+												} satisfies ApiCommitLfsFile,
+											};
+										}
+									}
+									return convertOperationToNdJson(operation);
+								})
+							)) satisfies ApiCommitOperation[]),
+						]
+							.map((x) => JSON.stringify(x))
+							.join("\n"),
+						signal: abortSignal,
+						...({
+							progressHint: {
+								progressCallback: (progress: number) => {
+									// For now, we display equal progress for all files
+									// We could compute the progress based on the size of `convertOperationToNdJson` for each of the files instead
+									for (const op of allOperations) {
+										if (isFileOperation(op) && !lfsShas.has(op.path)) {
+											yieldCallback({
+												event: "fileProgress",
+												path: op.path,
+												progress,
+												state: "uploading",
+											});
+										}
+									}
+								},
+							},
+							// eslint-disable-next-line @typescript-eslint/no-explicit-any
+						} as any),
+					}
+				)
+					.then(async (res) => {
+						if (!res.ok) {
+							throw await createApiError(res);
+						}
+						const json = await res.json();
+						returnCallback({
+							pullRequestUrl: json.pullRequestUrl,
+							commit: {
+								oid: json.commitOid,
+								url: json.commitUrl,
+							},
+							hookOutput: json.hookOutput,
+						});
+					})
+					.catch(rejectCallback)
+		);
+	} catch (err) {
+		// For parallel requests, cancel them all if one fails
+		abortController.abort();
+		throw err;
+	}
+}
+export async function commit(params: CommitParams): Promise<CommitOutput> {
+	const iterator = commitIter(params);
+	let res = await iterator.next();
+	while (!res.done) {
+		res = await iterator.next();
+	}
+	return res.value;
+}
+async function convertOperationToNdJson(operation: CommitBlobOperation): Promise<ApiCommitOperation> {
+	switch (operation.operation) {
+		case "addOrUpdate": {
+			// todo: handle LFS
+			return {
+				key: "file",
+				value: {
+					content: base64FromBytes(new Uint8Array(await operation.content.arrayBuffer())),
+					path: operation.path,
+					encoding: "base64",
+				},
+			};
+		}
+		// case "rename": {
+		// 	// todo: detect when remote file is already LFS, and in that case rename as LFS
+		// 	return {
+		// 		key:   "file",
+		// 		value: {
+		// 			content: operation.content,
+		// 			path:    operation.path,
+		// 			oldPath: operation.oldPath
+		// 		}
+		// 	};
+		// }
+		case "delete": {
+			return {
+				key: "deletedFile",
+				value: {
+					path: operation.path,
+				},
+			};
+		}
+		default:
+			throw new TypeError("Unknown operation: " + (operation as { operation: string }).operation);
+	}
+}

lib/count-commits.spec.ts ADDED Viewed

	@@ -0,0 +1,16 @@

+import { assert, it, describe } from "vitest";
+import { countCommits } from "./count-commits";
+describe("countCommits", () => {
+	it("should fetch paginated commits from the repo", async () => {
+		const count = await countCommits({
+			repo: {
+				name: "openai-community/gpt2",
+				type: "model",
+			},
+			revision: "607a30d783dfa663caf39e06633721c8d4cfcd7e",
+		});
+		assert.equal(count, 26);
+	});
+});

lib/count-commits.ts ADDED Viewed

	@@ -0,0 +1,35 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+import type { CredentialsParams, RepoDesignation } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { toRepoId } from "../utils/toRepoId";
+export async function countCommits(
+	params: {
+		repo: RepoDesignation;
+		/**
+		 * Revision to list commits from. Defaults to the default branch.
+		 */
+		revision?: string;
+		hubUrl?: string;
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): Promise<number> {
+	const accessToken = checkCredentials(params);
+	const repoId = toRepoId(params.repo);
+	// Could upgrade to 1000 commits per page
+	const url: string | undefined = `${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/commits/${
+		params.revision ?? "main"
+	}?limit=1`;
+	const res: Response = await (params.fetch ?? fetch)(url, {
+		headers: accessToken ? { Authorization: `Bearer ${accessToken}` } : {},
+	});
+	if (!res.ok) {
+		throw await createApiError(res);
+	}
+	return parseInt(res.headers.get("x-total-count") ?? "0", 10);
+}

lib/create-branch.spec.ts ADDED Viewed

	@@ -0,0 +1,159 @@

+import { assert, it, describe } from "vitest";
+import { TEST_ACCESS_TOKEN, TEST_HUB_URL, TEST_USER } from "../test/consts";
+import type { RepoId } from "../types/public";
+import { insecureRandomString } from "../utils/insecureRandomString";
+import { createRepo } from "./create-repo";
+import { deleteRepo } from "./delete-repo";
+import { createBranch } from "./create-branch";
+import { uploadFile } from "./upload-file";
+import { downloadFile } from "./download-file";
+describe("createBranch", () => {
+	it("should create a new branch from the default branch", async () => {
+		const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`;
+		const repo = { type: "model", name: repoName } satisfies RepoId;
+		try {
+			await createRepo({
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+				repo,
+			});
+			await uploadFile({
+				repo,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+				file: {
+					path: "file.txt",
+					content: new Blob(["file content"]),
+				},
+			});
+			await createBranch({
+				repo,
+				branch: "new-branch",
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+			});
+			const content = await downloadFile({
+				repo,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+				path: "file.txt",
+				revision: "new-branch",
+			});
+			assert.equal(await content?.text(), "file content");
+		} finally {
+			await deleteRepo({
+				repo,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+			});
+		}
+	});
+	it("should create an empty branch", async () => {
+		const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`;
+		const repo = { type: "model", name: repoName } satisfies RepoId;
+		try {
+			await createRepo({
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+				repo,
+			});
+			await uploadFile({
+				repo,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+				file: {
+					path: "file.txt",
+					content: new Blob(["file content"]),
+				},
+			});
+			await createBranch({
+				repo,
+				branch: "empty-branch",
+				empty: true,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+			});
+			const content = await downloadFile({
+				repo,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+				path: "file.txt",
+				revision: "empty-branch",
+			});
+			assert.equal(content, null);
+		} finally {
+			await deleteRepo({
+				repo,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+			});
+		}
+	});
+	it("should overwrite an existing branch", async () => {
+		const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`;
+		const repo = { type: "model", name: repoName } satisfies RepoId;
+		try {
+			await createRepo({
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+				repo,
+			});
+			await uploadFile({
+				repo,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+				file: {
+					path: "file.txt",
+					content: new Blob(["file content"]),
+				},
+			});
+			await createBranch({
+				repo,
+				branch: "overwrite-branch",
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+			});
+			await createBranch({
+				repo,
+				branch: "overwrite-branch",
+				overwrite: true,
+				empty: true,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+			});
+			const content = await downloadFile({
+				repo,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+				path: "file.txt",
+				revision: "overwrite-branch",
+			});
+			assert.equal(content, null);
+		} finally {
+			await deleteRepo({
+				repo,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+			});
+		}
+	});
+});

lib/create-branch.ts ADDED Viewed

	@@ -0,0 +1,54 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+import type { AccessToken, RepoDesignation } from "../types/public";
+import { toRepoId } from "../utils/toRepoId";
+export async function createBranch(params: {
+	repo: RepoDesignation;
+	/**
+	 * Revision to create the branch from. Defaults to the default branch.
+	 *
+	 * Use empty: true to create an empty branch.
+	 */
+	revision?: string;
+	hubUrl?: string;
+	accessToken?: AccessToken;
+	fetch?: typeof fetch;
+	/**
+	 * The name of the branch to create
+	 */
+	branch: string;
+	/**
+	 * Use this to create an empty branch, with no commits.
+	 */
+	empty?: boolean;
+	/**
+	 * Use this to overwrite the branch if it already exists.
+	 *
+	 * If you only specify `overwrite` and no `revision`/`empty`, and the branch already exists, it will be a no-op.
+	 */
+	overwrite?: boolean;
+}): Promise<void> {
+	const repoId = toRepoId(params.repo);
+	const res = await (params.fetch ?? fetch)(
+		`${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/branch/${encodeURIComponent(params.branch)}`,
+		{
+			method: "POST",
+			headers: {
+				"Content-Type": "application/json",
+				...(params.accessToken && {
+					Authorization: `Bearer ${params.accessToken}`,
+				}),
+			},
+			body: JSON.stringify({
+				startingPoint: params.revision,
+				...(params.empty && { emptyBranch: true }),
+				overwrite: params.overwrite,
+			}),
+		}
+	);
+	if (!res.ok) {
+		throw await createApiError(res);
+	}
+}

lib/create-repo.spec.ts ADDED Viewed

	@@ -0,0 +1,103 @@

+import { assert, it, describe, expect } from "vitest";
+import { TEST_HUB_URL, TEST_ACCESS_TOKEN, TEST_USER } from "../test/consts";
+import { insecureRandomString } from "../utils/insecureRandomString";
+import { createRepo } from "./create-repo";
+import { deleteRepo } from "./delete-repo";
+import { downloadFile } from "./download-file";
+describe("createRepo", () => {
+	it("should create a repo", async () => {
+		const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`;
+		const result = await createRepo({
+			accessToken: TEST_ACCESS_TOKEN,
+			repo: {
+				name: repoName,
+				type: "model",
+			},
+			hubUrl: TEST_HUB_URL,
+			files: [{ path: ".gitattributes", content: new Blob(["*.html filter=lfs diff=lfs merge=lfs -text"]) }],
+		});
+		assert.deepStrictEqual(result, {
+			repoUrl: `${TEST_HUB_URL}/${repoName}`,
+		});
+		const content = await downloadFile({
+			repo: {
+				name: repoName,
+				type: "model",
+			},
+			path: ".gitattributes",
+			hubUrl: TEST_HUB_URL,
+		});
+		assert(content);
+		assert.strictEqual(await content.text(), "*.html filter=lfs diff=lfs merge=lfs -text");
+		await deleteRepo({
+			repo: {
+				name: repoName,
+				type: "model",
+			},
+			credentials: { accessToken: TEST_ACCESS_TOKEN },
+			hubUrl: TEST_HUB_URL,
+		});
+	});
+	it("should throw a client error when trying to create a repo without a fully-qualified name", async () => {
+		const tryCreate = createRepo({
+			repo: { name: "canonical", type: "model" },
+			credentials: { accessToken: TEST_ACCESS_TOKEN },
+			hubUrl: TEST_HUB_URL,
+		});
+		await expect(tryCreate).rejects.toBeInstanceOf(TypeError);
+	});
+	it("should create a model with a string as name", async () => {
+		const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`;
+		const result = await createRepo({
+			accessToken: TEST_ACCESS_TOKEN,
+			hubUrl: TEST_HUB_URL,
+			repo: repoName,
+			files: [{ path: ".gitattributes", content: new Blob(["*.html filter=lfs diff=lfs merge=lfs -text"]) }],
+		});
+		assert.deepStrictEqual(result, {
+			repoUrl: `${TEST_HUB_URL}/${repoName}`,
+		});
+		await deleteRepo({
+			repo: {
+				name: repoName,
+				type: "model",
+			},
+			hubUrl: TEST_HUB_URL,
+			credentials: { accessToken: TEST_ACCESS_TOKEN },
+		});
+	});
+	it("should create a dataset with a string as name", async () => {
+		const repoName = `datasets/${TEST_USER}/TEST-${insecureRandomString()}`;
+		const result = await createRepo({
+			accessToken: TEST_ACCESS_TOKEN,
+			hubUrl: TEST_HUB_URL,
+			repo: repoName,
+			files: [{ path: ".gitattributes", content: new Blob(["*.html filter=lfs diff=lfs merge=lfs -text"]) }],
+		});
+		assert.deepStrictEqual(result, {
+			repoUrl: `${TEST_HUB_URL}/${repoName}`,
+		});
+		await deleteRepo({
+			repo: repoName,
+			hubUrl: TEST_HUB_URL,
+			credentials: { accessToken: TEST_ACCESS_TOKEN },
+		});
+	});
+});

lib/create-repo.ts ADDED Viewed

	@@ -0,0 +1,78 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+import type { ApiCreateRepoPayload } from "../types/api/api-create-repo";
+import type { CredentialsParams, RepoDesignation, SpaceSdk } from "../types/public";
+import { base64FromBytes } from "../utils/base64FromBytes";
+import { checkCredentials } from "../utils/checkCredentials";
+import { toRepoId } from "../utils/toRepoId";
+export async function createRepo(
+	params: {
+		repo: RepoDesignation;
+		/**
+		 * If unset, will follow the organization's default setting. (typically public, except for some Enterprise organizations)
+		 */
+		private?: boolean;
+		license?: string;
+		/**
+		 * Only a few lightweight files are supported at repo creation
+		 */
+		files?: Array<{ content: ArrayBuffer | Blob; path: string }>;
+		/** @required for when {@link repo.type} === "space" */
+		sdk?: SpaceSdk;
+		hubUrl?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & CredentialsParams
+): Promise<{ repoUrl: string }> {
+	const accessToken = checkCredentials(params);
+	const repoId = toRepoId(params.repo);
+	const [namespace, repoName] = repoId.name.split("/");
+	if (!namespace || !repoName) {
+		throw new TypeError(
+			`"${repoId.name}" is not a fully qualified repo name. It should be of the form "{namespace}/{repoName}".`
+		);
+	}
+	const res = await (params.fetch ?? fetch)(`${params.hubUrl ?? HUB_URL}/api/repos/create`, {
+		method: "POST",
+		body: JSON.stringify({
+			name: repoName,
+			private: params.private,
+			organization: namespace,
+			license: params.license,
+			...(repoId.type === "space"
+				? {
+						type: "space",
+						sdk: "static",
+				  }
+				: {
+						type: repoId.type,
+				  }),
+			files: params.files
+				? await Promise.all(
+						params.files.map(async (file) => ({
+							encoding: "base64",
+							path: file.path,
+							content: base64FromBytes(
+								new Uint8Array(file.content instanceof Blob ? await file.content.arrayBuffer() : file.content)
+							),
+						}))
+				  )
+				: undefined,
+		} satisfies ApiCreateRepoPayload),
+		headers: {
+			Authorization: `Bearer ${accessToken}`,
+			"Content-Type": "application/json",
+		},
+	});
+	if (!res.ok) {
+		throw await createApiError(res);
+	}
+	const output = await res.json();
+	return { repoUrl: output.url };
+}

lib/dataset-info.spec.ts ADDED Viewed

	@@ -0,0 +1,56 @@

+import { describe, expect, it } from "vitest";
+import { datasetInfo } from "./dataset-info";
+import type { DatasetEntry } from "./list-datasets";
+import type { ApiDatasetInfo } from "../types/api/api-dataset";
+describe("datasetInfo", () => {
+	it("should return the dataset info", async () => {
+		const info = await datasetInfo({
+			name: "nyu-mll/glue",
+		});
+		expect(info).toEqual({
+			id: "621ffdd236468d709f181e3f",
+			downloads: expect.any(Number),
+			gated: false,
+			name: "nyu-mll/glue",
+			updatedAt: expect.any(Date),
+			likes: expect.any(Number),
+			private: false,
+		});
+	});
+	it("should return the dataset info with author", async () => {
+		const info: DatasetEntry & Pick<ApiDatasetInfo, "author"> = await datasetInfo({
+			name: "nyu-mll/glue",
+			additionalFields: ["author"],
+		});
+		expect(info).toEqual({
+			id: "621ffdd236468d709f181e3f",
+			downloads: expect.any(Number),
+			gated: false,
+			name: "nyu-mll/glue",
+			updatedAt: expect.any(Date),
+			likes: expect.any(Number),
+			private: false,
+			author: "nyu-mll",
+		});
+	});
+	it("should return the dataset info for a specific revision", async () => {
+		const info: DatasetEntry & Pick<ApiDatasetInfo, "sha"> = await datasetInfo({
+			name: "nyu-mll/glue",
+			revision: "cb2099c76426ff97da7aa591cbd317d91fb5fcb7",
+			additionalFields: ["sha"],
+		});
+		expect(info).toEqual({
+			id: "621ffdd236468d709f181e3f",
+			downloads: expect.any(Number),
+			gated: false,
+			name: "nyu-mll/glue",
+			updatedAt: expect.any(Date),
+			likes: expect.any(Number),
+			private: false,
+			sha: "cb2099c76426ff97da7aa591cbd317d91fb5fcb7",
+		});
+	});
+});

lib/dataset-info.ts ADDED Viewed

	@@ -0,0 +1,61 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+import type { ApiDatasetInfo } from "../types/api/api-dataset";
+import type { CredentialsParams } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { pick } from "../utils/pick";
+import { type DATASET_EXPANDABLE_KEYS, DATASET_EXPAND_KEYS, type DatasetEntry } from "./list-datasets";
+export async function datasetInfo<
+	const T extends Exclude<(typeof DATASET_EXPANDABLE_KEYS)[number], (typeof DATASET_EXPAND_KEYS)[number]> = never,
+>(
+	params: {
+		name: string;
+		hubUrl?: string;
+		additionalFields?: T[];
+		/**
+		 * An optional Git revision id which can be a branch name, a tag, or a commit hash.
+		 */
+		revision?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): Promise<DatasetEntry & Pick<ApiDatasetInfo, T>> {
+	const accessToken = params && checkCredentials(params);
+	const search = new URLSearchParams([
+		...DATASET_EXPAND_KEYS.map((val) => ["expand", val] satisfies [string, string]),
+		...(params?.additionalFields?.map((val) => ["expand", val] satisfies [string, string]) ?? []),
+	]).toString();
+	const response = await (params.fetch || fetch)(
+		`${params?.hubUrl || HUB_URL}/api/datasets/${params.name}/revision/${encodeURIComponent(
+			params.revision ?? "HEAD"
+		)}?${search.toString()}`,
+		{
+			headers: {
+				...(accessToken ? { Authorization: `Bearer ${accessToken}` } : {}),
+				Accepts: "application/json",
+			},
+		}
+	);
+	if (!response.ok) {
+		throw await createApiError(response);
+	}
+	const data = await response.json();
+	return {
+		...(params?.additionalFields && pick(data, params.additionalFields)),
+		id: data._id,
+		name: data.id,
+		private: data.private,
+		downloads: data.downloads,
+		likes: data.likes,
+		gated: data.gated,
+		updatedAt: new Date(data.lastModified),
+	} as DatasetEntry & Pick<ApiDatasetInfo, T>;
+}

lib/delete-branch.spec.ts ADDED Viewed

	@@ -0,0 +1,43 @@

+import { it, describe } from "vitest";
+import { TEST_ACCESS_TOKEN, TEST_HUB_URL, TEST_USER } from "../test/consts";
+import type { RepoId } from "../types/public";
+import { insecureRandomString } from "../utils/insecureRandomString";
+import { createRepo } from "./create-repo";
+import { deleteRepo } from "./delete-repo";
+import { createBranch } from "./create-branch";
+import { deleteBranch } from "./delete-branch";
+describe("deleteBranch", () => {
+	it("should delete an existing branch", async () => {
+		const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`;
+		const repo = { type: "model", name: repoName } satisfies RepoId;
+		try {
+			await createRepo({
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+				repo,
+			});
+			await createBranch({
+				repo,
+				branch: "branch-to-delete",
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+			});
+			await deleteBranch({
+				repo,
+				branch: "branch-to-delete",
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+			});
+		} finally {
+			await deleteRepo({
+				repo,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+			});
+		}
+	});
+});

lib/delete-branch.ts ADDED Viewed

	@@ -0,0 +1,32 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+import type { AccessToken, RepoDesignation } from "../types/public";
+import { toRepoId } from "../utils/toRepoId";
+export async function deleteBranch(params: {
+	repo: RepoDesignation;
+	/**
+	 * The name of the branch to delete
+	 */
+	branch: string;
+	hubUrl?: string;
+	accessToken?: AccessToken;
+	fetch?: typeof fetch;
+}): Promise<void> {
+	const repoId = toRepoId(params.repo);
+	const res = await (params.fetch ?? fetch)(
+		`${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/branch/${encodeURIComponent(params.branch)}`,
+		{
+			method: "DELETE",
+			headers: {
+				...(params.accessToken && {
+					Authorization: `Bearer ${params.accessToken}`,
+				}),
+			},
+		}
+	);
+	if (!res.ok) {
+		throw await createApiError(res);
+	}
+}

lib/delete-file.spec.ts ADDED Viewed

	@@ -0,0 +1,64 @@

+import { assert, it, describe } from "vitest";
+import { TEST_ACCESS_TOKEN, TEST_HUB_URL, TEST_USER } from "../test/consts";
+import type { RepoId } from "../types/public";
+import { insecureRandomString } from "../utils/insecureRandomString";
+import { createRepo } from "./create-repo";
+import { deleteRepo } from "./delete-repo";
+import { deleteFile } from "./delete-file";
+import { downloadFile } from "./download-file";
+describe("deleteFile", () => {
+	it("should delete a file", async () => {
+		const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`;
+		const repo = { type: "model", name: repoName } satisfies RepoId;
+		try {
+			const result = await createRepo({
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+				repo,
+				files: [
+					{ path: "file1", content: new Blob(["file1"]) },
+					{ path: "file2", content: new Blob(["file2"]) },
+				],
+			});
+			assert.deepStrictEqual(result, {
+				repoUrl: `${TEST_HUB_URL}/${repoName}`,
+			});
+			let content = await downloadFile({
+				hubUrl: TEST_HUB_URL,
+				repo,
+				path: "file1",
+			});
+			assert.strictEqual(await content?.text(), "file1");
+			await deleteFile({ path: "file1", repo, accessToken: TEST_ACCESS_TOKEN, hubUrl: TEST_HUB_URL });
+			content = await downloadFile({
+				repo,
+				path: "file1",
+				hubUrl: TEST_HUB_URL,
+			});
+			assert.strictEqual(content, null);
+			content = await downloadFile({
+				repo,
+				path: "file2",
+				hubUrl: TEST_HUB_URL,
+			});
+			assert.strictEqual(await content?.text(), "file2");
+		} finally {
+			await deleteRepo({
+				repo,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+			});
+		}
+	});
+});

lib/delete-file.ts ADDED Viewed

	@@ -0,0 +1,35 @@

+import type { CredentialsParams } from "../types/public";
+import type { CommitOutput, CommitParams } from "./commit";
+import { commit } from "./commit";
+export function deleteFile(
+	params: {
+		repo: CommitParams["repo"];
+		path: string;
+		commitTitle?: CommitParams["title"];
+		commitDescription?: CommitParams["description"];
+		hubUrl?: CommitParams["hubUrl"];
+		fetch?: CommitParams["fetch"];
+		branch?: CommitParams["branch"];
+		isPullRequest?: CommitParams["isPullRequest"];
+		parentCommit?: CommitParams["parentCommit"];
+	} & CredentialsParams
+): Promise<CommitOutput> {
+	return commit({
+		...(params.accessToken ? { accessToken: params.accessToken } : { credentials: params.credentials }),
+		repo: params.repo,
+		operations: [
+			{
+				operation: "delete",
+				path: params.path,
+			},
+		],
+		title: params.commitTitle ?? `Delete ${params.path}`,
+		description: params.commitDescription,
+		hubUrl: params.hubUrl,
+		branch: params.branch,
+		isPullRequest: params.isPullRequest,
+		parentCommit: params.parentCommit,
+		fetch: params.fetch,
+	});
+}

lib/delete-files.spec.ts ADDED Viewed

	@@ -0,0 +1,81 @@

+import { assert, it, describe } from "vitest";
+import { TEST_HUB_URL, TEST_ACCESS_TOKEN, TEST_USER } from "../test/consts";
+import type { RepoId } from "../types/public";
+import { insecureRandomString } from "../utils/insecureRandomString";
+import { createRepo } from "./create-repo";
+import { deleteRepo } from "./delete-repo";
+import { deleteFiles } from "./delete-files";
+import { downloadFile } from "./download-file";
+describe("deleteFiles", () => {
+	it("should delete multiple files", async () => {
+		const repoName = `${TEST_USER}/TEST-${insecureRandomString()}`;
+		const repo = { type: "model", name: repoName } satisfies RepoId;
+		try {
+			const result = await createRepo({
+				accessToken: TEST_ACCESS_TOKEN,
+				repo,
+				files: [
+					{ path: "file1", content: new Blob(["file1"]) },
+					{ path: "file2", content: new Blob(["file2"]) },
+					{ path: "file3", content: new Blob(["file3"]) },
+				],
+				hubUrl: TEST_HUB_URL,
+			});
+			assert.deepStrictEqual(result, {
+				repoUrl: `${TEST_HUB_URL}/${repoName}`,
+			});
+			let content = await downloadFile({
+				repo,
+				path: "file1",
+				hubUrl: TEST_HUB_URL,
+			});
+			assert.strictEqual(await content?.text(), "file1");
+			content = await downloadFile({
+				repo,
+				path: "file2",
+				hubUrl: TEST_HUB_URL,
+			});
+			assert.strictEqual(await content?.text(), "file2");
+			await deleteFiles({ paths: ["file1", "file2"], repo, accessToken: TEST_ACCESS_TOKEN, hubUrl: TEST_HUB_URL });
+			content = await downloadFile({
+				repo,
+				path: "file1",
+				hubUrl: TEST_HUB_URL,
+			});
+			assert.strictEqual(content, null);
+			content = await downloadFile({
+				repo,
+				path: "file2",
+				hubUrl: TEST_HUB_URL,
+			});
+			assert.strictEqual(content, null);
+			content = await downloadFile({
+				repo,
+				path: "file3",
+				hubUrl: TEST_HUB_URL,
+			});
+			assert.strictEqual(await content?.text(), "file3");
+		} finally {
+			await deleteRepo({
+				repo,
+				accessToken: TEST_ACCESS_TOKEN,
+				hubUrl: TEST_HUB_URL,
+			});
+		}
+	});
+});

lib/delete-files.ts ADDED Viewed

	@@ -0,0 +1,33 @@

+import type { CredentialsParams } from "../types/public";
+import type { CommitOutput, CommitParams } from "./commit";
+import { commit } from "./commit";
+export function deleteFiles(
+	params: {
+		repo: CommitParams["repo"];
+		paths: string[];
+		commitTitle?: CommitParams["title"];
+		commitDescription?: CommitParams["description"];
+		hubUrl?: CommitParams["hubUrl"];
+		branch?: CommitParams["branch"];
+		isPullRequest?: CommitParams["isPullRequest"];
+		parentCommit?: CommitParams["parentCommit"];
+		fetch?: CommitParams["fetch"];
+	} & CredentialsParams
+): Promise<CommitOutput> {
+	return commit({
+		...(params.accessToken ? { accessToken: params.accessToken } : { credentials: params.credentials }),
+		repo: params.repo,
+		operations: params.paths.map((path) => ({
+			operation: "delete",
+			path,
+		})),
+		title: params.commitTitle ?? `Deletes ${params.paths.length} files`,
+		description: params.commitDescription,
+		hubUrl: params.hubUrl,
+		branch: params.branch,
+		isPullRequest: params.isPullRequest,
+		parentCommit: params.parentCommit,
+		fetch: params.fetch,
+	});
+}

lib/delete-repo.ts ADDED Viewed

	@@ -0,0 +1,37 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+import type { CredentialsParams, RepoDesignation } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { toRepoId } from "../utils/toRepoId";
+export async function deleteRepo(
+	params: {
+		repo: RepoDesignation;
+		hubUrl?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & CredentialsParams
+): Promise<void> {
+	const accessToken = checkCredentials(params);
+	const repoId = toRepoId(params.repo);
+	const [namespace, repoName] = repoId.name.split("/");
+	const res = await (params.fetch ?? fetch)(`${params.hubUrl ?? HUB_URL}/api/repos/delete`, {
+		method: "DELETE",
+		body: JSON.stringify({
+			name: repoName,
+			organization: namespace,
+			type: repoId.type,
+		}),
+		headers: {
+			Authorization: `Bearer ${accessToken}`,
+			"Content-Type": "application/json",
+		},
+	});
+	if (!res.ok) {
+		throw await createApiError(res);
+	}
+}

lib/download-file-to-cache-dir.spec.ts ADDED Viewed

	@@ -0,0 +1,306 @@

+import { expect, test, describe, vi, beforeEach } from "vitest";
+import type { RepoDesignation, RepoId } from "../types/public";
+import { dirname, join } from "node:path";
+import { lstat, mkdir, stat, symlink, rename } from "node:fs/promises";
+import { pathsInfo } from "./paths-info";
+import { createWriteStream, type Stats } from "node:fs";
+import { getHFHubCachePath, getRepoFolderName } from "./cache-management";
+import { toRepoId } from "../utils/toRepoId";
+import { downloadFileToCacheDir } from "./download-file-to-cache-dir";
+import { createSymlink } from "../utils/symlink";
+vi.mock("node:fs/promises", () => ({
+	rename: vi.fn(),
+	symlink: vi.fn(),
+	lstat: vi.fn(),
+	mkdir: vi.fn(),
+	stat: vi.fn(),
+}));
+vi.mock("node:fs", () => ({
+	createWriteStream: vi.fn(),
+}));
+vi.mock("./paths-info", () => ({
+	pathsInfo: vi.fn(),
+}));
+vi.mock("../utils/symlink", () => ({
+	createSymlink: vi.fn(),
+}));
+const DUMMY_REPO: RepoId = {
+	name: "hello-world",
+	type: "model",
+};
+const DUMMY_ETAG = "dummy-etag";
+// utility test method to get blob file path
+function _getBlobFile(params: {
+	repo: RepoDesignation;
+	etag: string;
+	cacheDir?: string; // default to {@link getHFHubCache}
+}) {
+	return join(params.cacheDir ?? getHFHubCachePath(), getRepoFolderName(toRepoId(params.repo)), "blobs", params.etag);
+}
+// utility test method to get snapshot file path
+function _getSnapshotFile(params: {
+	repo: RepoDesignation;
+	path: string;
+	revision: string;
+	cacheDir?: string; // default to {@link getHFHubCache}
+}) {
+	return join(
+		params.cacheDir ?? getHFHubCachePath(),
+		getRepoFolderName(toRepoId(params.repo)),
+		"snapshots",
+		params.revision,
+		params.path
+	);
+}
+describe("downloadFileToCacheDir", () => {
+	const fetchMock: typeof fetch = vi.fn();
+	beforeEach(() => {
+		vi.resetAllMocks();
+		// mock 200 request
+		vi.mocked(fetchMock).mockResolvedValue(
+			new Response("dummy-body", {
+				status: 200,
+				headers: {
+					etag: DUMMY_ETAG,
+					"Content-Range": "bytes 0-54/55",
+				},
+			})
+		);
+		// prevent to use caching
+		vi.mocked(stat).mockRejectedValue(new Error("Do not exists"));
+		vi.mocked(lstat).mockRejectedValue(new Error("Do not exists"));
+	});
+	test("should throw an error if fileDownloadInfo return nothing", async () => {
+		await expect(async () => {
+			await downloadFileToCacheDir({
+				repo: DUMMY_REPO,
+				path: "/README.md",
+				fetch: fetchMock,
+			});
+		}).rejects.toThrowError("cannot get path info for /README.md");
+		expect(pathsInfo).toHaveBeenCalledWith(
+			expect.objectContaining({
+				repo: DUMMY_REPO,
+				paths: ["/README.md"],
+				fetch: fetchMock,
+			})
+		);
+	});
+	test("existing symlinked and blob should not re-download it", async () => {
+		// <cache>/<repo>/<revision>/snapshots/README.md
+		const expectPointer = _getSnapshotFile({
+			repo: DUMMY_REPO,
+			path: "/README.md",
+			revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
+		});
+		// stat ensure a symlink and the pointed file exists
+		vi.mocked(stat).mockResolvedValue({} as Stats); // prevent default mocked reject
+		const output = await downloadFileToCacheDir({
+			repo: DUMMY_REPO,
+			path: "/README.md",
+			fetch: fetchMock,
+			revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
+		});
+		expect(stat).toHaveBeenCalledOnce();
+		// Get call argument for stat
+		const starArg = vi.mocked(stat).mock.calls[0][0];
+		expect(starArg).toBe(expectPointer);
+		expect(fetchMock).not.toHaveBeenCalledWith();
+		expect(output).toBe(expectPointer);
+	});
+	test("existing symlinked and blob with default revision should not re-download it", async () => {
+		// <cache>/<repo>/<revision>/snapshots/README.md
+		const expectPointer = _getSnapshotFile({
+			repo: DUMMY_REPO,
+			path: "/README.md",
+			revision: "main",
+		});
+		// stat ensure a symlink and the pointed file exists
+		vi.mocked(stat).mockResolvedValue({} as Stats); // prevent default mocked reject
+		vi.mocked(lstat).mockResolvedValue({} as Stats);
+		vi.mocked(pathsInfo).mockResolvedValue([
+			{
+				oid: DUMMY_ETAG,
+				size: 55,
+				path: "README.md",
+				type: "file",
+				lastCommit: {
+					date: new Date(),
+					id: "main",
+					title: "Commit msg",
+				},
+			},
+		]);
+		const output = await downloadFileToCacheDir({
+			repo: DUMMY_REPO,
+			path: "/README.md",
+			fetch: fetchMock,
+		});
+		expect(stat).toHaveBeenCalledOnce();
+		expect(symlink).not.toHaveBeenCalledOnce();
+		// Get call argument for stat
+		const starArg = vi.mocked(stat).mock.calls[0][0];
+		expect(starArg).toBe(expectPointer);
+		expect(fetchMock).not.toHaveBeenCalledWith();
+		expect(output).toBe(expectPointer);
+	});
+	test("existing blob should only create the symlink", async () => {
+		// <cache>/<repo>/<revision>/snapshots/README.md
+		const expectPointer = _getSnapshotFile({
+			repo: DUMMY_REPO,
+			path: "/README.md",
+			revision: "dummy-commit-hash",
+		});
+		// <cache>/<repo>/blobs/<etag>
+		const expectedBlob = _getBlobFile({
+			repo: DUMMY_REPO,
+			etag: DUMMY_ETAG,
+		});
+		// mock existing blob only no symlink
+		vi.mocked(lstat).mockResolvedValue({} as Stats);
+		// mock pathsInfo resolve content
+		vi.mocked(pathsInfo).mockResolvedValue([
+			{
+				oid: DUMMY_ETAG,
+				size: 55,
+				path: "README.md",
+				type: "file",
+				lastCommit: {
+					date: new Date(),
+					id: "dummy-commit-hash",
+					title: "Commit msg",
+				},
+			},
+		]);
+		const output = await downloadFileToCacheDir({
+			repo: DUMMY_REPO,
+			path: "/README.md",
+			fetch: fetchMock,
+		});
+		// should have check for the blob
+		expect(lstat).toHaveBeenCalled();
+		expect(vi.mocked(lstat).mock.calls[0][0]).toBe(expectedBlob);
+		// symlink should have been created
+		expect(createSymlink).toHaveBeenCalledOnce();
+		// no download done
+		expect(fetchMock).not.toHaveBeenCalled();
+		expect(output).toBe(expectPointer);
+	});
+	test("expect resolve value to be the pointer path of downloaded file", async () => {
+		// <cache>/<repo>/<revision>/snapshots/README.md
+		const expectPointer = _getSnapshotFile({
+			repo: DUMMY_REPO,
+			path: "/README.md",
+			revision: "dummy-commit-hash",
+		});
+		// <cache>/<repo>/blobs/<etag>
+		const expectedBlob = _getBlobFile({
+			repo: DUMMY_REPO,
+			etag: DUMMY_ETAG,
+		});
+		vi.mocked(pathsInfo).mockResolvedValue([
+			{
+				oid: DUMMY_ETAG,
+				size: 55,
+				path: "README.md",
+				type: "file",
+				lastCommit: {
+					date: new Date(),
+					id: "dummy-commit-hash",
+					title: "Commit msg",
+				},
+			},
+		]);
+		// eslint-disable-next-line @typescript-eslint/no-explicit-any
+		vi.mocked(createWriteStream).mockReturnValue(async function* () {} as any);
+		const output = await downloadFileToCacheDir({
+			repo: DUMMY_REPO,
+			path: "/README.md",
+			fetch: fetchMock,
+		});
+		// expect blobs and snapshots folder to have been mkdir
+		expect(vi.mocked(mkdir).mock.calls[0][0]).toBe(dirname(expectedBlob));
+		expect(vi.mocked(mkdir).mock.calls[1][0]).toBe(dirname(expectPointer));
+		expect(output).toBe(expectPointer);
+	});
+	test("should write fetch response to blob", async () => {
+		// <cache>/<repo>/<revision>/snapshots/README.md
+		const expectPointer = _getSnapshotFile({
+			repo: DUMMY_REPO,
+			path: "/README.md",
+			revision: "dummy-commit-hash",
+		});
+		// <cache>/<repo>/blobs/<etag>
+		const expectedBlob = _getBlobFile({
+			repo: DUMMY_REPO,
+			etag: DUMMY_ETAG,
+		});
+		// mock pathsInfo resolve content
+		vi.mocked(pathsInfo).mockResolvedValue([
+			{
+				oid: DUMMY_ETAG,
+				size: 55,
+				path: "README.md",
+				type: "file",
+				lastCommit: {
+					date: new Date(),
+					id: "dummy-commit-hash",
+					title: "Commit msg",
+				},
+			},
+		]);
+		// eslint-disable-next-line @typescript-eslint/no-explicit-any
+		vi.mocked(createWriteStream).mockReturnValue(async function* () {} as any);
+		await downloadFileToCacheDir({
+			repo: DUMMY_REPO,
+			path: "/README.md",
+			fetch: fetchMock,
+		});
+		const incomplete = `${expectedBlob}.incomplete`;
+		// 1. should write fetch#response#body to incomplete file
+		expect(createWriteStream).toHaveBeenCalledWith(incomplete);
+		// 2. should rename the incomplete to the blob expected name
+		expect(rename).toHaveBeenCalledWith(incomplete, expectedBlob);
+		// 3. should create symlink pointing to blob
+		expect(createSymlink).toHaveBeenCalledWith({ sourcePath: expectedBlob, finalPath: expectPointer });
+	});
+});

lib/download-file-to-cache-dir.ts ADDED Viewed

	@@ -0,0 +1,138 @@

+import { getHFHubCachePath, getRepoFolderName } from "./cache-management";
+import { dirname, join } from "node:path";
+import { rename, lstat, mkdir, stat } from "node:fs/promises";
+import type { CommitInfo, PathInfo } from "./paths-info";
+import { pathsInfo } from "./paths-info";
+import type { CredentialsParams, RepoDesignation } from "../types/public";
+import { toRepoId } from "../utils/toRepoId";
+import { downloadFile } from "./download-file";
+import { createSymlink } from "../utils/symlink";
+import { Readable } from "node:stream";
+import type { ReadableStream } from "node:stream/web";
+import { pipeline } from "node:stream/promises";
+import { createWriteStream } from "node:fs";
+export const REGEX_COMMIT_HASH: RegExp = new RegExp("^[0-9a-f]{40}$");
+function getFilePointer(storageFolder: string, revision: string, relativeFilename: string): string {
+	const snapshotPath = join(storageFolder, "snapshots");
+	return join(snapshotPath, revision, relativeFilename);
+}
+/**
+ * handy method to check if a file exists, or the pointer of a symlinks exists
+ * @param path
+ * @param followSymlinks
+ */
+async function exists(path: string, followSymlinks?: boolean): Promise<boolean> {
+	try {
+		if (followSymlinks) {
+			await stat(path);
+		} else {
+			await lstat(path);
+		}
+		return true;
+	} catch (err: unknown) {
+		return false;
+	}
+}
+/**
+ * Download a given file if it's not already present in the local cache.
+ * @param params
+ * @return the symlink to the blob object
+ */
+export async function downloadFileToCacheDir(
+	params: {
+		repo: RepoDesignation;
+		path: string;
+		/**
+		 * If true, will download the raw git file.
+		 *
+		 * For example, when calling on a file stored with Git LFS, the pointer file will be downloaded instead.
+		 */
+		raw?: boolean;
+		/**
+		 * An optional Git revision id which can be a branch name, a tag, or a commit hash.
+		 *
+		 * @default "main"
+		 */
+		revision?: string;
+		hubUrl?: string;
+		cacheDir?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): Promise<string> {
+	// get revision provided or default to main
+	const revision = params.revision ?? "main";
+	const cacheDir = params.cacheDir ?? getHFHubCachePath();
+	// get repo id
+	const repoId = toRepoId(params.repo);
+	// get storage folder
+	const storageFolder = join(cacheDir, getRepoFolderName(repoId));
+	let commitHash: string | undefined;
+	// if user provides a commitHash as revision, and they already have the file on disk, shortcut everything.
+	if (REGEX_COMMIT_HASH.test(revision)) {
+		commitHash = revision;
+		const pointerPath = getFilePointer(storageFolder, revision, params.path);
+		if (await exists(pointerPath, true)) return pointerPath;
+	}
+	const pathsInformation: (PathInfo & { lastCommit: CommitInfo })[] = await pathsInfo({
+		...params,
+		paths: [params.path],
+		revision: revision,
+		expand: true,
+	});
+	if (!pathsInformation || pathsInformation.length !== 1) throw new Error(`cannot get path info for ${params.path}`);
+	let etag: string;
+	if (pathsInformation[0].lfs) {
+		etag = pathsInformation[0].lfs.oid; // get the LFS pointed file oid
+	} else {
+		etag = pathsInformation[0].oid; // get the repo file if not a LFS pointer
+	}
+	const pointerPath = getFilePointer(storageFolder, commitHash ?? pathsInformation[0].lastCommit.id, params.path);
+	const blobPath = join(storageFolder, "blobs", etag);
+	// if we have the pointer file, we can shortcut the download
+	if (await exists(pointerPath, true)) return pointerPath;
+	// mkdir blob and pointer path parent directory
+	await mkdir(dirname(blobPath), { recursive: true });
+	await mkdir(dirname(pointerPath), { recursive: true });
+	// We might already have the blob but not the pointer
+	// shortcut the download if needed
+	if (await exists(blobPath)) {
+		// create symlinks in snapshot folder to blob object
+		await createSymlink({ sourcePath: blobPath, finalPath: pointerPath });
+		return pointerPath;
+	}
+	const incomplete = `${blobPath}.incomplete`;
+	console.debug(`Downloading ${params.path} to ${incomplete}`);
+	const blob: Blob | null = await downloadFile({
+		...params,
+		revision: commitHash,
+	});
+	if (!blob) {
+		throw new Error(`invalid response for file ${params.path}`);
+	}
+	await pipeline(Readable.fromWeb(blob.stream() as ReadableStream), createWriteStream(incomplete));
+	// rename .incomplete file to expect blob
+	await rename(incomplete, blobPath);
+	// create symlinks in snapshot folder to blob object
+	await createSymlink({ sourcePath: blobPath, finalPath: pointerPath });
+	return pointerPath;
+}

lib/download-file.spec.ts ADDED Viewed

	@@ -0,0 +1,82 @@

+import { expect, test, describe, assert } from "vitest";
+import { downloadFile } from "./download-file";
+import { deleteRepo } from "./delete-repo";
+import { createRepo } from "./create-repo";
+import { TEST_ACCESS_TOKEN, TEST_HUB_URL, TEST_USER } from "../test/consts";
+import { insecureRandomString } from "../utils/insecureRandomString";
+describe("downloadFile", () => {
+	test("should download regular file", async () => {
+		const blob = await downloadFile({
+			repo: {
+				type: "model",
+				name: "openai-community/gpt2",
+			},
+			path: "README.md",
+		});
+		const text = await blob?.slice(0, 1000).text();
+		assert(
+			text?.includes(`---
+language: en
+tags:
+- exbert
+license: mit
+---
+# GPT-2
+Test the whole generation capabilities here: https://transformer.huggingface.co/doc/gpt2-large`)
+		);
+	});
+	test("should downoad xet file", async () => {
+		const blob = await downloadFile({
+			repo: {
+				type: "model",
+				name: "celinah/xet-experiments",
+			},
+			path: "large_text.txt",
+		});
+		const text = await blob?.slice(0, 100).text();
+		expect(text).toMatch("this is a text file.".repeat(10).slice(0, 100));
+	});
+	test("should download private file", async () => {
+		const repoName = `datasets/${TEST_USER}/TEST-${insecureRandomString()}`;
+		const result = await createRepo({
+			accessToken: TEST_ACCESS_TOKEN,
+			hubUrl: TEST_HUB_URL,
+			private: true,
+			repo: repoName,
+			files: [{ path: ".gitattributes", content: new Blob(["*.html filter=lfs diff=lfs merge=lfs -text"]) }],
+		});
+		assert.deepStrictEqual(result, {
+			repoUrl: `${TEST_HUB_URL}/${repoName}`,
+		});
+		try {
+			const blob = await downloadFile({
+				repo: repoName,
+				path: ".gitattributes",
+				hubUrl: TEST_HUB_URL,
+				accessToken: TEST_ACCESS_TOKEN,
+			});
+			assert(blob, "File should be found");
+			const text = await blob?.text();
+			assert.strictEqual(text, "*.html filter=lfs diff=lfs merge=lfs -text");
+		} finally {
+			await deleteRepo({
+				repo: repoName,
+				hubUrl: TEST_HUB_URL,
+				accessToken: TEST_ACCESS_TOKEN,
+			});
+		}
+	});
+});

lib/download-file.ts ADDED Viewed

	@@ -0,0 +1,77 @@

+import type { CredentialsParams, RepoDesignation } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { WebBlob } from "../utils/WebBlob";
+import { XetBlob } from "../utils/XetBlob";
+import type { FileDownloadInfoOutput } from "./file-download-info";
+import { fileDownloadInfo } from "./file-download-info";
+/**
+ * @returns null when the file doesn't exist
+ */
+export async function downloadFile(
+	params: {
+		repo: RepoDesignation;
+		path: string;
+		/**
+		 * If true, will download the raw git file.
+		 *
+		 * For example, when calling on a file stored with Git LFS, the pointer file will be downloaded instead.
+		 */
+		raw?: boolean;
+		/**
+		 * An optional Git revision id which can be a branch name, a tag, or a commit hash.
+		 *
+		 * @default "main"
+		 */
+		revision?: string;
+		hubUrl?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+		/**
+		 * Whether to use the xet protocol to download the file (if applicable).
+		 *
+		 * Currently there's experimental support for it, so it's not enabled by default.
+		 *
+		 * It will be enabled automatically in a future minor version.
+		 *
+		 * @default false
+		 */
+		xet?: boolean;
+		/**
+		 * Can save an http request if provided
+		 */
+		downloadInfo?: FileDownloadInfoOutput;
+	} & Partial<CredentialsParams>
+): Promise<Blob | null> {
+	const accessToken = checkCredentials(params);
+	const info =
+		params.downloadInfo ??
+		(await fileDownloadInfo({
+			accessToken,
+			repo: params.repo,
+			path: params.path,
+			revision: params.revision,
+			hubUrl: params.hubUrl,
+			fetch: params.fetch,
+			raw: params.raw,
+		}));
+	if (!info) {
+		return null;
+	}
+	if (info.xet && params.xet) {
+		return new XetBlob({
+			refreshUrl: info.xet.refreshUrl.href,
+			reconstructionUrl: info.xet.reconstructionUrl.href,
+			fetch: params.fetch,
+			accessToken,
+			size: info.size,
+		});
+	}
+	return new WebBlob(new URL(info.url), 0, info.size, "", true, params.fetch ?? fetch, accessToken);
+}

lib/file-download-info.spec.ts ADDED Viewed

	@@ -0,0 +1,59 @@

+import { assert, it, describe } from "vitest";
+import { fileDownloadInfo } from "./file-download-info";
+describe("fileDownloadInfo", () => {
+	it("should fetch LFS file info", async () => {
+		const info = await fileDownloadInfo({
+			repo: {
+				name: "bert-base-uncased",
+				type: "model",
+			},
+			path: "tf_model.h5",
+			revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
+		});
+		assert.strictEqual(info?.size, 536063208);
+		assert.strictEqual(info?.etag, '"a7a17d6d844b5de815ccab5f42cad6d24496db3850a2a43d8258221018ce87d2"');
+	});
+	it("should fetch raw LFS pointer info", async () => {
+		const info = await fileDownloadInfo({
+			repo: {
+				name: "bert-base-uncased",
+				type: "model",
+			},
+			path: "tf_model.h5",
+			revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
+			raw: true,
+		});
+		assert.strictEqual(info?.size, 134);
+		assert.strictEqual(info?.etag, '"9eb98c817f04b051b3bcca591bcd4e03cec88018"');
+	});
+	it("should fetch non-LFS file info", async () => {
+		const info = await fileDownloadInfo({
+			repo: {
+				name: "bert-base-uncased",
+				type: "model",
+			},
+			path: "tokenizer_config.json",
+			revision: "1a7dd4986e3dab699c24ca19b2afd0f5e1a80f37",
+		});
+		assert.strictEqual(info?.size, 28);
+		assert.strictEqual(info?.etag, '"a661b1a138dac6dc5590367402d100765010ffd6"');
+	});
+	it("should fetch xet file info", async () => {
+		const info = await fileDownloadInfo({
+			repo: {
+				type: "model",
+				name: "celinah/xet-experiments",
+			},
+			path: "large_text.txt",
+		});
+		assert.strictEqual(info?.size, 62914580);
+		assert.strictEqual(info?.etag, '"c27f98578d9363b27db0bc1cbd9c692f8e6e90ae98c38cee7bc0a88829debd17"');
+	});
+});

lib/file-download-info.ts ADDED Viewed

	@@ -0,0 +1,151 @@

+import { HUB_URL } from "../consts";
+import { createApiError, InvalidApiResponseFormatError } from "../error";
+import type { CredentialsParams, RepoDesignation } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { parseLinkHeader } from "../utils/parseLinkHeader";
+import { toRepoId } from "../utils/toRepoId";
+export interface XetFileInfo {
+	hash: string;
+	refreshUrl: URL;
+	/**
+	 * Can be directly used instead of the hash.
+	 */
+	reconstructionUrl: URL;
+}
+export interface FileDownloadInfoOutput {
+	size: number;
+	etag: string;
+	xet?: XetFileInfo;
+	// URL to fetch (with the access token if private file)
+	url: string;
+}
+/**
+ * @returns null when the file doesn't exist
+ */
+export async function fileDownloadInfo(
+	params: {
+		repo: RepoDesignation;
+		path: string;
+		revision?: string;
+		hubUrl?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+		/**
+		 * To get the raw pointer file behind a LFS file
+		 */
+		raw?: boolean;
+		/**
+		 * To avoid the content-disposition header in the `downloadLink` for LFS files
+		 *
+		 * So that on browsers you can use the URL in an iframe for example
+		 */
+		noContentDisposition?: boolean;
+	} & Partial<CredentialsParams>
+): Promise<FileDownloadInfoOutput | null> {
+	const accessToken = checkCredentials(params);
+	const repoId = toRepoId(params.repo);
+	const hubUrl = params.hubUrl ?? HUB_URL;
+	const url =
+		`${hubUrl}/${repoId.type === "model" ? "" : `${repoId.type}s/`}${repoId.name}/${
+			params.raw ? "raw" : "resolve"
+		}/${encodeURIComponent(params.revision ?? "main")}/${params.path}` +
+		(params.noContentDisposition ? "?noContentDisposition=1" : "");
+	const resp = await (params.fetch ?? fetch)(url, {
+		method: "GET",
+		headers: {
+			...(accessToken && {
+				Authorization: `Bearer ${accessToken}`,
+			}),
+			Range: "bytes=0-0",
+			Accept: "application/vnd.xet-fileinfo+json, */*",
+		},
+	});
+	if (resp.status === 404 && resp.headers.get("X-Error-Code") === "EntryNotFound") {
+		return null;
+	}
+	if (!resp.ok) {
+		throw await createApiError(resp);
+	}
+	let size: number | undefined;
+	let xetInfo: XetFileInfo | undefined;
+	if (resp.headers.get("Content-Type")?.includes("application/vnd.xet-fileinfo+json")) {
+		size = parseInt(resp.headers.get("X-Linked-Size") ?? "invalid");
+		if (isNaN(size)) {
+			throw new InvalidApiResponseFormatError("Invalid file size received in X-Linked-Size header");
+		}
+		const hash = resp.headers.get("X-Xet-Hash");
+		const links = parseLinkHeader(resp.headers.get("Link") ?? "");
+		const reconstructionUrl = (() => {
+			try {
+				return new URL(links["xet-reconstruction-info"]);
+			} catch {
+				return null;
+			}
+		})();
+		const refreshUrl = (() => {
+			try {
+				return new URL(links["xet-auth"]);
+			} catch {
+				return null;
+			}
+		})();
+		if (!hash) {
+			throw new InvalidApiResponseFormatError("No hash received in X-Xet-Hash header");
+		}
+		if (!reconstructionUrl || !refreshUrl) {
+			throw new InvalidApiResponseFormatError("No xet-reconstruction-info or xet-auth link header");
+		}
+		xetInfo = {
+			hash,
+			refreshUrl,
+			reconstructionUrl,
+		};
+	}
+	if (size === undefined || isNaN(size)) {
+		const contentRangeHeader = resp.headers.get("content-range");
+		if (!contentRangeHeader) {
+			throw new InvalidApiResponseFormatError("Expected size information");
+		}
+		const [, parsedSize] = contentRangeHeader.split("/");
+		size = parseInt(parsedSize);
+		if (isNaN(size)) {
+			throw new InvalidApiResponseFormatError("Invalid file size received");
+		}
+	}
+	const etag = resp.headers.get("X-Linked-ETag") ?? resp.headers.get("ETag") ?? undefined;
+	if (!etag) {
+		throw new InvalidApiResponseFormatError("Expected ETag");
+	}
+	return {
+		etag,
+		size,
+		xet: xetInfo,
+		// Cannot use resp.url in case it's a S3 url and the user adds an Authorization header to it.
+		url:
+			resp.url &&
+			(new URL(resp.url).origin === new URL(hubUrl).origin || resp.headers.get("X-Cache")?.endsWith(" cloudfront"))
+				? resp.url
+				: url,
+	};
+}

lib/file-exists.spec.ts ADDED Viewed

	@@ -0,0 +1,30 @@

+import { assert, it, describe } from "vitest";
+import { fileExists } from "./file-exists";
+describe("fileExists", () => {
+	it("should return true for file that exists", async () => {
+		const info = await fileExists({
+			repo: {
+				name: "bert-base-uncased",
+				type: "model",
+			},
+			path: "tf_model.h5",
+			revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
+		});
+		assert(info, "file should exist");
+	});
+	it("should return false for file that does not exist", async () => {
+		const info = await fileExists({
+			repo: {
+				name: "bert-base-uncased",
+				type: "model",
+			},
+			path: "tf_model.h5dadazdzazd",
+			revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
+		});
+		assert(!info, "file should not exist");
+	});
+});

lib/file-exists.ts ADDED Viewed

	@@ -0,0 +1,41 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+import type { CredentialsParams, RepoDesignation } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { toRepoId } from "../utils/toRepoId";
+export async function fileExists(
+	params: {
+		repo: RepoDesignation;
+		path: string;
+		revision?: string;
+		hubUrl?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): Promise<boolean> {
+	const accessToken = checkCredentials(params);
+	const repoId = toRepoId(params.repo);
+	const hubUrl = params.hubUrl ?? HUB_URL;
+	const url = `${hubUrl}/${repoId.type === "model" ? "" : `${repoId.type}s/`}${repoId.name}/raw/${encodeURIComponent(
+		params.revision ?? "main"
+	)}/${params.path}`;
+	const resp = await (params.fetch ?? fetch)(url, {
+		method: "HEAD",
+		headers: accessToken ? { Authorization: `Bearer ${accessToken}` } : {},
+	});
+	if (resp.status === 404) {
+		return false;
+	}
+	if (!resp.ok) {
+		throw await createApiError(resp);
+	}
+	return true;
+}

lib/index.ts ADDED Viewed

	@@ -0,0 +1,32 @@

+export * from "./cache-management";
+export * from "./check-repo-access";
+export * from "./commit";
+export * from "./count-commits";
+export * from "./create-repo";
+export * from "./create-branch";
+export * from "./dataset-info";
+export * from "./delete-branch";
+export * from "./delete-file";
+export * from "./delete-files";
+export * from "./delete-repo";
+export * from "./download-file";
+export * from "./download-file-to-cache-dir";
+export * from "./file-download-info";
+export * from "./file-exists";
+export * from "./list-commits";
+export * from "./list-datasets";
+export * from "./list-files";
+export * from "./list-models";
+export * from "./list-spaces";
+export * from "./model-info";
+export * from "./oauth-handle-redirect";
+export * from "./oauth-login-url";
+export * from "./parse-safetensors-metadata";
+export * from "./paths-info";
+export * from "./repo-exists";
+export * from "./snapshot-download";
+export * from "./space-info";
+export * from "./upload-file";
+export * from "./upload-files";
+export * from "./upload-files-with-progress";
+export * from "./who-am-i";

lib/list-commits.spec.ts ADDED Viewed

	@@ -0,0 +1,117 @@

+import { assert, it, describe } from "vitest";
+import type { CommitData } from "./list-commits";
+import { listCommits } from "./list-commits";
+describe("listCommits", () => {
+	it("should fetch paginated commits from the repo", async () => {
+		const commits: CommitData[] = [];
+		for await (const commit of listCommits({
+			repo: {
+				name: "openai-community/gpt2",
+				type: "model",
+			},
+			revision: "607a30d783dfa663caf39e06633721c8d4cfcd7e",
+			batchSize: 5,
+		})) {
+			commits.push(commit);
+		}
+		assert.equal(commits.length, 26);
+		assert.deepEqual(commits.slice(0, 6), [
+			{
+				oid: "607a30d783dfa663caf39e06633721c8d4cfcd7e",
+				title: "Adds the tokenizer configuration file (#80)",
+				message: "\n\n\n- Adds tokenizer_config.json file (db6d57930088fb63e52c010bd9ac77c955ac55e7)\n\n",
+				authors: [
+					{
+						username: "lysandre",
+						avatarUrl:
+							"https://cdn-avatars.huggingface.co/v1/production/uploads/5e3aec01f55e2b62848a5217/PMKS0NNB4MJQlTSFzh918.jpeg",
+					},
+				],
+				date: new Date("2024-02-19T10:57:45.000Z"),
+			},
+			{
+				oid: "11c5a3d5811f50298f278a704980280950aedb10",
+				title: "Adding ONNX file of this model (#60)",
+				message: "\n\n\n- Adding ONNX file of this model (9411f419c589519e1a46c94ac7789ea20fd7c322)\n\n",
+				authors: [
+					{
+						username: "fxmarty",
+						avatarUrl:
+							"https://cdn-avatars.huggingface.co/v1/production/uploads/1651743336129-624c60cba8ec93a7ac188b56.png",
+					},
+				],
+				date: new Date("2023-06-30T02:19:43.000Z"),
+			},
+			{
+				oid: "e7da7f221d5bf496a48136c0cd264e630fe9fcc8",
+				title: "Update generation_config.json",
+				message: "",
+				authors: [
+					{
+						username: "joaogante",
+						avatarUrl: "https://cdn-avatars.huggingface.co/v1/production/uploads/1641203017724-noauth.png",
+					},
+				],
+				date: new Date("2022-12-16T15:44:21.000Z"),
+			},
+			{
+				oid: "f27b190eeac4c2302d24068eabf5e9d6044389ae",
+				title: "Add note that this is the smallest version of the model (#18)",
+				message:
+					"\n\n\n- Add note that this is the smallest version of the model (611838ef095a5bb35bf2027d05e1194b7c9d37ac)\n\n\nCo-authored-by: helen <mathemakitten@users.noreply.huggingface.co>\n",
+				authors: [
+					{
+						username: "sgugger",
+						avatarUrl:
+							"https://cdn-avatars.huggingface.co/v1/production/uploads/1593126474392-5ef50182b71947201082a4e5.jpeg",
+					},
+					{
+						username: "mathemakitten",
+						avatarUrl:
+							"https://cdn-avatars.huggingface.co/v1/production/uploads/1658248499901-6079afe2d2cd8c150e6ae05e.jpeg",
+					},
+				],
+				date: new Date("2022-11-23T12:55:26.000Z"),
+			},
+			{
+				oid: "0dd7bcc7a64e4350d8859c9a2813132fbf6ae591",
+				title: "Our very first generation_config.json (#17)",
+				message:
+					"\n\n\n- Our very first generation_config.json (671851b7e9d56ef062890732065d7bd5f4628bd6)\n\n\nCo-authored-by: Joao Gante <joaogante@users.noreply.huggingface.co>\n",
+				authors: [
+					{
+						username: "sgugger",
+						avatarUrl:
+							"https://cdn-avatars.huggingface.co/v1/production/uploads/1593126474392-5ef50182b71947201082a4e5.jpeg",
+					},
+					{
+						username: "joaogante",
+						avatarUrl: "https://cdn-avatars.huggingface.co/v1/production/uploads/1641203017724-noauth.png",
+					},
+				],
+				date: new Date("2022-11-18T18:19:30.000Z"),
+			},
+			{
+				oid: "75e09b43581151bd1d9ef6700faa605df408979f",
+				title: "Upload model.safetensors with huggingface_hub (#12)",
+				message:
+					"\n\n\n- Upload model.safetensors with huggingface_hub (ba2f794b2e4ea09ef932a6628fa0815dfaf09661)\n\n\nCo-authored-by: Nicolas Patry <Narsil@users.noreply.huggingface.co>\n",
+				authors: [
+					{
+						username: "julien-c",
+						avatarUrl:
+							"https://cdn-avatars.huggingface.co/v1/production/uploads/5dd96eb166059660ed1ee413/NQtzmrDdbG0H8qkZvRyGk.jpeg",
+					},
+					{
+						username: "Narsil",
+						avatarUrl:
+							"https://cdn-avatars.huggingface.co/v1/production/uploads/1608285816082-5e2967b819407e3277369b95.png",
+					},
+				],
+				date: new Date("2022-10-20T09:34:54.000Z"),
+			},
+		]);
+	});
+});

lib/list-commits.ts ADDED Viewed

	@@ -0,0 +1,70 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+import type { ApiCommitData } from "../types/api/api-commit";
+import type { CredentialsParams, RepoDesignation } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { parseLinkHeader } from "../utils/parseLinkHeader";
+import { toRepoId } from "../utils/toRepoId";
+export interface CommitData {
+	oid: string;
+	title: string;
+	message: string;
+	authors: Array<{ username: string; avatarUrl: string }>;
+	date: Date;
+}
+export async function* listCommits(
+	params: {
+		repo: RepoDesignation;
+		/**
+		 * Revision to list commits from. Defaults to the default branch.
+		 */
+		revision?: string;
+		hubUrl?: string;
+		/**
+		 * Number of commits to fetch from the hub each http call. Defaults to 100. Can be set to 1000.
+		 */
+		batchSize?: number;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): AsyncGenerator<CommitData> {
+	const accessToken = checkCredentials(params);
+	const repoId = toRepoId(params.repo);
+	// Could upgrade to 1000 commits per page
+	let url: string | undefined = `${params.hubUrl ?? HUB_URL}/api/${repoId.type}s/${repoId.name}/commits/${
+		params.revision ?? "main"
+	}?limit=${params.batchSize ?? 100}`;
+	while (url) {
+		const res: Response = await (params.fetch ?? fetch)(url, {
+			headers: accessToken ? { Authorization: `Bearer ${accessToken}` } : {},
+		});
+		if (!res.ok) {
+			throw await createApiError(res);
+		}
+		const resJson: ApiCommitData[] = await res.json();
+		for (const commit of resJson) {
+			yield {
+				oid: commit.id,
+				title: commit.title,
+				message: commit.message,
+				authors: commit.authors.map((author) => ({
+					username: author.user,
+					avatarUrl: author.avatar,
+				})),
+				date: new Date(commit.date),
+			};
+		}
+		const linkHeader = res.headers.get("Link");
+		url = linkHeader ? parseLinkHeader(linkHeader).next : undefined;
+	}
+}

lib/list-datasets.spec.ts ADDED Viewed

	@@ -0,0 +1,47 @@

+import { describe, expect, it } from "vitest";
+import type { DatasetEntry } from "./list-datasets";
+import { listDatasets } from "./list-datasets";
+describe("listDatasets", () => {
+	it("should list datasets from hf-doc-builder", async () => {
+		const results: DatasetEntry[] = [];
+		for await (const entry of listDatasets({ search: { owner: "hf-doc-build" } })) {
+			if (entry.name === "hf-doc-build/doc-build-dev-test") {
+				continue;
+			}
+			if (typeof entry.downloads === "number") {
+				entry.downloads = 0;
+			}
+			if (typeof entry.likes === "number") {
+				entry.likes = 0;
+			}
+			if (entry.updatedAt instanceof Date && !isNaN(entry.updatedAt.getTime())) {
+				entry.updatedAt = new Date(0);
+			}
+			results.push(entry);
+		}
+		expect(results).deep.equal([
+			{
+				id: "6356b19985da6f13863228bd",
+				name: "hf-doc-build/doc-build",
+				private: false,
+				gated: false,
+				downloads: 0,
+				likes: 0,
+				updatedAt: new Date(0),
+			},
+			{
+				id: "636a1b69f2f9ec4289c4c19e",
+				name: "hf-doc-build/doc-build-dev",
+				gated: false,
+				private: false,
+				downloads: 0,
+				likes: 0,
+				updatedAt: new Date(0),
+			},
+		]);
+	});
+});

lib/list-datasets.ts ADDED Viewed

	@@ -0,0 +1,121 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+import type { ApiDatasetInfo } from "../types/api/api-dataset";
+import type { CredentialsParams } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { parseLinkHeader } from "../utils/parseLinkHeader";
+import { pick } from "../utils/pick";
+export const DATASET_EXPAND_KEYS = [
+	"private",
+	"downloads",
+	"gated",
+	"likes",
+	"lastModified",
+] as const satisfies readonly (keyof ApiDatasetInfo)[];
+export const DATASET_EXPANDABLE_KEYS = [
+	"author",
+	"cardData",
+	"citation",
+	"createdAt",
+	"disabled",
+	"description",
+	"downloads",
+	"downloadsAllTime",
+	"gated",
+	"gitalyUid",
+	"lastModified",
+	"likes",
+	"paperswithcode_id",
+	"private",
+	// "siblings",
+	"sha",
+	"tags",
+] as const satisfies readonly (keyof ApiDatasetInfo)[];
+export interface DatasetEntry {
+	id: string;
+	name: string;
+	private: boolean;
+	downloads: number;
+	gated: false | "auto" | "manual";
+	likes: number;
+	updatedAt: Date;
+}
+export async function* listDatasets<
+	const T extends Exclude<(typeof DATASET_EXPANDABLE_KEYS)[number], (typeof DATASET_EXPAND_KEYS)[number]> = never,
+>(
+	params?: {
+		search?: {
+			/**
+			 * Will search in the dataset name for matches
+			 */
+			query?: string;
+			owner?: string;
+			tags?: string[];
+		};
+		hubUrl?: string;
+		additionalFields?: T[];
+		/**
+		 * Set to limit the number of models returned.
+		 */
+		limit?: number;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): AsyncGenerator<DatasetEntry & Pick<ApiDatasetInfo, T>> {
+	const accessToken = params && checkCredentials(params);
+	let totalToFetch = params?.limit ?? Infinity;
+	const search = new URLSearchParams([
+		...Object.entries({
+			limit: String(Math.min(totalToFetch, 500)),
+			...(params?.search?.owner ? { author: params.search.owner } : undefined),
+			...(params?.search?.query ? { search: params.search.query } : undefined),
+		}),
+		...(params?.search?.tags?.map((tag) => ["filter", tag]) ?? []),
+		...DATASET_EXPAND_KEYS.map((val) => ["expand", val] satisfies [string, string]),
+		...(params?.additionalFields?.map((val) => ["expand", val] satisfies [string, string]) ?? []),
+	]).toString();
+	let url: string | undefined = `${params?.hubUrl || HUB_URL}/api/datasets` + (search ? "?" + search : "");
+	while (url) {
+		const res: Response = await (params?.fetch ?? fetch)(url, {
+			headers: {
+				accept: "application/json",
+				...(accessToken ? { Authorization: `Bearer ${accessToken}` } : undefined),
+			},
+		});
+		if (!res.ok) {
+			throw await createApiError(res);
+		}
+		const items: ApiDatasetInfo[] = await res.json();
+		for (const item of items) {
+			yield {
+				...(params?.additionalFields && pick(item, params.additionalFields)),
+				id: item._id,
+				name: item.id,
+				private: item.private,
+				downloads: item.downloads,
+				likes: item.likes,
+				gated: item.gated,
+				updatedAt: new Date(item.lastModified),
+			} as DatasetEntry & Pick<ApiDatasetInfo, T>;
+			totalToFetch--;
+			if (totalToFetch <= 0) {
+				return;
+			}
+		}
+		const linkHeader = res.headers.get("Link");
+		url = linkHeader ? parseLinkHeader(linkHeader).next : undefined;
+		// Could update limit in url to fetch less items if not all items of next page are needed.
+	}
+}

lib/list-files.spec.ts ADDED Viewed

	@@ -0,0 +1,173 @@

+import { assert, it, describe } from "vitest";
+import type { ListFileEntry } from "./list-files";
+import { listFiles } from "./list-files";
+describe("listFiles", () => {
+	it("should fetch the list of files from the repo", async () => {
+		const cursor = listFiles({
+			repo: {
+				name: "bert-base-uncased",
+				type: "model",
+			},
+			revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
+		});
+		const files: ListFileEntry[] = [];
+		for await (const entry of cursor) {
+			files.push(entry);
+		}
+		assert.deepStrictEqual(files, [
+			{
+				oid: "dc08351d4dc0732d9c8af04070ced089b201ce2f",
+				path: ".gitattributes",
+				size: 345,
+				type: "file",
+			},
+			{
+				oid: "fca794a5f07ff8f963fe8b61e3694b0fb7f955df",
+				path: "config.json",
+				size: 313,
+				type: "file",
+			},
+			{
+				lfs: {
+					oid: "097417381d6c7230bd9e3557456d726de6e83245ec8b24f529f60198a67b203a",
+					size: 440473133,
+					pointerSize: 134,
+				},
+				xetHash: "2d8408d3a894d02517d04956e2f7546ff08362594072f3527ce144b5212a3296",
+				oid: "ba5d19791be1dd7992e33bd61f20207b0f7f50a5",
+				path: "pytorch_model.bin",
+				size: 440473133,
+				type: "file",
+			},
+			{
+				lfs: {
+					oid: "a7a17d6d844b5de815ccab5f42cad6d24496db3850a2a43d8258221018ce87d2",
+					size: 536063208,
+					pointerSize: 134,
+				},
+				xetHash: "879c5715c18a0b7f051dd33f70f0a5c8dd1522e0a43f6f75520f16167f29279b",
+				oid: "9eb98c817f04b051b3bcca591bcd4e03cec88018",
+				path: "tf_model.h5",
+				size: 536063208,
+				type: "file",
+			},
+			{
+				oid: "fb140275c155a9c7c5a3b3e0e77a9e839594a938",
+				path: "vocab.txt",
+				size: 231508,
+				type: "file",
+			},
+		]);
+	});
+	it("should fetch the list of files from the repo, including last commit", async () => {
+		const cursor = listFiles({
+			repo: {
+				name: "bert-base-uncased",
+				type: "model",
+			},
+			revision: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
+			expand: true,
+		});
+		const files: ListFileEntry[] = [];
+		for await (const entry of cursor) {
+			delete entry.securityFileStatus; // flaky
+			files.push(entry);
+		}
+		assert.deepStrictEqual(files, [
+			{
+				lastCommit: {
+					date: "2018-11-14T23:35:08.000Z",
+					id: "504939aa53e8ce310dba3dd2296dbe266c575de4",
+					title: "initial commit",
+				},
+				oid: "dc08351d4dc0732d9c8af04070ced089b201ce2f",
+				path: ".gitattributes",
+				size: 345,
+				type: "file",
+			},
+			{
+				lastCommit: {
+					date: "2019-06-18T09:06:51.000Z",
+					id: "bb3c1c3256d2598217df9889a14a2e811587891d",
+					title: "Update config.json",
+				},
+				oid: "fca794a5f07ff8f963fe8b61e3694b0fb7f955df",
+				path: "config.json",
+				size: 313,
+				type: "file",
+			},
+			{
+				lastCommit: {
+					date: "2019-06-18T09:06:34.000Z",
+					id: "3d2477d72b675a999d1b13ca822aaaf4908634ad",
+					title: "Update pytorch_model.bin",
+				},
+				lfs: {
+					oid: "097417381d6c7230bd9e3557456d726de6e83245ec8b24f529f60198a67b203a",
+					size: 440473133,
+					pointerSize: 134,
+				},
+				xetHash: "2d8408d3a894d02517d04956e2f7546ff08362594072f3527ce144b5212a3296",
+				oid: "ba5d19791be1dd7992e33bd61f20207b0f7f50a5",
+				path: "pytorch_model.bin",
+				size: 440473133,
+				type: "file",
+			},
+			{
+				lastCommit: {
+					date: "2019-09-23T19:48:44.000Z",
+					id: "dd4bc8b21efa05ec961e3efc4ee5e3832a3679c7",
+					title: "Update tf_model.h5",
+				},
+				lfs: {
+					oid: "a7a17d6d844b5de815ccab5f42cad6d24496db3850a2a43d8258221018ce87d2",
+					size: 536063208,
+					pointerSize: 134,
+				},
+				xetHash: "879c5715c18a0b7f051dd33f70f0a5c8dd1522e0a43f6f75520f16167f29279b",
+				oid: "9eb98c817f04b051b3bcca591bcd4e03cec88018",
+				path: "tf_model.h5",
+				size: 536063208,
+				type: "file",
+			},
+			{
+				lastCommit: {
+					date: "2018-11-14T23:35:08.000Z",
+					id: "2f07d813ca87c8c709147704c87210359ccf2309",
+					title: "Update vocab.txt",
+				},
+				oid: "fb140275c155a9c7c5a3b3e0e77a9e839594a938",
+				path: "vocab.txt",
+				size: 231508,
+				type: "file",
+			},
+		]);
+	});
+	it("should fetch the list of files from the repo, including subfolders", async () => {
+		const cursor = listFiles({
+			repo: {
+				name: "xsum",
+				type: "dataset",
+			},
+			revision: "0f3ea2f2b55fcb11e71fb1e3aec6822e44ddcb0f",
+			recursive: true,
+		});
+		const files: ListFileEntry[] = [];
+		for await (const entry of cursor) {
+			files.push(entry);
+		}
+		assert(files.some((file) => file.path === "data/XSUM-EMNLP18-Summary-Data-Original.tar.gz"));
+	});
+});

lib/list-files.ts ADDED Viewed

	@@ -0,0 +1,94 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+import type { ApiIndexTreeEntry } from "../types/api/api-index-tree";
+import type { CredentialsParams, RepoDesignation } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { parseLinkHeader } from "../utils/parseLinkHeader";
+import { toRepoId } from "../utils/toRepoId";
+export interface ListFileEntry {
+	type: "file" | "directory" | "unknown";
+	size: number;
+	path: string;
+	oid: string;
+	lfs?: {
+		oid: string;
+		size: number;
+		/** Size of the raw pointer file, 100~200 bytes */
+		pointerSize: number;
+	};
+	/**
+	 * Xet-backed hash, a new protocol replacing LFS for big files.
+	 */
+	xetHash?: string;
+	/**
+	 * Only fetched if `expand` is set to `true` in the `listFiles` call.
+	 */
+	lastCommit?: {
+		date: string;
+		id: string;
+		title: string;
+	};
+	/**
+	 * Only fetched if `expand` is set to `true` in the `listFiles` call.
+	 */
+	securityFileStatus?: unknown;
+}
+/**
+ * List files in a folder. To list ALL files in the directory, call it
+ * with {@link params.recursive} set to `true`.
+ */
+export async function* listFiles(
+	params: {
+		repo: RepoDesignation;
+		/**
+		 * Do we want to list files in subdirectories?
+		 */
+		recursive?: boolean;
+		/**
+		 * Eg 'data' for listing all files in the 'data' folder. Leave it empty to list all
+		 * files in the repo.
+		 */
+		path?: string;
+		/**
+		 * Fetch `lastCommit` and `securityFileStatus` for each file.
+		 */
+		expand?: boolean;
+		revision?: string;
+		hubUrl?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): AsyncGenerator<ListFileEntry> {
+	const accessToken = checkCredentials(params);
+	const repoId = toRepoId(params.repo);
+	let url: string | undefined = `${params.hubUrl || HUB_URL}/api/${repoId.type}s/${repoId.name}/tree/${
+		params.revision || "main"
+	}${params.path ? "/" + params.path : ""}?recursive=${!!params.recursive}&expand=${!!params.expand}`;
+	while (url) {
+		const res: Response = await (params.fetch ?? fetch)(url, {
+			headers: {
+				accept: "application/json",
+				...(accessToken ? { Authorization: `Bearer ${accessToken}` } : undefined),
+			},
+		});
+		if (!res.ok) {
+			throw await createApiError(res);
+		}
+		const items: ApiIndexTreeEntry[] = await res.json();
+		for (const item of items) {
+			yield item;
+		}
+		const linkHeader = res.headers.get("Link");
+		url = linkHeader ? parseLinkHeader(linkHeader).next : undefined;
+	}
+}

lib/list-models.spec.ts ADDED Viewed

	@@ -0,0 +1,118 @@

+import { describe, expect, it } from "vitest";
+import type { ModelEntry } from "./list-models";
+import { listModels } from "./list-models";
+describe("listModels", () => {
+	it("should list models for depth estimation", async () => {
+		const results: ModelEntry[] = [];
+		for await (const entry of listModels({
+			search: { owner: "Intel", task: "depth-estimation" },
+		})) {
+			if (typeof entry.downloads === "number") {
+				entry.downloads = 0;
+			}
+			if (typeof entry.likes === "number") {
+				entry.likes = 0;
+			}
+			if (entry.updatedAt instanceof Date && !isNaN(entry.updatedAt.getTime())) {
+				entry.updatedAt = new Date(0);
+			}
+			if (!["Intel/dpt-large", "Intel/dpt-hybrid-midas"].includes(entry.name)) {
+				expect(entry.task).to.equal("depth-estimation");
+				continue;
+			}
+			results.push(entry);
+		}
+		results.sort((a, b) => a.id.localeCompare(b.id));
+		expect(results).deep.equal([
+			{
+				id: "621ffdc136468d709f17e709",
+				name: "Intel/dpt-large",
+				private: false,
+				gated: false,
+				downloads: 0,
+				likes: 0,
+				task: "depth-estimation",
+				updatedAt: new Date(0),
+			},
+			{
+				id: "638f07977559bf9a2b2b04ac",
+				name: "Intel/dpt-hybrid-midas",
+				gated: false,
+				private: false,
+				downloads: 0,
+				likes: 0,
+				task: "depth-estimation",
+				updatedAt: new Date(0),
+			},
+		]);
+	});
+	it("should list indonesian models with gguf format", async () => {
+		let count = 0;
+		for await (const entry of listModels({
+			search: { tags: ["gguf", "id"] },
+			additionalFields: ["tags"],
+			limit: 2,
+		})) {
+			count++;
+			expect(entry.tags).to.include("gguf");
+			expect(entry.tags).to.include("id");
+		}
+		expect(count).to.equal(2);
+	});
+	it("should search model by name", async () => {
+		let count = 0;
+		for await (const entry of listModels({
+			search: { query: "t5" },
+			limit: 10,
+		})) {
+			count++;
+			expect(entry.name.toLocaleLowerCase()).to.include("t5");
+		}
+		expect(count).to.equal(10);
+	});
+	it("should search model by inference provider", async () => {
+		let count = 0;
+		for await (const entry of listModels({
+			search: { inferenceProviders: ["together"] },
+			additionalFields: ["inferenceProviderMapping"],
+			limit: 10,
+		})) {
+			count++;
+			if (Array.isArray(entry.inferenceProviderMapping)) {
+				expect(entry.inferenceProviderMapping.map(({ provider }) => provider)).to.include("together");
+			}
+		}
+		expect(count).to.equal(10);
+	});
+	it("should search model by several inference providers", async () => {
+		let count = 0;
+		const inferenceProviders = ["together", "replicate"];
+		for await (const entry of listModels({
+			search: { inferenceProviders },
+			additionalFields: ["inferenceProviderMapping"],
+			limit: 10,
+		})) {
+			count++;
+			if (Array.isArray(entry.inferenceProviderMapping)) {
+				expect(
+					entry.inferenceProviderMapping.filter(({ provider }) => inferenceProviders.includes(provider)).length
+				).toBeGreaterThan(0);
+			}
+		}
+		expect(count).to.equal(10);
+	});
+});

lib/list-models.ts ADDED Viewed

	@@ -0,0 +1,139 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+import type { ApiModelInfo } from "../types/api/api-model";
+import type { CredentialsParams, PipelineType } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { parseLinkHeader } from "../utils/parseLinkHeader";
+import { pick } from "../utils/pick";
+export const MODEL_EXPAND_KEYS = [
+	"pipeline_tag",
+	"private",
+	"gated",
+	"downloads",
+	"likes",
+	"lastModified",
+] as const satisfies readonly (keyof ApiModelInfo)[];
+export const MODEL_EXPANDABLE_KEYS = [
+	"author",
+	"cardData",
+	"config",
+	"createdAt",
+	"disabled",
+	"downloads",
+	"downloadsAllTime",
+	"gated",
+	"gitalyUid",
+	"inferenceProviderMapping",
+	"lastModified",
+	"library_name",
+	"likes",
+	"model-index",
+	"pipeline_tag",
+	"private",
+	"safetensors",
+	"sha",
+	// "siblings",
+	"spaces",
+	"tags",
+	"transformersInfo",
+] as const satisfies readonly (keyof ApiModelInfo)[];
+export interface ModelEntry {
+	id: string;
+	name: string;
+	private: boolean;
+	gated: false | "auto" | "manual";
+	task?: PipelineType;
+	likes: number;
+	downloads: number;
+	updatedAt: Date;
+}
+export async function* listModels<
+	const T extends Exclude<(typeof MODEL_EXPANDABLE_KEYS)[number], (typeof MODEL_EXPAND_KEYS)[number]> = never,
+>(
+	params?: {
+		search?: {
+			/**
+			 * Will search in the model name for matches
+			 */
+			query?: string;
+			owner?: string;
+			task?: PipelineType;
+			tags?: string[];
+			/**
+			 * Will search for models that have one of the inference providers in the list.
+			 */
+			inferenceProviders?: string[];
+		};
+		hubUrl?: string;
+		additionalFields?: T[];
+		/**
+		 * Set to limit the number of models returned.
+		 */
+		limit?: number;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): AsyncGenerator<ModelEntry & Pick<ApiModelInfo, T>> {
+	const accessToken = params && checkCredentials(params);
+	let totalToFetch = params?.limit ?? Infinity;
+	const search = new URLSearchParams([
+		...Object.entries({
+			limit: String(Math.min(totalToFetch, 500)),
+			...(params?.search?.owner ? { author: params.search.owner } : undefined),
+			...(params?.search?.task ? { pipeline_tag: params.search.task } : undefined),
+			...(params?.search?.query ? { search: params.search.query } : undefined),
+			...(params?.search?.inferenceProviders
+				? { inference_provider: params.search.inferenceProviders.join(",") }
+				: undefined),
+		}),
+		...(params?.search?.tags?.map((tag) => ["filter", tag]) ?? []),
+		...MODEL_EXPAND_KEYS.map((val) => ["expand", val] satisfies [string, string]),
+		...(params?.additionalFields?.map((val) => ["expand", val] satisfies [string, string]) ?? []),
+	]).toString();
+	let url: string | undefined = `${params?.hubUrl || HUB_URL}/api/models?${search}`;
+	while (url) {
+		const res: Response = await (params?.fetch ?? fetch)(url, {
+			headers: {
+				accept: "application/json",
+				...(accessToken ? { Authorization: `Bearer ${accessToken}` } : undefined),
+			},
+		});
+		if (!res.ok) {
+			throw await createApiError(res);
+		}
+		const items: ApiModelInfo[] = await res.json();
+		for (const item of items) {
+			yield {
+				...(params?.additionalFields && pick(item, params.additionalFields)),
+				id: item._id,
+				name: item.id,
+				private: item.private,
+				task: item.pipeline_tag,
+				downloads: item.downloads,
+				gated: item.gated,
+				likes: item.likes,
+				updatedAt: new Date(item.lastModified),
+			} as ModelEntry & Pick<ApiModelInfo, T>;
+			totalToFetch--;
+			if (totalToFetch <= 0) {
+				return;
+			}
+		}
+		const linkHeader = res.headers.get("Link");
+		url = linkHeader ? parseLinkHeader(linkHeader).next : undefined;
+		// Could update url to reduce the limit if we don't need the whole 500 of the next batch.
+	}
+}

lib/list-spaces.spec.ts ADDED Viewed

	@@ -0,0 +1,40 @@

+import { describe, expect, it } from "vitest";
+import type { SpaceEntry } from "./list-spaces";
+import { listSpaces } from "./list-spaces";
+describe("listSpaces", () => {
+	it("should list spaces for Microsoft", async () => {
+		const results: SpaceEntry[] = [];
+		for await (const entry of listSpaces({
+			search: { owner: "microsoft" },
+			additionalFields: ["subdomain"],
+		})) {
+			if (entry.name !== "microsoft/visual_chatgpt") {
+				continue;
+			}
+			if (typeof entry.likes === "number") {
+				entry.likes = 0;
+			}
+			if (entry.updatedAt instanceof Date && !isNaN(entry.updatedAt.getTime())) {
+				entry.updatedAt = new Date(0);
+			}
+			results.push(entry);
+		}
+		results.sort((a, b) => a.id.localeCompare(b.id));
+		expect(results).deep.equal([
+			{
+				id: "6409a392bbc73d022c58c980",
+				name: "microsoft/visual_chatgpt",
+				private: false,
+				likes: 0,
+				sdk: "gradio",
+				subdomain: "microsoft-visual-chatgpt",
+				updatedAt: new Date(0),
+			},
+		]);
+	});
+});

lib/list-spaces.ts ADDED Viewed

	@@ -0,0 +1,111 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+import type { ApiSpaceInfo } from "../types/api/api-space";
+import type { CredentialsParams, SpaceSdk } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { parseLinkHeader } from "../utils/parseLinkHeader";
+import { pick } from "../utils/pick";
+export const SPACE_EXPAND_KEYS = [
+	"sdk",
+	"likes",
+	"private",
+	"lastModified",
+] as const satisfies readonly (keyof ApiSpaceInfo)[];
+export const SPACE_EXPANDABLE_KEYS = [
+	"author",
+	"cardData",
+	"datasets",
+	"disabled",
+	"gitalyUid",
+	"lastModified",
+	"createdAt",
+	"likes",
+	"private",
+	"runtime",
+	"sdk",
+	// "siblings",
+	"sha",
+	"subdomain",
+	"tags",
+	"models",
+] as const satisfies readonly (keyof ApiSpaceInfo)[];
+export interface SpaceEntry {
+	id: string;
+	name: string;
+	sdk?: SpaceSdk;
+	likes: number;
+	private: boolean;
+	updatedAt: Date;
+	// Use additionalFields to fetch the fields from ApiSpaceInfo
+}
+export async function* listSpaces<
+	const T extends Exclude<(typeof SPACE_EXPANDABLE_KEYS)[number], (typeof SPACE_EXPAND_KEYS)[number]> = never,
+>(
+	params?: {
+		search?: {
+			/**
+			 * Will search in the space name for matches
+			 */
+			query?: string;
+			owner?: string;
+			tags?: string[];
+		};
+		hubUrl?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+		/**
+		 * Additional fields to fetch from huggingface.co.
+		 */
+		additionalFields?: T[];
+	} & Partial<CredentialsParams>
+): AsyncGenerator<SpaceEntry & Pick<ApiSpaceInfo, T>> {
+	const accessToken = params && checkCredentials(params);
+	const search = new URLSearchParams([
+		...Object.entries({
+			limit: "500",
+			...(params?.search?.owner ? { author: params.search.owner } : undefined),
+			...(params?.search?.query ? { search: params.search.query } : undefined),
+		}),
+		...(params?.search?.tags?.map((tag) => ["filter", tag]) ?? []),
+		...[...SPACE_EXPAND_KEYS, ...(params?.additionalFields ?? [])].map(
+			(val) => ["expand", val] satisfies [string, string]
+		),
+	]).toString();
+	let url: string | undefined = `${params?.hubUrl || HUB_URL}/api/spaces?${search}`;
+	while (url) {
+		const res: Response = await (params?.fetch ?? fetch)(url, {
+			headers: {
+				accept: "application/json",
+				...(accessToken ? { Authorization: `Bearer ${accessToken}` } : undefined),
+			},
+		});
+		if (!res.ok) {
+			throw await createApiError(res);
+		}
+		const items: ApiSpaceInfo[] = await res.json();
+		for (const item of items) {
+			yield {
+				...(params?.additionalFields && pick(item, params.additionalFields)),
+				id: item._id,
+				name: item.id,
+				sdk: item.sdk,
+				likes: item.likes,
+				private: item.private,
+				updatedAt: new Date(item.lastModified),
+			} as SpaceEntry & Pick<ApiSpaceInfo, T>;
+		}
+		const linkHeader = res.headers.get("Link");
+		url = linkHeader ? parseLinkHeader(linkHeader).next : undefined;
+	}
+}

lib/model-info.spec.ts ADDED Viewed

	@@ -0,0 +1,59 @@

+import { describe, expect, it } from "vitest";
+import { modelInfo } from "./model-info";
+import type { ModelEntry } from "./list-models";
+import type { ApiModelInfo } from "../types/api/api-model";
+describe("modelInfo", () => {
+	it("should return the model info", async () => {
+		const info = await modelInfo({
+			name: "openai-community/gpt2",
+		});
+		expect(info).toEqual({
+			id: "621ffdc036468d709f17434d",
+			downloads: expect.any(Number),
+			gated: false,
+			name: "openai-community/gpt2",
+			updatedAt: expect.any(Date),
+			likes: expect.any(Number),
+			task: "text-generation",
+			private: false,
+		});
+	});
+	it("should return the model info with author", async () => {
+		const info: ModelEntry & Pick<ApiModelInfo, "author"> = await modelInfo({
+			name: "openai-community/gpt2",
+			additionalFields: ["author"],
+		});
+		expect(info).toEqual({
+			id: "621ffdc036468d709f17434d",
+			downloads: expect.any(Number),
+			author: "openai-community",
+			gated: false,
+			name: "openai-community/gpt2",
+			updatedAt: expect.any(Date),
+			likes: expect.any(Number),
+			task: "text-generation",
+			private: false,
+		});
+	});
+	it("should return the model info for a specific revision", async () => {
+		const info: ModelEntry & Pick<ApiModelInfo, "sha"> = await modelInfo({
+			name: "openai-community/gpt2",
+			additionalFields: ["sha"],
+			revision: "f27b190eeac4c2302d24068eabf5e9d6044389ae",
+		});
+		expect(info).toEqual({
+			id: "621ffdc036468d709f17434d",
+			downloads: expect.any(Number),
+			gated: false,
+			name: "openai-community/gpt2",
+			updatedAt: expect.any(Date),
+			likes: expect.any(Number),
+			task: "text-generation",
+			private: false,
+			sha: "f27b190eeac4c2302d24068eabf5e9d6044389ae",
+		});
+	});
+});

lib/model-info.ts ADDED Viewed

	@@ -0,0 +1,62 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+import type { ApiModelInfo } from "../types/api/api-model";
+import type { CredentialsParams } from "../types/public";
+import { checkCredentials } from "../utils/checkCredentials";
+import { pick } from "../utils/pick";
+import { MODEL_EXPAND_KEYS, type MODEL_EXPANDABLE_KEYS, type ModelEntry } from "./list-models";
+export async function modelInfo<
+	const T extends Exclude<(typeof MODEL_EXPANDABLE_KEYS)[number], (typeof MODEL_EXPAND_KEYS)[number]> = never,
+>(
+	params: {
+		name: string;
+		hubUrl?: string;
+		additionalFields?: T[];
+		/**
+		 * An optional Git revision id which can be a branch name, a tag, or a commit hash.
+		 */
+		revision?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): Promise<ModelEntry & Pick<ApiModelInfo, T>> {
+	const accessToken = params && checkCredentials(params);
+	const search = new URLSearchParams([
+		...MODEL_EXPAND_KEYS.map((val) => ["expand", val] satisfies [string, string]),
+		...(params?.additionalFields?.map((val) => ["expand", val] satisfies [string, string]) ?? []),
+	]).toString();
+	const response = await (params.fetch || fetch)(
+		`${params?.hubUrl || HUB_URL}/api/models/${params.name}/revision/${encodeURIComponent(
+			params.revision ?? "HEAD"
+		)}?${search.toString()}`,
+		{
+			headers: {
+				...(accessToken ? { Authorization: `Bearer ${accessToken}` } : {}),
+				Accepts: "application/json",
+			},
+		}
+	);
+	if (!response.ok) {
+		throw await createApiError(response);
+	}
+	const data = await response.json();
+	return {
+		...(params?.additionalFields && pick(data, params.additionalFields)),
+		id: data._id,
+		name: data.id,
+		private: data.private,
+		task: data.pipeline_tag,
+		downloads: data.downloads,
+		gated: data.gated,
+		likes: data.likes,
+		updatedAt: new Date(data.lastModified),
+	} as ModelEntry & Pick<ApiModelInfo, T>;
+}

lib/oauth-handle-redirect.spec.ts ADDED Viewed

	@@ -0,0 +1,60 @@

+import { describe, expect, it } from "vitest";
+import { TEST_COOKIE, TEST_HUB_URL } from "../test/consts";
+import { oauthLoginUrl } from "./oauth-login-url";
+import { oauthHandleRedirect } from "./oauth-handle-redirect";
+describe("oauthHandleRedirect", () => {
+	it("should work", async () => {
+		const localStorage = {
+			nonce: undefined,
+			codeVerifier: undefined,
+		};
+		const url = await oauthLoginUrl({
+			clientId: "dummy-app",
+			redirectUrl: "http://localhost:3000",
+			localStorage,
+			scopes: "openid profile email",
+			hubUrl: TEST_HUB_URL,
+		});
+		const resp = await fetch(url, {
+			method: "POST",
+			headers: {
+				Cookie: `token=${TEST_COOKIE}`,
+			},
+			redirect: "manual",
+		});
+		if (resp.status !== 303) {
+			throw new Error(`Failed to fetch url ${url}: ${resp.status} ${resp.statusText}`);
+		}
+		const location = resp.headers.get("Location");
+		if (!location) {
+			throw new Error(`No location header in response`);
+		}
+		const result = await oauthHandleRedirect({
+			redirectedUrl: location,
+			codeVerifier: localStorage.codeVerifier,
+			nonce: localStorage.nonce,
+			hubUrl: TEST_HUB_URL,
+		});
+		if (!result) {
+			throw new Error("Expected result to be defined");
+		}
+		expect(result.accessToken).toEqual(expect.any(String));
+		expect(result.accessTokenExpiresAt).toBeInstanceOf(Date);
+		expect(result.accessTokenExpiresAt.getTime()).toBeGreaterThan(Date.now());
+		expect(result.scope).toEqual(expect.any(String));
+		expect(result.userInfo).toEqual({
+			sub: "62f264b9f3c90f4b6514a269",
+			name: "@huggingface/hub CI bot",
+			preferred_username: "hub.js",
+			email_verified: true,
+			email: "eliott@huggingface.co",
+			isPro: false,
+			picture: "https://hub-ci.huggingface.co/avatars/934b830e9fdaa879487852f79eef7165.svg",
+			profile: "https://hub-ci.huggingface.co/hub.js",
+			website: "https://github.com/huggingface/hub.js",
+			orgs: [],
+		});
+	});
+});

lib/oauth-handle-redirect.ts ADDED Viewed

	@@ -0,0 +1,334 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+export interface UserInfo {
+	/**
+	 * OpenID Connect field. Unique identifier for the user, even in case of rename.
+	 */
+	sub: string;
+	/**
+	 * OpenID Connect field. The user's full name.
+	 */
+	name: string;
+	/**
+	 * OpenID Connect field. The user's username.
+	 */
+	preferred_username: string;
+	/**
+	 * OpenID Connect field, available if scope "email" was granted.
+	 */
+	email_verified?: boolean;
+	/**
+	 * OpenID Connect field, available if scope "email" was granted.
+	 */
+	email?: string;
+	/**
+	 * OpenID Connect field. The user's profile picture URL.
+	 */
+	picture: string;
+	/**
+	 * OpenID Connect field. The user's profile URL.
+	 */
+	profile: string;
+	/**
+	 * OpenID Connect field. The user's website URL.
+	 */
+	website?: string;
+	/**
+	 * Hugging Face field. Whether the user is a pro user.
+	 */
+	isPro: boolean;
+	/**
+	 * Hugging Face field. Whether the user has a payment method set up. Needs "read-billing" scope.
+	 */
+	canPay?: boolean;
+	/**
+	 * Hugging Face field. The user's orgs
+	 */
+	orgs?: Array<{
+		/**
+		 * OpenID Connect field. Unique identifier for the org.
+		 */
+		sub: string;
+		/**
+		 * OpenID Connect field. The org's full name.
+		 */
+		name: string;
+		/**
+		 * OpenID Connect field. The org's username.
+		 */
+		preferred_username: string;
+		/**
+		 * OpenID Connect field. The org's profile picture URL.
+		 */
+		picture: string;
+		/**
+		 * Hugging Face field. Whether the org is an enterprise org.
+		 */
+		isEnterprise: boolean;
+		/**
+		 * Hugging Face field. Whether the org has a payment method set up. Needs "read-billing" scope, and the user needs to approve access to the org in the OAuth page.
+		 */
+		canPay?: boolean;
+		/**
+		 * Hugging Face field. The user's role in the org. The user needs to approve access to the org in the OAuth page.
+		 */
+		roleInOrg?: string;
+		/**
+		 * HuggingFace field. When the user granted the oauth app access to the org, but didn't complete SSO.
+		 *
+		 * Should never happen directly after the oauth flow.
+		 */
+		pendingSSO?: boolean;
+		/**
+		 * HuggingFace field. When the user granted the oauth app access to the org, but didn't complete MFA.
+		 *
+		 * Should never happen directly after the oauth flow.
+		 */
+		missingMFA?: boolean;
+	}>;
+}
+export interface OAuthResult {
+	accessToken: string;
+	accessTokenExpiresAt: Date;
+	userInfo: UserInfo;
+	/**
+	 * State passed to the OAuth provider in the original request to the OAuth provider.
+	 */
+	state?: string;
+	/**
+	 * Granted scope
+	 */
+	scope: string;
+}
+/**
+ * To call after the OAuth provider redirects back to the app.
+ *
+ * There is also a helper function {@link oauthHandleRedirectIfPresent}, which will call `oauthHandleRedirect` if the URL contains an oauth code
+ * in the query parameters and return `false` otherwise.
+ */
+export async function oauthHandleRedirect(opts?: {
+	/**
+	 * The URL of the hub. Defaults to {@link HUB_URL}.
+	 */
+	hubUrl?: string;
+	/**
+	 * The URL to analyze.
+	 *
+	 * @default window.location.href
+	 */
+	redirectedUrl?: string;
+	/**
+	 * nonce generated by oauthLoginUrl
+	 *
+	 * @default localStorage.getItem("huggingface.co:oauth:nonce")
+	 */
+	nonce?: string;
+	/**
+	 * codeVerifier generated by oauthLoginUrl
+	 *
+	 * @default localStorage.getItem("huggingface.co:oauth:code_verifier")
+	 */
+	codeVerifier?: string;
+}): Promise<OAuthResult> {
+	if (typeof window === "undefined" && !opts?.redirectedUrl) {
+		throw new Error("oauthHandleRedirect is only available in the browser, unless you provide redirectedUrl");
+	}
+	if (typeof localStorage === "undefined" && (!opts?.nonce || !opts?.codeVerifier)) {
+		throw new Error(
+			"oauthHandleRedirect requires localStorage to be available, unless you provide nonce and codeVerifier"
+		);
+	}
+	const redirectedUrl = opts?.redirectedUrl ?? window.location.href;
+	const searchParams = (() => {
+		try {
+			return new URL(redirectedUrl).searchParams;
+		} catch (err) {
+			throw new Error("Failed to parse redirected URL: " + redirectedUrl);
+		}
+	})();
+	const [error, errorDescription] = [searchParams.get("error"), searchParams.get("error_description")];
+	if (error) {
+		throw new Error(`${error}: ${errorDescription}`);
+	}
+	const code = searchParams.get("code");
+	const nonce = opts?.nonce ?? localStorage.getItem("huggingface.co:oauth:nonce");
+	if (!code) {
+		throw new Error("Missing oauth code from query parameters in redirected URL: " + redirectedUrl);
+	}
+	if (!nonce) {
+		throw new Error("Missing oauth nonce from localStorage");
+	}
+	const codeVerifier = opts?.codeVerifier ?? localStorage.getItem("huggingface.co:oauth:code_verifier");
+	if (!codeVerifier) {
+		throw new Error("Missing oauth code_verifier from localStorage");
+	}
+	const state = searchParams.get("state");
+	if (!state) {
+		throw new Error("Missing oauth state from query parameters in redirected URL");
+	}
+	let parsedState: { nonce: string; redirectUri: string; state?: string };
+	try {
+		parsedState = JSON.parse(state);
+	} catch {
+		throw new Error("Invalid oauth state in redirected URL, unable to parse JSON: " + state);
+	}
+	if (parsedState.nonce !== nonce) {
+		throw new Error("Invalid oauth state in redirected URL");
+	}
+	const hubUrl = opts?.hubUrl || HUB_URL;
+	const openidConfigUrl = `${new URL(hubUrl).origin}/.well-known/openid-configuration`;
+	const openidConfigRes = await fetch(openidConfigUrl, {
+		headers: {
+			Accept: "application/json",
+		},
+	});
+	if (!openidConfigRes.ok) {
+		throw await createApiError(openidConfigRes);
+	}
+	const openidConfig: {
+		authorization_endpoint: string;
+		token_endpoint: string;
+		userinfo_endpoint: string;
+	} = await openidConfigRes.json();
+	const tokenRes = await fetch(openidConfig.token_endpoint, {
+		method: "POST",
+		headers: {
+			"Content-Type": "application/x-www-form-urlencoded",
+		},
+		body: new URLSearchParams({
+			grant_type: "authorization_code",
+			code,
+			redirect_uri: parsedState.redirectUri,
+			code_verifier: codeVerifier,
+		}).toString(),
+	});
+	if (!opts?.codeVerifier) {
+		localStorage.removeItem("huggingface.co:oauth:code_verifier");
+	}
+	if (!opts?.nonce) {
+		localStorage.removeItem("huggingface.co:oauth:nonce");
+	}
+	if (!tokenRes.ok) {
+		throw await createApiError(tokenRes);
+	}
+	const token: {
+		access_token: string;
+		expires_in: number;
+		id_token: string;
+		// refresh_token: string;
+		scope: string;
+		token_type: string;
+	} = await tokenRes.json();
+	const accessTokenExpiresAt = new Date(Date.now() + token.expires_in * 1000);
+	const userInfoRes = await fetch(openidConfig.userinfo_endpoint, {
+		headers: {
+			Authorization: `Bearer ${token.access_token}`,
+		},
+	});
+	if (!userInfoRes.ok) {
+		throw await createApiError(userInfoRes);
+	}
+	const userInfo: UserInfo = await userInfoRes.json();
+	return {
+		accessToken: token.access_token,
+		accessTokenExpiresAt,
+		userInfo: userInfo,
+		state: parsedState.state,
+		scope: token.scope,
+	};
+}
+// if (code && !nonce) {
+//   console.warn("Missing oauth nonce from localStorage");
+// }
+/**
+ * To call after the OAuth provider redirects back to the app.
+ *
+ * It returns false if the URL does not contain an oauth code in the query parameters, otherwise
+ * it calls {@link oauthHandleRedirect}.
+ *
+ * Depending on your app, you may want to call {@link oauthHandleRedirect} directly instead.
+ */
+export async function oauthHandleRedirectIfPresent(opts?: {
+	/**
+	 * The URL of the hub. Defaults to {@link HUB_URL}.
+	 */
+	hubUrl?: string;
+	/**
+	 * The URL to analyze.
+	 *
+	 * @default window.location.href
+	 */
+	redirectedUrl?: string;
+	/**
+	 * nonce generated by oauthLoginUrl
+	 *
+	 * @default localStorage.getItem("huggingface.co:oauth:nonce")
+	 */
+	nonce?: string;
+	/**
+	 * codeVerifier generated by oauthLoginUrl
+	 *
+	 * @default localStorage.getItem("huggingface.co:oauth:code_verifier")
+	 */
+	codeVerifier?: string;
+}): Promise<OAuthResult | false> {
+	if (typeof window === "undefined" && !opts?.redirectedUrl) {
+		throw new Error("oauthHandleRedirect is only available in the browser, unless you provide redirectedUrl");
+	}
+	if (typeof localStorage === "undefined" && (!opts?.nonce || !opts?.codeVerifier)) {
+		throw new Error(
+			"oauthHandleRedirect requires localStorage to be available, unless you provide nonce and codeVerifier"
+		);
+	}
+	const searchParams = new URLSearchParams(opts?.redirectedUrl ?? window.location.search);
+	if (searchParams.has("error")) {
+		return oauthHandleRedirect(opts);
+	}
+	if (searchParams.has("code")) {
+		if (!localStorage.getItem("huggingface.co:oauth:nonce")) {
+			console.warn(
+				"Missing oauth nonce from localStorage. This can happen when the user refreshes the page after logging in, without changing the URL."
+			);
+			return false;
+		}
+		return oauthHandleRedirect(opts);
+	}
+	return false;
+}

lib/oauth-login-url.ts ADDED Viewed

	@@ -0,0 +1,166 @@

+import { HUB_URL } from "../consts";
+import { createApiError } from "../error";
+import { base64FromBytes } from "../utils/base64FromBytes";
+/**
+ * Use "Sign in with Hub" to authenticate a user, and get oauth user info / access token.
+ *
+ * Returns an url to redirect to. After the user is redirected back to your app, call `oauthHandleRedirect` to get the oauth user info / access token.
+ *
+ * When called from inside a static Space with OAuth enabled, it will load the config from the space, otherwise you need to at least specify
+ * the client ID of your OAuth App.
+ *
+ * @example
+ * ```ts
+ * import { oauthLoginUrl, oauthHandleRedirectIfPresent } from "@huggingface/hub";
+ *
+ * const oauthResult = await oauthHandleRedirectIfPresent();
+ *
+ * if (!oauthResult) {
+ *   // If the user is not logged in, redirect to the login page
+ *   window.location.href = await oauthLoginUrl();
+ * }
+ *
+ * // You can use oauthResult.accessToken, oauthResult.accessTokenExpiresAt and oauthResult.userInfo
+ * console.log(oauthResult);
+ * ```
+ *
+ * (Theoretically, this function could be used to authenticate a user for any OAuth provider supporting PKCE and OpenID Connect by changing `hubUrl`,
+ * but it is currently only tested with the Hugging Face Hub.)
+ */
+export async function oauthLoginUrl(opts?: {
+	/**
+	 * OAuth client ID.
+	 *
+	 * For static Spaces, you can omit this and it will be loaded from the Space config, as long as `hf_oauth: true` is present in the README.md's metadata.
+	 * For other Spaces, it is available to the backend in the OAUTH_CLIENT_ID environment variable, as long as `hf_oauth: true` is present in the README.md's metadata.
+	 *
+	 * You can also create a Developer Application at https://huggingface.co/settings/connected-applications and use its client ID.
+	 */
+	clientId?: string;
+	hubUrl?: string;
+	/**
+	 * OAuth scope, a list of space-separated scopes.
+	 *
+	 * For static Spaces, you can omit this and it will be loaded from the Space config, as long as `hf_oauth: true` is present in the README.md's metadata.
+	 * For other Spaces, it is available to the backend in the OAUTH_SCOPES environment variable, as long as `hf_oauth: true` is present in the README.md's metadata.
+	 *
+	 * Defaults to "openid profile".
+	 *
+	 * You can also create a Developer Application at https://huggingface.co/settings/connected-applications and use its scopes.
+	 *
+	 * See https://huggingface.co/docs/hub/oauth for a list of available scopes.
+	 */
+	scopes?: string;
+	/**
+	 * Redirect URI, defaults to the current URL.
+	 *
+	 * For Spaces, any URL within the Space is allowed.
+	 *
+	 * For Developer Applications, you can add any URL you want to the list of allowed redirect URIs at https://huggingface.co/settings/connected-applications.
+	 */
+	redirectUrl?: string;
+	/**
+	 * State to pass to the OAuth provider, which will be returned in the call to `oauthLogin` after the redirect.
+	 */
+	state?: string;
+	/**
+	 * If provided, will be filled with the code verifier and nonce used for the OAuth flow,
+	 * instead of using localStorage.
+	 *
+	 * When calling {@link `oauthHandleRedirectIfPresent`} or {@link `oauthHandleRedirect`} you will need to provide the same values.
+	 */
+	localStorage?: {
+		codeVerifier?: string;
+		nonce?: string;
+	};
+}): Promise<string> {
+	if (typeof window === "undefined" && (!opts?.redirectUrl || !opts?.clientId)) {
+		throw new Error("oauthLogin is only available in the browser, unless you provide clientId and redirectUrl");
+	}
+	if (typeof localStorage === "undefined" && !opts?.localStorage) {
+		throw new Error(
+			"oauthLogin requires localStorage to be available in the context, unless you provide a localStorage empty object as argument"
+		);
+	}
+	const hubUrl = opts?.hubUrl || HUB_URL;
+	const openidConfigUrl = `${new URL(hubUrl).origin}/.well-known/openid-configuration`;
+	const openidConfigRes = await fetch(openidConfigUrl, {
+		headers: {
+			Accept: "application/json",
+		},
+	});
+	if (!openidConfigRes.ok) {
+		throw await createApiError(openidConfigRes);
+	}
+	const opendidConfig: {
+		authorization_endpoint: string;
+		token_endpoint: string;
+		userinfo_endpoint: string;
+	} = await openidConfigRes.json();
+	const newNonce = globalThis.crypto.randomUUID();
+	// Two random UUIDs concatenated together, because min length is 43 and max length is 128
+	const newCodeVerifier = globalThis.crypto.randomUUID() + globalThis.crypto.randomUUID();
+	if (opts?.localStorage) {
+		if (opts.localStorage.codeVerifier !== undefined && opts.localStorage.codeVerifier !== null) {
+			throw new Error(
+				"localStorage.codeVerifier must be initially set to null or undefined, and will be filled by oauthLoginUrl"
+			);
+		}
+		if (opts.localStorage.nonce !== undefined && opts.localStorage.nonce !== null) {
+			throw new Error(
+				"localStorage.nonce must be initially set to null or undefined, and will be filled by oauthLoginUrl"
+			);
+		}
+		opts.localStorage.codeVerifier = newCodeVerifier;
+		opts.localStorage.nonce = newNonce;
+	} else {
+		localStorage.setItem("huggingface.co:oauth:nonce", newNonce);
+		localStorage.setItem("huggingface.co:oauth:code_verifier", newCodeVerifier);
+	}
+	const redirectUri = opts?.redirectUrl || (typeof window !== "undefined" ? window.location.href : undefined);
+	if (!redirectUri) {
+		throw new Error("Missing redirectUrl");
+	}
+	const state = JSON.stringify({
+		nonce: newNonce,
+		redirectUri,
+		state: opts?.state,
+	});
+	const variables: Record<string, string> | null =
+		// @ts-expect-error window.huggingface is defined inside static Spaces.
+		typeof window !== "undefined" ? window.huggingface?.variables ?? null : null;
+	const clientId = opts?.clientId || variables?.OAUTH_CLIENT_ID;
+	if (!clientId) {
+		if (variables) {
+			throw new Error("Missing clientId, please add hf_oauth: true to the README.md's metadata in your static Space");
+		}
+		throw new Error("Missing clientId");
+	}
+	const challenge = base64FromBytes(
+		new Uint8Array(await globalThis.crypto.subtle.digest("SHA-256", new TextEncoder().encode(newCodeVerifier)))
+	)
+		.replace(/[+]/g, "-")
+		.replace(/[/]/g, "_")
+		.replace(/=/g, "");
+	return `${opendidConfig.authorization_endpoint}?${new URLSearchParams({
+		client_id: clientId,
+		scope: opts?.scopes || variables?.OAUTH_SCOPES || "openid profile",
+		response_type: "code",
+		redirect_uri: redirectUri,
+		state,
+		code_challenge: challenge,
+		code_challenge_method: "S256",
+	}).toString()}`;
+}

lib/parse-safetensors-metadata.spec.ts ADDED Viewed

	@@ -0,0 +1,122 @@

+import { assert, it, describe } from "vitest";
+import { parseSafetensorsMetadata, parseSafetensorsShardFilename } from "./parse-safetensors-metadata";
+import { sum } from "../utils/sum";
+describe("parseSafetensorsMetadata", () => {
+	it("fetch info for single-file (with the default conventional filename)", async () => {
+		const parse = await parseSafetensorsMetadata({
+			repo: "bert-base-uncased",
+			computeParametersCount: true,
+			revision: "86b5e0934494bd15c9632b12f734a8a67f723594",
+		});
+		assert(!parse.sharded);
+		assert.deepStrictEqual(parse.header.__metadata__, { format: "pt" });
+		// Example of one tensor (the header contains many tensors)
+		assert.deepStrictEqual(parse.header["bert.embeddings.LayerNorm.beta"], {
+			dtype: "F32",
+			shape: [768],
+			data_offsets: [0, 3072],
+		});
+		assert.deepStrictEqual(parse.parameterCount, { F32: 110_106_428 });
+		assert.deepStrictEqual(sum(Object.values(parse.parameterCount)), 110_106_428);
+		// total params = 110m
+	});
+	it("fetch info for sharded (with the default conventional filename)", async () => {
+		const parse = await parseSafetensorsMetadata({
+			repo: "bigscience/bloom",
+			computeParametersCount: true,
+			revision: "053d9cd9fbe814e091294f67fcfedb3397b954bb",
+		});
+		assert(parse.sharded);
+		assert.strictEqual(Object.keys(parse.headers).length, 72);
+		// This model has 72 shards!
+		// Example of one tensor inside one file
+		assert.deepStrictEqual(parse.headers["model_00012-of-00072.safetensors"]["h.10.input_layernorm.weight"], {
+			dtype: "BF16",
+			shape: [14336],
+			data_offsets: [3288649728, 3288678400],
+		});
+		assert.deepStrictEqual(parse.parameterCount, { BF16: 176_247_271_424 });
+		assert.deepStrictEqual(sum(Object.values(parse.parameterCount)), 176_247_271_424);
+		// total params = 176B
+	});
+	it("fetch info for single-file with multiple dtypes", async () => {
+		const parse = await parseSafetensorsMetadata({
+			repo: "roberta-base",
+			computeParametersCount: true,
+			revision: "e2da8e2f811d1448a5b465c236feacd80ffbac7b",
+		});
+		assert(!parse.sharded);
+		assert.deepStrictEqual(parse.parameterCount, { F32: 124_697_433, I64: 514 });
+		assert.deepStrictEqual(sum(Object.values(parse.parameterCount)), 124_697_947);
+		// total params = 124m
+	});
+	it("fetch info for single-file with file path", async () => {
+		const parse = await parseSafetensorsMetadata({
+			repo: "CompVis/stable-diffusion-v1-4",
+			computeParametersCount: true,
+			path: "unet/diffusion_pytorch_model.safetensors",
+			revision: "133a221b8aa7292a167afc5127cb63fb5005638b",
+		});
+		assert(!parse.sharded);
+		assert.deepStrictEqual(parse.header.__metadata__, { format: "pt" });
+		// Example of one tensor (the header contains many tensors)
+		assert.deepStrictEqual(parse.header["up_blocks.3.resnets.0.norm2.bias"], {
+			dtype: "F32",
+			shape: [320],
+			data_offsets: [3_409_382_416, 3_409_383_696],
+		});
+		assert.deepStrictEqual(parse.parameterCount, { F32: 859_520_964 });
+		assert.deepStrictEqual(sum(Object.values(parse.parameterCount)), 859_520_964);
+	});
+	it("fetch info for sharded (with the default conventional filename) with file path", async () => {
+		const parse = await parseSafetensorsMetadata({
+			repo: "Alignment-Lab-AI/ALAI-gemma-7b",
+			computeParametersCount: true,
+			path: "7b/1/model.safetensors.index.json",
+			revision: "37e307261fe97bbf8b2463d61dbdd1a10daa264c",
+		});
+		assert(parse.sharded);
+		assert.strictEqual(Object.keys(parse.headers).length, 4);
+		assert.deepStrictEqual(parse.headers["model-00004-of-00004.safetensors"]["model.layers.24.mlp.up_proj.weight"], {
+			dtype: "BF16",
+			shape: [24576, 3072],
+			data_offsets: [301996032, 452990976],
+		});
+		assert.deepStrictEqual(parse.parameterCount, { BF16: 8_537_680_896 });
+		assert.deepStrictEqual(sum(Object.values(parse.parameterCount)), 8_537_680_896);
+	});
+	it("should detect sharded safetensors filename", async () => {
+		const safetensorsFilename = "model_00005-of-00072.safetensors"; // https://huggingface.co/bigscience/bloom/blob/4d8e28c67403974b0f17a4ac5992e4ba0b0dbb6f/model_00005-of-00072.safetensors
+		const safetensorsShardFileInfo = parseSafetensorsShardFilename(safetensorsFilename);
+		assert.strictEqual(safetensorsShardFileInfo?.prefix, "model_");
+		assert.strictEqual(safetensorsShardFileInfo?.basePrefix, "model");
+		assert.strictEqual(safetensorsShardFileInfo?.shard, "00005");
+		assert.strictEqual(safetensorsShardFileInfo?.total, "00072");
+	});
+});

lib/parse-safetensors-metadata.ts ADDED Viewed

	@@ -0,0 +1,274 @@

+import type { CredentialsParams, RepoDesignation } from "../types/public";
+import { omit } from "../utils/omit";
+import { toRepoId } from "../utils/toRepoId";
+import { typedEntries } from "../utils/typedEntries";
+import { downloadFile } from "./download-file";
+import { fileExists } from "./file-exists";
+import { promisesQueue } from "../utils/promisesQueue";
+import type { SetRequired } from "../vendor/type-fest/set-required";
+export const SAFETENSORS_FILE = "model.safetensors";
+export const SAFETENSORS_INDEX_FILE = "model.safetensors.index.json";
+/// We advise model/library authors to use the filenames above for convention inside model repos,
+/// but in some situations safetensors weights have different filenames.
+export const RE_SAFETENSORS_FILE = /\.safetensors$/;
+export const RE_SAFETENSORS_INDEX_FILE = /\.safetensors\.index\.json$/;
+export const RE_SAFETENSORS_SHARD_FILE =
+	/^(?<prefix>(?<basePrefix>.*?)[_-])(?<shard>\d{5})-of-(?<total>\d{5})\.safetensors$/;
+export interface SafetensorsShardFileInfo {
+	prefix: string;
+	basePrefix: string;
+	shard: string;
+	total: string;
+}
+export function parseSafetensorsShardFilename(filename: string): SafetensorsShardFileInfo | null {
+	const match = RE_SAFETENSORS_SHARD_FILE.exec(filename);
+	if (match && match.groups) {
+		return {
+			prefix: match.groups["prefix"],
+			basePrefix: match.groups["basePrefix"],
+			shard: match.groups["shard"],
+			total: match.groups["total"],
+		};
+	}
+	return null;
+}
+const PARALLEL_DOWNLOADS = 20;
+const MAX_HEADER_LENGTH = 25_000_000;
+class SafetensorParseError extends Error {}
+type FileName = string;
+export type TensorName = string;
+export type Dtype = "F64" | "F32" | "F16" | "BF16" | "I64" | "I32" | "I16" | "I8" | "U8" | "BOOL";
+export interface TensorInfo {
+	dtype: Dtype;
+	shape: number[];
+	data_offsets: [number, number];
+}
+export type SafetensorsFileHeader = Record<TensorName, TensorInfo> & {
+	__metadata__: Record<string, string>;
+};
+export interface SafetensorsIndexJson {
+	dtype?: string;
+	/// ^there's sometimes a dtype but it looks inconsistent.
+	metadata?: Record<string, string>;
+	/// ^ why the naming inconsistency?
+	weight_map: Record<TensorName, FileName>;
+}
+export type SafetensorsShardedHeaders = Record<FileName, SafetensorsFileHeader>;
+export type SafetensorsParseFromRepo =
+	| {
+			sharded: false;
+			header: SafetensorsFileHeader;
+			parameterCount?: Partial<Record<Dtype, number>>;
+	  }
+	| {
+			sharded: true;
+			index: SafetensorsIndexJson;
+			headers: SafetensorsShardedHeaders;
+			parameterCount?: Partial<Record<Dtype, number>>;
+	  };
+async function parseSingleFile(
+	path: string,
+	params: {
+		repo: RepoDesignation;
+		revision?: string;
+		hubUrl?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): Promise<SafetensorsFileHeader> {
+	const blob = await downloadFile({ ...params, path });
+	if (!blob) {
+		throw new SafetensorParseError(`Failed to parse file ${path}: failed to fetch safetensors header length.`);
+	}
+	const bufLengthOfHeaderLE = await blob.slice(0, 8).arrayBuffer();
+	const lengthOfHeader = new DataView(bufLengthOfHeaderLE).getBigUint64(0, true);
+	// ^little-endian
+	if (lengthOfHeader <= 0) {
+		throw new SafetensorParseError(`Failed to parse file ${path}: safetensors header is malformed.`);
+	}
+	if (lengthOfHeader > MAX_HEADER_LENGTH) {
+		throw new SafetensorParseError(
+			`Failed to parse file ${path}: safetensor header is too big. Maximum supported size is ${MAX_HEADER_LENGTH} bytes.`
+		);
+	}
+	try {
+		// no validation for now, we assume it's a valid FileHeader.
+		const header: SafetensorsFileHeader = JSON.parse(await blob.slice(8, 8 + Number(lengthOfHeader)).text());
+		return header;
+	} catch (err) {
+		throw new SafetensorParseError(`Failed to parse file ${path}: safetensors header is not valid JSON.`);
+	}
+}
+async function parseShardedIndex(
+	path: string,
+	params: {
+		repo: RepoDesignation;
+		revision?: string;
+		hubUrl?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): Promise<{ index: SafetensorsIndexJson; headers: SafetensorsShardedHeaders }> {
+	const indexBlob = await downloadFile({
+		...params,
+		path,
+	});
+	if (!indexBlob) {
+		throw new SafetensorParseError(`Failed to parse file ${path}: failed to fetch safetensors index.`);
+	}
+	// no validation for now, we assume it's a valid IndexJson.
+	let index: SafetensorsIndexJson;
+	try {
+		index = JSON.parse(await indexBlob.slice(0, 10_000_000).text());
+	} catch (error) {
+		throw new SafetensorParseError(`Failed to parse file ${path}: not a valid JSON.`);
+	}
+	const pathPrefix = path.slice(0, path.lastIndexOf("/") + 1);
+	const filenames = [...new Set(Object.values(index.weight_map))];
+	const shardedMap: SafetensorsShardedHeaders = Object.fromEntries(
+		await promisesQueue(
+			filenames.map(
+				(filename) => async () =>
+					[filename, await parseSingleFile(pathPrefix + filename, params)] satisfies [string, SafetensorsFileHeader]
+			),
+			PARALLEL_DOWNLOADS
+		)
+	);
+	return { index, headers: shardedMap };
+}
+/**
+ * Analyze model.safetensors.index.json or model.safetensors from a model hosted
+ * on Hugging Face using smart range requests to extract its metadata.
+ */
+export async function parseSafetensorsMetadata(
+	params: {
+		/** Only models are supported */
+		repo: RepoDesignation;
+		/**
+		 * Relative file path to safetensors file inside `repo`. Defaults to `SAFETENSORS_FILE` or `SAFETENSORS_INDEX_FILE` (whichever one exists).
+		 */
+		path?: string;
+		/**
+		 * Will include SafetensorsParseFromRepo["parameterCount"], an object containing the number of parameters for each DType
+		 *
+		 * @default false
+		 */
+		computeParametersCount: true;
+		hubUrl?: string;
+		revision?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): Promise<SetRequired<SafetensorsParseFromRepo, "parameterCount">>;
+export async function parseSafetensorsMetadata(
+	params: {
+		/** Only models are supported */
+		repo: RepoDesignation;
+		/**
+		 * Will include SafetensorsParseFromRepo["parameterCount"], an object containing the number of parameters for each DType
+		 *
+		 * @default false
+		 */
+		path?: string;
+		computeParametersCount?: boolean;
+		hubUrl?: string;
+		revision?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): Promise<SafetensorsParseFromRepo>;
+export async function parseSafetensorsMetadata(
+	params: {
+		repo: RepoDesignation;
+		path?: string;
+		computeParametersCount?: boolean;
+		hubUrl?: string;
+		revision?: string;
+		/**
+		 * Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
+		 */
+		fetch?: typeof fetch;
+	} & Partial<CredentialsParams>
+): Promise<SafetensorsParseFromRepo> {
+	const repoId = toRepoId(params.repo);
+	if (repoId.type !== "model") {
+		throw new TypeError("Only model repos should contain safetensors files.");
+	}
+	if (RE_SAFETENSORS_FILE.test(params.path ?? "") || (await fileExists({ ...params, path: SAFETENSORS_FILE }))) {
+		const header = await parseSingleFile(params.path ?? SAFETENSORS_FILE, params);
+		return {
+			sharded: false,
+			header,
+			...(params.computeParametersCount && {
+				parameterCount: computeNumOfParamsByDtypeSingleFile(header),
+			}),
+		};
+	} else if (
+		RE_SAFETENSORS_INDEX_FILE.test(params.path ?? "") ||
+		(await fileExists({ ...params, path: SAFETENSORS_INDEX_FILE }))
+	) {
+		const { index, headers } = await parseShardedIndex(params.path ?? SAFETENSORS_INDEX_FILE, params);
+		return {
+			sharded: true,
+			index,
+			headers,
+			...(params.computeParametersCount && {
+				parameterCount: computeNumOfParamsByDtypeSharded(headers),
+			}),
+		};
+	} else {
+		throw new Error("model id does not seem to contain safetensors weights");
+	}
+}
+function computeNumOfParamsByDtypeSingleFile(header: SafetensorsFileHeader): Partial<Record<Dtype, number>> {
+	const counter: Partial<Record<Dtype, number>> = {};
+	const tensors = omit(header, "__metadata__");
+	for (const [, v] of typedEntries(tensors)) {
+		if (v.shape.length === 0) {
+			continue;
+		}
+		counter[v.dtype] = (counter[v.dtype] ?? 0) + v.shape.reduce((a, b) => a * b);
+	}
+	return counter;
+}
+function computeNumOfParamsByDtypeSharded(shardedMap: SafetensorsShardedHeaders): Partial<Record<Dtype, number>> {
+	const counter: Partial<Record<Dtype, number>> = {};
+	for (const header of Object.values(shardedMap)) {
+		for (const [k, v] of typedEntries(computeNumOfParamsByDtypeSingleFile(header))) {
+			counter[k] = (counter[k] ?? 0) + (v ?? 0);
+		}
+	}
+	return counter;
+}