Spaces:
Running
Running
| import os | |
| from io import BytesIO | |
| from multiprocessing import Pool, cpu_count | |
| import fiftyone as fo | |
| from datasets import load_dataset | |
| from PIL import Image | |
| # Load the dataset | |
| imagenet_hard_dataset = load_dataset('taesiri/imagenet-hard', split='validation') | |
| os.makedirs("dataset", exist_ok=True) | |
| def process_image(i): | |
| image = imagenet_hard_dataset[i]["image"].convert("RGB") | |
| image_path = f"dataset/{i}.JPEG" | |
| image.save(image_path, "JPEG", quality=80) | |
| return { | |
| "file_path": image_path, | |
| "labels": imagenet_hard_dataset[i]["english_label"], | |
| } | |
| def create_fiftyone_sample(sample): | |
| classifications = [ | |
| fo.Classification(label=str(label)) for label in sample["labels"] | |
| ] | |
| return fo.Sample( | |
| filepath=sample["file_path"], | |
| labels=fo.Classifications(classifications=classifications), | |
| ) | |
| if __name__ == "__main__": | |
| # Process images in parallel and get the list of images with their labels | |
| with Pool(cpu_count()) as pool: | |
| samples_data = pool.map(process_image, range(len(imagenet_hard_dataset))) | |
| # Create a FiftyOne dataset | |
| dataset = fo.Dataset(name="imagenet-hard") | |
| # Add images and labels to the FiftyOne dataset | |
| samples = [create_fiftyone_sample(sample_data) for sample_data in samples_data] | |
| dataset.add_samples(samples) | |
| session = fo.launch_app(dataset, port=8888, remote=True, address="0.0.0.0") | |
| session.wait() | |