from datasets import load_dataset, concatenate_datasets # ds1 = load_dataset("JacobLinCool/taiko-2023-1.1", split="train") # ds2 = load_dataset("JacobLinCool/taiko-2023-1.2", split="train") # ds3 = load_dataset("JacobLinCool/taiko-2023-1.3", split="train") # ds4 = load_dataset("JacobLinCool/taiko-2023-1.4", split="train") # ds5 = load_dataset("JacobLinCool/taiko-2023-1.5", split="train") # ds6 = load_dataset("JacobLinCool/taiko-2023-1.6", split="train") # ds7 = load_dataset("JacobLinCool/taiko-2023-1.7", split="train") # ds = concatenate_datasets([ds1, ds2, ds3, ds4, ds5, ds6, ds7]).with_format("torch") # good = list(range(len(ds))) # good.remove(1079) # 1079 has file problem # ds = ds.select(good) # for local test ds = ( load_dataset("JacobLinCool/taiko-2023-1.6", split="train") .with_format("torch") .select(range(10)) )