Spaces:
Running
on
Zero
Running
on
Zero
File size: 838 Bytes
812b01c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
from datasets import load_dataset, concatenate_datasets
ds1 = load_dataset("JacobLinCool/taiko-2023-1.1", split="train")
ds2 = load_dataset("JacobLinCool/taiko-2023-1.2", split="train")
ds3 = load_dataset("JacobLinCool/taiko-2023-1.3", split="train")
ds4 = load_dataset("JacobLinCool/taiko-2023-1.4", split="train")
ds5 = load_dataset("JacobLinCool/taiko-2023-1.5", split="train")
ds6 = load_dataset("JacobLinCool/taiko-2023-1.6", split="train")
ds7 = load_dataset("JacobLinCool/taiko-2023-1.7", split="train")
ds = concatenate_datasets([ds1, ds2, ds3, ds4, ds5, ds6, ds7]).with_format("torch")
good = list(range(len(ds)))
good.remove(1079) # 1079 has file problem
ds = ds.select(good)
# for local test
# ds = (
# load_dataset("JacobLinCool/taiko-2023-1.6", split="train")
# .with_format("torch")
# .select(range(10))
# )
|