File size: 850 Bytes
812b01c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from datasets import load_dataset, concatenate_datasets

# ds1 = load_dataset("JacobLinCool/taiko-2023-1.1", split="train")
# ds2 = load_dataset("JacobLinCool/taiko-2023-1.2", split="train")
# ds3 = load_dataset("JacobLinCool/taiko-2023-1.3", split="train")
# ds4 = load_dataset("JacobLinCool/taiko-2023-1.4", split="train")
# ds5 = load_dataset("JacobLinCool/taiko-2023-1.5", split="train")
# ds6 = load_dataset("JacobLinCool/taiko-2023-1.6", split="train")
# ds7 = load_dataset("JacobLinCool/taiko-2023-1.7", split="train")
# ds = concatenate_datasets([ds1, ds2, ds3, ds4, ds5, ds6, ds7]).with_format("torch")

# good = list(range(len(ds)))
# good.remove(1079)  # 1079 has file problem
# ds = ds.select(good)

# for local test
ds = (
    load_dataset("JacobLinCool/taiko-2023-1.6", split="train")
    .with_format("torch")
    .select(range(10))
)