Spaces:
Running
on
Zero
Running
on
Zero
import json | |
import pyarrow | |
import typer | |
from rich.progress import track | |
from bytelatent.data.iterators.multiprocess_iterator import MultiprocessIteratorState | |
from bytelatent.logger import init_logger | |
def main(state_file: str): | |
init_logger() | |
pyarrow.set_io_thread_count(4) | |
pyarrow.set_cpu_count(4) | |
with open(state_file) as f: | |
train_state = json.load(f) | |
dl_state = MultiprocessIteratorState(**train_state["data_loader_state"]) | |
packing_iterator_state = dl_state.base_iterator_state | |
print("building") | |
packing_iterator = packing_iterator_state.build() | |
print("iter") | |
batch_iter = packing_iterator.create_iter() | |
batch = None | |
print("looping") | |
for i in track(range(1_000)): | |
batch = next(batch_iter) | |
if __name__ == "__main__": | |
typer.run(main) | |