python3 -m torch.distributed.run --nproc_per_node 8 --rdzv_backend=c10d --rdzv_endpoint=localhost:0 --nnodes=1 pretrain.py \ arch=trm \ data_paths="[data/arc2concept-aug-1000]" \ arch.L_layers=2 \ arch.H_cycles=3 arch.L_cycles=4 \ +run_name=trm_arc2_8gpu_resume_step115815_plus100k_v2 ema=True \ checkpoint_every_eval=True \ epochs=24000 eval_interval=100 \ global_batch_size=768 \ +load_checkpoint="/workspace/TinyRecursiveModels/checkpoints/Arc2concept-aug-1000-ACT-torch/trm_arc2_8gpu_resume_plus100k/step_115815"