@inproceedings{lee2026spare,
  title = {SPARe: Stacked Parallelism with Adaptive Reordering for Fault-Tolerant LLM Pretraining Systems with 100k+ GPUs},
  author = {Lee, Jin and Chen, Zhonghao and He, Xuhang and Underwood, Robert and Nicolae, Bogdan and Cappello, Franck and Lu, Xiaoyi and Di, Sheng and Zhang, Zheng},
  booktitle = {Proceedings of the 43rd International Conference on Machine Learning},
  year = {2026},
  publisher = {JMLR.org}
}

