Junfei Xiao

I am a Member of Technical Staff at xAI, where I work on Grok Imagine Video Generation.

I lead efforts in video pretraining & post-training data mixture, captioning, and training recipes. I build scalable highest quality in-house video generation dataset pipelines for the Grok Imagine Video base model and downstream tasks (editing, reference-guided, super-resolution, etc.).

I received my Ph.D. and M.S.E. degrees in Computer Science from Johns Hopkins University, where I was advised by Bloomberg Distinguished Professor Dr. Alan Yuille. Before this, I got B.E. in Mechanical Engineering and Double Degree in Mathematics from Beihang University.



CV  /  Google Scholar /  Github

profile photo
Recent News

Publications

Captain Safari: A World Engine
Yu-Cheng Chou, Xingrui Wang, Yitong Li, Jiahao Wang, Hanting Liu, Cihang Xie, Alan Yuille, Junfei Xiao*
(*: Project Lead)
CVPR, 2026

arXiv / project page / code / bibtex
@inproceedings{chou2026captainsafari,
  title={Captain Safari: A World Engine},
  author={Chou, Yu-Cheng and Wang, Xingrui and Li, Yitong and Wang, Jiahao and Liu, Hanting and Xie, Cihang and Yuille, Alan and Xiao, Junfei},
  booktitle={CVPR},
  year={2026}
}

Captain Cinema: Towards Short Movie Generation
Junfei Xiao*, Ceyuan Yang, Lvmin Zhang, Shengqu Cai, Yang Zhao, Yuwei Guo, Gordon Wetzstein, Maneesh Agrawala, Alan Yuille,
Lu Jiang
(*: Project Lead)
ICLR, 2025

arXiv / project page / bibtex
@inproceedings{xiao2025captain,
  title={Captain Cinema: Towards Short Movie Generation},
  author={Xiao, Junfei and Yang, Ceyuan and Zhang, Lvmin and Cai, Shengqu and Zhao, Yang and Guo, Yuwei and Wetzstein, Gordon and Agrawala, Maneesh and Yuille, Alan and Jiang, Lu},
  booktitle={ICLR},
  year={2025}
}

Mixture of Contexts for Long Video Generation
Shengqu Cai, Ceyuan Yang, Lvmin Zhang, Yuwei Guo, Junfei Xiao, Ziyan Yang, Yinghao Xu, Zhenheng Yang, Alan Yuille,
Leonidas Guibas, Maneesh Agrawala, Lu Jiang, Gordon Wetzstein
ICLR, 2025

project page / bibtex
@inproceedings{cai2025moc,
  title={Mixture of Contexts for Long Video Generation},
  author={Cai, Shengqu and Yang, Ceyuan and Zhang, Lvmin and Guo, Yuwei and Xiao, Junfei and Yang, Ziyan and Xu, Yinghao and Yang, Zhenheng and Yuille, Alan and Guibas, Leonidas and Agrawala, Maneesh and Jiang, Lu and Wetzstein, Gordon},
  booktitle={ICLR},
  year={2025}
}

VLV: Vision-Language-Vision Auto-Encoder: Scalable Knowledge Distillation from Diffusion Models
Tiezheng Zhang, Yitong Li, Yu-cheng Chou, Jieneng Chen,
Alan L. Yuille, Chen Wei, Junfei Xiao*
(*: Project Lead)
NeurIPS, 2025

arXiv / code / huggingface / dataset / bibtex
@inproceedings{zhang2025vlv,
  title={VLV: Vision-Language-Vision Auto-Encoder: Scalable Knowledge Distillation from Diffusion Models},
  author={Zhang, Tiezheng and Li, Yitong and Chou, Yu-cheng and Chen, Jieneng and Yuille, Alan L and Wei, Chen and Xiao, Junfei},
  booktitle={NeurIPS},
  year={2025}
}

Play to Generalize: Learning to Reason Through Game Play
Yunfei Xie, Yinsong Ma, Shiyi Lan, Alan Yuille,
Junfei Xiao*, Chen Wei
(*: Project Lead)
arXiv preprint, 2025

arXiv / project page / code / bibtex
@article{xie2025vigal,
  title={Play to Generalize: Learning to Reason Through Game Play},
  author={Xie, Yunfei and Ma, Yinsong and Lan, Shiyi and Yuille, Alan and Xiao, Junfei and Wei, Chen},
  journal={arXiv preprint arXiv:2506.08011},
  year={2025}
}

VideoAuteur: Towards Long Narrative Video Generation
Junfei Xiao, Feng Cheng, Lu Qi, Liangke Gui, Jiepeng Cen, Zhibei Ma, Alan Yuille, Lu Jiang
ICCV, 2025

paper / project page / bibtex
@article{xiao2024narrative,
  title={Towards Long Narrative Video Generation},
  author={Xiao, Junfei and Cheng, Feng and Qi, Lu and Gui, Liangke and Cen, Jiepeng and Ma, Zhibei and Yuille, Alan and Jiang, Lu},
  journal={arXiv preprint},
  year={2024}
}

GenEx: Generating an Explorable World
Taiming Lu*, Tianmin Shu*, Junfei Xiao*, Luoxin Ye, Jiahao Wang, Cheng Peng, Chen Wei, Daniel Khashabi, Rama Chellappa, Alan Yuille, Jieneng Chen
(*: Core Contributors)
Tech Report, 2024

arXiv / project page / bibtex
@article{lu2024genex,
  title={GenEx: Generating an Explorable World},
  author={Lu, Taiming and Shu, Tianmin and Xiao, Junfei and Ye, Luoxin and Wang, Jiahao and Peng, Cheng and Wei, Chen and Khashabi, Daniel and Chellappa, Rama and Yuille, Alan and Chen, Jieneng},
  journal={arXiv preprint arXiv:2412.09624},
  year={2024}
}

PaLM2-VAdapter: Progressively Aligned Language Model Makes a Strong Vision-language Adapter
Junfei Xiao, Zheng Xu, Alan Yuille, Shen Yan, Boyu Wang
arXiv preprint, 2024

arXiv / slides / bibtex
                    @article{palm2vadapter2024,
                      title={PaLM2-VAdapter: Progressively Aligned Language Model Makes a Strong Vision-language Adapter},
                      author={Xiao, Junfei and Xu, Zheng and Yuille, Alan and Yan, Shen and Wang, Boyu},
                      journal={arXiv preprint arXiv:2402.10896},
                      year={2024},
                    }

A Semantic Space is Worth 256 Language Descriptions: Make Stronger Segmentation Models with Descriptive Properties
Junfei Xiao, Ziqi Zhou, Wenxuan Li, Shiyi Lan, Jieru Mei, Zhiding Yu,
Bingchen Zhao, Alan Yuille, Yuyin Zhou, Cihang Xie
ECCV , 2024

arXiv / code / bibtex
                    @article{xiao2023semantic,
                      author    = {Xiao, Junfei and Zhou, Ziqi and Li, Wenxuan and Lan, Shiyi and Mei, Jieru and Yu, Zhiding and Yuille, Alan and Zhou, Yuyin and Xie, Cihang},
                      title     = {A Semantic Space is Worth 256 Language Descriptions: Make Stronger Segmentation Models with Descriptive Properties},
                      journal   = {arXiv preprint arXiv:2312.13764},
                      year      = {2023},
                    }

CLIP-Driven Universal Model for Organ Segmentation and Tumor Detection
Jie Liu, Yixiao Zhang, Jieneng Chen, Junfei Xiao, Yongyi Lu, Bennett A. Landman, Yixuan Yuan, Alan Yuille, Yucheng Tang, Zongwei Zhou
ICCV, 2023

arXiv / code / bibtex
                  @inproceedings{liu2023clip,
                    title={Clip-driven universal model for organ segmentation and tumor detection},
                    author={Liu, Jie and Zhang, Yixiao and Chen, Jie-Neng and Xiao, Junfei and Lu, Yongyi and A Landman, Bennett and Yuan, Yixuan and Yuille, Alan and Tang, Yucheng and Zhou, Zongwei},
                    booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
                    pages={21152--21164},
                    year={2023}
                  }



Webpage template borrowed from Jon Barron