A Roadmap for Big Model

@article{Yuan2022ARF,
  title={A Roadmap for Big Model},
  author={Sha Yuan and Hanyu Zhao and Shuai Zhao and Jiahong Leng and Yangxiao Liang and Xiaozhi Wang and Jifan Yu and Xin Lv and Zhou Shao and Jiaao He and Yankai Lin and Xu Han and Zhenghao Liu and Ning Ding and Yongming Rao and Yizhao Gao and L. Zhang and Ming Ding and Cong Fang and Yisen Wang and Mingsheng Long and Jing Zhang and Yinpeng Dong and Tianyu Pang and Peng Cui and Lingxiao Huang and Zhengxuan Liang and Huawei Shen and Hui Zhang and Quanshi Zhang and Qingxiu Dong and Zhixing Tan and Mingxuan Wang and Shuo Wang and Longxi Zhou and Haoran Li and Junwei Bao and Yingwei Pan and Weinan Zhang and Zhou Yu and Rui Yan and Chence Shi and Minghao Xu and Zuobai Zhang and Guoqiang Wang and Xiang Pan and Meng-Jie Li and Xiaoyu Chu and Zijun Yao and Fangwei Zhu and Shulin Cao and Weicheng Xue and Zixuan Ma and Zhengyan Zhang and Shengding Hu and Yujia Qin and Chaojun Xiao and Zheni Zeng and Ganqu Cui and Weize Chen and Weilin Zhao and Yuan Yao and Peng Li and Wenzhao Zheng and Wenliang Zhao and Ziyi Wang and Bo Zhang and Nanyi Fei and Anwen Hu and Zenan Ling and Hao Li and Boxi Cao and Xi Han and Weidong Zhan and Baobao Chang and Hao Sun and Jia Deng and Juan-Zi Li and Lei Hou and Xi-Ming Cao and Jidong Zhai and Zhiyuan Liu and Maosong Sun and Jiwen Lu and Zhi-Gang Lu and Qingyu Jin and Ruihua Song and Jiaxin Wen and Zhou Lin and Liwei Wang and Hang Su and Junting Zhu and Zhifang Sui and Jiaju Zhang and Yang Liu and Xiao He and Min Huang and Jianxi Tang and Jie Tang},
  journal={ArXiv},
  year={2022},
  volume={abs/2203.14101}
}
With the rapid development of deep learning, training Big Models (BMs) for multiple downstream tasks becomes a popular paradigm. Researchers have achieved various outcomes in the construction of BMs and the BM application in many fields. At present, there is a lack of research work that sorts out the overall progress of BMs and guides the follow-up research. In this paper, we cover not only the BM technologies themselves but also the prerequisites for BM training and applications with BMs… 

Big Learning: A Universal Machine Learning Paradigm?

TLDR
It is revealed that big learning is what existing foundation models are implicitly doing; accordingly, the newly introduced big learning provides high-level guidance for flexible design and improvements of foundation models, accelerating the true self-learning on the Internet.

The Bearable Lightness of Big Data: Towards Massive Public Datasets in Scientific Machine Learning

TLDR
This paper demonstrates that deep learning models, trained and tested on data from a petascale CFD simulation, are robust to errors introduced during lossy compression in a semantic segmentation problem, and demonstrates that lossY compression algorithms offer a realistic pathway for exposing high-fidelity scientifldelity data to open-source data repositories for building community datasets.