@inproceedings{ author={ A. Awan, K. Hamidouche, A. Venkatesh, D. Panda }, title={ Efficient Large Message Broadcast using NCCL and CUDA-Aware MPI for Deep Learning }, conference={ The 23rd European MPI Users' Group Meeting (EuroMPI 16) }, year={ 2016 }, month={ September }, location={ Edinburgh, Scotland }, source={ http://nowlab.cse.ohio-state.edu/publications/ }, }