@inproceedings{c755c5f7c1cf40ee94dcebb07be59897,
title = "Adaptive Distributed Stochastic Gradient Descent for Minimizing Delay in the Presence of Stragglers",
abstract = "We consider the setting where a master wants to run a distributed stochastic gradient descent (SGD) algorithm on n workers each having a subset of the data. Distributed SGD may suffer from the effect of stragglers, i.e., slow or unresponsive workers who cause delays. One solution studied in the literature is to wait at each iteration for the responses of the fastest k < n workers before updating the model, where k is a fixed parameter. The choice of the value of k presents a trade-off between the runtime (i.e., convergence rate) of SGD and the error of the model. Towards optimizing the error-runtime trade-off, we investigate distributed SGD with adaptive k. We first design an adaptive policy for varying k that optimizes this trade-off based on an upper bound on the error as a function of the wallclock time which we derive. Then, we propose an algorithm for adaptive distributed SGD that is based on a statistical heuristic. We implement our algorithm and provide numerical simulations which confirm our intuition and theoretical analysis.",
keywords = "Distributed SGD, adaptive policy, stragglers.",
author = "Hanna, {Serge Kas} and Rawad Bitar and Parimal Parag and Venkat Dasari and {El Rouayheb}, Salim",
note = "Funding Information: The work of the third author was supported in part by the Science and Engineering Research Board under Grant DSTO-1677, the Department of Telecommunications, Government of India, under Grant DOTC-0001, the Robert Bosch Center for Cyber-Physical Systems, and the Centre for Networked Intelligence (a Cisco CSR initiative) of the Indian Institute of Science, Bangalore. Funding Information: The work of the first and last authors was supported in part by ARL Grant W911NF-17-1-0032. Publisher Copyright: {\textcopyright} 2020 IEEE.; 2020 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2020 ; Conference date: 04-05-2020 Through 08-05-2020",
year = "2020",
month = may,
doi = "10.1109/ICASSP40776.2020.9053961",
language = "English (US)",
series = "ICASSP, IEEE International Conference on Acoustics, Speech and Signal Processing - Proceedings",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "4262--4266",
booktitle = "2020 IEEE International Conference on Acoustics, Speech, and Signal Processing, ICASSP 2020 - Proceedings",
address = "United States",
}