@InCollection{BR93, author = { Avrim Blum and Ronald L. Rivest }, title = { Training a 3-node neural network is NP-complete }, pages = { 9--28 }, booktitle = { Machine Learning: From Theory to Applications }, isbn = { 978-3-540-56483-6 }, editor = { Stephen Jos\'{e} Hanson and Werner Remmele and Ronald L. Rivest }, publisher = { Springer }, OPTyear = { 1993 }, date = { 1993 }, series = { Lecture Notes in Computer Science }, volume = { 661 }, url = { http://dx.doi.org/10.1007/3-540-56483-7_20 }, doi = { 10.1007/3-540-56483-7_20 }, note = { (Reprinted with permission from Neural Networks 5,1 (January 1992), Avrim Blum and Ronald L. Rivest, ``Training a three-node neural net is NP-complete''.) }, abstract = { We show for many simple two-layer networks whose nodes compute linear threshold functions of their inputs that training is NP-complete. For any training algorithm for one of these networks there will be some sets of training data on which it performs poorly, either by running for more than an amount of time polynomial in the input length, or by producing sub-optimal weights. Thus, these networks differ fundamentally from the perceptron in a worst-case computational sense. \par The theorems and proofs are in a sense fragile; they do not imply that training is necessarily hard for networks other than those specifically mentioned. They do, however, suggest that one cannot escape computational difficulties simply by considering only very simple or very regular networks. \par On a somewhat more positive note, we present two networks such that the second is both more powerful than the first and can be trained in polynomial time, even though the first is NP-complete to train. This shows that computational intractability does not depend directly on network power and provides theoretical support for the idea that finding an appropriate network and input encoding for one's training problem is an important part of the training process. \par An open problem is whether the NP-completeness results can be extended to neural networks that use the differentiable logistic linear functions. We conjecture that training remains NP-complete when these functions are used since their use does not seem to alter significantly the expressive power of a neural network. However, our proof techniques break down. Note that Judd [9], for the networks he considers, shows NP-completeness for a wide variety of node functions including logistic linear functions. }, }