@InCollection{BR93,
author = { Avrim Blum and Ronald L. Rivest },
title = { Training a 3-node neural network is NP-complete },
pages = { 9--28 },
booktitle = { Machine Learning: From Theory to Applications },
isbn = { 978-3-540-56483-6 },
editor = { Stephen Jos\'{e} Hanson and Werner Remmele and Ronald L. Rivest },
publisher = { Springer },
OPTyear = { 1993 },
date = { 1993 },
series = { Lecture Notes in Computer Science },
volume = { 661 },
url = { http://dx.doi.org/10.1007/3-540-56483-7_20 },
doi = { 10.1007/3-540-56483-7_20 },
note = { (Reprinted with permission from Neural Networks 5,1 (January 1992),
Avrim Blum and Ronald L. Rivest, ``Training a three-node
neural net is NP-complete''.)
},
abstract = { We show for many simple two-layer networks whose
nodes compute linear threshold functions of their
inputs that training is NP-complete. For any
training algorithm for one of these networks there
will be some sets of training data on which it
performs poorly, either by running for more than an
amount of time polynomial in the input length, or by
producing sub-optimal weights. Thus, these networks
differ fundamentally from the perceptron in a
worst-case computational sense.
\par
The theorems
and proofs are in a sense fragile; they do not imply
that training is necessarily hard for networks other
than those specifically mentioned. They do, however,
suggest that one cannot escape computational
difficulties simply by considering only very simple
or very regular networks.
\par
On a somewhat more
positive note, we present two networks such that the
second is both more powerful than the first and can
be trained in polynomial time, even though the first
is NP-complete to train. This shows that
computational intractability does not depend
directly on network power and provides theoretical
support for the idea that finding an appropriate
network and input encoding for one's training
problem is an important part of the training
process.
\par
An open problem is whether the
NP-completeness results can be extended to neural
networks that use the differentiable logistic linear
functions. We conjecture that training remains
NP-complete when these functions are used since
their use does not seem to alter significantly the
expressive power of a neural network. However, our
proof techniques break down. Note that Judd [9], for
the networks he considers, shows NP-completeness for
a wide variety of node functions including logistic
linear functions.
},
}