% Change: Platt98 -> Platt99,
%         Lewis94b -> Lewis94
% 
% Keywords:
% [MODELS]
% bigram
% back-off
% mixture
%
% [APPLICATIONS]
% focused crawling
% random process
% text
% e-mail
% structured documents
%
% [PROBLEM TYPES]
% bias
% classification
% clustering
% compression
% factorization (soft clustering)
% fast optimization
% hierarchy
% information extraction
% language modeling
% lexical attraction
% model selection
% multiclass
% multilabel
% multitask
% network structure
% nlp
% on-line
% parameter estimation
% parsing
% ranking
% semi-supervised
% supervised
% terminology extraction
% topic detection
% unsupervised
%
% [ALGORITHMS]
% aic
% bayesian network
% bch
% bic
% boosting
% compression
% decision tree
% ecoc
% exponential (family)
% exponential gradient
% gradient descent
% hmm
% knn
% latent variables
% linear
% linear programming
% llsf
% logistic
% mdl
% mml (minimum message length)
% mixture of experts
% multinomial
% naive bayes
% n-grams
% nnet (neural network)
% ppm (compression algorithm)
% regression
% rocchio
% smoothing
% svm
% subsampling
% widrow-hoff
% winnow
%
% [EVALUATION]
% pr breakeven
% precision
% recall
% roc curve
%
% [DATA SETS]
% 20 newsgroups
% ap newswire
% industry sector
% ohsumed
% rcv1
% reuters
% reuters-21450
% reuters-21578
% reuters-22173
% webkb

@techreport{Patrick91
,author = "J. D. Patrick"
,title = "A program for discriminating between classes"
,number = "91/151"
,institution = "School of Computer Science and Software Engineering, Monash University"
,note = "compression, classification, clustering"
}

@article{Klautau03
,author = "Aldebaro Klautau and Nikola Jevti\'c and Alon Orlitsky"
,title = "On Nearest-Neighbor Error-Correcting Output Codes with Application
to All-Pairs Multiclass Support Vector Machines"
,year = 2003
,journal = "Journal of Machine Learning Research"
,volume = 4
,keywords = "ecoc, classification"
}

@inproceedings{Pratt93
,author = "Lorien Y. Pratt",
,title = "Discriminability-Based Transfer between Neural Networks"
,booktitle = "Advances in Neural Information Processing Systems"
,volume = "5"
,publisher = "Morgan Kaufmann, San Mateo, CA"
,editor = "Stephen Jos{\'e} Hanson and Jack D. Cowan and C. Lee Giles"
,pages = "204--211"
,year = "1993"
,keywords = "multitask, classification"
,note = "use weights from one task as starting point for neural network for another task"
}

@inproceedings{Cohen97
,author = "William W. Cohen and Daniel Kudenko"
,title = "Transferring and Retraining Learned Information Filters"
,booktitle = "Proceedings of AAAI-97"
,year = 1997
,keywords = "multitask, classification, text"
}

@inproceedings{Bollacker97
,author = "Kurt D. Bollacker and Joydeep Ghosh"
,title = "A scalable method for classifier knowledge reuse"
,booktitle = "Proceedings of the 1997 International Conference on Neural Networks"
,year = 1997
,keywords = "multitask, classification"
}

@inproceedings{Caruana96
,author = "Rich Caruana"
,title = "Algorithms and Applications for Multitask Learning"
,booktitle = "Proceedings of the 13th International Conference on Machine Learning"
,year = 1996
,keywords = "multitask, classification"
}

@inproceedings{Collins00
,author = "Michael Collins"
,title = "Discriminative Reranking for Natural Language Parsing"
,booktitle = "Proceedings of the 17th International Conference on Machine Learning"
,year = 2000
,keywords = "boosting, parsing, nlp"
}

@inproceedings{Crammer02sigir
,author = "Koby Crammer and Yoram Singer"
,title = "A New Family of Online Algorithms for Category Ranking"
,booktitle = "25rd Conference on Research and Development in Information Retrieval"
,year = 2002
,keywords = "ranking, text"
}

@inproceedings{Crammer02nips
,author = "Koby Crammer and Yoram Singer"
,title = "P{R}anking with Ranking"
,year = 2002
,booktitle = "Advances in Neural Information Processing Systems 14"
,keywords = "ranking"
}

@article{Cleary97
,author = "John G. Cleary and William J. Teahan.
,title = "Unbounded length contexts for PPM"
,journal = "Computer Journal"
,volume = 40
,number = "2/3"
,pages = "67--75"
,year = 1997
,keywords = "text, compression, ppm"
}

@inproceedings{Wittten99
,author = "I. H. Witten and Z. Bray and M. Mahoui and W. J. Teahan"
,title = "Text mining: a new frontier for lossless compression"
,year = 1999
,booktitle = "Proceedings of the Data Compression Conference"
,keywords = "compression, ppm, text, classification, information extraction"
}

@mastersthesis{Thaper01
,author = "Nitin Thaper"
,title = "Using Compression For Source Based Classification Of Text"
,year = 2001
,institution = "Massachusetts Institute of Technology"
,keywords = "text classification, compression, clustering, language modeling"
}

@inproceedings{Teahan01
,author = "William J. Teahan and David J. Harper"
,title = "Using compression based language models for text categorization"
,year = 2001
,booktitle = "Workshop on Language Modeling and Information Retrieval"
,keywords = "text classification, compression"
}

@article{Demiriz02
,author = "Ayhan Demiriz and Kristin P. Bennett and John Shawe-Taylor"
,title = "Linear Programming Boosting via Column Generation"
,year = 2002
,journal = "Machine Learning"
,pages = "225--254"
,volume = 46
,keywords = "boosting, linear programming"
}

@incollection{Mason00
,author = "Llew Mason and Jonathan Baxter and Peter L. Bartlett and Marcus Frean"
,title = "Functional Gradient Techniques for Combining Hypotheses"
,booktitle = "Advances in Large Margin Classifiers"
,year = 1999
,editor = {Smola and Bartlett and Sch\"olkopf and Schuurmans}
,pages = "221--246"
,publisher = "MIT Press"
,keywords = "boosting, gradient descent"
}

@inproceedings{Mason00a
,author = "Llew Mason and Jonathan Baxter and Peter Bartlett and Marcus Frean"
,title = "Boosting algorithms as gradient descent"
,booktitle = "Neural Information Processing Systems 12"
,year = 2000
,keywords = "boosting, gradient descent"
}

@article{Freund97a
,author = "Yoav Freund and Robert E. Schapire"
,title = "A decision-theoretic generalization of on-line learning and an application to boosting"
,journal = "Computer and System Sciences"
,volume = 55
,number = 1
,pages = "119--139"
,year = 1997
,keywords = "boosting"
}

@article{Damashek95
,title = "Gauging similarity with $n$-grams: {L}anguage independent categorization of text"
,author = "Marc Damashek"
,year = 1995
,volume = 267
,pages = "843--848"
,keywords = "n-grams, information retrieval"
,journal = "Science"
}

@article{Cohen95
,title = "Highlights: {L}anguage- and domain-independent automatic indexing terms for abstracting"
,author = "Jonathan Cohen"
,year = 1995
,volume = 46
,keywords = "n-grams, information retrieval"
,booktitle = "Journal of the American Society for Information Science"
}

@article{Pearce96
,title = "TELLTALE: {E}xperiments in a dynmaic hypertext environment for degraded and multilingual data"
,author = "Claudia Pearce and Charles K. Nicholas"
,year = 1996
,volume = 47
,number = 4
,keywords = "n-grams, information retrieval"
,booktitle = "Journal of the American Society for Information Science"
}

@phdthesis{Pearce94
,title = "A dynamic hypertext environment through $n$-gram analysis"
,author = "Claudia Pearce"
,year = 1994
,institution = "Univerisity of Maryland, Baltimore County"
,keywords = "n-grams, information retrieval"
}

@inproceedings{Kulyukin96
,title = "Automated Analysis of Structured Online Documents"
,author = "Vladimir A. Kulyukin and Kristian Hammond and Robin Burke"
,booktitle = "Proceedings of the AAAI-96 Workshop on Internet Agents"
,year = 1996
,keywords = "structured documents, information extraction"
}

@incollection{McDiarmid89
,author = "Colin McDiarmid"
,title = "On the method of bounded differences"
,booktitle = "Surveys in Combinatorics 1989"
,year = 1989
,pages = "148--188"
,publisher = "Cambridge University Press"
}

@unpublished{Lugosi03
,author = "G\'abor Lugosi"
,title = "Concentration-of-measure inequalities"
,note = "http://www.econ.upf.es/$\sim$lugosi/anu.ps"
,year = 2003
}

@inproceedings{Chen98
,author = "Hao Chen and Jianying Hu and Richard Sproat"
,title = "E-mail signature block analysis"
,booktitle = "Proceedings of the 14th International Conference on Pattern Recognition"
,year = 1998
,keywords = "text analysis, e-mail"
}

@article{Salton88
,author = "Gerald Salton and Chris Buckley"
,title = "Term-weighting approaches in automatic text retrieval"
,journal = "Information Processing and Management"
,volume = 24
,number = 5
,pages = "513--523"
,year = 1988
}

@article{ church95,
    author = "K. Church and W. Gale",
    title = "Poisson Mixtures",
    journal = "Natural Language Engineering",
    pages = {163--190},
    volume = "1",
    number = "2",
    year = "1995"
}

@article{ katz96,
    author = "Slava Katz",
    title = "Distribution of content words and phrases in text and language modelling",
    year = "1996",
    volume = "2",
    number = "1",
    pages = "15--60",
    journal = "Natural Language Engineering"
}

@article{ sj72,
    author = "Karen Sparck Jones",
    title = "A statistical interpretation of term specificity and its application in retrieval",
    journal = "Journal of Documentation",
    volume = "28",
    pages = "11--21",
    year = "1972"
}

@inproceedings{ mckeown99,
    author = "Kathleen McKeown and Judith Klavans and Vasileios Hatzivassiloglou and Regina Barzilay and Eleazar Eskin",
    title = "Towards Multidocument Summarization by Reformulation: Progress and Prospects",
    booktitle = "{AAAI}",
    year = "1999",
}

@book{Manning99
,author = {C. Manning and H. Sch\"utze}
,title = "Foundations of Statistical Natural Language Processing"
,year = 1999
,publisher = "MIT Press"
}

@techreport{Chen98
,author = "Stanley F. Chen and Joshua Goodman"
,title = "An empirical study of smoothing techniques for language modeling"
,year = 1998
,number = "TR-10-98"
,institution = "Harvard University, Center for Research in Computing Technology"
}

@inproceedings{Peng03
,author = "Fechun Peng and Dale Schuurmans"
,title = "Combining naive Bayes and n-gram language models for text classification"
,year = 2003
,booktitle = "Proceedings of the Twenty-Fifth European Conference on Information Retrieval Research (ECIR-03)"
}

@book{Zipf49
,author = "G. K. Zipf"
,title = "Human Behavior and the Priciple of Least Effort: An Introduction to Human Ecology"
,year = 1949
,publisher = "Addison-Wesley"
}

@inproceedings{Lewis94b
,author = "David D. Lewis and William A. Gale"
,title = "A sequential algorithm for training text classifiers"
,booktitle = "SIGIR 94: Proceedings of the Seventeenth Annual International ACM-SIGIR Conference on Research and Development in Information Retrieval"
,year = 1994
,keywords = "naive bayes, text, classification, logistic regression, bias, on-line, ap newswire"
}

@inproceedings{Musick93
,author = "R. Musick and J. Catlett and S. Russell"
,title = "Decision theoretic subsampling for induction on large databases"
,booktitle = "Proceedings of the Tenth International Conference on Machine Learning"
,year = 1993
,keywords = "decision trees, subsampling"
}

@techreport{Breiman94
,author = "Leo Breiman"
,title = "Bagging Predictors"
,number = "421"
,institution = "Department of Statistics, University of California, Berkeley"
,year = 1994
}

@article{Dietterich97
,author = "Thomas G. Dietterich"
,title = "Machine-Learning Research: Four Current Directions"
,journal = "The {AI} Magazine"
,volume = "18"
,number = "4"
,pages = "97--136"
,year = "1997"
,keywords = "feature selection, winnow, adaboost, bagging, reinforcement learning, mixtures of experts, baum-welch, hmm"
}

@inproceedings{Weston00
,author = "J. Weston and S. Mukherjee and O. Chapelle and M. Pontil and T. Poggio and V. Vapnik"
,title = "Feature Selection for SVMs"
,year = 2001
,booktitle = "Proceedings of Neural Information Processing Systems 13"
,keywords = "feature selection, svm, classification"
}

@inproceedings{Lewis94
,author = "David Lewis and Marc Ringuette"
,title = "A Comparison of Two Learning Algorithms for Text Categorization"
,year = 1994
,booktitle = "The Third Annual Symposium on Document Analysis and Information Retrieval"
,keywords = "text classification, feature selection, naive bayes, decision tree, reuters-21450"
}

@inproceedings{Osuna97
,author = "Edgar Osuna and Robert Freund and Federico Girosi"
,title = "An Improved Training Algorithm for Support Vector Machines"
,booktitle = "Proceedings of the 1997 IEEE Workshop on Neural Networks for Signal Processing"
,year = 1997
,keywords = "svm, fast optimization"
}

@incollection{Joachims99a
,author = "Thorsten Joachims"
,title = "Making Large-Scale SVM Learning Practical"
,booktitle = "Advances in Kernel Methods - Support Vector Learning"
,year = 1999
,editor = "B. Schlkopf and C. Burges and A. Smola"
,publisher = "MIT Press"
}

@article{Domingos02
,author = "Pedro Domingos"
,title = "When and How to Subsample: Report on the KDD-2001 Panel"
,journal = "SIGKDD Explorations"
,year = 2002
,volume = 3
,number = 2
}

@article{Weiss99
,author = "S. M. Weiss and C. Apte and F. J. Damerau and D. E. Johnson and J. F. Oles and T. Goetz and T. Hampp"
,title = "Maximizing text-mining performance"
,journal = "IEEE Intelligent Systems"
,number = 4
,volume = 14
,pages = "63--69"
,year = 1999
}

@inproceedings{Rennie99
,author = "Jason Rennie and Andrew McCallum"
,title = "Using Reinforcement Learning to Spider the Web Efficiently"
,year = 1999
,booktitle = "Proceedings of the 16th International Conference on Machine Learning"
,keywords = "focused crawling, text classification"
}

@unpublished{Elkan97
,author = "Charles Elkan"
,title = "Naive Bayesian Learning"
,note = "Based on 1997 UCSD TR"
}

@article{Kass95
,author = "Robert E. Kass and Adrian E. Raftery"
,title = "Bayes Factors"
,journal = "Journal of the American  Statistical Association"
,volume = 90
,number = 430
,pages = "773--795"
,year = 1995
,keywords = "mdl, bic, model selection"
}

@article{Wallace87
,author = "C. S. Wallace and P. R. Freeman"
,title = "Estimation and inference by compact coding"
,year = 1987
,volume = 49
,number = 3
,pages = "240--265"
,journal = "Journal of the Royal Statistical Society, series B (Methodological)"
,keywords = "mml"
}

@article{Rissanen87
,author = "Jorma Rissanen"
,title = "stochastic complexity"
,year = 1987
,volume = 49
,number = 3
,pages = "223--239"
,journal = "Journal of the Royal Statistical Society, series B (Methodological)"
}

@article{Rissanen83
,author = "Jorma Rissanen"
,title = "A universal prior for integers and estimation by minimum description length"
,year = 1983
,journal = "Annals of Statistics"
,volume = 11
,number = 2
,pages = "416-431"
,keywords = "mdl, model selection"
}

@unpublished{Jaakkola01
,author = "Tommi Jaakkola"
,title = "none"
,note = "personal communication"
,year = 2001
,month = "May"
,keywords = "clustering, factorization, em"
}

@inproceedings{Basu02
,author = "Sugato Basu and Arindam Banerjee and Raymond Mooney"
,title = "Semi-supervised clustering by seeding"
,year = 2002
,booktitle = "Machine Learning: Proceedings of the Nineteenth International Conference"
,keywords = "clustering, classification, semi-supervised"
}

@phdthesis{McCallum96
,author = "Andrew K. McCallum"
,title = "Reinforcement Learning with Selective Perception and Hidden State"
,year = 1996
,school = "University of Rochester"
,keywords = "reinforcement learning, feature selection"
}

@inproceedings{Lodhi01
,author = "Huma Lodhi and John Shawe-Taylor and Nello Cristianini and Christopher J. C. H. Watkins"
,title = "Text Classification using String Kernels"
,booktitle = "Advances in Neural Information Processing Systems 13"
,year = 2001
}

@inproceedings{Slonim02
,author = "Noam Slonim and Gill Bejerano and Shai Fine and Naftali Tishby"
,title = "Discriminative Feature Selection via Multiclass Variable Memory Markov Model"
,year = 2002
,booktitle = "Machine Learning: Proceedings of the Nineteenth International Conference"
}

@inproceedings{Kudenko98
,author = "Daniel Kudenko and Haym Hirsh"
,title = "Feature Generation for Sequence Categorization"
,year = 1998
,booktitle = "Proceedings of the Fifteenth National Conference on Artificial Intelligence"
}

@inproceedings{Aha91
,author = "David W. Aha"
,title = "Incremental constructive induction: an instance-based approach"
,year = 1991
,booktitle = "Proceedings of the Eighth International Workshop on Machine Learning"
,pages = "117--121"
}

@inproceedings{Matheus89
,author = "Peter A. Flach and Nada Lavrac"
,title = "The role of feature construction in inductive rule learning"
,year = 2000
,booktitle = "Proceedings of the ICML2000 Workshop on Attribute-Value and Relational Learning: Crossing the Boundaries"
}

@inproceedings{Matheus89
,author = "Christopher J. Matheus and Larry A. Rendell"
,title = "Constructive Induction On Decision Trees"
,year = 1989
,booktitle = "Proceedings of the Eleventh International Joint Conference on Artificialn Intelligence"
,pages = "645--650"
}

@article{Bell89
,author = "T. C. Bell and I. H. Witten and J. G. Cleary"
,title = "Modeling for text compression"
,journal = "Computing Surveys"
,volume = 21
,number = 4
,pages = "557--591"
,year = 1989
}

@inproceedings{Blei02uai
,author = "David M. Blei and J. Andrew Bagnell and Andrew K. McCallum"
,title = "Learning with scope, with application to information extraction and classification"
,year = 2002
,booktitle = "Proceedings of the Eighteenth Annual Conference on Uncertainty in Artificial Intelligence"
}

@phdthesis{deMarcken96
,author = "Carl G. de Marcken"
,title = "Unsupervised Language Acquisition"
,year = 1996
,school = "Massachusetts Institute of Technology"
}

@article{Rosenfeld00
,author = "Ronald Rosenfeld"
,title = "Two decades of Statistical Language Modeling: Where Do We Go From Here?"
,year = 2000
,journal = "Proceedings of the IEEE"
,volume = 88
,number = 8
}

@inproceedings{Scheffer02
,author = "Tobias Scheffer and Stefan Wrobel"
,title = "Text classification beyond the bag-of-words representation"
,year = 2002
,booktitle = "Proceedings of the ICML Workshop on Text Learning"
,keywords = "text, classification, mixture, HMM"
}

@inproceedings{Ueda02
,author = "Naonori Ueda and Kazumi Saito"
,title = "Single-shot Detection of Multiple Categories of Text using Parametric Mixture Models"
,year = 2002
,booktitle = "Proceedings of the Eighth ACM SIGKDD International Conference on Knowledge Discovery and Data Mining"
,keywords = "text, classification, multilabel, mixture"
}

@unpublished{Yuret99
,author = "Deniz Yuret"
,year = 1999
,title = "Lexical Attraction Models of Language"
,note = "http://www.ai.mit.edu/people/deniz/publications/aaai99.html"
,keywords = "nlp, lexical attraction, parsing"
}

@inproceedings{Beeferman97
,title = "A model of lexical attraction and repulsion"
,author = "Doug Beeferman and Adam Berger and John Lafferty"
,year = 1997
,booktitle = "Proceedings of the ACL-EACL'97 Joint Conference"
,keywords = "nlp, lexical attraction, parsing"
}

@inproceedings{Nymble
,title = "Nymble: a high-performance learning name-finder"
,author = "Daniel M. Bikel and Scott Miller and Richard Schwartz and Ralph Weischedel"
,year = 1997
,booktitle = "Proceedings of Fifth Conference on
Applied Natural Language Processing"
,pages = "194--201"
,keywords = "nlp, hmm, back-off, information extraction"
}

@inproceedings{Elisseeff02
,title = "A kernel method for multi-labelled classification"
,author = "Andr\'e Elisseeff and Jason Weston"
,year = 2002
,booktitle = "Advances in Neural Information Processing Systems 14"
,editor = "Thomas G. Dietterich and Susan Becker and Zoubin Ghahramani"
,keywords = "classification, multilabel, text, svm"
}

@inproceedings{McCallum99
,title = "Multi-Label Text Classification with a Mixture Model Trained by {EM}"
,author = "Andrew McCallum"
,year = 1999
,booktitle = "Proceedings of the AAAI'99 Workshop on Text Learning"
,keywords = "classification, multilabel, text, mixture model"
}

@article{Nigam00
,title = "Text Classification from Labeled and Unlabeled Documents using {EM}."
,author = "Kamal Nigam and Andrew McCallum and Sebastian Thrun and Tom Mitchell"
,journal = "Machine Learning"
,volume = 39
,number = "2/3"
,pages = "103--134"
,year = 2000
,keywords = "language modeling, classification, clustering, latent variables"
}

@article{Hofmann01
,title = "Unsupervised Learning by Probabilistic Latent Semantic Analysis"
,author = "Thomas Hofmann"
,journal = "Machine Learning"
,pages = "177--196"
,year = 2001
,volume = 42
,number = 1
,keywords = "language modeling, classification, clustering, latent variables"
}

@techreport{Hofmann98
,title = "Statistical models for co-occurrence data"
,author = "Thomas Hofmann and Jan Puzicha"
,institution = "MIT AI Lab"
,year = 1998
,number = "AI Memo 1625"
,keywords = "language modeling, classification, clustering, latent variables"
}

@article{Saund95
,author = "Eric Saund"
,title = "A Multiple Cause Mixture Model for Unsupervised Learning"
,journal = "Neural Computation"
,pages = "51--71"
,volume = 7
,number = 1
,year = 1995
}

@inproceedings{Sahami96
,author = "Mehran Sahami and Marti Hearst and Eric Saund"
,title = "Applying the multiple cause mixture model to text categorization"
,booktitle = "Machine Learning: Proceedings of the Thirteenth International Conference"
,year = 1996
,keywords = "mixture model, text, classification, naive bayes"
}

@article{Kageura96,
    author       = {Kyo Kageura and Bin Umino},
    title        = {Methods of automatic term recognition: a review},
    year         = {1996},
    journal      = {Terminology},
    volume       = {3},
    number       = {2},
    pages        = {259--289},
    keywords     = {terminology extraction, nlp}
}

@inproceedings{Joachims01
,author = "Thorsten Joachims"
,title = "A Statistical Learning Model of Text Classification with Support Vector Machines"
,booktitle = "Proceedings of SIGIR"
,year = 2001
,keywords = "SVM, text, language modeling"
}

@inproceedings{Collins02
,author = "Michael Collins"
,title = "Discriminative Training Methods for Hidden Markov Models: Theory and Experiments with Perceptron Algorithms"
,booktitle = "Proceedings of EMNLP"
,year = 2002
}

@inproceedings{Blei02
,author = "David M. Blei and Andrew Y. Ng and Michael I. Jordan"
,title = "Latent Dirichlet Allocation"
,booktitle = "Advances in Neural Information Processing Systems 13"
,year = 2002
,keywords = "language modeling, classification, clustering, latent variables"
}

@inproceedings{Friedman96
,author = "Nir Friedman and Moises Goldszmidt"
,title = "Building classifiers using bayesian networks"
,booktitle = "Proceedings of the Thirteenth National Conference on Artificial Intelligence"
,year = 1996
,keywords = "bayesian network"
}

@inproceedings{Craven98a
,author = "Mark Craven and Dan DiPasquo and Dayne Freitag and Andrew McCallum and Tom
Mitchell and Kamal Nigam and Sean Slattery"
,title = "Learning to extract symbolic knowledge from the {W}orld {W}ide {W}eb"
,booktitle = "Proceedings of the Fifteenth National Conference on Artificial Intelligence (AAAI-98)"
,year = 1998
,keywords = "information extraction, webkb"
}

@book{KalbFleisch79
,author = "J. G. Kalbfleisch"
,title = "Probability and Statistical Inference, Volume 2: Statistical Inference"
,publisher = "Springer-Verlag"
,year = 1979
}

@incollection{Mukherjee02
,author = "S. Mukherjee and R. Rifkin and T. Poggio"
,title = "Regression and Classification with Regularization"
,year = 2002
,booktitle = "Uncertainty in Geometric Computations"
,publisher = "Kluwer"
}

@article{MacKay94
,author = "David J. C. MacKay and Linda C. Bauman Peto"
,title = "A hierarchical dirichlet language model"
,journal = "Natural Language Engineering"
,year = 1994
,keywords = "language modeling, multinomial, parameter estimation, bigram model"
}

@phdthesis{Adamic01
,author = "Lada Adamic"
,title = "Network Dynamics: The World Wide Web"
,institution = "Stanford University"
,year = 2001
}

@inproceedings{Nigam99
,author = "Kamal Nigam and John Lafferty and Andrew McCallum"
,title = "Using Maximum Entropy for Text Classification"
,booktitle = "IJCAI-99 Workshop on Machine Learning for Information Filtering"
,year = 1999
}

@unpublished{Teevan02
,author = "Jaime Teevan and David Karger"
,title = "Finding an exponential model for text retrieval through textual analysis"
,note = "Jaime's RQE paper"
,keywords = "information retrieval, exponential, naive bayes"
}

@techreport{Jordan95
,author = "Michael I. Jordan"
,title = "Why the logistic function?  A tutorial discussion on probabilities and neural networks"
,number = "9503"
,institution = "Massachusetts Institute of Technology, Department of Computational Cognitive Science"
,keywords = "logistic, exponential, classification"
}

@inproceedings{Lewis96
,author = "David D. Lewis and Robert E. Schapire and James P. Callan and Ron Papka"
,title = "Training algorithms for linear text classifiers"
,booktitle = "Proceedings of the Nineteenth Annual International ACM-SIGIR Conference"
,year = 1996
,keywords = "widrow-hoff, eg, rocchio, text, classification, linear"
}

@inproceedings{Lewis01
,author = "David D. Lewis"
,title = "Applying support vector machines to the TREC-2001 batch filtering and routing tasks"
,booktitle = "The Tenth Text REtrieval Conference (TREC 2001)"
,year = 2001
,keywords = "svm, text, classification, rcv1"
}

@inproceedings{Robertson95
,author = "S.E. Robertson and S. Walker and S. Jones and M.M. Hancock-Beauleiu and M. Gatford"
,title = "Okpai at TREC-3"
,booktitle = "Proceedings of the Third Text REtrieval Conference (TREC-3)"
,year = 1995
}

@techreport{Kleinberg02
,author = "John Kleinberg"
,title = "Bursty and Hierarchical Structure in Streams"
,institution = "Department of Computer Science, Cornell University"
,number = "02--1863"
,keywords = "e-mail, text, random process, topic detection"
}

@inproceedings{Apte94
,author = "C. Apte and F. Damerau and S. Weiss"
,title = "Towards language independent automated learning of text categorization models"
,booktitle = "Proceedings of the 17th annual ACM/SIGIR conference"
,year = 1994
,keywords = "text, categorization, reuters-21450"
}

@techreport{Chickering97
,author = "D. Chickering and D. Heckerman and C. Meek"
,title = "A Bayesian Approach to Learning Bayesian Networks with Local Structure"
,institution = "Microsoft Research"
,number = "MSR-TR-97-07"
}

@article{Yang99a
,author = "Yiming Yang"
,title = "An evaluation of statistical approaches to text categorization"
,journal = "Information Retrieval"
,volume = 1
,number = 1
,pages = "67--88"
,url = "http://www.cs.cmu.edu/~yiming/papers.yy/irj99.ps.gz"
,keywords = "reuters, text, classification, survey"
}

@techreport{Yang97a
,author = "Yiming Yang"
,title = "An evaluation of statistical approaches to text categorization"
,institution = "Department of Computer Science, Carnegie Mellon"
,year = 1997
,number = "CMU-CS-97-127"
,keywords = "reuters, text, classification, survey, ohsumed"
}

@article{Sebastiani02,
    author       = {Fabrizio Sebastiani},
    title        = {Machine learning in automated text categorization},
    journal      = {ACM Computing Surveys},
    volume       = {34},
    number       = {1},
    pages        = {1--47},
    year         = {2002},
    url          = {http://faure.iei.pi.cnr.it/~fabrizio/Publications/ACMCS02.pdf},
    keywords     = "text, classification, reuters, survey"
}

@article{Jain99
,author = "Anil K. Jain and M. Narasimha Murty  and Patrick J. Flynn"
,title = "Data Clustering: A Review"
,journal = "ACM Computing Surveys"
,year = 1999
,number = 3
,volume = 31
,pages = "264--323"
,keywords = "clustering, hierarchy"
,url = "http://web.mit.edu/jorlin/www/15099Papers/jain.pdf"
}

@article{Schapire00
,author = "Robert E. Schapire and Yoram Singer"
,title = "BoosTexter: A Boosting-based System for Text Categorization"
,journal = "Machine Learning"
,year = 2000
,volume = 39
,pages = "135--168"
,keywords = "boosting, text, classification, reuters, 20 newsgroups"
}

@techreport{Minka01
,title = "Algorithms for maximum-likelihood logistic regression"
,author = "Thomas P. Minka"
,year = 2001
,institution = "Carnegie Mellon University; Department of Statistics"
,number = 758
,keywords = "logistic regression"
}

@article{Zhang01
,title = "Text Categorization based on regularized linear classification methods"
,author = "Tong Zhang and Frank J. Oles"
,year = 2001
,journal = "Information Retrieval"
,pages = "5--31"
,volume = 4
,keywords = "logistic regression, SVM, Naive Bayes, text classification"
}

@phdthesis{LewisPhd
,author = "David Dolan Lewis"
,title = "Representation and Learning in Information Retrieval"
,school = "Department of Computer Science; Univ. of Massachusetts"
,year = 1991
,address = "Amherst, MA"
,keywords = "P-R breakeven, precision, recall"
}

@book{Shanmugan88
,author = "K. Sam Shanmugan and Arthur M. Breipohl"
,title = "Random Signals: Detection, Estimation and Data Analaysis"
,publisher = "John Wiley \& Sons"
,year = 1988
,keywords = "ROC Curve"
}

@book{Peterson72
,author = "William Wesley Peterson and E. J. Weldon"
,title = "Error-Correcting Codes"
,publisher = "MIT Press"
,year = 1972
,address = "Cambridge, MA"
,keywords = "ECOC, BCH"
}

@book{Hill86
,author = "Raymond Hill"
,title = "A First Course in Coding Theory"
,publisher = "Oxford University Press"
,year = 1986
,keywords = "ECOC, BCH"
}

@book{Pless89
,author = "V. Pless"
,title = "Introduction to the Theory of Error-Correcting Codes"
,publisher = "John Wiley and Sons"
,year = 1989
,keywords = "ECOC, BCH"
}

@inproceedings{Chakrabarti02
,author = "Soumen Chakrabarti and Shourya Roy and Mahesh V. Soundalgekar"
,title = "Fast and accurate text classification via multiple linear discriminant projections"
,booktitle = "Proceedings of VLDB-2002"
,year = 2002
,keywords = "naive bayes, svm, text classification"
}

@article{Ruiz02
,author = "Miguel E. Ruiz and Padmini Srinivasan"
,title = "Hierarchical Text Categorization Using Neural Networks"
,journal = "Machine Learning"
,volume = 5
,number = 1
,year = 2002
,pages = "87--118"
,keywords = "text, classification, mixture of experts, hierarchy, OHSUMED"
}

@article{Dietterich98
,author = "Thomas G. Dietterich"
,title = "Approximate Statistical Tests for Comparing Supervised Classification Learning Algorithms"
,journal = "Neural Computation"
,volume = 10
,number = 7
,year = 1998
,pages = "1895--1924"
,keywords = "classification, error, performance evaluation, McNemar's test"
}

@article{Chakrabarti98c,
    author       = {Soumen Chakrabarti and Byron E. Dom and Rakesh 
Agrawal and Prabhakar Raghavan},
    title        = {Scalable feature selection, classification and 
signature generation for
                    organizing large text databases into hierarchical 
topic taxonomies},
    journal      = {Journal of Very Large Data Bases},
    year         = {1998},
    number       = {3},
    volume       = {7},
    pages        = {163--178},
    url          = {http://www.cs.berkeley.edu/~soumen/VLDB54_3.PDF},
}

@inProceedings{Dalessio00,
    author       = {Stephen D'Alessio and Keitha Murray and Robert 
Schiaffino and Aaron Kershenbaum},
    title        = {The effect of using Hierarchical classifiers in 
Text Categorization},
    booktitle    = {Proceeding of RIAO-00, 6th International 
Conference ``Recherche d'Information
                    Assistee par Ordinateur''},
    editor       = {},
    address      = {Paris, FR},
    year         = {2000},
    pages        = {302--313},
    url          = {http://www.iona.edu/cs/FacultyPublications/riao2000New.pdf},
}

@inProceedings{Dumais00,
    author       = {Susan T. Dumais and Hao Chen},
    title        = {Hierarchical classification of {W}eb content},
    booktitle    = {Proceedings of SIGIR-00, 23rd ACM International 
Conference on Research and
                    Development in Information Retrieval},
    editor       = {Nicholas J. Belkin and Peter Ingwersen and Mun-Kew Leong},
    publisher    = {ACM Press, New York, US},
    address      = {Athens, GR},
    year         = {2000},
    pages        = {256--263},
    url          = {http://research.microsoft.com/~sdumais/sigir00.pdf},
}

@inProceedings{Frommholz01,
    author       = {Ingo Frommholz},
    title        = {Categorizing {W}eb Documents in Hierarchical Catalogues},
    booktitle    = {Proceedings of ECIR-01, 23rd European Colloquium 
on Information Retrieval
                    Research},
    editor       = {},
    year         = {2001},
    address      = {Darmstadt, DE},
    publisher    = {},
    pages        = {},
}

@inProceedings{Gaussier02,
    author       = {Eric Gaussier and Cyril Goutte and Kris Popat and 
Francine Chen},
    title        = {A hierarchical model for clustering and 
categorising documents},
    booktitle    = {Proceedings of ECIR-02, 24th European Colloquium 
on Information Retrieval
                    Research},
    editor       = {Fabio Crestani and Mark Girolami and Cornelis J. 
van Rijsbergen},
    year         = {2002},
    address      = {Glasgow, UK},
    publisher    = {Springer Verlag, Heidelberg, DE},
    note         = {Published in the ``Lecture Notes in Computer 
Science'' series, number 2291},
    pages        = {},
    url          = {},
    abstract     = {},
    note         = {Forthcoming},
}

@inProceedings{McCallum98b,
    author       = {Andrew K. McCallum and Ronald Rosenfeld and Tom M. 
Mitchell and Andrew Y. Ng},
    title        = {Improving text classification by shrinkage in a 
hierarchy of classes},
    booktitle    = {Proceedings of ICML-98, 15th International 
Conference on Machine Learning},
    editor       = {Jude W. Shavlik},
    year         = {1998},
    address      = {Madison, US},
    pages        = {359--367},
    publisher    = {Morgan Kaufmann Publishers, San Francisco, US},
    url          = {http://www.cs.cmu.edu/~mccallum/papers/hier-icml98.ps.gz},
}

@article{Ruiz02,
    author       = {Miguel Ruiz and Padmini Srinivasan},
    title        = {Hierarchical text classification using neural networks},
    journal      = {Information Retrieval},
    number       = {1},
    volume       = {5},
    pages        = {87--118},
    year         = {2002},
    url          = {http://www.wkap.nl/article.pdf?383232},
}

@inProceedings{Toutanova01,
    author       = {Kristina Toutanova and Francine Chen and Kris 
Popat and Thomas Hofmann},
    title        = {Text Classification in a Hierarchical Mixture 
Model for Small Training Sets},
    booktitle    = {Proceedings of CIKM-01, 10th ACM International 
Conference on Information and
                    Knowledge Management},
    publisher    = {ACM Press, New York, US},
    editor       = {Henrique Paques and Ling Liu and David Grossman},
    year         = {2001},
    address      = {Atlanta, US},
    pages        = {105--113},
    url          = {http://www.stanford.edu/~krist/papers/cikm2001.pdf},
}

@inProceedings{Vinokourov01,
    author       = {Alexei Vinokourov and Mark Girolami},
    title        = {Document Classification Employing the {F}isher 
Kernel Derived from Probabilistic
                    Hierarchic Corpus Representations},
    booktitle    = {Proceedings of ECIR-01, 23rd European Colloquium 
on Information Retrieval
                    Research},
    editor       = {},
    year         = {2001},
    address      = {Darmstadt, DE},
    publisher    = {},
    pages        = {24--40},
    url          = {http://cis.paisley.ac.uk/vino-ci0/fisher_hierarchic.ps},
}

@article{Vinokourov02,
    author       = {Alexei Vinokourov and Mark Girolami},
    title        = {A Probabilistic Framework for the Hierarchic 
Organisation and Classification of
                    Document Collections},
    journal      = {Journal of Intelligent Information Systems},
    year         = {2002},
    note         = {Special Issue on Automated Text Categorization},
    volume       = {18},
    number       = {2/3},
    pages        = {153--172},
    url          = {http://www.wkap.nl/article.pdf?391244},
}

@inproceedings{Rubinstein97
,title = "Discriminative vs. Informative Learning"
,author = "Y. Dan Rubinstein and Trevor Hastie"
,year = 1997
,booktitle = "Third International Conference on Knowledge Discovery and Data Mining"
}

@techreport{Dietterich95
,title = "Machine Learning Bias, Statistical Bias, and Statistical Variance of Decision Tree Algorithms"
,author = "Thomas G. Dietterich and Eun Bae Kong"
,institution = "Department of Computer Science, Oregon State University"
,year = 1995
}

@inproceedings{Ng02
,title = "On Discriminative vs. Generative classifiers: A comparison of logistic regression and naive bayes"
,author = "Andrew Y. Ng and Michael I. Jordan"
,year = 2002
,booktitle = "Advances in Neural Information Processing Systems 14 (NIPS*01)"
}

@book{Fedov75
,title = "Optimal Experiment Design"
,author ="Fedov"
,year = 1975
,publisher = ""
}

@inproceedings{Cooley99
,title = "Classification of News Stories Using Support Vector Machines"
,author = "Robert Cooley"
,booktitle = "IJCAI-99 Text Mining Workshop"
,year = 1999
}

@article{Kaplan58
,title = "Nonparametric estimation from incomplete observations"
,author = "E. L. Kaplan and Paul Meier"
,journal = "Journal of the American Statistical Association"
,volume = 53
,number = 282
,year = 1958
,pages = "457--481"
}

@article{Efron81
,title = "Censored data and the bootstrap"
,author = "Bradley Efron"
,journal = "Journal of the American Statistical Association"
,volume = 76
,number = 374
,year = 1981
,pages = "312--319"
}

@article{Efron79t
,title = "Computers and the Theory of Statistics: Thinking the Unthinkable"
,author = "Bradley Efron"
,journal = "SIAM Review"
,volume = 21
,year = 1979
,number = 4
,pages = "460--480"
}

@article{Efron79
,title = "Bootstrap methods: another look at the jackknife"
,author = "Bradley Efron"
,journal = "Annals of Statistics"
,volume = 7
,year = 1979
,number = 1
,pages = "1--26"
}

@inproceedings{Koller97
,title = "Hierarchically classifying documents using very few words"
,author = "Daphne Koller and Mehran Sahami"
,booktitle = "Proceedings of the Fourteenth International Conference on Machine 
Learning"
,year = 1997
,pages = "170--178"
}

@article{Evgeniou00
,author = "Theodoros Evgeniou and Massimiliano Pontil and Tomaso Poggio"
,title = "Regularization Networks and Support Vector Machines"
,journal = "Advances in Computational Mathematics"
,year = 2000
,volume = 13
,pages = "1-50"
}

@book{Vapnik98
,author = "Vladimir N. Vapnik"
,title = "Statistical Learning Theory"
,publisher = "John Wiley \& Sons"
,year = "1998"
}

@book{Apostol67
,author = "Tom M. Apostol"
,title = "Calculus"
,year = 1967
,publisher = "John Wiley and Sons"
,volume = "I"
,edition = "second"
}

@techreport{Rifkin01
,title = "Improving multiclass text classification with the {S}upport {V}ector {M}achine"
,author = "Jason D. M. Rennie and Ryan Rifkin"
,year = 2001
,institution = "Massachusetts Insititute of Technology, Artificial Intelligence Laboratory"
,number = "AIM-2001-026"
,url = "http://www.ai.mit.edu/~jrennie/papers/aimemo2001.ps.gz"
}

@inproceedings{Crammer01
,title = "Improved output coding for classification using continuous relaxation"
,author = "Koby Crammer and Yoram Singer"
,booktitle = "Advances in Neural Information Processing Systes 13 (NIPS*00)"
,year = 2001
,note = "Relaxes a particular code matrix.  (1) Train binary classifiers on discrete matrix, then (2) relax matrix using learned classifiers."
}

@article{Ferguson73
,title = "A Bayesian analysis of some nonparametric problems"
,author = "T. S. Ferguson"
,journal = "Annals of Statistics"
,vol = 1
,pages = "209--230"
,year = 1973
}

@book{Cristianini00
,title = "An introduction to support vector machines"
,author = "Nello Cristianini and John Shawe-Taylor"
,publisher = "Cambridge University Press"
,year = 2000
}

@mastersthesis{Rennie01
,title = "Improving Multi-class Text Classification with Naive Bayes"
,author = "Jason D. M. Rennie"
,year = 2001
,school = "Massachusetts Institute of Technology"
}

@inproceedings{Yang97
,title = "A comparitive study on feature selection in text categorization"
,author = "Yiming Yang and J. O. Pedersen"
,year = 1997
,booktitle = "Proceedings of the Fourteenth International Conference on Machine Learning"
}

@inproceedings{Joachims97a
,title = "A probabilistic analysis of the Rocchio algorithm with TFIDF for text categorization"
,author = "Thorsten Joachims"
,year = 1997
,booktitle = "Proceedings of the Fourteenth International Conference on Machine Learning"
,keywords = "20 newsgroups, reuters-22173"
}

@inproceedings{Freitag99
,title = "Information Extraction with HMMs and Shrinkage"
,author = "Dayne Frietag and Andrew McCallum"
,year = 1999
,booktitle = "Proceedings of the AAAI'99 Workshop on Machine Learning for Information Extraction"
}

@inproceedings{Lewis98
,title = "Naive ({B}ayes) at forty: {T}he independence assumption in information retrieval"
,author = "David D. Lewis"
,year = 1998
,booktitle = "Proceedings of the Tenth European Conference on Machine Learning"
}

@book{DudaHart
,title = "Pattern Classification"
,author = "Richard O. Duda and Peter E. Hart and David G. Stork"
,year = 2000
,publisher = "Wiley-Interscience"
}

@book{DudaHart73
,title = "Pattern Classification and Scene Analysis"
,author = "Richard O. Duda and Peter E. Hart"
,year = 1973
,publisher = "Wiley and Sons, Inc."
}

@techreport{Ristad95
,title = "A natural law of succession"
,author = "Eric Svwn Ristad"
,year = 1995
,number = "CS-TR-495-95"
,institution = "Princeton University"
}

@book{Laplace95
,title = "Philosophical Essays on Probabilities"
,author = "Pierre-Simon Laplace"
,year = 1995
,publisher = "Springer-Verlag"
,comment = "Translated by Andrew I. Dale from the 5th French edition of 1825"
,city = "New York"
}

@inproceedings{Lewis94a
,title = "A sequential algorithm for training text classifiers"
,author = "David D. Lewis and William A. Gale"
,year = 1994
,booktitle = "Proceedings of the Seventeenth Annual International ACM-SIGIR Conference on Research and Development in Information Retrieval"
}

@inproceedings{Chakrabarti97
,title = "Using taxonomy, discriminants and signatures for navigating in text databases"
,author = "Soumen Chakrabarti and Byron Dom and Rakesh Agrawal and Prabhakar Raghavan"
,year = 1997
,booktitle = "Proceedings of the 23rd VLDB Conference"
}

@inproceedings{Domingos96
,title = "Beyond independence: conditions for the optimality of the simple bayesian classifier"
,author = "Pedro Domingos and Michael Pazzani"
,year = 1996
,booktitle = "Machine Learning: Proceedings of the Thirteenth International Conference"
}

@book{Vapnik95
,title = "The Nature of Statistical Learning Theory"
,author = "Vladimir Vapnik"
,year = 1995
,publisher = "Springer-Verlag"
}

@inproceedings{Dumais98
,title = "Inductive Learning Algorithms and Reepresentations for Text Classification"
,author = "Susand Dumais and John Platt and David Heckerman and Mehran Sahami"
,booktitle = "Proceedings of the Seventh International Conference on Information and Knowledge Management"
,year = 1998
}

@inproceedings{Yang99
,title = "A re-examination of text categorization methods"
,author = "Yiming Yang and Xin Liu"
,year = 1999
,booktitle = "Proceedings of the ACM SIGIR Conference on Research and Development in Information Retrieval"
,keywords = "text, classification, reuters-21578, svm, naive bayes, llsf, nnet, knn"
}

@inproceedings{Wiener95
,title = "A neural network approach to topic spotting"
,author = "Erik Wiener and Jan O. Pedersen and Andreas S. Weigend"
,year = 1995
,booktitle = "Fourth Annual Symposium on Document Analysis and Information Retrieval (SDAIR-95)"
,keywords = "text, categorization, reuters-22173"
}

@article{Masulli00
,title = "Effectiveness of error correcting output codes in multiclass learning problems"
,author = "Francesco Masulli and Giorgio Valentini"
,year = 2000
,journal = "Lecture notes in computer science"
,pages = "107--116"
,vol = 1857
}

@inproceedings{CohnNIPS01
,title = "The missing link - a probabilistic model of document content and hypertext connectivity"
,author = "David Cohn and Thomas Hofmann"
,year = 2001
,booktitle = "Neural Information Processing Systems 13"
}

@article{Lucas72
,author = "William F. Lucas"
,title = "An Overview of the Mathematical Theory of Games"
,journal = "Management Science"
,volume = 18
,number = 5
}

@article{Lee99
,title = "Learning the parts of objects by non-negative matrix factorization"
,author = "Daniel D. Lee and H. Sebastian Seung"
,year = 1999
,journal = "Nature"
,volume = 401
,pages = "788--791"
}

@inproceedings{Lee00
,title = "Algorithms for non-negative matrix factorization"
,author = "Daniel D. Lee and H. Sebastian Seung"
,year = 2000
,booktitle = "Advances in Neural Information Processing Systems 13"
}

@article{MacKay94
,title = "A hierarchical dirichlet language model"
,author = "David J. C. MacKay and Linda C. Bauman Peto"
,year = 1994
,journal = "Natural Language Engineering"
}

@inproceedings{Kindermann00
,title = "Multi-class Classification with Error Correcting Codes"
,author = {J\"org Kindermann and & Edda Leopold and Gerhard Paa�}
,year = 2000
,booktitle = "Treffen der GI-Fachgruppe 1.1.3, Maschinelles Lernen"
}

@techreport{Ganesh94
,title = "Poissonian Behavior of Ising Spin Systems in an External Field"
,author = "A. Ganesh and B. M. Hambly and Neil O'Connell and Dudley Stark and P. J. Upton"
,year = 1994
,number = "HPL-BRIMS-1999-04"
,institution = "HP Laboratories"
}

@article{Hsu01
,title = "A comparison on methods for multi-class support vector machines"
,author = "Chih-Wei Hsu and Chih-Jen Lin"
,year = 2001
}

@techreport{Fung91
,title = "Proximal Support Vector Classifiers"
,author = "Glenn Fung and O. L. Mangasarian."
,year = 2001
,number = "01-02"
,institution = "Data Mining Institute"
}

@inproceedings{Kivinen95
,title = "The perceptron algorithm vs. Winnow: linear vs. logarithmic mistake bounds when few input variables are relevant"
,author = "Jyrki Kivinen and Manfred K. Warmuth"
,year = 1995
,booktitle = "Proceedings of the eighth annual conference on Computational learning theory"
}

@inproceedings{Freund96
,title = "Experiments with a new boosting algorithm"
,author = "Yoav Freund and Robert E. Schapire"
,year = 1996
,booktitle = "Machine Learning: Proceedings of the Thirteenth International Conference"
}

@article{Freund99
,title = "A short introduction to boosting"
,author = "Yoav Freund and Robert E. Schapire"
,year = 1999
,journal = "Journal of Japanese Society for Artificial Intelligence"
,volume = 14
,number = 5
,pages = "771-780"
}

@techreport{Weston98
,title = "Multi-class Support Vector Machines"
,author = "Jason Weston and Chris Watkins"
,year = 1998
,number = "CSD-TR-98-04"
,institution = "Royal Holloway University of London"
}


@book{Cover91
,author = "Thomas Cover and Joy Thomas"
,title = "Elements of Information Theory"
,year = 1991
,publisher = "John Wiley \& Sons, Inc."
}

@incollection{Platt99
,author = "John Platt"
,title = "Fast Training of Support Vector Machines using Sequential Minimal Optimization"
,year = 1999
,booktitle = "Advances in Kernel Methods---Support Vector Learning"
,editor = "B. Scholkopf and C. Burges and A. Smola"
,publisher = "MIT Press"
}

@misc{20Newsgroups
,author = "Ken Lang"
,title = "20 Newsgroups"
,howpublished = "http://www.ai.mit.edu/people/jrennie/20Newsgroups"
,year = 1995
}

@misc{Sector105
,author = "Kamal Nigam"
,title = "Industry Sector Data"
,howpublished = "http://www.cs.cmu.edu/ $\sim$TextLearning/datasets.html"
,year = 2000
}

@misc{Reuters21578
,author = "David Lewis"
,title = "Reuters-21578"
,howpublished = "http://www.daviddlewis.com/resources/testcollections/reuters21578/"
,year = 1996
}

@misc{SvmFu
,author = "Ryan Rifkin"
,title = "SvmFu"
,howpublished = "http://five-percent-nation.mit.edu/SvmFu/"
,year = 2000
}

@inproceedings{Glickman99AAAI
,author = "Oren Glickman and Rosie Jones"
,title = "Examining machine learining for adaptable end-to-end information extraction systems"
,year = 1999
,booktitle = "AAAI-99 Workshop on Machine Learning for Information Extraction"
}

@inproceedings{Frietag98ICML
,author = "Dayne Freitag"
,title = "Multistrategy learning for information extraction"
,year = 1998
,booktitle = "Machine Learning: Proceedings of the Fifteenth International Conference"
}

@inproceedings{Freitag98AAAI
,author = "Dayne Freitag"
,title = "Information extraction from HTML: application of a general machine learning approach"
,year = 1998
,booktitle = "Proceedings of the Fifteenth National Conference on Artificial Intelligence"
}

@inproceedings{Ghani00
,author = "Rayid Ghani"
,title = "Using Error-Correcting Codes for Text Classification"
,year = 2000
,booktitle = "Machine Learning: Proceedings of the Seventeenth International Conference"
}

@article{Drucker99
,author = "Harris Drucker and Donghui Wu and Vladimir N. Vapnik"
,year = 1999
,title = "Support vector machines for spam categorization"
,journal = "IEEE transactions on Neural Networks"
,volume = 10
,number = 5
,pages = "1048-1055"
}

@inproceedings{Tishby01NIPS
,author = "Naftali Tishby and Noam Slonim"
,title = "Data clustering by markovian relaxation and the information bottleneck method"
,year = 2000
,booktitle = "Advances in Neural Information Processing Systems 13"
}

@inproceedings{Jaakkola98NIPS
,author = "Tommi Jaakkola and David Haussler"
,title = "Exploiting generative models in discriminative classifiers"
,year = 1998
,booktitle = "Advances in Neural Information Processing Systems 11"
}

@techreport{Scholkopf99
,author = {Bernhard Sch\"olkopf and John C. Platt and John Shawe-Taylor and Alex J. Smola and Robert C. Williamson}
,title = "Estimating the Support of a High-Dimensional Distribution"
,year = 1999
,number = "MSR-TR-99-87"
,institution = "Microsoft Research"
}

@inproceedings{Katz97
,author = "Boris Katz"
,title = "From Sentence Processing to Information Access on the World Wide Web,"
,year = 1997
,booktitle = "Proceedings of AAAI Spring Symposium on Natural Language Processing for the World Wide Web"
}

@techreport{Haussler99
,author = "David Haussler"
,title = "Convolutional kernels on discrete structures"
,year = 1999
,institution = "University of California, Santa Cruz"
,number = "UCSC-CRL-99-10"
}

@inproceedings{Kong95
,author = "Eun Bae Kong and Thomas G. Dietterich"
,title = "Error-Correcting Output Coding Corrects Bias and Variance"
,booktitle = "Machine Learning: Proceedings of the Twelfth International Conference"
,pages = "313--321"
,address = "Tahoe City, CA"
,publisher = "Morgan Kaufmann"
,year = 1995
}

@inproceedings{Guruswami99
,author = "Venkatesan Guruswami and Amit Sahal"
,title = "Multiclass Learning, Boosting and Error-Correcting Codes"
,booktitle = "Proceedings of the Twelfth Annual Conference on Computational Learning Theory"
,year = 1999
}

@incollection{mackay98
,author = "D. J. C. MacKay"
,title = "Introduction to Monte Carlo Methods"
,booktitle = "Learning in Graphical Models"
,year = 1998
,publisher = "Kluwer Academic Publishers"
,address = "Netherlands"
,editor = "Michael I. Jordan"
,pages = "175--204"
}

@article{Allwein00
,author = "Erin L. Allwein and Robert E. Schapire and Yoram Singer"
,title = "Reducing Multiclass to Binary: A Unifying Approach for Margin Classifiers"
,year = 2000
,journal = "Journal of Machine Learning Research"
,volume = 1
,pages = "113--141"
,keywords = "ecoc, classification"
}

@inproceedings{Crammer00
,author = "Koby Crammer and Yoram Singer"
,title = "On the Learnability and Design of Output Codes for Multiclass Problems"
,year = 2000
,booktitle = "Thirteenth Annual Conference on Computational Learning Theory"
,keywords = "code matrix, multiclass, classification"
,note = "assumes the existence of a set of fixed, binary classifiers"
}

@inproceedings{Schapire97
,author = "Robert E. Schapire"
,title = "Using output codes to boost multiclass learning problems"
,year = 1997
,booktitle = "Machine Learning: Proceedings of the Fourteenth International Conference"
}

@inproceedings{Dietterich91
,author = "Tom G. Dietterich and Ghulum Bakiri"
,title = "Error-correcting output codes: A general method for improving multiclass inductive learning programs"
,year = 1991
,booktitle = "Proceedings of the Ninth National Conference on Artificial Intelligence"
,pages = "572-577"
,address = "Anaheim, CA"
,publisher = "AAAI Press"
}

@article{Blum97AI
,author = "Avrim L. Blum and Pat Langley"
,title = "Selection of Relevant Features and Examples in Machine Learning"
,year = 1997
,journal = "Artificial Intelligence"
,volume = 97
,pages = "294--302"
}

@inproceedings{Koller96ML
,author = "Daphne Koller and Mehran Sahami"
,title = "Toward Optimal Feature Selection"
,year = 1996
,booktitle = "Machine Learning: Proceedings of the Thirteenth International Conference"
}

@inproceedings{Caruana94ML
,author = "Rich Caruana and Dayne Freitag"
,title = "Greedy Attribute Selection"
,year = 1994
,booktitle = "Machine Learning: Proceedings of the Eleventh International Conference"
}

@article{Dietterich98
,author = "Thomas G. Dietterich"
,title = "Approximate Statistical Tests for Comparing Supervised Classification Learning Algorithm"
,year = 1998
,journal = "Neural Computation"
,volume = 10
,number = 7
}

@inproceedings{Craven98
,author = "Mark Craven and Sean Slattery and Kamal Nigam"
,title = "First-Order Learning for Web Mining"
,year = 1998
,booktitle = "Proceedings of the Tenth European Machine Learning Conference"
}

@inproceedings{Jaakkola99
,author = "Tommi Jaakkola and Marina Meila and Tony Jebara"
,title = "Maximum entropy discrimination"
,year = 1999
,booktitle = "Advances in Neural Information Processing Systems 12"
}

@inproceedings{Scott99
,author = "Sam Scott and Stan Matwin"
,title = "Feature Engineering for Text Classification"
,year = 1999
,booktitle = "Machine Learning: Proceedings of Sixteenth International Conference"
}

@inproceedings{Berger99
,author = "Adam Berger"
,title = "Error-correcting output coding for text classification"
,year = 1999
,booktitle = "Proceedings of IJCAI-99 Workshop on Machine Learning for Information Filtering"
,address = "Stockholm, Sweeden"
}

@unpublished{ishmail
,author = "John Helfman and Charles Isbell"
,title = "Ishmail: Immediate Identification of Important Information"
,note = "http://www.research.att.com/$\sim$jon/ishmail"
,year = 1995
}

@unpublished{rainbow
,author = "Andrew Kachites McCallum"
,title = "Bow: A toolkit for statistical language modeling, text retrieval, classification and clustering"
,note = "http://www.cs.cmu.edu/$\sim$mccallum/bow"
,year = 1996
}

@inproceedings{Segal99
,author = "Richard B. Segal and Jeffrey O. Kephart"
,title = "MailCat: An Intelligent Assistant for Organizing E-Mail"
,booktitle = "Proceedings of the Third International Conference on Autonomous Agents"
,year = 1999
}

@inproceedings{yang00
,author = "Yiming Yang and Thomas Ault and Thomas Pierce and Charles W. Lattimer"
,title = "Improving text categorization methods for event tracking"
,year = 2000
,booktitle = "Proceedings of ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR)"
}

@inproceedings{allan98
,author = "James Allan and Ron Papka and Victor Lavrenko"
,title = "On-line New Event Detection and Tracking"
,year = 1998
,booktitle = "Proceedings of ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR)"
}

@inproceedings{cohen97
,author = "William W. Cohen and Daniel Kudenko"
,title = "Trasferring and Retraining Learned Information Filters"
,year = 1997
,booktitle = "Proceedings of AAAI-97"
}

@book{Raiffa61
,author = "Howard Raiffa and Robert Schlaifer"
,title = "Applied Statistical Decision Theory"
,year = 1961
,publisher = "Harvard University"
,misc = "Division of Research Graduate School of Business Administration Harvard University"
}

@inproceedings{Slonim00
,author = "Noam Slonim and Naftali Tishby"
,title = "Document Clustering using Word Clusters via the Information Bottleneck Method"
,year = 2000
,booktitle = "Proceedings of SIGIR-2000"
}

@article{Baxter91
,author = "Jonathan Baxter"
,title = "The Canonical Distortion Measure for Vector Quantization and Function Approximation"
,year = 1992
,journal = "Machine Learning"
}

@article{Herbster98
,author = "Mark Herbster and Manfred K. Warmuth"
,title = "Tracking the Best Expert"
,year = 1998
,journal = "Machine Learning"
}

@article{DellaPietra97
,author = "Stephen Della Pietra and Vincent Della Pietra and John Lafferty"
,title = "Inducing Features of Random Fields"
,year = 1997
,journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence"
,number = 4
,volume = 19
,keywords = "improved iterative scaling, feature selection, text"
}

@techreport{DellaPietra95
,author = "Stephen Della Pietra and Vincent Della Pietra and John Lafferty"
,title = "Inducing Features of Random Fields"
,year = 1995
,number = "CMU-CS-95-144"
,institution = "Carnegie Mellon University"
,keywords = "improved iterative scaling, feature selection, text"
}

@article{Lai85
,author = "T. L. Lai and Herbert Robbins"
,title = "Asymptotically Efficient Adaptive Allocation Rules"
,year = 1985
,journal = "Advances in Applied Mathematics"
,volume = 6
}

@article{Robbins52
,author = "Herbert Robbins"
,title = "Some Aspects of the Sequential Design of Experiments"
,year = 1952
,journal = "Bulletins of the American Mathematical Society"
}

@inproceedings{Oliver96
,author = "Jonathan J. Oliver and Rohan A. Baxter and Chris S. Wallace"
,title = "Unsupervised {L}earning Using {MML}"
,year = 1996
,booktitle = "Machine Learning: Proceedings of the Thirteenth International Conference"
,url = "http://www.cs.monash.edu.au/\raisebox{-0.6ex}{\~{}}jono/"
}

@techreport{Friedman98
,author = "Jerome Friedman and Trevor Hastie and Robert Tibshirani"
,title = "Additive logistic regression: a statistical view of boosting"
,year = 1998
,url = "http://www-stat.stanford.edu/~hastie/Papers/boost.ps"
,instituton = "Stanford University"
}

@inproceedings{Menczer97
,author = "Filippo Menczer"
,title = "{ARACHNID}: Adaptive Retrieval Agents Choosing Heuristic Neighborhoods for Information Discovery"
,booktitle = "Machine Learning: Proceedings of the Fourteenth International Conference"
,pages = "227--235"
,year = 1997
,url = "http://dollar.biz.uiowa.edu/~fil/papers.html"
}

@inproceedings{Webwatcher
,title = "WebWatcher: A Tour Guide for the {W}orld {W}ide {W}eb"
,author = "T. Joachims and D. Freitag and T. Mitchell"
,booktitle = "Proceedings of the Fifteenth International Joint Conference on Artificial Intelligence (IJCAI-97)"
,oldbooktitle = "Proceedings of IJCAI-97"
,pages = "770--777"
,year = 1997
,url = "http://www.cs.cmu.edu/afs/cs/user/dayne/www/cv.html"
}

@article{Cohn94
,author = "David Cohn, Les Atlas and Richard Ladner"
,title = "Improving generalization with active learning"
,journal = "Machine Learning"
,year = 1994
,volume = 15
,number = 2
,url = "http://www.cs.cmu.edu/~cohn/psyche/selsampling.ps.Z"
}

@article{Cohn96
,author = "David Cohn, Zoubin Ghahramani, and Michael Jordan"
,title = "Active learning with statistical models"
,journal = "Journal of Artificial Intelligence Research"
,year = 1996
,volume = 4
,url = "http://www.cs.washington.edu/research/jair/abstracts/cohn96a.html"
}

@article{Nash51
,author = "John Nash"
,title = "Non-cooperative Games"
,journal = "Annals of Mathematics"
,year = 1951
,volume = 54
,number = 2
,pages = "286-295"
,url = "http://www.jstor.org/fcgi-bin/jstor/viewitem.fcg/0003486x/di961724/96p0127a/0?currentResult=0003486x%2bdi961724%2b96p0127a%2b0%2c01%2b19510900%2b9991%2b80489099&searchID=8dd5531e.9474529030&nextHit=01&frame=frame&userID=803425c8@mit.edu/018dd5531e00502efe76&displayChunk=10&psearchExp=&viewContent=Article&config=jstor&dpi=3"
,summary = ""
}

@inproceedings{CaruanaICML94
,author = "Rich Caruana and Dayne Freitag"
,title = "Greedy Attribute Selection"
,booktitle = "Machine Learning: Proceedings of the Eleventh International Conference"
,year = 1994
,publisher = "Morgan Kaufmann Publishers"
,address = "San Francisco, CA"
,editor = "William Cohen and Haym Hirsh"
,url = "http://www.cs.cmu.edu/afs/cs/user/caruana/pub/papers/ml94.ps"
,summary = ""
}

@inproceedings{JohnICML94
,author = "George John and Ron Kohavi and Karl Pfleger"
,title = "Irrelevant Features and the Subset Selection Problem"
,booktitle = "Machine Learning: Proceedings of the Eleventh International Conference"
,year = 1994
,pages = "121-129"
,publisher = "Morgan Kaufmann Publishers"
,address = "San Francisco, CA"
,editor = "William Cohen and Haym Hirsh"
,url = "http://robotics.stanford.edu/~ronnyk/ml94.ps"
,summary = ""
}

@inproceedings{LewisSNL92
,author = "David D. Lewis"
,title = "Feature Selection and Feature Extraction for Text Categorization"
,booktitle = "Proceedings of Speech and Natural Language workshop"
,year = 1992
,url = "http://www.research.att.com/~lewis/papers/lewis92e.ps"
,summary = "Feature selection yields improved text classification on data sets such as Reuters.  Optimal number of features is very small (10 to 15)."
}

@inproceedings{MladenicECML98
,author = "Dunja Mladenic"
,title = "Feature subset selection in text-learning"
,booktitle = "Proceedings of the Tenth European Conference on Machine Learning"
,year = 1998
}

@inproceedings{McCallum98
,author = "Andrew McCallum and Kamal Nigam"
,title = "A Comparison of Event Models for Naive Bayes Text Classification"
,year = 1998
,booktitle = "Proceedings of the AAAI-98 workshop on Learning for Text Categorization"
,url = "http://www.cs.cmu.edu/~mccallum/papers/multinomial-aaai98w.ps"
}

@inproceedings{BakerSIGIR98
,author = "L. Douglas Baker and Andrew Kachites McCallum"
,title = "Distributional Clustering of Words for Text Classification"
,year = 1998
,booktitle = "Proceedings of SIGIR'98"
,url = "http://www.cs.cmu.edu/~mccallum/papers/clustering-sigir98.ps.gz"
}

@techreport{Lin99
,author = "Yi Lin"
,title = "Support Vector Machines and the Bayes Rule in Classification"
,year = "1999"
,institution = "University of Wisconsin"
,number = 1014
,url = "http://www.stat.wisc.edu/~yilin/tr1014.ps"
}

@article{Freund97
,author = "Yoav Freund and H. Sebastian Seung and Eli Shamir and Naftali Tishby"
,title = "Selective Sampling Using the Query by Committee Algorithm"
,year = 1997
,journal = "Machine Learning"
,volume = 28
,pages = "133-168"
,url = ""
}

@article{Burges98
,author = "Christopher J. C. Burges"
,title = "A Tutorial on Support Vector Machines for Pattern Recognition"
,year = 1998
,journal = "Data Mining and Knowledge Discovery"
,volume = 2
,number = 2
,pages = "121-167"
,url = "http://svm.research.bell-labs.com/papers/tutorial_web_page.ps.gz"
}

@article{Campbell99
,author = "Colin Campbell and Nello Cristianini"
,title = "Simple Training Algorithms for Support Vector Machines"
,year = 1998
,journal = ""
,url = "http://lara.enm.bris.ac.uk/cig/gzipped/KA-ieee.ps.gz"
}

@inproceedings{Dasgupta99
,author = "Sanjoy Dasgupta"
,title = "Learning Mixtures of Gaussians"
,year = 1999
,booktitle = "Foundations of Computer Science"
,url = "http://www.cs.berkeley.edu/~dasgupta/focs2.ps"
}

@inproceedings{HeckermanML95
,author = "David Heckerman and Dan Geiger and David Chickering"
,title = "Learning Bayesian Networks: The Combination of Knowledge and Statistical Data"
,year = 1995
,booktitle = "Machine Learning"
,url = "ftp://ftp.research.microsoft.com/pub/tr/tr-94-09.ps"
}

@techreport{HeckermanTR95
,author = "David Heckerman"
,title = "A Tutorial on Learning With Bayesian Networks"
,year = 1995
,institution = "Microsoft Research"
,number = "MSR-TR-95-06"
,booktitle = "Machine Learning"
,url = "ftp://ftp.research.microsoft.com/pub/tr/tr-95-06.ps"
}

@inproceedings{BoykovICCV99
,author = "Yuri Boykov and Olga Veksler and Ramin Zabih"
,title = "Fast Approximate Energy Minimization via Graph Cuts"
,booktitle = "Internation Conference on Computer Vision"
,year = 1999
,url = "http://www2.cs.cornell.edu/html/rdz/papers/iccv.pdf"
}

@inproceedings{BoykovEMM99
,author = "Yuri Boykov and Olga Veksler and Ramin Zabih"
,title = "A New Algorithm for Energy Minimization With Discontinuities"
,booktitle = "International Workshop on Energy Minimization Methods in Computer Vision and Pattern Recognition"
,year = 1999
,url = "http://www2.cs.cornell.edu/html/rdz/papers/emmcvpr99.pdf"
}

@inproceedings{Jensen90
,author = "Finn Jensen and Steffen Lauritzen and Kristian Olesen"
,title = "Bayesian Updating in Causal Probabilistic Networks by Local Computations"
,year = 1990
,booktitle = "Computational Statistics Quarterly"
,volume = 4
,pages = "269-282"
,url = "http://www.cs.auc.dk/research/DSS/abstracts/jensen:laurtizen:olesen:90.html"
}

@inproceedings{ifile00
,author = "Jason D. M. Rennie"
,title = "{i}file: An Application of Machine Learning to Mail Filtering"
,booktitle = "Proceedings of the KDD-2000 Workshop on Text Mining"
,year = 2000
}

@inproceedings{Boyan96
,author = "Justin Boyan and Dayne Freitag and Thorsten Joachims"
,title = "A Machine Learning Architecture for Optimizing Web Search Engines"
,booktitle = "AAAI workshop on Internet-Based Information Systems"
,year = 1996
,url = "http://www.cs.cmu.edu/~jab/pubs/boyan.laser.ps"
}

@inproceedings{Cutting92
,author = "Douglass Cutting and David Karger and Jan Pedersen and John W. Tukey"
,title = "Scatter/Gather: A Cluster-based Approach to Browsing Large Document Collections"
,booktitle = "ACM SIGIR conference on research and development in information retrieval"
,year = 1992
,url = "ftp://parcftp.xerox.com/pub/qca/papers/scattergather.ps.gz"
}

@inproceedings{Yang98
,author = "Yiming Yang and Tom Pierce and Jaime Carbonell"
,title = "A study of retrospective and on-line event detection"
,booktitle = "ACM SIGIR conference on research and development in information retrieval"
,year = 1998
,pages = "28-36"
,url = "http://www.acm.org/pubs/citations/proceedings/ir/290941/p28-yang/"
,url = "http://www.cs.cmu.edu/~yiming/papers.yy/sigir98.ps"
}

@inproceedings{Slonim99
,author = "Noam Slonim and Naftali Tishby"
,title = "Agglomerative Information Bottleneck"
,booktitle = "Neural Information Processing Systems 12 (NIPS-99)"
,year = 1999
,url = "http://www.cs.huji.ac.il/labs/learning/Papers/AIB.ps.gz"
}

@InProceedings{Joachims98a
,author = "Thorsten Joachims"
,title = "Text Categorization with Support Vector Machines: Learning with Many Relevant Features"
,booktitle = "Proceedings of the Tenth European Conference on Machine Learning"
,year = 1998
,keywords = "svm, text, classification, reuters-21578, ohsumed"
}

@techreport{Joachims97b
,author = "Thorsten Joachims"
,title = "Text Categorization with Support Vector Machines: Learning with Many Relevant Features"
,institution = "University of Dortmund, Computer Science Department"
,year = 1997
}

@inproceedings{Vaithyanathan99
,author = "Shivakumar Vaithyanathan and Byron Dom"
,title = "Model Selection in Unsupervised Learning With Applications to Document Clustering"
,booktitle = "Machine Learning: Proceedings of the Sixteenth International Conference"
,year = 1999
,url = "http://www.almaden.ibm.com/cs/k53/irpapers/dom.ps"
}

@inproceedings{Pereira93
,author = "Fernando Pereira and Naftali Tishby and William Bialek"
,title = "Distiributional Clustering of English Words"
,booktitle = "Association for Computational Linguistics"
,year = 1993
,url = "http://www.cs.huji.ac.il/labs/learning/Papers/NLP_list.html"
}

@inproceedins{Tishby99
,author = "Naftali Tishby and Fernando Pereira and William Bialek"
,title = "The Information Bottleneck Method"
,booktitle = "Allerton Conference on Communication, Control and Computing"
,year = 1999
,url = "http://www.cs.huji.ac.il/labs/learning/Papers/MLT_list.html"
}

@inproceedings{Boone98
,author = "Gary Boone"
,title = "Concept Features in Re:Agent, an Intelligent Email Agent"
,booktitle = "International Conference on Autonomous Agents"
,year = 1998
}

@inproceedings{Marko94
,author = "Marko Balabanovic and Yoav Shoham"
,title = "Learning Information Retrieval Agents: Experiments with Automated Web Browsing"
,booktitle = ""
,year = 1994
}

@inproceedings{Sahami98
,author = "M. Sahami and S. Dumais and D. Heckerman and E. Horvitz"
,title = "A Bayesian Approach to Filtering Junk E-Mail"
,booktitle = "Learning for Text Categorization: Papers from the 1998 Workshop. AAAI Technical Report WS-98-05"
,year = 1998
}

@book{Mitchell97
,author = "Tom Mitchell"
,title = "Machine Learning"
,year = 1997
,publisher = "McGraw-Hill Companies, Inc."
}

@mastersthesis{Payne94
,author = "Terry Payne"
,title = "Learning Email Filtering Rules with Magi, A Mail Agent Interface"
,year = 1994
,note = "MSc Thesis, Department of Computing Science, University of Aberdeen, Scotland"
,school = "University of Aberdeen"
}

@article{Lewis97
,author = "David D. Lewis and Kimberly A. Knowles"
,title = "Threading Electronic Mail: A Preliminary Study"
,year = 1997
,journal = "Information Processing and Management"
,volume = "33(2)"
,pages = "209-217"
}

@inproceedings{Cohen96
,author = "William Cohen"
,title = "Learning Rules that Classify E-Mail"
,booktitle = "Proceedings of the AAAI-96 Spring Symposium on Machine Learning in Information Access"
,year = "1996"
}

@inproceedings{Lang95
,author = "Ken Lang"
,title = "Newsweeder: Learning to filter netnews"
,year = 1995
,booktitle = "Machine Learning: Proceedings of the Twelfth International Conference"
,pages = "331-339"
}

@inproceedings{Li95
,author = "Xiaobin Li and Stan Szpakowicz and Stan Matwin"
,title = "A Word{N}et-based algorithm for word sense disambiguation"
,year = 1995
,booktitle = "Proceedings of the Twelth International Joint Conference on Artificial Intelligence (IJCAI-95)"
,comment = "Heuristic rules for making use of WordNet in WSD.  For ex: if verbs match and we know one noun sense, assume that the other noun sense is the same.  57% of the time (when disambiguating w/ multiple senses) get exactly the correct solution.  Additional 15% of the time, algorithm guesses multiple senses , all of which seemed reasonable in the context of the verb (no sentence context here?!?!)"
}

@inproceedings{Freitag98
,author = "Dayne Freitag"
,title = "Toward General-Purpose Learning for Information Extraction"
,year = 1998
,booktitle = "Proceedings of the Seventeenth International Conference on Computational Linguistics (COLING-ACL-98)"
,comment = "Do information extraction on reuters acquisition set; compare simple learners (rote/kn1, naive Bayes) with SRV (relational rule learner) and SRV plus linguistic information (link grammar parser, Wordnet synonym/hypernym relations).  Linguistics generally provide higher accuracy for loss of coverage.  Bayes does well in dollar amount, seller abbreviation and seller.  SRV+ling does well in acquired party abbreviation (reason: high frequency in linguistically rich text)."
}

@inproceedings{Mihalcea98COLING
,author = "Rada Mihalcea and Dan Molovan"
,title = "Word Sense Disambiguation Based on Semantic Density"
,year = 1998
,booktitle = "Proceedings of COLING-ACL Workshop on Usage of Word{N}et in Natural Language Processing Systems"
,url = "http://www.seas.smu.edu/~rada/research.html"
}

@inproceedings{Mihalcea99ACL
,author = "Rada Mihalcea and Dan Molovan"
,title = "A Method for Word Sense Disambiguation of Unrestricted Text"
,year = 1999
,booktitle = "Proceedings of ACL '99"
,url = "http://www.seas.smu.edu/~rada/research.html"
,comment = "Disambiguate word pairs by doing a search on all possible sense combinations (using hypernyms, synsets to distinguish) and selecting the sense pair with the greatest number of hits.  Reported accuracy is 80\%.  As usual, he complains that labeled data isn't always labeled correctly and that WordNet is too fine-grained.  Not all that interesting, but supposidly this is the paper to beat."
}

@inproceedings{Gonzalo98
,author = "Julio Gonzalo and Felisa Verdejo and Irina Chugur and Juan Cigarr\'an"
,title = "Indexing with Word{N}et synsets can improve text retrieval"
,year = 1998
,booktitle = "Proceedings of COLING-ACL Workshop on Usage of Word{N}et in Natural Language Processing Systems"
,url = "http://www.ai.sri.com/~harabagi/coling-acl98/acl_work/acl_work.html"
,comment = "Do IR w/ WordNet synset expansion with varying degrees of WSD.  WSD done by hand -- random variation added to measure degrees.  Used SEMCOR documents, used short summaries of documents as queries.  Indexing by word senses (w/o synsets) improves precision, hurts recall."
}

@book{Francis67
,author = "S. Francis and H. Kucera"
,title = "Computing Analysis of Present-day American English"
,publisher = "Brown University Press"
,year = 1967
,address = "Providence, RI"
}

@inproceedings{Agirre96
,author = "Eneko Agirre and German Rigau"
,title = "Word Sense Disambiguation Using Conceptual Density"
,booktitle = "COLING"
,year = "1996"
}

@article{fivepapers
,author = "George A. Miller and Christiane Fellbaum and Judy Kegl and Katherine J. Miller"
,title = "Introduction to WordNet: an on-line lexical database"
,journal = "International Journal of Lexicography"
,year = 1990
,volume = 3
,number = 4
,pages = "235-244"
,url = "ftp://ftp.cogsci.princeton.edu/pub/wordnet/5papers.ps"
,comment = "Five papers that provide a WordNet overview as given by the creators"
}

@inproceedings{Syed99
,author = "Nadeem Ahmed Syed and Huan Liu and Kah Kay Sung"
,title = "A Study of Support Vectors on Model Independent Example Selection"
,booktitle = "Knowledge Discovery and Data Mining"
,year = "1999"
,keywords = "subsampling, svm, active learning"
}

@article{Aha91a
,author = "David W. Aha and Dennis Kibler and Marc K. Albert"
,title = "Instance-Based Learning Algorithms"
,year = 1991
,journal = "Machine Learning"
,volume = 6
,pages = "37--66"
,keywords = "active learning"
}

@article{Mangasarian00
,author = "O. L. Mangasarian and David R. Musicant"
,title = "Lagrangian Support Vector Machines"
,year = 2001
,journal = "Journal of Machine Learning Research"
,volume = 1
,pages = "161--177"
,keywords = "svm"
}