hierarchical-clusterer.h

Go to the documentation of this file.
00001 // Copyright 2007, Massachusetts Institute of Technology.
00002 // The use of this code is permitted for research only. There is
00003 // absolutely no warranty for this software.
00004 //
00005 // Author: John Lee (jjl@mit.edu)
00006 //
00007 // TODO(jjl): Allow users to pass in arbitrary Clusterer objects, not just
00008 // hard-coded KMeansClusterer.
00009 //
00010 
00011 #ifndef CLUSTERING_HIERARCHICAL_CLUSTERER_H
00012 #define CLUSTERING_HIERARCHICAL_CLUSTERER_H
00013 
00014 #include <vector>
00015 #include <list>
00016 #include <iostream>
00017 #include "clustering/clusterer.h"
00018 #include "point_set/point-ref.h"
00019 #include "tree/point-tree-node.h"
00020 #include "tree/tree.cc"
00021 #include "util/distance-computer.h"
00022 
00023 using namespace std;
00024 
00025 namespace libpmk {
00026 
00027 template class Tree<PointTreeNode>;
00028 
00030 
00039 class HierarchicalClusterer {
00040 public:
00041   HierarchicalClusterer();
00042 
00043 
00045 
00051   const Tree<PointTreeNode>& centers() const;
00052 
00054 
00060   int membership(int index) const;
00061 
00063   int membership_size() const { return (int)membership_.size(); }
00064 
00066 
00076   void IdentifyMemberIDPath(int index, list<int>* out) const;
00077 
00079 
00094   void IdentifyMemberTreePath(int index, list<int>* out) const;
00095 
00096 
00098   void Cluster(int num_levels, int branch_factor,
00099                const vector<PointRef>& points,
00100                const DistanceComputer& distance_computer);
00101 
00103 
00117   void WriteToStream(ostream& output_stream) const;
00118 
00120   void WriteToFile(const char* output_filename) const;
00121 
00123 
00129   void ReadFromStream(istream& input_stream);
00130 
00132   void ReadFromFile(const char* input_filename);
00133 
00135   static const int MAX_ITERATIONS;
00136 
00137 private:
00138   int num_levels_;
00139   int point_dim_;
00140 
00141   // RecursiveCluster() takes as input which level it's going to get
00142   // clusters for, as well as a list of ints. This list of ints gives
00143   // *indices into all_points_* to cluster (note that all_points_ itself
00144   // is a list of indices into the actual data).
00145   void RecursiveCluster(int level, const vector<int>& indices,
00146                         int num_levels, int branch_factor,
00147                         const vector<PointRef>& points,
00148                         const DistanceComputer& distance_computer,
00149                         int parent_index);
00150 
00151   // The cluster centers.
00152   Tree<PointTreeNode> centers_;
00153 
00154   // A vector with size equal to the number of points being clustered.
00155   // membership_[ii] gives the node ID of the leaf node that the <ii>th
00156   // point belongs to.
00157   vector<int> membership_;
00158 
00159   bool done_;
00160 };
00161 }  // namespace libpmk
00162 
00163 #endif  // CLUSTERING_HIERARCHICAL_CLUSTERER_H

Generated on Fri Sep 21 11:39:04 2007 for libpmk2 by  doxygen 1.5.1