hierarchical-clusterer.h

Go to the documentation of this file.
00001 // Copyright 2007, Massachusetts Institute of Technology.
00002 // The use of this code is permitted for research only. There is
00003 // absolutely no warranty for this software.
00004 //
00005 // Author: John Lee (jjl@mit.edu)
00006 //
00007 // TODO(jjl): Allow users to pass in arbitrary Clusterer objects, not just
00008 // hard-coded KMeansClusterer.
00009 //
00010 #ifndef CLUSTERING_HIERARCHICAL_CLUSTERER_H
00011 #define CLUSTERING_HIERARCHICAL_CLUSTERER_H
00012 
00013 #include <vector>
00014 #include <iostream>
00015 #include "clustering/clusterer.h"
00016 #include "point_set/point-ref.h"
00017 #include "point_set/point-set-list.h"
00018 #include "point_set/mutable-point-set-list.h"
00019 #include "util/sparse-vector.h"
00020 #include "util/distance-computer.h"
00021 
00022 using namespace std;
00023 
00024 namespace libpmk {
00026 
00035 class HierarchicalClusterer {
00036  public:
00037    HierarchicalClusterer();
00038 
00039 
00041 
00052    const PointSetList& GetClusterCenters() const;
00053 
00055 
00078    const vector<LargeIndex>& GetMemberships() const;
00079 
00081    void Cluster(int num_levels, int branch_factor,
00082                 const vector<PointRef>& points,
00083                 const DistanceComputer& distance_computer);
00084    
00086 
00094    pair<int, int> GetChildRange(int level, int index) const;
00095 
00097 
00105    int GetParentIndex(int level, int index) const;
00106 
00107    
00109 
00132    void WriteToStream(ostream& output_stream) const;
00133 
00135    void WriteToFile(const char* output_filename) const;
00136 
00138 
00143    void ReadFromStream(istream& input_stream);
00144 
00146    void ReadFromFile(const char* input_filename);
00147    
00149    static const int MAX_ITERATIONS;
00150 
00151  private:
00152    // RecursiveCluster() takes as input which level it's going to get
00153    // clusters for, as well as a list of ints. This list of ints gives
00154    // *indices into all_points_* to cluster (note that all_points_ itself
00155    // is a list of indices into the actual data).
00156    void RecursiveCluster(int level, const vector<int>& indices,
00157                          int num_levels, int branch_factor,
00158                          const vector<PointRef>& points,
00159                          const DistanceComputer& distance_computer,
00160                          int parent_index);
00161 
00162 
00163    MutablePointSetList centers_;
00164 
00165    // Has the same structure as centers_, except the features are
00166    // 3-dim.  They tell you the index of the parent, and the start and
00167    // end indices of its children in the next level. -1 if N/A, like
00168    // if it has no children or if it has no parent.
00169    MutablePointSetList tree_indices_;
00170 
00171    // A vector of size num_levels. For each level, this tells you
00172    // how many centers are in that group. We need this beecause centers_
00173    // is sparsely done, meaning if there are clusters with 0 points,
00174    // they simply don't show up in centers_.
00175    vector<LargeIndex> memberships_;
00176 
00177 
00178    bool done_;
00179 };
00180 }  // namespace libpmk
00181 
00182 #endif  // CLUSTERING_HIERARCHICAL_CLUSTERER_H

Generated on Wed May 2 11:17:12 2007 for libpmk by  doxygen 1.5.1