00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef CLUSTERING_HIERARCHICAL_CLUSTERER_H
00011 #define CLUSTERING_HIERARCHICAL_CLUSTERER_H
00012
00013 #include <vector>
00014 #include <iostream>
00015 #include "clustering/clusterer.h"
00016 #include "point_set/point-ref.h"
00017 #include "point_set/point-set-list.h"
00018 #include "point_set/mutable-point-set-list.h"
00019 #include "util/sparse-vector.h"
00020 #include "util/distance-computer.h"
00021
00022 using namespace std;
00023
00024 namespace libpmk {
00026
00035 class HierarchicalClusterer {
00036 public:
00037 HierarchicalClusterer();
00038
00039
00041
00052 const PointSetList& GetClusterCenters() const;
00053
00055
00078 const vector<LargeIndex>& GetMemberships() const;
00079
00081 void Cluster(int num_levels, int branch_factor,
00082 const vector<PointRef>& points,
00083 const DistanceComputer& distance_computer);
00084
00086
00094 pair<int, int> GetChildRange(int level, int index) const;
00095
00097
00105 int GetParentIndex(int level, int index) const;
00106
00107
00109
00132 void WriteToStream(ostream& output_stream) const;
00133
00135 void WriteToFile(const char* output_filename) const;
00136
00138
00143 void ReadFromStream(istream& input_stream);
00144
00146 void ReadFromFile(const char* input_filename);
00147
00149 static const int MAX_ITERATIONS;
00150
00151 private:
00152
00153
00154
00155
00156 void RecursiveCluster(int level, const vector<int>& indices,
00157 int num_levels, int branch_factor,
00158 const vector<PointRef>& points,
00159 const DistanceComputer& distance_computer,
00160 int parent_index);
00161
00162
00163 MutablePointSetList centers_;
00164
00165
00166
00167
00168
00169 MutablePointSetList tree_indices_;
00170
00171
00172
00173
00174
00175 vector<LargeIndex> memberships_;
00176
00177
00178 bool done_;
00179 };
00180 }
00181
00182 #endif // CLUSTERING_HIERARCHICAL_CLUSTERER_H