00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef CLUSTERING_HIERARCHICAL_CLUSTERER_H
00012 #define CLUSTERING_HIERARCHICAL_CLUSTERER_H
00013
00014 #include <vector>
00015 #include <list>
00016 #include <iostream>
00017 #include "clustering/clusterer.h"
00018 #include "point_set/point-ref.h"
00019 #include "tree/point-tree-node.h"
00020 #include "tree/tree.cc"
00021 #include "util/distance-computer.h"
00022
00023 using namespace std;
00024
00025 namespace libpmk {
00026
00027 template class Tree<PointTreeNode>;
00028
00030
00039 class HierarchicalClusterer {
00040 public:
00041 HierarchicalClusterer();
00042
00043
00045
00051 const Tree<PointTreeNode>& centers() const;
00052
00054
00060 int membership(int index) const;
00061
00063 int membership_size() const { return (int)membership_.size(); }
00064
00066
00076 void IdentifyMemberIDPath(int index, list<int>* out) const;
00077
00079
00094 void IdentifyMemberTreePath(int index, list<int>* out) const;
00095
00096
00098 void Cluster(int num_levels, int branch_factor,
00099 const vector<PointRef>& points,
00100 const DistanceComputer& distance_computer);
00101
00103
00117 void WriteToStream(ostream& output_stream) const;
00118
00120 void WriteToFile(const char* output_filename) const;
00121
00123
00129 void ReadFromStream(istream& input_stream);
00130
00132 void ReadFromFile(const char* input_filename);
00133
00135 static const int MAX_ITERATIONS;
00136
00137 private:
00138 int num_levels_;
00139 int point_dim_;
00140
00141
00142
00143
00144
00145 void RecursiveCluster(int level, const vector<int>& indices,
00146 int num_levels, int branch_factor,
00147 const vector<PointRef>& points,
00148 const DistanceComputer& distance_computer,
00149 int parent_index);
00150
00151
00152 Tree<PointTreeNode> centers_;
00153
00154
00155
00156
00157 vector<int> membership_;
00158
00159 bool done_;
00160 };
00161 }
00162
00163 #endif // CLUSTERING_HIERARCHICAL_CLUSTERER_H