00001 #ifndef INDII_ML_AUX_KERNELDENSITYPDF_HPP
00002 #define INDII_ML_AUX_KERNELDENSITYPDF_HPP
00003
00004 #include "Pdf.hpp"
00005 #include "PartitionTree.hpp"
00006 #include "Almost2Norm.hpp"
00007 #include "AlmostGaussianKernel.hpp"
00008 #include "kde.hpp"
00009
00010 namespace indii {
00011 namespace ml {
00012 namespace aux {
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 template <class NT = Almost2Norm, class KT = AlmostGaussianKernel>
00038 class KernelDensityPdf : public Pdf {
00039 public:
00040
00041
00042
00043
00044
00045
00046
00047 KernelDensityPdf();
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057 KernelDensityPdf(PartitionTree* tree, const NT& N, const KT& K);
00058
00059
00060
00061
00062 virtual ~KernelDensityPdf();
00063
00064
00065
00066
00067
00068
00069 virtual void setDimensions(const unsigned int N,
00070 const bool preserve = false);
00071
00072 virtual const vector& getExpectation();
00073
00074 virtual const symmetric_matrix& getCovariance();
00075
00076 virtual vector sample();
00077
00078 virtual double densityAt(const vector& x);
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091 vector densityAt(PartitionTree& tree);
00092
00093 private:
00094
00095
00096
00097 PartitionTree* tree;
00098
00099
00100
00101
00102 NT N;
00103
00104
00105
00106
00107 KT K;
00108
00109
00110
00111
00112 vector mu;
00113
00114
00115
00116
00117 symmetric_matrix sigma;
00118
00119
00120
00121
00122 bool haveMu;
00123
00124
00125
00126
00127 bool haveSigma;
00128
00129
00130
00131
00132 void calculateExpectation();
00133
00134
00135
00136
00137 void calculateCovariance();
00138
00139
00140
00141
00142 template<class Archive>
00143 void save(Archive& ar, const unsigned int version) const;
00144
00145
00146
00147
00148 template<class Archive>
00149 void load(Archive& ar, const unsigned int version);
00150
00151
00152
00153
00154 BOOST_SERIALIZATION_SPLIT_MEMBER()
00155 friend class boost::serialization::access;
00156
00157 };
00158
00159 }
00160 }
00161 }
00162
00163 #include "boost/serialization/base_object.hpp"
00164
00165 #include <stack>
00166
00167 template <class NT, class KT>
00168 indii::ml::aux::KernelDensityPdf<NT,KT>::KernelDensityPdf() : tree(NULL),
00169 mu(0), sigma(0), haveMu(false), haveSigma(false) {
00170
00171 }
00172
00173 template <class NT, class KT>
00174 indii::ml::aux::KernelDensityPdf<NT,KT>::KernelDensityPdf(
00175 PartitionTree* tree, const NT& N, const KT& K) :
00176 Pdf(tree->getData()->getDimensions()), tree(tree), N(N), K(K),
00177 mu(tree->getData()->getDimensions()),
00178 sigma(tree->getData()->getDimensions()), haveMu(false), haveSigma(false) {
00179
00180 }
00181
00182 template <class NT, class KT>
00183 indii::ml::aux::KernelDensityPdf<NT,KT>::~KernelDensityPdf() {
00184
00185 }
00186
00187 template <class NT, class KT>
00188 void indii::ml::aux::KernelDensityPdf<NT,KT>::setDimensions(
00189 const unsigned int N, const bool preserve) {
00190 assert (false);
00191 }
00192
00193 template <class NT, class KT>
00194 const indii::ml::aux::vector&
00195 indii::ml::aux::KernelDensityPdf<NT,KT>::getExpectation() {
00196 if (!haveMu) {
00197 calculateExpectation();
00198 assert (haveMu);
00199 }
00200 return mu;
00201 }
00202
00203 template <class NT, class KT>
00204 const indii::ml::aux::symmetric_matrix&
00205 indii::ml::aux::KernelDensityPdf<NT,KT>::getCovariance() {
00206 if (!haveSigma) {
00207 calculateCovariance();
00208 assert (haveSigma);
00209 }
00210 return sigma;
00211 }
00212
00213 template <class NT, class KT>
00214 indii::ml::aux::vector indii::ml::aux::KernelDensityPdf<NT,KT>::sample() {
00215
00216 assert (tree != NULL);
00217
00218
00219
00220 return tree->getData()->sample() + K.sample() * N.sample(getDimensions());
00221 }
00222
00223 template <class NT, class KT>
00224 double indii::ml::aux::KernelDensityPdf<NT,KT>::densityAt(const vector& x) {
00225 if (tree->getRoot() == NULL) {
00226 return 0.0;
00227 }
00228
00229 PartitionTreeNode* node = tree->getRoot();
00230 DiracMixturePdf* p = tree->getData();
00231 std::stack<PartitionTreeNode*> nodes;
00232 vector d(x.size());
00233 double result = 0.0;
00234 std::vector<unsigned int> is;
00235 unsigned int i;
00236
00237 nodes.push(node);
00238 while (!nodes.empty()) {
00239 node = nodes.top();
00240 nodes.pop();
00241
00242 if (node->isLeaf()) {
00243 i = node->getIndex();
00244 noalias(d) = p->get(i) - x;
00245 result += p->getWeight(i) * K(N(d));
00246 } else if (node->isPrune()) {
00247 is = node->getIndices();
00248 for (i = 0; i < is.size(); i++) {
00249 noalias(d) = p->get(is[i]) - x;
00250 result += p->getWeight(is[i]) * K(N(d));
00251 }
00252 } else {
00253
00254 node->difference(x, d);
00255 if (K(N(d)) > 0.0) {
00256 nodes.push(node->getLeft());
00257 nodes.push(node->getRight());
00258 }
00259 }
00260 }
00261 return result / p->getTotalWeight();
00262 }
00263
00264 template <class NT, class KT>
00265 indii::ml::aux::vector
00266 indii::ml::aux::KernelDensityPdf<NT,KT>::densityAt(PartitionTree& tree) {
00267
00268 assert (getDimensions() == tree.getData()->getDimensions());
00269
00270 return aux::dualTreeDensity(tree, *this->tree,
00271 this->tree->getData()->getWeights(), N, K);
00272 }
00273
00274 template <class NT, class KT>
00275 void indii::ml::aux::KernelDensityPdf<NT,KT>::calculateExpectation() {
00276 std::vector<PartitionTreeNode*> nodes;
00277 PartitionTreeNode* node = tree->getRoot();
00278 DiracMixturePdf* p = tree->getData();
00279 assert (node != NULL);
00280
00281 std::vector<unsigned int> is;
00282 unsigned int i;
00283
00284 mu.clear();
00285 nodes.push_back(node);
00286 while (!nodes.empty()) {
00287 node = nodes.back();
00288 nodes.pop_back();
00289 if (node->isLeaf()) {
00290 i = node->getIndex();
00291 mu += p->getWeight(i) * p->get(i);
00292 } else if (node->isPrune()) {
00293 is = node->getIndices();
00294 for (i = 0; i < is.size(); i++) {
00295 mu += p->getWeight(is[i]) * p->get(is[i]);
00296 }
00297 } else {
00298
00299 nodes.push_back(node->getLeft());
00300 nodes.push_back(node->getRight());
00301 }
00302 }
00303
00304 mu /= p->getTotalWeight();
00305 haveMu = true;
00306 }
00307
00308 template <class NT, class KT>
00309 void indii::ml::aux::KernelDensityPdf<NT,KT>::calculateCovariance() {
00310 if (!haveMu) {
00311 calculateExpectation();
00312 assert (haveMu);
00313 }
00314
00315 std::vector<PartitionTreeNode*> nodes;
00316 PartitionTreeNode* node = tree->getRoot();
00317 DiracMixturePdf* p = tree->getData();
00318 assert (node != NULL);
00319
00320 std::vector<unsigned int> is;
00321 unsigned int i;
00322
00323 sigma.clear();
00324 nodes.push_back(node);
00325 while (!nodes.empty()) {
00326 node = nodes.back();
00327 nodes.pop_back();
00328
00329 if (node->isLeaf()) {
00330 i = node->getIndex();
00331 vector& x = p->get(i);
00332 sigma += p->getWeight(i) * outer_prod(x, x);
00333 } else if (node->isPrune()) {
00334 is = node->getIndices();
00335 for (i = 0; i < is.size(); i++) {
00336 vector& x = p->get(is[i]);
00337 sigma += p->getWeight(is[i]) * outer_prod(x, x);
00338 }
00339 } else {
00340
00341 nodes.push_back(node->getLeft());
00342 nodes.push_back(node->getRight());
00343 }
00344 }
00345 sigma /= p->getTotalWeight();
00346 noalias(sigma) -= outer_prod(mu, mu);
00347 haveSigma = true;
00348 }
00349
00350 template <class NT, class KT>
00351 template <class Archive>
00352 void indii::ml::aux::KernelDensityPdf<NT,KT>::save(Archive& ar,
00353 const unsigned int version) const {
00354 ar & boost::serialization::base_object<Pdf>(*this);
00355 ar & tree;
00356 ar & N;
00357 ar & K;
00358 }
00359
00360 template <class NT, class KT>
00361 template <class Archive>
00362 void indii::ml::aux::KernelDensityPdf<NT,KT>::load(Archive& ar,
00363 const unsigned int version) {
00364 ar & boost::serialization::base_object<Pdf>(*this);
00365 ar & tree;
00366 ar & N;
00367 ar & K;
00368
00369 mu.resize(tree->getData()->getDimensions(), false);
00370 sigma.resize(tree->getData()->getDimensions(), false);
00371 haveMu = false;
00372 haveSigma = false;
00373 }
00374
00375 #endif
00376