indii/ml/aux/KernelDensityPdf.hpp

00001 #ifndef INDII_ML_AUX_KERNELDENSITYPDF_HPP
00002 #define INDII_ML_AUX_KERNELDENSITYPDF_HPP
00003 
00004 #include "Pdf.hpp"
00005 #include "PartitionTree.hpp"
00006 #include "Almost2Norm.hpp"
00007 #include "AlmostGaussianKernel.hpp"
00008 #include "kde.hpp"
00009 
00010 namespace indii {
00011   namespace ml {
00012     namespace aux {
00013 /**
00014  * %Kernel density estimator.
00015  *
00016  * @author Lawrence Murray <lawrence@indii.org>
00017  * @version $Rev: 584 $
00018  * @date $Date: 2008-12-15 17:26:36 +0000 (Mon, 15 Dec 2008) $
00019  *
00020  * @param NT Norm type.
00021  * @param KT Kernel type.
00022  *
00023  * The kernel density estimator is constructed over a KDTree for efficient
00024  * evaluations. After construction, it acts as any other Pdf.
00025  *
00026  * @section Serialization
00027  *
00028  * This class supports serialization through the Boost.Serialization
00029  * library.
00030  *
00031  * @section KernelDensityPdf_references References
00032  * 
00033  * @anchor Silverman1986
00034  * Silverman, B.W. <i>Density Estimation for Statistics and Data
00035  * Analysis</i>. Chapman and Hall, <b>1986</b>.
00036  */
00037 template <class NT = Almost2Norm, class KT = AlmostGaussianKernel>
00038 class KernelDensityPdf : public Pdf {
00039 public:
00040   /**
00041    * Default constructor.
00042    *
00043    * Initialises the distribution with zero dimensions. This should
00044    * generally only be used when the object is to be restored from a
00045    * serialization.
00046    */
00047   KernelDensityPdf();
00048 
00049   /**
00050    * Constructor.
00051    *
00052    * @param tree Partition tree over which to define distribution. Caller
00053    * has ownership.
00054    * @param N \f$\|\mathbf{x}\|_p\f$; a norm.
00055    * @param K \f$K(\|\mathbf{x}\|_p) \f$; density kernel.
00056    */
00057   KernelDensityPdf(PartitionTree* tree, const NT& N, const KT& K);
00058   
00059   /**
00060    * Destructor.
00061    */
00062   virtual ~KernelDensityPdf();
00063 
00064   /**
00065    * Not supported.
00066    *
00067    * @see Pdf::setDimensions()
00068    */
00069   virtual void setDimensions(const unsigned int N,
00070       const bool preserve = false);
00071 
00072   virtual const vector& getExpectation();
00073 
00074   virtual const symmetric_matrix& getCovariance();
00075 
00076   virtual vector sample();
00077 
00078   virtual double densityAt(const vector& x);
00079 
00080   /**
00081    * Calculate the density for all points in a tree.
00082    *
00083    * @param tree Query tree.
00084    *
00085    * @return Density at all points in the tree, ordered according to the 
00086    * underlying DiracMixturePdf ordering.
00087    *
00088    * Uses a dual-tree algorithm to efficiently calculate the density at
00089    * all points in the query tree.
00090    */
00091   vector densityAt(PartitionTree& tree);
00092 
00093 private:
00094   /**
00095    * Partition tree.
00096    */
00097   PartitionTree* tree;
00098   
00099   /**
00100    * \f$\|\mathbf{x}\|_p\f$; the norm.
00101    */
00102   NT N;
00103   
00104   /**
00105    * \f$K(\|\mathbf{x}\|_p) \f$; the density kernel.
00106    */
00107   KT K;
00108   
00109   /**
00110    * \f$\mathbf{\mu}\f$; the mean.
00111    */
00112   vector mu;
00113   
00114   /**
00115    * \f$\Sigma\f$; the covariance.
00116    */
00117   symmetric_matrix sigma;
00118 
00119   /**
00120    * Has the mean been calculated?
00121    */
00122   bool haveMu;
00123   
00124   /**
00125    * Has the covariance been calculated?
00126    */
00127   bool haveSigma;
00128   
00129   /**
00130    * Calculate the mean.
00131    */
00132   void calculateExpectation();
00133   
00134   /**
00135    * Calculate the covariance.
00136    */
00137   void calculateCovariance();
00138   
00139   /**
00140    * Serialize.
00141    */
00142   template<class Archive>
00143   void save(Archive& ar, const unsigned int version) const;
00144 
00145   /**
00146    * Restore from serialization.
00147    */
00148   template<class Archive>
00149   void load(Archive& ar, const unsigned int version);
00150 
00151   /*
00152    * Boost.Serialization requirements.
00153    */
00154   BOOST_SERIALIZATION_SPLIT_MEMBER()
00155   friend class boost::serialization::access;
00156 
00157 };
00158 
00159     }
00160   }
00161 }
00162 
00163 #include "boost/serialization/base_object.hpp"
00164 
00165 #include <stack>
00166 
00167 template <class NT, class KT>
00168 indii::ml::aux::KernelDensityPdf<NT,KT>::KernelDensityPdf() : tree(NULL),
00169     mu(0), sigma(0), haveMu(false), haveSigma(false) {
00170   //
00171 }
00172 
00173 template <class NT, class KT>
00174 indii::ml::aux::KernelDensityPdf<NT,KT>::KernelDensityPdf(
00175     PartitionTree* tree, const NT& N, const KT& K) :
00176     Pdf(tree->getData()->getDimensions()), tree(tree), N(N), K(K),
00177     mu(tree->getData()->getDimensions()),
00178     sigma(tree->getData()->getDimensions()), haveMu(false), haveSigma(false) {
00179   //
00180 }
00181   
00182 template <class NT, class KT>
00183 indii::ml::aux::KernelDensityPdf<NT,KT>::~KernelDensityPdf() {
00184   //
00185 }
00186 
00187 template <class NT, class KT>
00188 void indii::ml::aux::KernelDensityPdf<NT,KT>::setDimensions(
00189     const unsigned int N, const bool preserve) {
00190   assert (false);
00191 }
00192 
00193 template <class NT, class KT>
00194 const indii::ml::aux::vector&
00195     indii::ml::aux::KernelDensityPdf<NT,KT>::getExpectation() {
00196   if (!haveMu) {
00197     calculateExpectation();
00198     assert (haveMu);
00199   }
00200   return mu;
00201 }
00202 
00203 template <class NT, class KT>
00204 const indii::ml::aux::symmetric_matrix&
00205     indii::ml::aux::KernelDensityPdf<NT,KT>::getCovariance() {
00206   if (!haveSigma) {
00207     calculateCovariance();
00208     assert (haveSigma);
00209   }
00210   return sigma;
00211 }
00212 
00213 template <class NT, class KT>
00214 indii::ml::aux::vector indii::ml::aux::KernelDensityPdf<NT,KT>::sample() {
00215   /* pre-condition */
00216   assert (tree != NULL);
00217 
00218   /* sampling from the underlying weighted sample set and adding a kernel
00219    * sample is more efficient than working down the tree. */
00220   return tree->getData()->sample() + K.sample() * N.sample(getDimensions());
00221 }
00222 
00223 template <class NT, class KT>
00224 double indii::ml::aux::KernelDensityPdf<NT,KT>::densityAt(const vector& x) {
00225   if (tree->getRoot() == NULL) {
00226     return 0.0;
00227   }
00228 
00229   PartitionTreeNode* node = tree->getRoot();
00230   DiracMixturePdf* p = tree->getData();
00231   std::stack<PartitionTreeNode*> nodes;
00232   vector d(x.size());
00233   double result = 0.0;
00234   std::vector<unsigned int> is;
00235   unsigned int i;
00236 
00237   nodes.push(node);
00238   while (!nodes.empty()) {
00239     node = nodes.top();
00240     nodes.pop();
00241     
00242     if (node->isLeaf()) {
00243       i = node->getIndex();
00244       noalias(d) = p->get(i) - x;
00245       result += p->getWeight(i) * K(N(d));
00246     } else if (node->isPrune()) {
00247       is = node->getIndices();
00248       for (i = 0; i < is.size(); i++) {
00249         noalias(d) = p->get(is[i]) - x;
00250         result += p->getWeight(is[i]) * K(N(d));
00251       }
00252     } else {
00253       /* should we recurse? */
00254       node->difference(x, d);
00255       if (K(N(d)) > 0.0) {
00256         nodes.push(node->getLeft());
00257         nodes.push(node->getRight());
00258       } 
00259     }
00260   }  
00261   return result / p->getTotalWeight();
00262 }
00263 
00264 template <class NT, class KT>
00265 indii::ml::aux::vector
00266     indii::ml::aux::KernelDensityPdf<NT,KT>::densityAt(PartitionTree& tree) {
00267   /* pre-condition */
00268   assert (getDimensions() == tree.getData()->getDimensions());
00269 
00270   return aux::dualTreeDensity(tree, *this->tree,
00271       this->tree->getData()->getWeights(), N, K);
00272 }
00273 
00274 template <class NT, class KT>
00275 void indii::ml::aux::KernelDensityPdf<NT,KT>::calculateExpectation() {
00276   std::vector<PartitionTreeNode*> nodes;
00277   PartitionTreeNode* node = tree->getRoot();
00278   DiracMixturePdf* p = tree->getData();
00279   assert (node != NULL);
00280 
00281   std::vector<unsigned int> is;
00282   unsigned int i;
00283 
00284   mu.clear();
00285   nodes.push_back(node);
00286   while (!nodes.empty()) {
00287     node = nodes.back();
00288     nodes.pop_back();
00289     if (node->isLeaf()) {
00290       i = node->getIndex();
00291       mu += p->getWeight(i) * p->get(i);
00292     } else if (node->isPrune()) {
00293       is = node->getIndices();
00294       for (i = 0; i < is.size(); i++) {
00295         mu += p->getWeight(is[i]) * p->get(is[i]);
00296       }
00297     } else {
00298       /* recurse */
00299       nodes.push_back(node->getLeft());
00300       nodes.push_back(node->getRight());
00301     }
00302   }
00303   
00304   mu /= p->getTotalWeight();
00305   haveMu = true;
00306 }
00307 
00308 template <class NT, class KT>
00309 void indii::ml::aux::KernelDensityPdf<NT,KT>::calculateCovariance() {
00310   if (!haveMu) {
00311     calculateExpectation();
00312     assert (haveMu);
00313   }
00314  
00315   std::vector<PartitionTreeNode*> nodes;
00316   PartitionTreeNode* node = tree->getRoot();
00317   DiracMixturePdf* p = tree->getData();
00318   assert (node != NULL);
00319 
00320   std::vector<unsigned int> is;
00321   unsigned int i;
00322 
00323   sigma.clear();
00324   nodes.push_back(node);
00325   while (!nodes.empty()) {
00326     node = nodes.back();
00327     nodes.pop_back();
00328 
00329     if (node->isLeaf()) {
00330       i = node->getIndex();
00331       vector& x = p->get(i);
00332       sigma += p->getWeight(i) * outer_prod(x, x);
00333     } else if (node->isPrune()) {
00334       is = node->getIndices();
00335       for (i = 0; i < is.size(); i++) {
00336         vector& x = p->get(is[i]);
00337         sigma += p->getWeight(is[i]) * outer_prod(x, x);
00338       }
00339     } else {
00340       /* recurse */
00341       nodes.push_back(node->getLeft());
00342       nodes.push_back(node->getRight());
00343     }
00344   }  
00345   sigma /= p->getTotalWeight();
00346   noalias(sigma) -= outer_prod(mu, mu);
00347   haveSigma = true;
00348 }
00349 
00350 template <class NT, class KT>
00351 template <class Archive>
00352 void indii::ml::aux::KernelDensityPdf<NT,KT>::save(Archive& ar,
00353     const unsigned int version) const {
00354   ar & boost::serialization::base_object<Pdf>(*this);
00355   ar & tree;
00356   ar & N;
00357   ar & K;
00358 }
00359 
00360 template <class NT, class KT>
00361 template <class Archive>
00362 void indii::ml::aux::KernelDensityPdf<NT,KT>::load(Archive& ar,
00363     const unsigned int version) {
00364   ar & boost::serialization::base_object<Pdf>(*this);
00365   ar & tree;
00366   ar & N;
00367   ar & K;
00368 
00369   mu.resize(tree->getData()->getDimensions(), false);
00370   sigma.resize(tree->getData()->getDimensions(), false);
00371   haveMu = false;
00372   haveSigma = false;
00373 }
00374 
00375 #endif
00376 

Generated on Wed Dec 17 15:11:57 2008 for dysii Dynamical Systems Library by  doxygen 1.5.3