indii/ml/aux/DiracMixturePdf.cpp

00001 //#if defined(__GNUC__) && defined(GCC_PCH)
00002 //  #include "aux.hpp"
00003 //#else
00004   #include "DiracMixturePdf.hpp"
00005 //#endif
00006 
00007 #include "KDTreeNode.hpp"
00008 #include "DistributedPartitioner.hpp"
00009 
00010 #include "boost/numeric/bindings/traits/ublas_matrix.hpp"
00011 #include "boost/numeric/bindings/traits/ublas_vector.hpp"
00012 #include "boost/numeric/bindings/traits/ublas_symmetric.hpp"
00013 #include "boost/numeric/bindings/lapack/lapack.hpp"
00014 
00015 #include <stack>
00016 
00017 using namespace indii::ml::aux;
00018 
00019 namespace ublas = boost::numeric::ublas;
00020 namespace lapack = boost::numeric::bindings::lapack;
00021 
00022 DiracMixturePdf::DiracMixturePdf() : MixturePdf<DiracPdf>(0), sigma(0),
00023     Zsigma(0), haveSigma(false) {
00024   //
00025 }
00026 
00027 DiracMixturePdf::DiracMixturePdf(Pdf& o, const unsigned int P) :
00028     MixturePdf<DiracPdf>(o.getDimensions()), sigma(o.getDimensions()),
00029     Zsigma(o.getDimensions()), haveSigma(false) {
00030   unsigned int i;
00031 
00032   for (i = 0; i < P; i++) {
00033     add(o.sample());
00034   }
00035 }
00036 
00037 DiracMixturePdf::DiracMixturePdf(const unsigned int N) :
00038     MixturePdf<DiracPdf>(N), sigma(N), Zsigma(N), haveSigma(false) {
00039   //
00040 }
00041 
00042 DiracMixturePdf::~DiracMixturePdf() {
00043   //
00044 }
00045 
00046 double DiracMixturePdf::calculateEss() {
00047   const double W = getTotalWeight();
00048   const vector& ws = getWeights();
00049   double ess = 0.0;
00050   unsigned int i;
00051   
00052   for (i = 0; i < ws.size(); i++) {
00053     ess += ws(i)*ws(i);
00054   }
00055   ess = W*W / ess;
00056 
00057   return ess;
00058 }
00059 
00060 double DiracMixturePdf::calculateDistributedEss() {
00061   boost::mpi::communicator world;
00062 
00063   const double W = getDistributedTotalWeight();
00064   const vector& ws = getWeights();
00065   double ess = 0.0;
00066   unsigned int i;
00067   
00068   for (i = 0; i < ws.size(); i++) {
00069     ess += ws(i)*ws(i);
00070   }
00071   ess = W*W / boost::mpi::all_reduce(world, ess, std::plus<double>());
00072 
00073   return ess;
00074 }
00075 
00076 void DiracMixturePdf::standardise() {
00077   standardise(getExpectation(), getStandardDeviation());
00078 }
00079 
00080 void DiracMixturePdf::standardise(const vector& mu,
00081     const lower_triangular_matrix& sd) {
00082   const unsigned int N = getDimensions();
00083   unsigned int i;
00084 
00085   /* prepare inverse standard deviation */
00086   matrix inv_sd(N,N), tmp(sd);
00087   inv(tmp, inv_sd);
00088   
00089   /* standardise */
00090   for (i = 0; i < getSize(); i++) {  
00091     get(i) = DiracPdf(prod(inv_sd, get(i) - mu));
00092   }
00093 
00094   dirty();    
00095 }
00096 
00097 void DiracMixturePdf::standardise(const vector& mu,
00098     const symmetric_matrix& sigma) {
00099   symmetric_matrix tmp(sigma);
00100   #ifdef NDEBUG
00101   lapack::pptrf(tmp); // avoids compiler warning about unused variable
00102   #else
00103   int err = lapack::pptrf(tmp);
00104   assert (err == 0);
00105   #endif
00106   
00107   lower_triangular_matrix sd(sigma.size1(), sigma.size2());
00108   noalias(sd) = ublas::triangular_adaptor<symmetric_matrix,ublas::lower>(tmp);
00109   standardise(mu, sd);
00110 }
00111 
00112 void DiracMixturePdf::distributedStandardise() {
00113   standardise(getDistributedExpectation(),
00114       getDistributedStandardDeviation());
00115 }
00116 
00117 void DiracMixturePdf::setDimensions(const unsigned int N,
00118     const bool preserve) {
00119   MixturePdf<DiracPdf>::setDimensions(N, preserve);
00120 
00121   Zsigma.resize(N, false);
00122   sigma.resize(N, false);
00123 }
00124 
00125 const symmetric_matrix& DiracMixturePdf::getCovariance() {
00126   if (!haveSigma) {
00127     calculateCovariance();
00128   }
00129   return sigma;
00130 }
00131 
00132 symmetric_matrix DiracMixturePdf::getDistributedCovariance() {
00133   boost::mpi::communicator world;
00134   const unsigned int size = world.size();
00135   
00136   if (size == 1) {
00137     return getCovariance();
00138   } else {
00139     const unsigned int N = getDimensions();
00140 
00141     boost::mpi::communicator world;
00142     matrix Zsigma_d(N,N), sigma_d(N,N);
00143     vector mu_d(N);
00144 
00145     if (getTotalWeight() > 0.0) {
00146       if (!haveSigma) {
00147         calculateCovariance();
00148       }
00149     } else {
00150       Zsigma.clear();
00151     }
00152 
00153     noalias(mu_d) = getDistributedExpectation();
00154     noalias(Zsigma_d) = boost::mpi::all_reduce(world, matrix(Zsigma),
00155         std::plus<matrix>());
00156     noalias(sigma_d) = Zsigma_d / getDistributedTotalWeight() -
00157         outer_prod(mu_d, mu_d);
00158   
00159     return ublas::symmetric_adaptor<matrix, ublas::lower>(sigma_d);
00160   }
00161 }
00162 
00163 lower_triangular_matrix DiracMixturePdf::getStandardDeviation() {
00164   const unsigned int N = this->getDimensions();
00165   lower_triangular_matrix sd(N,N);
00166   
00167   if (getSize() > 1) {
00168     symmetric_matrix sigma(getCovariance());
00169     int err;
00170 
00171     err = lapack::pptrf(sigma);
00172     assert (err == 0);
00173     noalias(sd) = ublas::triangular_adaptor<symmetric_matrix,
00174         ublas::lower>(sigma);
00175   } else {
00176     sd.clear();
00177   }
00178 
00179   return sd;
00180 }
00181 
00182 lower_triangular_matrix DiracMixturePdf::getDistributedStandardDeviation() {
00183   boost::mpi::communicator world;
00184   const unsigned int size = world.size();
00185   lower_triangular_matrix sd_d(N,N);
00186   
00187   if (size == 1) {
00188     return getStandardDeviation();
00189   } else if (getDistributedSize() > 1) {
00190     symmetric_matrix sigma_d(getDistributedCovariance());
00191     int err;
00192     
00193     err = lapack::pptrf(sigma_d);
00194     assert (err == 0);
00195     noalias(sd_d) = ublas::triangular_adaptor<symmetric_matrix,
00196         ublas::lower>(sigma_d);
00197   } else {
00198     sd_d.clear();
00199   }
00200 
00201   return sd_d;
00202 }
00203 
00204 void DiracMixturePdf::redistributeBySpace() {
00205   boost::mpi::communicator world;
00206   unsigned int rank = world.rank();
00207   unsigned int size = world.size();
00208   
00209   unsigned int i, j;
00210   std::vector<unsigned int> is(getSize());
00211 
00212   for (i = 0; i < is.size(); i++) {
00213     is[i] = i;
00214   }
00215 
00216   /* build pruned kd tree of evenly distributed components */
00217   KDTreeNode* root = distributedBuild(is, 0, size);
00218   
00219   /* traverse tree and redistribute components */
00220   PartitionTreeNode* node = root;
00221   std::stack<PartitionTreeNode*> nodes;
00222   std::vector<PartitionTreeNode*> prunes;
00223   
00224   nodes.push(node);
00225   while (!nodes.empty()) {
00226     node = nodes.top();
00227     nodes.pop();
00228 
00229     if (node->isInternal()) {
00230       nodes.push(node->getLeft());
00231       nodes.push(node->getRight());
00232     } else if (node->isPrune()) {
00233       prunes.push_back(node);
00234     }
00235   }
00236 
00237   /* redistribute */
00238   std::vector<std::vector<DiracPdf> > recvXs;
00239   std::vector<vector> recvWeights;
00240   std::vector<DiracPdf> sendXs;
00241   vector sendWeights;
00242 
00243   for (i = 0; i < prunes.size(); i++) {
00244     /* prepare components at ith node in tree */
00245     is = prunes[i]->getIndices();
00246     sendXs.clear();
00247     sendWeights.resize(prunes[i]->getSize(), false);
00248     
00249     for (j = 0; j < is.size(); j++) {
00250       sendXs.push_back(get(is[j]));
00251       sendWeights(j) = getWeight(is[j]);
00252     }
00253     
00254     /* gather components to rank i */
00255     if (rank == i) {
00256       boost::mpi::gather(world, sendXs, recvXs, i);
00257       boost::mpi::gather(world, sendWeights, recvWeights, i);
00258     } else {
00259       boost::mpi::gather(world, sendXs, i);
00260       boost::mpi::gather(world, sendWeights, i);
00261     }
00262   }
00263   
00264   /* reconstruct */
00265   clear();
00266   for (i = 0; i < recvXs.size(); i++) {
00267     for (j = 0; j < recvXs[i].size(); j++) {
00268       add(recvXs[i][j], recvWeights[i](j));
00269     }
00270   }
00271 
00272   /* clean up */
00273   delete root;
00274 }
00275 
00276 void DiracMixturePdf::dirty() {
00277   MixturePdf<DiracPdf>::dirty();
00278   haveSigma = false;
00279 }
00280 
00281 void DiracMixturePdf::calculateCovariance() {
00282   /* pre-condition */
00283   assert (getTotalWeight() > 0.0);
00284 
00285   const vector& mu = getExpectation();
00286   unsigned int i;
00287 
00288   Zsigma.clear();
00289   for (i = 0; i < getSize(); i++) {
00290     noalias(Zsigma) += getWeight(i) * outer_prod(get(i), get(i));
00291   }
00292   noalias(sigma) = Zsigma / getTotalWeight() - outer_prod(mu, mu);
00293   haveSigma = true;
00294 }
00295 
00296 KDTreeNode* DiracMixturePdf::distributedBuild(
00297     const std::vector<unsigned int>& is, const unsigned int depth,
00298     const unsigned int nodes) {
00299   /* pre-condition */
00300   assert (nodes > 0);
00301   
00302   boost::mpi::communicator world;
00303   
00304   KDTreeNode* result;
00305   unsigned int i, P, nth;
00306   
00307   if (nodes == 1) {
00308     /* prune node */
00309     result = new KDTreeNode(this, is, depth);
00310   } else {
00311     P = boost::mpi::all_reduce(world, is.size(), std::plus<unsigned int>());
00312     nth = P * (nodes / 2) / nodes;
00313     
00314     /* internal node */
00315     DistributedPartitioner partitioner(nth);
00316     KDTreeNode *left, *right; // child nodes
00317     std::vector<unsigned int> ls, rs; // indices of left, right components
00318     if (partitioner.init(this, is)) {
00319       ls.reserve(is.size() / 2);
00320       rs.reserve(is.size() / 2);
00321       
00322       for (i = 0; i < is.size(); i++) {
00323         if (partitioner.assign(get(is[i])) == Partitioner::LEFT) {
00324           ls.push_back(is[i]);
00325         } else {
00326           rs.push_back(is[i]);
00327         }
00328       }
00329       
00330       left = distributedBuild(ls, depth + 1, nodes / 2);
00331       right = distributedBuild(rs, depth + 1, nodes - nodes / 2);    
00332 
00333       result = new KDTreeNode(left, right, depth);
00334     } else {
00335       result = new KDTreeNode(this, is, depth);
00336     }
00337   }
00338   
00339   return result;
00340 }

Generated on Wed Dec 17 15:11:57 2008 for dysii Dynamical Systems Library by  doxygen 1.5.3