00001
00002
00003
00004 #include "DiracMixturePdf.hpp"
00005
00006
00007 #include "KDTreeNode.hpp"
00008 #include "DistributedPartitioner.hpp"
00009
00010 #include "boost/numeric/bindings/traits/ublas_matrix.hpp"
00011 #include "boost/numeric/bindings/traits/ublas_vector.hpp"
00012 #include "boost/numeric/bindings/traits/ublas_symmetric.hpp"
00013 #include "boost/numeric/bindings/lapack/lapack.hpp"
00014
00015 #include <stack>
00016
00017 using namespace indii::ml::aux;
00018
00019 namespace ublas = boost::numeric::ublas;
00020 namespace lapack = boost::numeric::bindings::lapack;
00021
00022 DiracMixturePdf::DiracMixturePdf() : MixturePdf<DiracPdf>(0), sigma(0),
00023 Zsigma(0), haveSigma(false) {
00024
00025 }
00026
00027 DiracMixturePdf::DiracMixturePdf(Pdf& o, const unsigned int P) :
00028 MixturePdf<DiracPdf>(o.getDimensions()), sigma(o.getDimensions()),
00029 Zsigma(o.getDimensions()), haveSigma(false) {
00030 unsigned int i;
00031
00032 for (i = 0; i < P; i++) {
00033 add(o.sample());
00034 }
00035 }
00036
00037 DiracMixturePdf::DiracMixturePdf(const unsigned int N) :
00038 MixturePdf<DiracPdf>(N), sigma(N), Zsigma(N), haveSigma(false) {
00039
00040 }
00041
00042 DiracMixturePdf::~DiracMixturePdf() {
00043
00044 }
00045
00046 double DiracMixturePdf::calculateEss() {
00047 const double W = getTotalWeight();
00048 const vector& ws = getWeights();
00049 double ess = 0.0;
00050 unsigned int i;
00051
00052 for (i = 0; i < ws.size(); i++) {
00053 ess += ws(i)*ws(i);
00054 }
00055 ess = W*W / ess;
00056
00057 return ess;
00058 }
00059
00060 double DiracMixturePdf::calculateDistributedEss() {
00061 boost::mpi::communicator world;
00062
00063 const double W = getDistributedTotalWeight();
00064 const vector& ws = getWeights();
00065 double ess = 0.0;
00066 unsigned int i;
00067
00068 for (i = 0; i < ws.size(); i++) {
00069 ess += ws(i)*ws(i);
00070 }
00071 ess = W*W / boost::mpi::all_reduce(world, ess, std::plus<double>());
00072
00073 return ess;
00074 }
00075
00076 void DiracMixturePdf::standardise() {
00077 standardise(getExpectation(), getStandardDeviation());
00078 }
00079
00080 void DiracMixturePdf::standardise(const vector& mu,
00081 const lower_triangular_matrix& sd) {
00082 const unsigned int N = getDimensions();
00083 unsigned int i;
00084
00085
00086 matrix inv_sd(N,N), tmp(sd);
00087 inv(tmp, inv_sd);
00088
00089
00090 for (i = 0; i < getSize(); i++) {
00091 get(i) = DiracPdf(prod(inv_sd, get(i) - mu));
00092 }
00093
00094 dirty();
00095 }
00096
00097 void DiracMixturePdf::standardise(const vector& mu,
00098 const symmetric_matrix& sigma) {
00099 symmetric_matrix tmp(sigma);
00100 #ifdef NDEBUG
00101 lapack::pptrf(tmp);
00102 #else
00103 int err = lapack::pptrf(tmp);
00104 assert (err == 0);
00105 #endif
00106
00107 lower_triangular_matrix sd(sigma.size1(), sigma.size2());
00108 noalias(sd) = ublas::triangular_adaptor<symmetric_matrix,ublas::lower>(tmp);
00109 standardise(mu, sd);
00110 }
00111
00112 void DiracMixturePdf::distributedStandardise() {
00113 standardise(getDistributedExpectation(),
00114 getDistributedStandardDeviation());
00115 }
00116
00117 void DiracMixturePdf::setDimensions(const unsigned int N,
00118 const bool preserve) {
00119 MixturePdf<DiracPdf>::setDimensions(N, preserve);
00120
00121 Zsigma.resize(N, false);
00122 sigma.resize(N, false);
00123 }
00124
00125 const symmetric_matrix& DiracMixturePdf::getCovariance() {
00126 if (!haveSigma) {
00127 calculateCovariance();
00128 }
00129 return sigma;
00130 }
00131
00132 symmetric_matrix DiracMixturePdf::getDistributedCovariance() {
00133 boost::mpi::communicator world;
00134 const unsigned int size = world.size();
00135
00136 if (size == 1) {
00137 return getCovariance();
00138 } else {
00139 const unsigned int N = getDimensions();
00140
00141 boost::mpi::communicator world;
00142 matrix Zsigma_d(N,N), sigma_d(N,N);
00143 vector mu_d(N);
00144
00145 if (getTotalWeight() > 0.0) {
00146 if (!haveSigma) {
00147 calculateCovariance();
00148 }
00149 } else {
00150 Zsigma.clear();
00151 }
00152
00153 noalias(mu_d) = getDistributedExpectation();
00154 noalias(Zsigma_d) = boost::mpi::all_reduce(world, matrix(Zsigma),
00155 std::plus<matrix>());
00156 noalias(sigma_d) = Zsigma_d / getDistributedTotalWeight() -
00157 outer_prod(mu_d, mu_d);
00158
00159 return ublas::symmetric_adaptor<matrix, ublas::lower>(sigma_d);
00160 }
00161 }
00162
00163 lower_triangular_matrix DiracMixturePdf::getStandardDeviation() {
00164 const unsigned int N = this->getDimensions();
00165 lower_triangular_matrix sd(N,N);
00166
00167 if (getSize() > 1) {
00168 symmetric_matrix sigma(getCovariance());
00169 int err;
00170
00171 err = lapack::pptrf(sigma);
00172 assert (err == 0);
00173 noalias(sd) = ublas::triangular_adaptor<symmetric_matrix,
00174 ublas::lower>(sigma);
00175 } else {
00176 sd.clear();
00177 }
00178
00179 return sd;
00180 }
00181
00182 lower_triangular_matrix DiracMixturePdf::getDistributedStandardDeviation() {
00183 boost::mpi::communicator world;
00184 const unsigned int size = world.size();
00185 lower_triangular_matrix sd_d(N,N);
00186
00187 if (size == 1) {
00188 return getStandardDeviation();
00189 } else if (getDistributedSize() > 1) {
00190 symmetric_matrix sigma_d(getDistributedCovariance());
00191 int err;
00192
00193 err = lapack::pptrf(sigma_d);
00194 assert (err == 0);
00195 noalias(sd_d) = ublas::triangular_adaptor<symmetric_matrix,
00196 ublas::lower>(sigma_d);
00197 } else {
00198 sd_d.clear();
00199 }
00200
00201 return sd_d;
00202 }
00203
00204 void DiracMixturePdf::redistributeBySpace() {
00205 boost::mpi::communicator world;
00206 unsigned int rank = world.rank();
00207 unsigned int size = world.size();
00208
00209 unsigned int i, j;
00210 std::vector<unsigned int> is(getSize());
00211
00212 for (i = 0; i < is.size(); i++) {
00213 is[i] = i;
00214 }
00215
00216
00217 KDTreeNode* root = distributedBuild(is, 0, size);
00218
00219
00220 PartitionTreeNode* node = root;
00221 std::stack<PartitionTreeNode*> nodes;
00222 std::vector<PartitionTreeNode*> prunes;
00223
00224 nodes.push(node);
00225 while (!nodes.empty()) {
00226 node = nodes.top();
00227 nodes.pop();
00228
00229 if (node->isInternal()) {
00230 nodes.push(node->getLeft());
00231 nodes.push(node->getRight());
00232 } else if (node->isPrune()) {
00233 prunes.push_back(node);
00234 }
00235 }
00236
00237
00238 std::vector<std::vector<DiracPdf> > recvXs;
00239 std::vector<vector> recvWeights;
00240 std::vector<DiracPdf> sendXs;
00241 vector sendWeights;
00242
00243 for (i = 0; i < prunes.size(); i++) {
00244
00245 is = prunes[i]->getIndices();
00246 sendXs.clear();
00247 sendWeights.resize(prunes[i]->getSize(), false);
00248
00249 for (j = 0; j < is.size(); j++) {
00250 sendXs.push_back(get(is[j]));
00251 sendWeights(j) = getWeight(is[j]);
00252 }
00253
00254
00255 if (rank == i) {
00256 boost::mpi::gather(world, sendXs, recvXs, i);
00257 boost::mpi::gather(world, sendWeights, recvWeights, i);
00258 } else {
00259 boost::mpi::gather(world, sendXs, i);
00260 boost::mpi::gather(world, sendWeights, i);
00261 }
00262 }
00263
00264
00265 clear();
00266 for (i = 0; i < recvXs.size(); i++) {
00267 for (j = 0; j < recvXs[i].size(); j++) {
00268 add(recvXs[i][j], recvWeights[i](j));
00269 }
00270 }
00271
00272
00273 delete root;
00274 }
00275
00276 void DiracMixturePdf::dirty() {
00277 MixturePdf<DiracPdf>::dirty();
00278 haveSigma = false;
00279 }
00280
00281 void DiracMixturePdf::calculateCovariance() {
00282
00283 assert (getTotalWeight() > 0.0);
00284
00285 const vector& mu = getExpectation();
00286 unsigned int i;
00287
00288 Zsigma.clear();
00289 for (i = 0; i < getSize(); i++) {
00290 noalias(Zsigma) += getWeight(i) * outer_prod(get(i), get(i));
00291 }
00292 noalias(sigma) = Zsigma / getTotalWeight() - outer_prod(mu, mu);
00293 haveSigma = true;
00294 }
00295
00296 KDTreeNode* DiracMixturePdf::distributedBuild(
00297 const std::vector<unsigned int>& is, const unsigned int depth,
00298 const unsigned int nodes) {
00299
00300 assert (nodes > 0);
00301
00302 boost::mpi::communicator world;
00303
00304 KDTreeNode* result;
00305 unsigned int i, P, nth;
00306
00307 if (nodes == 1) {
00308
00309 result = new KDTreeNode(this, is, depth);
00310 } else {
00311 P = boost::mpi::all_reduce(world, is.size(), std::plus<unsigned int>());
00312 nth = P * (nodes / 2) / nodes;
00313
00314
00315 DistributedPartitioner partitioner(nth);
00316 KDTreeNode *left, *right;
00317 std::vector<unsigned int> ls, rs;
00318 if (partitioner.init(this, is)) {
00319 ls.reserve(is.size() / 2);
00320 rs.reserve(is.size() / 2);
00321
00322 for (i = 0; i < is.size(); i++) {
00323 if (partitioner.assign(get(is[i])) == Partitioner::LEFT) {
00324 ls.push_back(is[i]);
00325 } else {
00326 rs.push_back(is[i]);
00327 }
00328 }
00329
00330 left = distributedBuild(ls, depth + 1, nodes / 2);
00331 right = distributedBuild(rs, depth + 1, nodes - nodes / 2);
00332
00333 result = new KDTreeNode(left, right, depth);
00334 } else {
00335 result = new KDTreeNode(this, is, depth);
00336 }
00337 }
00338
00339 return result;
00340 }