Skip to content

Commit 5c678e3

Browse files
manuelschillerguitargeek
authored andcommitted
[smatrix] CholeskyDecompGenDim: use std::vector for internal workspace
CholeskyDecompGenDim: move to std::vector for internal storage, provide releaseWorkspace method.
1 parent e145bd8 commit 5c678e3

File tree

1 file changed

+93
-63
lines changed

1 file changed

+93
-63
lines changed

math/smatrix/inc/Math/CholeskyDecomp.h

Lines changed: 93 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -23,20 +23,20 @@
2323
* provide routines to access the result of the decomposition L and its
2424
* inverse
2525
* @date January 20th 2024
26-
* CholeskyDecompGenDim allowed copying and moving, but did not provide
27-
* copy and move constructors or assignment operators, resulting in
28-
* incorrect code and potential memory corruption, if somebody tried to
29-
* copy/move a CholeskyDecompGenDim instance. Since we're in a C++17
30-
* world now, use std::unique_ptr<F[]> and std::make_unique to get rid of
31-
* explicit memory management in CholeskyDecompGenDim, and provide correct
32-
* code for copy construction and assignment (the default constructor and
33-
* destructor, and the move construction and assignment code auto-generated
34-
* by the compiler are fine, and these methods are defaulted).
26+
* CholeskyDecompGenDim allowed copying and moving, but did not provide copy
27+
* and move constructors or assignment operators, resulting in incorrect
28+
* code and potential memory corruption, if somebody tried to copy/move a
29+
* CholeskyDecompGenDim instance. Since we're in a C++17 world now, use the
30+
* excellent movable std::vector<F> to get rid of explicit memory management
31+
* in CholeskyDecompGenDim, and thus provide correct code for copy/move
32+
* construction and assignment. Moreover, the provided releaseWorkspace
33+
* method allows reuse of the allocated vector inside a CholeskyDecompGenDim
34+
* to reduce the cost of repeated matrix decomposition.
3535
*/
3636

3737
#include <cmath>
38+
#include <vector>
3839
#include <algorithm>
39-
#include <memory>
4040

4141
namespace ROOT {
4242

@@ -103,7 +103,7 @@ template<class F, unsigned N> class CholeskyDecomp
103103
* operator()(int i, int j) for access to elements)
104104
*/
105105
template<class M> CholeskyDecomp(const M& m) :
106-
fL( ), fOk(false)
106+
fOk(false)
107107
{
108108
using CholeskyDecompHelpers::_decomposer;
109109
fOk = _decomposer<F, N, M>()(fL, m);
@@ -121,7 +121,7 @@ template<class F, unsigned N> class CholeskyDecomp
121121
* (i * (i + 1)) / 2 + j
122122
*/
123123
template<typename G> CholeskyDecomp(G* m) :
124-
fL(), fOk(false)
124+
fOk(false)
125125
{
126126
using CholeskyDecompHelpers::_decomposer;
127127
using CholeskyDecompHelpers::PackedArrayAdapter;
@@ -327,50 +327,29 @@ template<class F> class CholeskyDecompGenDim
327327
/// lower triangular matrix L
328328
/** lower triangular matrix L, packed storage, with diagonal
329329
* elements pre-inverted */
330-
std::unique_ptr<F[]> fL;
330+
std::vector<F> fL;
331331
/// flag indicating a successful decomposition
332332
bool fOk = false;
333333
public:
334-
// move construction and assignment, and destruction do the right thing, so
335-
// use the versions that the compiler generates for us...
336-
CholeskyDecompGenDim(CholeskyDecompGenDim&&) = default;
337-
CholeskyDecompGenDim& operator=(CholeskyDecompGenDim&&) = default;
338-
~CholeskyDecompGenDim() = default;
339-
340-
/// copy constructor
341-
CholeskyDecompGenDim(const CholeskyDecompGenDim& other)
342-
: fN(other.fN), fL(std::make_unique<F[]>((fN * (fN + 1)) / 2)),
343-
fOk(other.fOk)
344-
{
345-
std::copy(other.fL, other.fL + (fN * (fN + 1)) / 2, fL);
346-
}
347-
/// (copy) assignment
348-
CholeskyDecompGenDim& operator=(const CholeskyDecompGenDim& other)
349-
{
350-
if (this != &other) {
351-
// check if fL is large enough to hold other, if not, reallocate
352-
if (fN < other.fN)
353-
fL = std::make_unique<F[]>((other.fN * (other.fN + 1)) / 2);
354-
fN = other.fN;
355-
std::copy(other.fL, other.fL + (fN * (fN + 1)) / 2, fL);
356-
fOk = other.fOk;
357-
}
358-
return *this;
359-
}
360-
361334
/// perform a Cholesky decomposition
362335
/** perform a Cholesky decomposition of a symmetric positive
363336
* definite matrix m
364337
*
365338
* this is the constructor to uses with an SMatrix (and objects
366339
* that behave like an SMatrix in terms of using
367340
* operator()(int i, int j) for access to elements)
341+
*
342+
* The optional wksp parameter that can be used to avoid (re-)allocations
343+
* on repeated decompositions of the same size (by passing a workspace of
344+
* size N * (N + 1), or reusing one returned from releaseWorkspace by a
345+
* previous decomposition).
368346
*/
369-
template<class M> CholeskyDecompGenDim(unsigned N, const M& m) :
370-
fN(N), fL(std::make_unique<F[]>((fN * (fN + 1)) / 2))
347+
template<class M> CholeskyDecompGenDim(unsigned N, const M& m, std::vector<F> wksp = {}) :
348+
fN(N), fL(std::move(wksp))
371349
{
372350
using CholeskyDecompHelpers::_decomposerGenDim;
373-
fOk = _decomposerGenDim<F, M>()(fL.get(), m, fN);
351+
if (fL.size() < fN * (fN + 1) / 2) fL.resize(fN * (fN + 1) / 2);
352+
fOk = _decomposerGenDim<F, M>()(fL.data(), m, fN);
374353
}
375354

376355
/// perform a Cholesky decomposition
@@ -383,14 +362,60 @@ template<class F> class CholeskyDecompGenDim
383362
* NOTE: the matrix is given in packed representation, matrix
384363
* element m(i,j) (j <= i) is supposed to be in array element
385364
* (i * (i + 1)) / 2 + j
365+
*
366+
* The optional wksp parameter that can be used to avoid (re-)allocations
367+
* on repeated decompositions of the same size (by passing a workspace of
368+
* size N * (N + 1), or reusing one returned from releaseWorkspace by a
369+
* previous decomposition).
386370
*/
387-
template<typename G> CholeskyDecompGenDim(unsigned N, G* m) :
388-
fN(N), fL(std::make_unique<F[]>((fN * (fN + 1)) / 2))
371+
template<typename G> CholeskyDecompGenDim(unsigned N, G* m, std::vector<F> wksp = {}) :
372+
fN(N), fL(std::move(wksp))
389373
{
390374
using CholeskyDecompHelpers::_decomposerGenDim;
391375
using CholeskyDecompHelpers::PackedArrayAdapter;
376+
if (fL.size() < fN * (fN + 1) / 2) fL.resize(fN * (fN + 1) / 2);
392377
fOk = _decomposerGenDim<F, PackedArrayAdapter<G> >()(
393-
fL.get(), PackedArrayAdapter<G>(m), fN);
378+
fL.data(), PackedArrayAdapter<G>(m), fN);
379+
}
380+
381+
/// release the workspace of the decomposition for reuse
382+
/** release the workspace of the decomposition for reuse
383+
*
384+
* If you use CholeskyDecompGenDim repeatedly with the same size N, it is
385+
* possible to avoid repeatedly allocating the internal workspace. The
386+
* constructors take an optional wksp argument, and this routine can be
387+
* called to get the workspace out of a decomposition when you are done
388+
* with it.
389+
*
390+
* Please note that once you call releaseWorkspace, you cannot do further
391+
* operations on the decomposition object (and this is indicated by having
392+
* it return false when you call ok()).
393+
*
394+
* Here is an example snippet of (pseudo-)code which illustrates the use of
395+
* releaseWorkspace for inverting the covariance matrices of a number of
396+
* items (which must all be of the same size):
397+
*
398+
* @code
399+
* std::vector<float> wksp;
400+
* for (const auto& item: items) {
401+
* CholeskyDecompGenDim decomp(item.cov().size(), item.cov(),
402+
* std::move(wksp));
403+
* if (decomp.ok()) {
404+
* decomp.Invert(item.invcov());
405+
* }
406+
* wksp = std::move(decomp.releaseWorkspace());
407+
* }
408+
* @endcode
409+
*
410+
* In the above code snippet, only the first CholeskyDecompGenDim
411+
* constructor call will allocate memory. Subsequent calls in the loop will
412+
* reuse the buffer from the first iteration.
413+
*/
414+
std::vector<F> releaseWorkspace()
415+
{
416+
fN = 0;
417+
fOk = false;
418+
return std::move(fL);
394419
}
395420

396421
/// returns true if decomposition was successful
@@ -411,7 +436,7 @@ template<class F> class CholeskyDecompGenDim
411436
template<class V> bool Solve(V& rhs) const
412437
{
413438
using CholeskyDecompHelpers::_solverGenDim;
414-
if (fOk) _solverGenDim<F,V>()(rhs, fL.get(), fN);
439+
if (fOk) _solverGenDim<F,V>()(rhs, fL.data(), fN);
415440
return fOk;
416441
}
417442

@@ -424,7 +449,10 @@ template<class F> class CholeskyDecompGenDim
424449
template<class M> bool Invert(M& m) const
425450
{
426451
using CholeskyDecompHelpers::_inverterGenDim;
427-
if (fOk) _inverterGenDim<F,M>()(m, fL.get(), fN);
452+
if (fOk) {
453+
auto wksp = fL;
454+
_inverterGenDim<F,M>()(m, fN, wksp.data());
455+
}
428456
return fOk;
429457
}
430458

@@ -443,8 +471,9 @@ template<class F> class CholeskyDecompGenDim
443471
using CholeskyDecompHelpers::_inverterGenDim;
444472
using CholeskyDecompHelpers::PackedArrayAdapter;
445473
if (fOk) {
474+
auto wksp = fL;
446475
PackedArrayAdapter<G> adapted(m);
447-
_inverterGenDim<F,PackedArrayAdapter<G> >()(adapted, fL.get(), fN);
476+
_inverterGenDim<F,PackedArrayAdapter<G> >()(adapted, fN, wksp.data());
448477
}
449478
return fOk;
450479
}
@@ -458,7 +487,7 @@ template<class F> class CholeskyDecompGenDim
458487
template<class M> bool getL(M& m) const
459488
{
460489
if (!fOk) return false;
461-
const F* L = fL.get();
490+
const F* L = fL.data();
462491
for (unsigned i = 0; i < fN; ++i) {
463492
// zero upper half of matrix
464493
for (unsigned j = i + 1; j < fN; ++j)
@@ -484,7 +513,7 @@ template<class F> class CholeskyDecompGenDim
484513
template<typename G> bool getL(G* m) const
485514
{
486515
if (!fOk) return false;
487-
const F* L = fL.get();
516+
const F* L = fL.data();
488517
// copy L
489518
for (unsigned i = 0; i < (fN * (fN + 1)) / 2; ++i)
490519
m[i] = L[i];
@@ -504,7 +533,7 @@ template<class F> class CholeskyDecompGenDim
504533
template<class M> bool getLi(M& m) const
505534
{
506535
if (!fOk) return false;
507-
const F* L = fL.get();
536+
const F* L = fL.data();
508537
for (unsigned i = 0; i < fN; ++i) {
509538
// zero lower half of matrix
510539
for (unsigned j = i + 1; j < fN; ++j)
@@ -536,7 +565,7 @@ template<class F> class CholeskyDecompGenDim
536565
template<typename G> bool getLi(G* m) const
537566
{
538567
if (!fOk) return false;
539-
const F* L = fL.get();
568+
const F* L = fL.data();
540569
// copy L
541570
for (unsigned i = 0; i < (fN * (fN + 1)) / 2; ++i)
542571
m[i] = L[i];
@@ -626,18 +655,14 @@ namespace CholeskyDecompHelpers {
626655
template<class F, class M> struct _inverterGenDim
627656
{
628657
/// method to do the inversion
629-
void operator()(M& dst, const F* src, unsigned N) const
658+
void operator()(M& dst, unsigned N, F* wksp) const
630659
{
631-
// make working copy
632-
auto l_ = std::make_unique<F[]>(N * (N + 1) / 2);
633-
F* l = l_.get();
634-
std::copy(src, src + ((N * (N + 1)) / 2), l);
635-
// ok, next step: invert off-diagonal part of matrix
636-
F* base1 = &l[1];
660+
// invert off-diagonal part of matrix
661+
F* base1 = &wksp[1];
637662
for (unsigned i = 1; i < N; base1 += ++i) {
638663
for (unsigned j = 0; j < i; ++j) {
639664
F tmp = F(0.0);
640-
const F *base2 = &l[(i * (i - 1)) / 2];
665+
const F *base2 = &wksp[(i * (i - 1)) / 2];
641666
for (unsigned k = i; k-- > j; base2 -= k)
642667
tmp -= base1[k] * base2[j];
643668
base1[j] = tmp * base1[i];
@@ -648,7 +673,7 @@ namespace CholeskyDecompHelpers {
648673
for (unsigned i = N; i--; ) {
649674
for (unsigned j = i + 1; j--; ) {
650675
F tmp = F(0.0);
651-
base1 = &l[(N * (N - 1)) / 2];
676+
base1 = &wksp[(N * (N - 1)) / 2];
652677
for (unsigned k = N; k-- > i; base1 -= k)
653678
tmp += base1[i] * base1[j];
654679
dst(i, j) = tmp;
@@ -662,7 +687,12 @@ namespace CholeskyDecompHelpers {
662687
{
663688
/// method to do the inversion
664689
void operator()(M& dst, const F* src) const
665-
{ return _inverterGenDim<F, M>()(dst, src, N); }
690+
{
691+
// make working copy
692+
F wksp[N * (N + 1) / 2];
693+
std::copy(src, src + ((N * (N + 1)) / 2), wksp);
694+
return _inverterGenDim<F, M>()(dst, N, wksp);
695+
}
666696
};
667697

668698
/// struct to solve a linear system using its Cholesky decomposition (generalised dimensionality)

0 commit comments

Comments
 (0)