-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathassembly.h
1730 lines (1575 loc) · 79.7 KB
/
assembly.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#pragma once
#include "sparse_matrix_math.h"
#include <unordered_set>
#include <grid.h>
#include "error_code.h"
#include "static_matrix.h"
#include "kd_tree.cuh"
#include <string>
#include <nlohmann/json.hpp>
#include <opencv2/imgproc.hpp>
#include <filesystem>
#include "timer.h"
#include "tbb/blocked_range.h"
#include "tbb/parallel_for.h"
#include "tbb/task_group.h"
#include "kd_tree_builder.h"
// If this is defined GPU devices will be initialized and load all available kernels
// #define GPU_SETUP
// If this is defined the KD tree will be uploaded to the GPU and the advection phase will be performed on the GPU.
// This requires SETUP_GPU to be defined
// #define GPU_ADVECTION
// If this is defined all matrices will be uploaded to the GPU. Conjugate Gradient will use GPU implementation
// of the vector matrix product.
// #define GPU_CONJUGATE_GRADIENT
#if (defined(GPU_CONJUGATE_GRADIENT) || defined(GPU_ADVECTION)) && !defined(GPU_SETUP)
#define GPU_SETUP
#endif
// If this is defined Preconditioned Conjugate Gradient will be used and IC0 preconditioning matrices
// for diffusion, pressure and velocity will be computed
// #define USE_PRECONDITIONING
#ifdef GPU_SETUP
#include "gpu_simulation_device.h"
#endif
namespace NSFem {
/// Draw 2D vector plot of the velocity field and save it as an image in the specified path
/// @param[in] grid The grid where the simulation was done
/// @param[in] uVec The velocity in u direction. Ordered the same way as the nodes in the grid are.
/// @param[in] vVec The velocity in v direction. Ordered the same way as the nodes in the grid are.
/// @param[in] pressure The pressure which will be plotted. Ordered the same way as the nodes in the grid are.
/// @param[in] path Path where the image must be saved. (Will not create missing folders on the path)
/// @param[in] width The width of the resulting image in pixels
/// @param[in] height The height of the resulting image in pixels
/// @param[in] maxArrowLength The max length of the arrows in the produced vector plot. The largest velocity will have
/// this length and all others will be scaled accordingly
void drawVectorPlot(
cv::Mat& outputImage,
const FemGrid2D& grid,
const real* const uVec,
const real* const vVec,
const real* const pressure,
const std::string& path,
const int width,
const int height,
const int maxArrowLengthInPixels
);
void drawVectorPlotSVG(
const FemGrid2D& grid,
const real* const uVec,
const real* const vVec,
const real* const pressure,
const std::string& path,
const int width,
const int height,
const int maxArrowLengthInPixels
);
/// Find the smapplest triangle side all of all triangles. Used to scale the vector plot points.
float findSmallestSide(const FemGrid2D& grid);
/// Find all nodes which are part a boundary. The boundary has imposed condition for one channel (velocity, pressure, etc.)
/// @tparam IteratorT The type of the iterator which iterates over all boundaries for the given channel
/// @param[in] begin (Forward) Iterator to the first boundary
/// @param[in] end Iterator to the ending of the boundary. (One element past the last element. It will not be dereferenced)
/// @param[out] allBoundaryNodesOut The indices of all boundary nodes in all boundaries which [begin;end) spans.
template<typename IteratorT>
void collectBoundaryNodes(
IteratorT begin,
IteratorT end,
std::unordered_set<int>& allBoundaryNodesOut
) {
while(begin != end) {
const int* boundary = begin->getNodeIndexes();
const int boundarySize = begin->getSize();
allBoundaryNodesOut.insert(boundary, boundary + boundarySize);
++begin;
}
}
/// Find the determinant of the Jacobi matrix for a linear transformation of random triangle to the unit one
/// @param[in] elementNodes List of (x, y) coordinates in world space of the points which are going to be transformed.
/// The order is (0, 0), (1, 1), (0, 1)
/// @return The determinant of the Jacobi matrix which transforms k, l, m to the unit triangle
inline real linTriangleTmJacobian(const real* elementNodes) {
const real xk = elementNodes[0];
const real yk = elementNodes[1];
const real xl = elementNodes[2];
const real yl = elementNodes[3];
const real xm = elementNodes[4];
const real ym = elementNodes[5];
// The Jacobi matrix is given by
// |xl - xk yl - yk|
// |xm - xk ym - yk|
const real a = xl - xk;
const real b = yl - yk;
const real c = xm - xk;
const real d = ym - yk;
return a * d - b * c;
}
/// Compute the differential operator:
/// B
/// |d/dx| |ym - yk yk - yl| |d/dxi |
/// | | = 1/det(J) | | | |
/// |d/dy| |xk - xm xl - xk| |d/deta|
/// Where J is the Jacobi matrix:
/// |dx/dxi dy/dxi | |xl - xk yl - yk|
/// | | = | |
/// |dx/deta dy/deta| |xm - xk ym - yk|
/// @param[in] nodes The coordinates of the tirangle vertices. The array is expected to be of the form
/// (x0, y0), (x1, y1), (x2, y2) and they are supposed to be remapped to (0, 0), (1, 0), (0, 1) in the unit tirangle
/// @param[out] outDetJ The determinant of the Jacobi matrix
/// @param[out] outB The matrix B used in the operator as illustrated above it must be of size 4
template<typename TMatrix>
inline void differentialOperator(const real* nodes, real& outDetJ, TMatrix& outB) {
const real xk = nodes[0];
const real yk = nodes[1];
const real xl = nodes[2];
const real yl = nodes[3];
const real xm = nodes[4];
const real ym = nodes[5];
outB[0][0] = ym - yk;
outB[0][1] = yk - yl;
outB[1][0] = xk - xm;
outB[1][1] = xl - xk;
// Important note that the Jacobi matrix and matrix (B * 1/detJ) are actually the inverse of each other
// The inverse of 2d matrix given by:
// |a b| |d -b|
// | | = a*d * c*b | |
// |c d| |-c a|
// Looking at the above picture we can find the determinant of matrix B instead of Jacobi and it will be the same
outDetJ = outB[0][0] * outB[1][1] - outB[0][1] * outB[1][0];
}
/// Structure to represent first order polynomial shape functions for triangular elements
struct P1 {
/// The dimension of the element space
static constexpr int dim = 2;
/// The number of shape functions (in eval)
static constexpr int size = 3;
/// The total number of functions evaluated by the del operator
static constexpr int delSize = P1::dim * P1::size;
/// Shape functions for 2D triangluar element with degrees of freedom in each triangle node
/// @param[in] xi - Coordinate in the (transformed) unit triangle along the xi (aka x) axis
/// @param[in] eta - Coordinate in the (transformed) unit triangle anong the eta (aka y) axis
/// @param[out] out - The value of each shape function at (xi, eta). The array must have at least 6 elements.
/// Order: [0, 1, 2] - The values at the nodes of the triangle
static constexpr void eval(const real xi, const real eta, real (&out)[P1::size]) {
out[0] = 1 - xi - eta;
out[1] = xi;
out[2] = eta;
}
static constexpr void eval(const real xi, const real eta, StaticMatrix<real, 1, size>& out) {
out[0][0] = 1 - xi - eta;
out[0][1] = xi;
out[0][2] = eta;
}
/// Compute the derivatives in xi and eta directions, at point (xi, eta) of the shape functions for triangular
/// element with degrees of freedom in each triangle node
/// @param[in] xi - Coordinate in the (transformed) unit triangle along the xi (aka x) axis
/// @param[in] eta - Coordinate in the (transformed) unit triangle anong the eta (aka y) axis
/// @param[out] out - This will hold the gradient. The first 3 elements are the derivatives of the shape functions with
/// respect to xi, next 3 elements are the derivatives of the shape functions with respect to eta
static constexpr void del(
[[maybe_unused]]const real xi,
[[maybe_unused]]const real eta,
StaticMatrix<real, dim, size>& out
) {
// dpsi/dxi
out[0][0] = -1.0f;
out[0][1] = 1.0f;
out[0][2] = 0.0f;
// dpsi/deta
out[1][0] = -1.0f;
out[1][1] = 0.0f;
out[1][2] = 1.0f;
}
};
/// Structure to represent second order polynomial shape functions for triangular elements
struct P2 {
/// The dimension of the element space
static constexpr int dim = 2;
/// The number of shape functions (in eval)
static constexpr int size = 6;
/// The total number of functions evaluated by the del operator
static constexpr int delSize = P2::dim * P2::size;
/// Shape functions for 2D triangluar element with degrees of freedom in each triangle node and in the middle of each side
/// @param[in] xi - Coordinate in the (transformed) unit triangle along the xi (aka x) axis
/// @param[in] eta - Coordinate in the (transformed) unit triangle anong the eta (aka y) axis
/// @param[out] out - The value of each shape function at (xi, eta). The array must have at least 6 elements.
/// Order is as follows:
/// [0, 1, 2] - The values at the nodes of the triangle
/// 3 - the value at the point between 1 and 2
/// 4 - the value at the point between 0 and 2
/// 5 - the value at the point between 0 and 1
static constexpr void eval(const real xi, const real eta, real (&out)[P2::size]) {
out[0] = 1 - 3 * xi - 3 * eta + 2 * xi * xi + 4 * xi * eta + 2 * eta * eta;
out[1] = 2 * xi * xi - xi;
out[2] = 2 * eta * eta - eta;
out[3] = 4 * xi * eta;
out[4] = 4 * eta - 4 * xi * eta - 4 * eta * eta;
out[5] = 4 * xi - 4 * xi * xi - 4 * xi * eta;
}
static constexpr void eval(const real xi, const real eta, StaticMatrix<real, 1, size>& out) {
out[0][0] = 1 - 3 * xi - 3 * eta + 2 * xi * xi + 4 * xi * eta + 2 * eta * eta;
out[0][1] = 2 * xi * xi - xi;
out[0][2] = 2 * eta * eta - eta;
out[0][3] = 4 * xi * eta;
out[0][4] = 4 * eta - 4 * xi * eta - 4 * eta * eta;
out[0][5] = 4 * xi - 4 * xi * xi - 4 * xi * eta;
}
/// Compute the derivatives in xi and eta directions, at point (xi, eta) of the shape functions for triangular
///element with degrees of freedom in each triangle node and in the middle of each side
/// @param[in] xi - Coordinate in the (transformed) unit triangle along the xi (aka x) axis
/// @param[in] eta - Coordinate in the (transformed) unit triangle anong the eta (aka y) axis
/// @param[out] out - This will hold the gradient. The first 6 elements are the derivatives of the shape functions with
/// respect to xi, next 6 elements are the derivatives of the shape functions with respect to eta
static constexpr void del(const real xi, const real eta, StaticMatrix<real, dim, size>& out) {
// dpsi/dxi
out[0][0] = -3 + 4 * eta + 4 * xi;
out[0][1] = -1 + 4 * xi;
out[0][2] = 0.0f;
out[0][3] = 4 * eta;
out[0][4] = -4 * eta;
out[0][5] = 4 - 4 * eta - 8 * xi;
// dpsi/deta
out[1][0] = -3 + 4 * eta + 4 * xi;
out[1][1] = 0;
out[1][2] = -1 + 4 * eta;
out[1][3] = 4 * xi;
out[1][4] = 4 - 8 * eta - 4 * xi;
out[1][5] = -4 * xi;
}
};
struct TriangleIntegrator {
/// The integration will be exact for polynomials which have order less or equal TriangleIntegrator::order
static constexpr int order = 3;
template<typename TFunctor, int size>
constexpr static void integrate(const TFunctor& f, real (&out)[size]) {
assert(std::all_of(&out[0], &out[0] + size, [](const real x){return x == real(0);}));
real tmp[size] = {};
for(int i = 0; i < numIntegrationPoints; ++i) {
const real x = nodes[2 * i];
const real y = nodes[2 * i + 1];
f(x, y, tmp);
for(int j = 0; j < size; ++j) {
out[j] += tmp[j] * weights[i];
}
}
}
template<typename TFunctor, int rows, int cols>
constexpr static void integrate(const TFunctor& f, StaticMatrix<real, rows, cols>& out) {
assert(std::all_of(out.begin(), out.end(), [](const real x){return x == real(0);}));
StaticMatrix<real, rows, cols> tmp;
for(int i = 0; i < numIntegrationPoints; ++i) {
const real x = nodes[2 * i];
const real y = nodes[2 * i + 1];
f(x, y, tmp);
out += tmp * weights[i];
}
}
private:
static const int numIntegrationPoints = 8;
static constexpr real nodes[2 * TriangleIntegrator::numIntegrationPoints] = {
0.0, 0.0,
1.0, 0.0,
0.0, 1.0,
0.5, 0.0,
0.5, 0.5,
0.0, 0.5,
1.0/3.0, 1.0/3.0
};
static constexpr real weights[TriangleIntegrator::numIntegrationPoints] = {
3.0 / 120.0, 3.0 / 120.0, 3.0 / 120.0,
8.0 / 120.0, 8.0 / 120.0, 8.0 / 120.0,
27.0 / 120.0
};
};
template<typename VelocityShape, typename PressureShape>
class NavierStokesAssembly {
public:
NavierStokesAssembly();
EC::ErrorCode init(const char* simDescriptionPath, const char* outPath);
void solve();
EC::ErrorCode semiLagrangianSolve();
void setTimeStep(const real dt);
void setOutputDir(std::string outputDir);
void setOutputDir(std::string&& outputDir);
private:
enum class VelocityChannel {
U,
V
};
/// Unstructured triangluar grid where the fulid simulation will be computed
FemGrid2D grid;
/// KDTree which is used semi-Lagrangian solver is used.
KDTreeCPUOwner kdTreeCPUOwner;
/// Mass matrix for the velocity formed by (fi_i, fi_j) : forall i, j in 0...numVelocityNodes - 1
/// Where fi_i is the i-th velocity basis function. This matrix is the same for the u and v components of the velocity,
/// thus we will assemble it only once and use the same matrix to compute all velocity components.
/// Used to compute the tentative velocity at step i + 1/2. This matrix is constant for the given mesh and does not change
/// when the time changes.
SMM::CSRMatrix<real> velocityMassMatrix;
/// Stiffness matrix for the velocity formed by (del(fi_i), del(fi_j)) : forall i, j in 0...numVelocityNodes - 1
/// Where fi_i is the i-th velocity basis function and viscosity is the fluid viscosity. This matrix is the same for the u and v
/// components of the velocity, thus we will assemble it only once and use the same matrix to compute all velocity components.
/// Used to compute the tentative velocity at step i + 1/2. This matrix is constant for the given mesh and does not change
/// when the time changes.
SMM::CSRMatrix<real> velocityStiffnessMatrix;
/// Stiffness matrix for the velocity formed by (del(fi_i), del(fi_j)) : forall i, j in 0...numPressureNodes - 1
/// Where fi_i is the i-th pressure basis function. This matrix is constant for the given mesh and does not change
/// when the time changes.
SMM::CSRMatrix<real> pressureStiffnessMatrix;
/// Divergence matrices formed by (dfi_i/dx, chi_j) and (dfi_i/dy, chi_j) : forall i in numVelocityNodes - 1, j in 0...numPressureNodes - 1
/// Where fi_i is the i-th velocity basis function and chi_j is the j-th pressure basis function
/// These are used when pressure is found from the tentative velocity. These matrices are constant for the given mesh and do not change
/// when the time changes.
SMM::CSRMatrix<real> velocityDivergenceMatrix;
/// Diffusion matrix used in the last step of the three way split, when solving for the velocity. This matrix is result solving
/// du/dt = viscosity * div(grad(u)) using the finite element method for the spatial discretizaion and implicit Euler method for
/// the temporal discretization. The matrix is formed by the sum of two matrices: M + viscosity * K, where M is the velocity mass
/// matrix and K is the velocity sitffness matrix. This matrix is constant for the given mesh and does not change when the time changes.
SMM::CSRMatrix<real> diffusionMatrix;
/// This is solved when solving for the diffusion. The diffusion is the last step of the operator split and it solves equation for
/// the velocity. When there is Dirichlet boundary condition for the velocity it's imposed at this step. It cannot be imposed after
/// solving the system, so it's imposed in the system itself. This is done by putting 1 on the main diagonal where the value is
/// imposed and then zeroing out all other elements in the same row and in the same column. The zeroing out is done in order to
/// preserve symmetry of the matrix. The elements which were zeroed out in the row do not matter since we need to impose a value
/// in the system, however the ones in the column do matter. This matrix holds all the columns which were zeroed out. They are
/// multiplied by the boundary condition and are then subtracted from the right hand side in order so that the system is not altered.
SMM::CSRMatrix<real> velocityDirichletWeights;
/// Divergence matrices formed by (fi_i, dchi_j/dx) and (fi_i, dchi_j/dy) : forall i in numVelocityNodes - 1, j in 0...numPressureNodes - 1
/// Where fi_i is the i-th velocity basis function and chi_j is the j-th pressure basis function
/// These are used when pressure is found from the tentative velocity. These matrices are constant for the given mesh and do not change
/// when the time changes.
SMM::CSRMatrix<real> pressureDivergenceMatrix;
/// When solving the pressure poisson equaiton we might need to impose Dirichlet boundary conditions. In that case we cannot
/// impose them after the system is solved. They have to be imposed in the system itself. This is done by putting 1 on the
/// main diagonal where the value is imposed and then zeroing out all other elements in the same row and in the same column.
/// The zeroing out is done in order to preserve symmetry of the matrix. The elements which were zeroed out in the row do not
/// matter since we need to impose a value in the system, however the ones in the column do matter. This matrix holds all the columns
/// which were zeroed out. They are multiplied by the boundary condition and are then subtracted from the right hand side in order
/// so that the system is not altered.
SMM::CSRMatrix<real> pressureDirichletWeights;
/// Vector containing the approximate solution at each mesh node for the current time step
/// First are the values in u direction for all nodes and the the values in v direction for all nodes
/// When using P2-P1 elements the pressure is at the verices of the triangle and the midpoints of each side
SMM::Vector<real> currentVelocitySolution;
/// Vector containing the approximate solution for the pressure at each pressure nodes
/// When using P2-P1 elements the pressure is only at the vertices of the triangle
SMM::Vector<real> currentPressureSolution;
/// Path to a folder where the result for each iteration will be saved
std::string outFolder;
/// Viscosity of the fluid
real viscosity;
/// Size of the time step used when approximating derivatives with respect to time
real dt;
/// The total time over the simulation is going to be executed
real totalTime;
/// The width of the output image in pixels
int outputImageWidth;
/// The height of the output image in pixels
int outputImageHeight;
/// OpenCV matrix which will is used when exporting image results. It will be filled with
/// the corresponding colors and then written to an image file on the hard disk inside the output folder.
cv::Mat outputImage;
/// Assemble all matrices used by the FEM. This is called one time in the beggining of the semiLagrangianSolve
/// function to setup all of the matrices.
EC::ErrorCode assembleAllMatrices();
template<int localRows, int localCols, typename TLocalF, typename Triplet>
void assembleMatrix(const TLocalF& localFunction, Triplet& triplet);
/// Assemble global matrix which will be used to solve problem with Dirichlet conditions. Rows and colums of the
/// out matrix with indexes matching a boundary nodes will be filled with zero (except the diagonal elements).
/// The out matrix will have value one on the diagonal elements corresponding to the boundary nodes.
/// The elements which were dropped from the out matrix will be added to outBondaryWeights, these weights must
/// be subtracted from the right hand side when the linear system is solved
/// @tparam TLocalF Type of the functor which will compute the local matrix
/// @tparam localRows Number of rows in the local matrix
/// @tparam localCols Number of columns in the local matrix
/// @param[in] localFunction Functor which will compute the local matrix
/// @param[in] boundaryNodes Map between boundary node and the boundary to which it belogns
/// @param[out] out The result of the assembling. Ones will appear on the main diagonal elements corresponding
/// to each bondary nodes
/// @param[out] outBondaryWeights For each boundary this holds the columns which were zeroed out when
/// the diagonal element was set to 1. The rows corresponding to boundary nodes are ommitted as they are
/// filled with 0 anyway
template<int localRows, int localCols, typename TLocalF>
void assembleBCMatrix(
const TLocalF& localFunction,
const std::unordered_set<int>& boundaryNodes,
SMM::TripletMatrix<real>& out,
SMM::TripletMatrix<real>& outBondaryWeights
);
/// Export the current solutions for velocity and pressure to a file
void exportSolution(const int timeStep);
/// Handles assembling of a general stiffness matrix. It precomputes the integrals of each pair
/// shape function and then calls assembleMatrix with functor which takes advantage of this optimization
/// @tparam[Shape] The class representing the shape functions which are going to be used to assemble this matrix
/// @param[out] out The resulting stiffness matrix
template<typename Shape>
void assembleStiffnessMatrix(SMM::CSRMatrix<real>& out);
/// Handles assembling of the convection matrix. It does it directly by the formula and does not use
/// precomputed integrals. In theory it's possible, but it would make the code too complicated as it
/// would require combined integral of 3 basis functions (i,j,k forall i,j,k). The convection matrix
/// depends on the solution at the current time step. It changes at each time step.
/// @param[out] outConvectionMatrix The resulting convection matrix
template<typename Triplet>
void assembleConvectionMatrix(Triplet& triplet);
/// This handles the assembling of the divergence matrix. This function looks a lot like assembleMatrix.
/// In fact we could split the divergence matrix into two matrices (one for x direction and one for y direction)
/// each with it's own local function. This way assembleMatrix could be used, but this would mean that we have
/// to iterate over all elements twice and also offset all indexes by the number of nodes for the second (y drection)
/// matrix.
template<typename RegularShape, typename DelShape, bool sideBySide>
void assembleDivergenceMatrix(SMM::TripletMatrix<real>& out);
/// Impose Dirichlet Boundary conditions on the velocity vector passed as an input
/// @param[in, out] velocityVector Velocity vector where the Dirichlet BC will be imposed
/// The vector must have all its u-velocity components at the begining, followed by all
/// v-velocity components.
void imposeVelocityDirichlet(SMM::Vector<real>& velocityVector);
/// Use the semi-Lagrangian method to find the approximate value for the advectable quantities (velocity)
/// The semi-Lagrangian method approximates directly the material derivative D()/Dt=0. For this method we are
/// thinking in Lagrangian perspective. We imagine that the fluid as particles and at each grid point we measure
/// the velocity of some particle, so to find the velocity at some point in space (x, y) at time step i+1, would
/// require us to find a particle which at time point i was at some position (x1, y1), but has moved to (x, y) in
/// time step i+1. Since each particle has a velocity and it "moves with the particle", the velocity of the particle
/// which has moved to (x, y) at i+1 will be the velocity which we want to find.
EC::ErrorCode advect(
const real* const uVelocity,
const real* const vVelocity,
real* const uVelocityOut,
real* const vVelocityOut
);
EC::ErrorCode diffusionSolve(
real* velocityOut,
real* velocityRhs,
real* advectedVelocity,
VelocityChannel ch,
const real eps
) {
#ifdef GPU_SETUP
auto& gpuDevice = getSelectedGPUDevice();
#endif
std::unordered_map<char, float> velocityVars;
// Compute the right hand side for the diffused channel
velocityMassMatrix.rMult(advectedVelocity, velocityRhs);
// Take in to accound Dirchled condition and add them to the right hand side for the diffused channel
FemGrid2D::VelocityDirichletConstIt velocityDrichiletIt = grid.getVelocityDirichletBegin();
const FemGrid2D::VelocityDirichletConstIt velocityDrichiletEnd = grid.getVelocityDirichletEnd();
for(;velocityDrichiletIt != velocityDrichiletEnd; ++velocityDrichiletIt) {
const FemGrid2D::VelocityDirichlet& boundary = *velocityDrichiletIt;
for(int boundaryNodeIndex = 0; boundaryNodeIndex < boundary.getSize(); ++boundaryNodeIndex) {
const int nodeIndex = boundary.getNodeIndexes()[boundaryNodeIndex];
const real x = grid.getNodesBuffer()[nodeIndex * 2];
const real y = grid.getNodesBuffer()[nodeIndex * 2 + 1];
velocityVars['x'] = x;
velocityVars['y'] = y;
float uBoundary = 0, vBoundary = 0;
boundary.eval(&velocityVars, uBoundary, vBoundary);
const float boundaryValue = ch == VelocityChannel::U ? uBoundary : vBoundary;
velocityRhs[nodeIndex] = boundaryValue;
SMM::CSRMatrix<real>::ConstRowIterator it = velocityDirichletWeights.rowBegin(nodeIndex);
const SMM::CSRMatrix<real>::ConstRowIterator end = velocityDirichletWeights.rowEnd(nodeIndex);
while(it != end) {
velocityRhs[it->getCol()] -= it->getValue() * boundaryValue;
++it;
}
}
}
{
PROFILING_SCOPED_TIMER_CUSTOM("Solve with diffusion matrix");
PROFILING_SCOPED_TIMER_CUSTOM("Conjugate Gradient");
#ifdef GPU_CONJUGATE_GRADIENT
{
RETURN_ON_ERROR_CODE(gpuDevice.conjugateGradient(
GPUSimulation::GPUSimulationDevice::diffusion,
static_cast<real*>(velocityRhs),
static_cast<real*>(advectedVelocity),
static_cast<real*>(velocityOut),
-1,
eps
));
}
#else
// Find the final velocity at the current time step
SMM::SolverStatus status = SMM::ConjugateGradient(
diffusionMatrix,
static_cast<real*>(velocityRhs),
static_cast<real*>(advectedVelocity),
static_cast<real*>(velocityOut),
-1,
eps
#ifdef USE_PRECONDITIONING
,diffusionIC0
#endif
);
if(status != SMM::SolverStatus::SUCCESS) {
return EC::ErrorCode(int(status), "Failed to solve diffusionMatrix * currentVelocitySolution = velocityRhs");
}
#endif
}
return EC::ErrorCode();
};
EC::ErrorCode applyPressure(
const real* velocityIn,
real* velocityOut,
real* pressureRhs,
real* velocityRhs,
const real eps
) {
#ifdef GPU_SETUP
auto& gpuDevice = getSelectedGPUDevice();
#endif
std::unordered_map<char, float> velocityVars;
const int nodesCount = grid.getNodesCount();
// Solve for the pressure. As pressure is "implicitly" stepped Dirchlet boundary conditions cannot be imposed after
// solving the linear system. For this reason the pressure stiffness matrix was tweaked before time iterations begin.
// Now at each time step the right hand side must be tweaked as well.
// Find the right hand side
std::unordered_map<char, float> pressureVars;
velocityDivergenceMatrix.rMult(velocityIn, pressureRhs);
// Now impose the Dirichlet Boundary Conditions
FemGrid2D::PressureDirichletConstIt pressureDrichiletIt = grid.getPressureDirichletBegin();
const FemGrid2D::PressureDirichletConstIt pressureDrichiletEnd = grid.getPressureDirichletEnd();
for(;pressureDrichiletIt != pressureDrichiletEnd; ++pressureDrichiletIt) {
const FemGrid2D::PressureDirichlet& boundary = *pressureDrichiletIt;
for(int boundaryNodeIndex = 0; boundaryNodeIndex < boundary.getSize(); ++boundaryNodeIndex) {
const int nodeIndex = boundary.getNodeIndexes()[boundaryNodeIndex];
const real x = grid.getNodesBuffer()[nodeIndex * 2];
const real y = grid.getNodesBuffer()[nodeIndex * 2 + 1];
pressureVars['x'] = x;
pressureVars['y'] = y;
float pBoundary = 0;
boundary.eval(&pressureVars, pBoundary);
pressureRhs[nodeIndex] = pBoundary;
SMM::CSRMatrix<real>::ConstRowIterator it = pressureDirichletWeights.rowBegin(nodeIndex);
const SMM::CSRMatrix<real>::ConstRowIterator end = pressureDirichletWeights.rowEnd(nodeIndex);
while(it != end) {
pressureRhs[it->getCol()] -= it->getValue() * pBoundary;
++it;
}
}
}
{
PROFILING_SCOPED_TIMER_CUSTOM("Solve for pressure stiffness");
PROFILING_SCOPED_TIMER_CUSTOM("Conjugate Gradient");
#ifdef GPU_CONJUGATE_GRADIENT
RETURN_ON_ERROR_CODE(gpuDevice.conjugateGradient(
GPUSimulation::GPUSimulationDevice::pressureSitffness,
static_cast<real*>(pressureRhs),
static_cast<real*>(currentPressureSolution),
static_cast<real*>(currentPressureSolution),
-1,
eps
));
#else
// Finally solve the linear system for the pressure
SMM::SolverStatus solveStatus = SMM::ConjugateGradient(
pressureStiffnessMatrix,
static_cast<real*>(pressureRhs),
static_cast<real*>(currentPressureSolution),
static_cast<real*>(currentPressureSolution),
-1,
eps
#ifdef USE_PRECONDITIONING
,pressureStiffnessIC0
#endif
);
if(solveStatus != SMM::SolverStatus::SUCCESS) {
return EC::ErrorCode(int(solveStatus), "Failed to solve: pressureStiffness * currentPressureSolution = pressureRhs");
}
#endif
}
// After the pressure is found, we must use it to find the "tentative" velocity.
// First find the right hand side of the tentative velocity.
pressureDivergenceMatrix.rMult(currentPressureSolution, velocityRhs);
{
PROFILING_SCOPED_TIMER_CUSTOM("Solve with velocity mass matrix");
PROFILING_SCOPED_TIMER_CUSTOM("Conjugate Gradient");
#ifdef GPU_CONJUGATE_GRADIENT
RETURN_ON_ERROR_CODE(gpuDevice.conjugateGradient(
GPUSimulation::GPUSimulationDevice::velocityMass,
velocityRhs,
velocityIn,
velocityOut,
-1,
eps
));
RETURN_ON_ERROR_CODE(gpuDevice.conjugateGradient(
GPUSimulation::GPUSimulationDevice::velocityMass,
velocityRhs + nodesCount,
velocityIn + nodesCount,
velocityOut + nodesCount,
-1,
eps
));
#else
// Apply the pressure to both velocity channels
SMM::SolverStatus status = SMM::ConjugateGradient(
velocityMassMatrix,
velocityRhs,
velocityIn,
velocityOut,
-1,
eps
#ifdef USE_PRECONDITIONING
,velocityMassIC0
#endif
);
status = SMM::ConjugateGradient(
velocityMassMatrix,
(real*)velocityRhs + nodesCount,
velocityIn + nodesCount,
velocityOut + nodesCount,
-1,
eps
#ifdef USE_PRECONDITIONING
,velocityMassIC0
#endif
);
if(status != SMM::SolverStatus::SUCCESS) {
return EC::ErrorCode(int(status), "Failed to solve velocityMassMatrix * advectedVelocity = velocityRhs");
}
#endif
}
tbb::parallel_for(tbb::blocked_range<int>(0, 2 * nodesCount), [&](const tbb::blocked_range<int>& range) {
for(int i = range.begin(); i < range.end(); ++i) {
velocityOut[i] += velocityIn[i];
}
});
FemGrid2D::VelocityDirichletConstIt velocityDrichiletIt = grid.getVelocityDirichletBegin();
const FemGrid2D::VelocityDirichletConstIt velocityDrichiletEnd = grid.getVelocityDirichletEnd();
for(;velocityDrichiletIt != velocityDrichiletEnd; ++velocityDrichiletIt) {
const FemGrid2D::VelocityDirichlet& boundary = *velocityDrichiletIt;
for(int boundaryNodeIndex = 0; boundaryNodeIndex < boundary.getSize(); ++boundaryNodeIndex) {
const int nodeIndex = boundary.getNodeIndexes()[boundaryNodeIndex];
const real x = grid.getNodesBuffer()[nodeIndex * 2];
const real y = grid.getNodesBuffer()[nodeIndex * 2 + 1];
velocityVars['x'] = x;
velocityVars['y'] = y;
float uBoundary = 0, vBoundary = 0;
boundary.eval(&velocityVars, uBoundary, vBoundary);
velocityOut[nodeIndex] = uBoundary;
velocityOut[nodeIndex + nodesCount] = vBoundary;
}
}
return EC::ErrorCode();
};
#ifdef GPU_SETUP
/// A Device manager which owns all GPU devices which will be used for simulation purposes.
/// It loads the simulation kernels for each device and is used to call each kernel.
/// @note Multi device simulation is not supported at this moment.
GPUSimulation::GPUSimulationDeviceManager gpuDevman;
GPUSimulation::GPUSimulationDevice& getSelectedGPUDevice() {
return gpuDevman.getDevice(0);
}
#endif
template<typename Shape>
struct LocalStiffnessFunctor {
LocalStiffnessFunctor() {
// Compute the integral of each pair shape function derivatives.
const auto squareDelP = [](
const real xi,
const real eta,
StaticMatrix<real, Shape::delSize, Shape::delSize>& out
) -> void {
StaticMatrix<real, 2, Shape::size> del;
Shape::del(xi, eta, del);
auto it = out.begin();
for(const auto i : del) {
for(const auto j : del) {
*it = i * j;
++it;
}
}
};
TriangleIntegrator::integrate(squareDelP, delPSq);
}
void operator()(
[[maybe_unused]]const int* elementIndexes,
const real* elementNodes,
StaticMatrix<real, Shape::size, Shape::size>& localMatrixOut
) const {
// Compute the mass matrix. Local stiffness matrix is of the form Integral(Transpose(B.DPSI(xi, eta)/|J|).B.DPSI(xi, eta)/|J| * abs(|J|) dxi, deta),
// |J| cancel out to produce more readable result 1/abs(|J|) * Integral(Transpose(B.PSI(xi, eta)).B.PSI(xi, eta) * dxi, deta)
// Where DPSI(xi, eta) = {dpsi_1(xi, eta)/dxi, ..., dpsi_n(xi, eta)/dxi, dpsi_1(xi, eta)/deta ... dpsi_n(xi, eta)/deta} is a
// row vector containing the gradient of each shape function, first are the derivatives in the xi direction then are the derivatives
// in the eta direction |J| is the determinant of the linear transformation to the unit triangle and B/|J| is a 2x2 matrix which
// represents the Grad operator in terms of xi and eta. As with mass matrix note, that B and |J| do not depend on xi and eta, only
// the gradient of the shape functions dependon xi and eta. Thus we can precompute all pairs of shape function integrals and reuse
// them in each element. The main complexity comes from the matrix B, which multiples the shape functions.
StaticMatrix<real, Shape::dim, Shape::dim> b;
real J;
differentialOperator(elementNodes, J, b);
J = real(1.0) / std::abs(J);
for(int i = 0; i < Shape::size; ++i) {
for(int j = 0; j < Shape::size; ++j) {
localMatrixOut[i][j] = real(0);
const real sq[4] = {
delPSq[i][j],
delPSq[i][Shape::size + j],
delPSq[Shape::size + i][j],
delPSq[Shape::size + i][Shape::size + j]
};
for(int k = 0; k < 2; ++k) {
// This function takes advantage that in the integral for the local matrix only del(DPSI) depends on xi and eta
// The problem is the matrix B which represents the Grad operator. We have Transpose(B.DPSI(xi, eta)).B.DPSI(xi, eta) =
// Transpose(DPSI).Transpose(B).B.DPSI. Let us denote U = Transpose(DPSI).Transpose(B) and V = B.DPSI
// U[i][j] = Sum(B[j][k]*DPSI[k][i], k=0, 1) = Sum(Transpose(DPSI)[i][k]*Transpose(B)[k][j], k = 0, 1) =
// V[i][j] = Sum(B[i][k]*DPSI[k][j], k=0, 1)
// Let us denote the result - localMatrixOut with R = U.V
// R[i][j] = Sum(U[i][k]*V[k][j], k=0, 1)
// R[i][j] = Sum(Sum(DPSI[k'][i]*B[k][k'], k'=0, 1) * Sum(B[k][k'] * DPSI[k']j[],k'=0, 1), k=0, 1)
// When we expand the two sums and the multiplication we get the expression for localMatrixOut
// This way we have separated the pairs of shape function derivatives and we can use the precomputed values
localMatrixOut[i][j] += sq[0]*b[k][0]*b[k][0] + sq[1]*b[k][0]*b[k][1] + sq[2]*b[k][0]*b[k][1] + sq[3]*b[k][1]*b[k][1];
}
localMatrixOut[i][j] *= J;
}
}
}
private:
StaticMatrix<real, Shape::delSize, Shape::delSize> delPSq;
};
template<typename RegularShape, typename DelShape>
class LocalDivergenceFunctor {
public:
LocalDivergenceFunctor() {
auto combine = [](
const real xi,
const real eta,
StaticMatrix<real, RegularShape::size, DelShape::delSize>& out
) -> void {
real p1Res[RegularShape::size] = {};
RegularShape::eval(xi, eta, p1Res);
StaticMatrix<real, 2, DelShape::size> delP2Res;
DelShape::del(xi, eta, delP2Res);
for(int i = 0; i < RegularShape::size; ++i) {
for(int j = 0; j < DelShape::size; ++j) {
for(int k = 0; k < 2; ++k) {
out[i][j + k * DelShape::size] = p1Res[i] * delP2Res[k][j];
}
}
}
};
TriangleIntegrator::integrate(combine, combined);
}
void operator()(
[[maybe_unused]]const int* elementIndexes,
const real* elementNodes,
StaticMatrix<real, RegularShape::size, DelShape::size>& delXShape,
StaticMatrix<real, RegularShape::size, DelShape::size>& delYShape
) const {
real J;
StaticMatrix<real, 2, 2> B;
differentialOperator(elementNodes, J, B);
// Compute local matrices
for(int p = 0; p < RegularShape::size; ++p) {
for(int q = 0; q < DelShape::size; ++q) {
delXShape[p][q] = B[0][0] * combined[p][q] + B[0][1] * combined[p][q + DelShape::size];
delYShape[p][q] = B[1][0] * combined[p][q] + B[1][1] * combined[p][q + DelShape::size];
}
}
}
private:
/// Matrix to hold combined values for the integrals: Integrate(psi_i(xi, eta) * dpsi_j(xi, eta)/dxi * dxi * deta) and
/// Integrate(psi(xi, eta) * dpsi(xi, eta)/deta * dxi * deta). Where i is in [0;p1Size-1] and j is in [0;p2Size-1]
/// The first p2Size entries in each row are the first integral and the second represent the second integral
StaticMatrix<real, RegularShape::size, DelShape::delSize> combined;
};
/// Handles assembling of the velocity mass matrix. It precomputes the integrals of each pair shape functions
template<typename Shape>
struct LocalMassFunctor {
LocalMassFunctor() {
const auto squareShape = [](
const real xi,
const real eta,
StaticMatrix<real, VelocityShape::size, VelocityShape::size>& out
) -> void {
real p2Res[VelocityShape::size];
VelocityShape::eval(xi, eta, p2Res);
for(int i = 0; i < VelocityShape::size; ++i) {
for(int j = 0; j < VelocityShape::size; ++j) {
out[i][j] = p2Res[i]*p2Res[j];
}
}
};
TriangleIntegrator::integrate(squareShape, shapeSquared);
}
void operator()(
[[maybe_unused]]const int* elementIndexes,
const real* elementNodes,
StaticMatrix<real, Shape::size, Shape::size>& localMatrixOut
) const {
const real jDetAbs = std::abs(linTriangleTmJacobian(elementNodes));
for(int i = 0; i < VelocityShape::size; ++i) {
for(int j = 0; j < VelocityShape::size; ++j) {
localMatrixOut[i][j] = shapeSquared[i][j] * jDetAbs;
}
}
}
private:
// Local mass matrix is of the form Integral(dot(Transpose(PSI(xi, eta)), PSI(xi, eta)) * abs(|J|) dxi * deta).
// Where PSI(xi, eta) = {psi1(xi, eta), psi2(xi, eta), psi3(xi, eta), ...} is a row vector containing all shape functions and
// |J| is the determinant of Jacobi matrix for the transformation to the unit triangle. Not that |J| is scalar which does not
// depend of xi and eta, thus we can write the formula as |J| * integral(psi_i(xi, eta) * psi_j(xi, eta) * dxi * deta). So
// there is no need to integrate the shape funcions for each element. We shall precompute the integral and then for each element
// find |J| and multiply the precompute integral by it.
StaticMatrix<real, Shape::size, Shape::size> shapeSquared;
};
};
template<typename VelocityShape, typename PressureShape>
EC::ErrorCode NavierStokesAssembly<VelocityShape, PressureShape>::init(
const char* simDescriptionPath,
const char* outPath
) {
std::fstream simDescriptionFile(simDescriptionPath);
if(!simDescriptionFile.is_open()) {
return EC::ErrorCode(1, "Could not find desciption file: %s", simDescriptionPath);
}
nlohmann::basic_json simJson;
simDescriptionFile >> simJson;
if(!simJson["mesh_path"].is_string()) {
return EC::ErrorCode(2, "Missing path to mesh.");
} else {
std::filesystem::path meshPath(*(simJson["mesh_path"].get_ptr<std::string*>()));
if (meshPath.is_relative()) {
std::filesystem::path base =
std::filesystem::path(simDescriptionPath).parent_path();
meshPath = std::filesystem::canonical(base / meshPath);
}
EC::ErrorCode error = grid.loadJSON(meshPath.string().c_str());
if(error.hasError()) {
return error;
}
}
if(simJson.contains("dt") && simJson["dt"].is_number()) {
dt = simJson["dt"];
} else {
return EC::ErrorCode(3, "Missing required param: dt");
}
if(simJson.contains("viscosity") && simJson["viscosity"].is_number()) {
viscosity = simJson["viscosity"];
} else {
return EC::ErrorCode(3, "Missing required param: viscosity");
}
if(simJson.contains("total_time") && simJson["total_time"].is_number()) {
totalTime = simJson["total_time"];
} else {
return EC::ErrorCode(3, "Missing required param: total_time");
}
if(outPath != nullptr) {
outFolder = outPath;
}
KDTreeBuilder builder;
kdTreeCPUOwner = builder.buildCPUOwner(&grid);
#ifdef GPU_SETUP
RETURN_ON_ERROR_CODE(gpuDevman.init());
#endif
return EC::ErrorCode();
}
template<typename VelocityShape, typename PressureShape>
void NavierStokesAssembly<VelocityShape, PressureShape>::setTimeStep(const real dt) {
this->dt = dt;
}
template<typename VelocityShape, typename PressureShape>
void NavierStokesAssembly<VelocityShape, PressureShape>::setOutputDir(std::string outputDir) {
this->outFolder = std::move(outputDir);
}
template<typename VelocityShape, typename PressureShape>
void NavierStokesAssembly<VelocityShape, PressureShape>::setOutputDir(std::string&& outputDir) {
this->outFolder = std::move(outputDir);
}
template<typename VelocityShape, typename PressureShape>
void NavierStokesAssembly<VelocityShape, PressureShape>::exportSolution(const int timeStep) {
if(outFolder.empty()) {
return;
}
const int nodesCount = grid.getNodesCount();
nlohmann::json outJSON = {
{"u", nlohmann::json::array()},
{"v", nlohmann::json::array()},
{"timeStep", timeStep}
};
for(int i = 0; i < nodesCount; ++i) {
outJSON["u"].push_back(currentVelocitySolution[i]);
outJSON["v"].push_back(currentVelocitySolution[i + nodesCount]);
}
for(int i = 0; i < grid.getPressureNodesCount(); ++i) {
outJSON["p"].push_back(currentPressureSolution[i]);
}
// outJSON["timeStep"] = timeStep;
const std::string& path = outFolder + "/out_" + std::to_string(timeStep) + ".json";
std::ofstream outFile(path);
if(outFile.is_open()) {
outFile << std::setw(4) << outJSON << std::endl;
} else {
assert(false && "Failed to open file for writing the result");
}
const std::string& velocityFieldPath = outFolder + "/velocity_field_" + std::to_string(timeStep) + ".svg";
drawVectorPlotSVG(
grid,
currentVelocitySolution,
currentVelocitySolution + grid.getNodesCount(),
currentPressureSolution,
velocityFieldPath.c_str(),
outputImageWidth,
outputImageHeight,
50
);
}
template<typename VelocityShape, typename PressureShape>
template<int localRows, int localCols, typename TLocalF>
void NavierStokesAssembly<VelocityShape, PressureShape>::assembleBCMatrix(
const TLocalF& localFunction,
const std::unordered_set<int>& boundaryNodes,
SMM::TripletMatrix<real>& out,
SMM::TripletMatrix<real>& outBondaryWeights