@@ -103,6 +103,8 @@ BEGIN_C_DECLS
103
103
Process-local Gauss-Seidel
104
104
\var CS_SLES_P_SYM_GAUSS_SEIDEL
105
105
Process-local symmetric Gauss-Seidel
106
+ \var CS_SLES_P_B_GAUSS_SEIDEL
107
+ Process-local backward Gauss-Seidel (smoother, no convergence test)
106
108
\var CS_SLES_PCR3
107
109
3-layer conjugate residual
108
110
@@ -290,6 +292,7 @@ const char *cs_sles_it_type_name[]
290
292
N_ ("GMRES" ),
291
293
N_ ("Local Gauss-Seidel" ),
292
294
N_ ("Local symmetric Gauss-Seidel" ),
295
+ N_ ("Local backwards Gauss-Seidel" ),
293
296
N_ ("3-layer conjugate residual" )};
294
297
295
298
/*============================================================================
@@ -3672,25 +3675,33 @@ _p_gauss_seidel_msr(cs_sles_it_t *c,
3672
3675
3673
3676
}
3674
3677
3678
+ if (convergence -> precision > 0. || c -> plot != NULL ) {
3679
+
3675
3680
#if defined(HAVE_MPI )
3676
3681
3677
- if (c -> comm != MPI_COMM_NULL ) {
3678
- double _sum ;
3679
- MPI_Allreduce (& res2 , & _sum , 1 , MPI_DOUBLE , MPI_SUM ,
3680
- c -> comm );
3681
- res2 = _sum ;
3682
- }
3682
+ if (c -> comm != MPI_COMM_NULL ) {
3683
+ double _sum ;
3684
+ MPI_Allreduce (& res2 , & _sum , 1 , MPI_DOUBLE , MPI_SUM ,
3685
+ c -> comm );
3686
+ res2 = _sum ;
3687
+ }
3683
3688
3684
3689
#endif /* defined(HAVE_MPI) */
3685
3690
3686
- residue = sqrt (res2 ); /* Actually, residue of previous iteration */
3691
+ residue = sqrt (res2 ); /* Actually, residue of previous iteration */
3687
3692
3688
- /* Convergence test */
3693
+ /* Convergence test */
3689
3694
3690
- if (n_iter == 1 )
3691
- c -> setup_data -> initial_residue = residue ;
3695
+ if (n_iter == 1 )
3696
+ c -> setup_data -> initial_residue = residue ;
3692
3697
3693
- cvg = _convergence_test (c , n_iter , residue , convergence );
3698
+ cvg = _convergence_test (c , n_iter , residue , convergence );
3699
+
3700
+ }
3701
+ else if (n_iter >= convergence -> n_iterations_max ) {
3702
+ convergence -> n_iterations = n_iter ;
3703
+ cvg = CS_SLES_MAX_ITERATION ;
3704
+ }
3694
3705
3695
3706
}
3696
3707
@@ -3925,6 +3936,128 @@ _p_sym_gauss_seidel_msr(cs_sles_it_t *c,
3925
3936
return cvg ;
3926
3937
}
3927
3938
3939
+ /*----------------------------------------------------------------------------
3940
+ * Solution of A.vx = Rhs using Process-local backward Gauss-Seidel.
3941
+ *
3942
+ * This variant is intended for smoothing with a fixed number of
3943
+ * iterations, so does not compute a residue or run a convergence test.
3944
+ *
3945
+ * parameters:
3946
+ * c <-- pointer to solver context info
3947
+ * a <-- linear equation matrix
3948
+ * diag_block_size <-- diagonal block size
3949
+ * rotation_mode <-- halo update option for rotational periodicity
3950
+ * convergence <-- convergence information structure
3951
+ * rhs <-- right hand side
3952
+ * vx <-> system solution
3953
+ *
3954
+ * returns:
3955
+ * convergence state
3956
+ *----------------------------------------------------------------------------*/
3957
+
3958
+ static cs_sles_convergence_state_t
3959
+ _p_b_gauss_seidel_msr (cs_sles_it_t * c ,
3960
+ const cs_matrix_t * a ,
3961
+ int diag_block_size ,
3962
+ cs_halo_rotation_t rotation_mode ,
3963
+ cs_sles_it_convergence_t * convergence ,
3964
+ const cs_real_t * rhs ,
3965
+ cs_real_t * restrict vx )
3966
+ {
3967
+ cs_sles_convergence_state_t cvg ;
3968
+
3969
+ unsigned n_iter = 0 ;
3970
+
3971
+ const cs_lnum_t n_rows = cs_matrix_get_n_rows (a );
3972
+
3973
+ const cs_halo_t * halo = cs_matrix_get_halo (a );
3974
+
3975
+ const cs_real_t * restrict ad_inv = c -> setup_data -> ad_inv ;
3976
+
3977
+ const cs_lnum_t * a_row_index , * a_col_id ;
3978
+ const cs_real_t * a_d_val , * a_x_val ;
3979
+
3980
+ const int * db_size = cs_matrix_get_diag_block_size (a );
3981
+ cs_matrix_get_msr_arrays (a , & a_row_index , & a_col_id , & a_d_val , & a_x_val );
3982
+
3983
+ cvg = CS_SLES_ITERATING ;
3984
+
3985
+ /* Current iteration */
3986
+ /*-------------------*/
3987
+
3988
+ while (cvg == CS_SLES_ITERATING ) {
3989
+
3990
+ n_iter += 1 ;
3991
+
3992
+ /* Synchronize ghost cells first */
3993
+
3994
+ if (halo != NULL )
3995
+ cs_matrix_pre_vector_multiply_sync (rotation_mode , a , vx );
3996
+
3997
+ /* Compute Vx <- Vx - (A-diag).Rk */
3998
+
3999
+ if (diag_block_size == 1 ) {
4000
+
4001
+ # pragma omp parallel for if(n_rows > CS_THR_MIN && !_thread_debug)
4002
+ for (cs_lnum_t ii = n_rows - 1 ; ii > - 1 ; ii -- ) {
4003
+
4004
+ const cs_lnum_t * restrict col_id = a_col_id + a_row_index [ii ];
4005
+ const cs_real_t * restrict m_row = a_x_val + a_row_index [ii ];
4006
+ const cs_lnum_t n_cols = a_row_index [ii + 1 ] - a_row_index [ii ];
4007
+
4008
+ cs_real_t vx0 = rhs [ii ];
4009
+
4010
+ for (cs_lnum_t jj = n_cols - 1 ; jj > -1 ; jj -- )
4011
+ vx0 -= (m_row [jj ]* vx [col_id [jj ]]);
4012
+
4013
+ vx0 *= ad_inv [ii ];
4014
+
4015
+ vx [ii ] = vx0 ;
4016
+ }
4017
+
4018
+ }
4019
+ else {
4020
+
4021
+ # pragma omp parallel for if(n_rows > CS_THR_MIN && !_thread_debug)
4022
+ for (cs_lnum_t ii = n_rows - 1 ; ii > - 1 ; ii -- ) {
4023
+
4024
+ const cs_lnum_t * restrict col_id = a_col_id + a_row_index [ii ];
4025
+ const cs_real_t * restrict m_row = a_x_val + a_row_index [ii ];
4026
+ const cs_lnum_t n_cols = a_row_index [ii + 1 ] - a_row_index [ii ];
4027
+
4028
+ cs_real_t vx0 [DB_SIZE_MAX ], _vx [DB_SIZE_MAX ];
4029
+
4030
+ for (cs_lnum_t kk = 0 ; kk < db_size [0 ]; kk ++ ) {
4031
+ vx0 [kk ] = rhs [ii * db_size [1 ] + kk ];
4032
+ }
4033
+
4034
+ for (cs_lnum_t jj = n_cols - 1 ; jj > -1 ; jj -- ) {
4035
+ for (cs_lnum_t kk = 0 ; kk < db_size [0 ]; kk ++ )
4036
+ vx0 [kk ] -= (m_row [jj ]* vx [col_id [jj ]* db_size [1 ] + kk ]);
4037
+ }
4038
+
4039
+ _fw_and_bw_lu_gs (ad_inv + db_size [3 ]* ii ,
4040
+ db_size [0 ],
4041
+ _vx ,
4042
+ vx0 );
4043
+
4044
+ for (cs_lnum_t kk = 0 ; kk < db_size [0 ]; kk ++ ) {
4045
+ vx [ii * db_size [1 ] + kk ] = _vx [kk ];
4046
+ }
4047
+
4048
+ }
4049
+
4050
+ }
4051
+
4052
+ if (n_iter >= convergence -> n_iterations_max ) {
4053
+ convergence -> n_iterations = n_iter ;
4054
+ cvg = CS_SLES_MAX_ITERATION ;
4055
+ }
4056
+ }
4057
+
4058
+ return cvg ;
4059
+ }
4060
+
3928
4061
/*----------------------------------------------------------------------------
3929
4062
* Solution of A.vx = Rhs using Process-local symmetric Gauss-Seidel.
3930
4063
*
@@ -4189,6 +4322,7 @@ cs_sles_it_create(cs_sles_it_type_t solver_type,
4189
4322
case CS_SLES_JACOBI :
4190
4323
case CS_SLES_P_GAUSS_SEIDEL :
4191
4324
case CS_SLES_P_SYM_GAUSS_SEIDEL :
4325
+ case CS_SLES_P_B_GAUSS_SEIDEL :
4192
4326
c -> _pc = NULL ;
4193
4327
break ;
4194
4328
default :
@@ -4460,8 +4594,8 @@ cs_sles_it_setup(void *context,
4460
4594
}
4461
4595
4462
4596
if ( c -> type == CS_SLES_JACOBI
4463
- || c -> type = = CS_SLES_P_GAUSS_SEIDEL
4464
- || c -> type == CS_SLES_P_SYM_GAUSS_SEIDEL ) {
4597
+ || ( c -> type > = CS_SLES_P_GAUSS_SEIDEL
4598
+ && c -> type <= CS_SLES_P_B_GAUSS_SEIDEL ) ) {
4465
4599
/* Force to Jacobi in case matrix type is not adapted */
4466
4600
if (cs_matrix_get_type (a ) != CS_MATRIX_MSR )
4467
4601
c -> type = CS_SLES_JACOBI ;
@@ -4730,6 +4864,15 @@ cs_sles_it_solve(void *context,
4730
4864
rhs ,
4731
4865
vx );
4732
4866
break ;
4867
+ case CS_SLES_P_B_GAUSS_SEIDEL :
4868
+ cvg = _p_b_gauss_seidel_msr (c ,
4869
+ a ,
4870
+ _diag_block_size ,
4871
+ rotation_mode ,
4872
+ & convergence ,
4873
+ rhs ,
4874
+ vx );
4875
+ break ;
4733
4876
default :
4734
4877
bft_error
4735
4878
(__FILE__ , __LINE__ , 0 ,
0 commit comments