Skip to content

Commit e36a834

Browse files
committed
Add backwards Gauss-Seidel solver (for use as multigrid smoother only).
1 parent 479696d commit e36a834

File tree

2 files changed

+157
-13
lines changed

2 files changed

+157
-13
lines changed

src/alge/cs_sles_it.c

Lines changed: 156 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ BEGIN_C_DECLS
103103
Process-local Gauss-Seidel
104104
\var CS_SLES_P_SYM_GAUSS_SEIDEL
105105
Process-local symmetric Gauss-Seidel
106+
\var CS_SLES_P_B_GAUSS_SEIDEL
107+
Process-local backward Gauss-Seidel (smoother, no convergence test)
106108
\var CS_SLES_PCR3
107109
3-layer conjugate residual
108110
@@ -290,6 +292,7 @@ const char *cs_sles_it_type_name[]
290292
N_("GMRES"),
291293
N_("Local Gauss-Seidel"),
292294
N_("Local symmetric Gauss-Seidel"),
295+
N_("Local backwards Gauss-Seidel"),
293296
N_("3-layer conjugate residual")};
294297

295298
/*============================================================================
@@ -3672,25 +3675,33 @@ _p_gauss_seidel_msr(cs_sles_it_t *c,
36723675

36733676
}
36743677

3678+
if (convergence->precision > 0. || c->plot != NULL) {
3679+
36753680
#if defined(HAVE_MPI)
36763681

3677-
if (c->comm != MPI_COMM_NULL) {
3678-
double _sum;
3679-
MPI_Allreduce(&res2, &_sum, 1, MPI_DOUBLE, MPI_SUM,
3680-
c->comm);
3681-
res2 = _sum;
3682-
}
3682+
if (c->comm != MPI_COMM_NULL) {
3683+
double _sum;
3684+
MPI_Allreduce(&res2, &_sum, 1, MPI_DOUBLE, MPI_SUM,
3685+
c->comm);
3686+
res2 = _sum;
3687+
}
36833688

36843689
#endif /* defined(HAVE_MPI) */
36853690

3686-
residue = sqrt(res2); /* Actually, residue of previous iteration */
3691+
residue = sqrt(res2); /* Actually, residue of previous iteration */
36873692

3688-
/* Convergence test */
3693+
/* Convergence test */
36893694

3690-
if (n_iter == 1)
3691-
c->setup_data->initial_residue = residue;
3695+
if (n_iter == 1)
3696+
c->setup_data->initial_residue = residue;
36923697

3693-
cvg = _convergence_test(c, n_iter, residue, convergence);
3698+
cvg = _convergence_test(c, n_iter, residue, convergence);
3699+
3700+
}
3701+
else if (n_iter >= convergence->n_iterations_max) {
3702+
convergence->n_iterations = n_iter;
3703+
cvg = CS_SLES_MAX_ITERATION;
3704+
}
36943705

36953706
}
36963707

@@ -3925,6 +3936,128 @@ _p_sym_gauss_seidel_msr(cs_sles_it_t *c,
39253936
return cvg;
39263937
}
39273938

3939+
/*----------------------------------------------------------------------------
3940+
* Solution of A.vx = Rhs using Process-local backward Gauss-Seidel.
3941+
*
3942+
* This variant is intended for smoothing with a fixed number of
3943+
* iterations, so does not compute a residue or run a convergence test.
3944+
*
3945+
* parameters:
3946+
* c <-- pointer to solver context info
3947+
* a <-- linear equation matrix
3948+
* diag_block_size <-- diagonal block size
3949+
* rotation_mode <-- halo update option for rotational periodicity
3950+
* convergence <-- convergence information structure
3951+
* rhs <-- right hand side
3952+
* vx <-> system solution
3953+
*
3954+
* returns:
3955+
* convergence state
3956+
*----------------------------------------------------------------------------*/
3957+
3958+
static cs_sles_convergence_state_t
3959+
_p_b_gauss_seidel_msr(cs_sles_it_t *c,
3960+
const cs_matrix_t *a,
3961+
int diag_block_size,
3962+
cs_halo_rotation_t rotation_mode,
3963+
cs_sles_it_convergence_t *convergence,
3964+
const cs_real_t *rhs,
3965+
cs_real_t *restrict vx)
3966+
{
3967+
cs_sles_convergence_state_t cvg;
3968+
3969+
unsigned n_iter = 0;
3970+
3971+
const cs_lnum_t n_rows = cs_matrix_get_n_rows(a);
3972+
3973+
const cs_halo_t *halo = cs_matrix_get_halo(a);
3974+
3975+
const cs_real_t *restrict ad_inv = c->setup_data->ad_inv;
3976+
3977+
const cs_lnum_t *a_row_index, *a_col_id;
3978+
const cs_real_t *a_d_val, *a_x_val;
3979+
3980+
const int *db_size = cs_matrix_get_diag_block_size(a);
3981+
cs_matrix_get_msr_arrays(a, &a_row_index, &a_col_id, &a_d_val, &a_x_val);
3982+
3983+
cvg = CS_SLES_ITERATING;
3984+
3985+
/* Current iteration */
3986+
/*-------------------*/
3987+
3988+
while (cvg == CS_SLES_ITERATING) {
3989+
3990+
n_iter += 1;
3991+
3992+
/* Synchronize ghost cells first */
3993+
3994+
if (halo != NULL)
3995+
cs_matrix_pre_vector_multiply_sync(rotation_mode, a, vx);
3996+
3997+
/* Compute Vx <- Vx - (A-diag).Rk */
3998+
3999+
if (diag_block_size == 1) {
4000+
4001+
# pragma omp parallel for if(n_rows > CS_THR_MIN && !_thread_debug)
4002+
for (cs_lnum_t ii = n_rows - 1; ii > - 1; ii--) {
4003+
4004+
const cs_lnum_t *restrict col_id = a_col_id + a_row_index[ii];
4005+
const cs_real_t *restrict m_row = a_x_val + a_row_index[ii];
4006+
const cs_lnum_t n_cols = a_row_index[ii+1] - a_row_index[ii];
4007+
4008+
cs_real_t vx0 = rhs[ii];
4009+
4010+
for (cs_lnum_t jj = n_cols-1; jj > -1; jj--)
4011+
vx0 -= (m_row[jj]*vx[col_id[jj]]);
4012+
4013+
vx0 *= ad_inv[ii];
4014+
4015+
vx[ii] = vx0;
4016+
}
4017+
4018+
}
4019+
else {
4020+
4021+
# pragma omp parallel for if(n_rows > CS_THR_MIN && !_thread_debug)
4022+
for (cs_lnum_t ii = n_rows - 1; ii > - 1; ii--) {
4023+
4024+
const cs_lnum_t *restrict col_id = a_col_id + a_row_index[ii];
4025+
const cs_real_t *restrict m_row = a_x_val + a_row_index[ii];
4026+
const cs_lnum_t n_cols = a_row_index[ii+1] - a_row_index[ii];
4027+
4028+
cs_real_t vx0[DB_SIZE_MAX], _vx[DB_SIZE_MAX];
4029+
4030+
for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
4031+
vx0[kk] = rhs[ii*db_size[1] + kk];
4032+
}
4033+
4034+
for (cs_lnum_t jj = n_cols-1; jj > -1; jj--) {
4035+
for (cs_lnum_t kk = 0; kk < db_size[0]; kk++)
4036+
vx0[kk] -= (m_row[jj]*vx[col_id[jj]*db_size[1] + kk]);
4037+
}
4038+
4039+
_fw_and_bw_lu_gs(ad_inv + db_size[3]*ii,
4040+
db_size[0],
4041+
_vx,
4042+
vx0);
4043+
4044+
for (cs_lnum_t kk = 0; kk < db_size[0]; kk++) {
4045+
vx[ii*db_size[1] + kk] = _vx[kk];
4046+
}
4047+
4048+
}
4049+
4050+
}
4051+
4052+
if (n_iter >= convergence->n_iterations_max) {
4053+
convergence->n_iterations = n_iter;
4054+
cvg = CS_SLES_MAX_ITERATION;
4055+
}
4056+
}
4057+
4058+
return cvg;
4059+
}
4060+
39284061
/*----------------------------------------------------------------------------
39294062
* Solution of A.vx = Rhs using Process-local symmetric Gauss-Seidel.
39304063
*
@@ -4189,6 +4322,7 @@ cs_sles_it_create(cs_sles_it_type_t solver_type,
41894322
case CS_SLES_JACOBI:
41904323
case CS_SLES_P_GAUSS_SEIDEL:
41914324
case CS_SLES_P_SYM_GAUSS_SEIDEL:
4325+
case CS_SLES_P_B_GAUSS_SEIDEL:
41924326
c->_pc = NULL;
41934327
break;
41944328
default:
@@ -4460,8 +4594,8 @@ cs_sles_it_setup(void *context,
44604594
}
44614595

44624596
if ( c->type == CS_SLES_JACOBI
4463-
|| c->type == CS_SLES_P_GAUSS_SEIDEL
4464-
|| c->type == CS_SLES_P_SYM_GAUSS_SEIDEL) {
4597+
|| ( c->type >= CS_SLES_P_GAUSS_SEIDEL
4598+
&& c->type <= CS_SLES_P_B_GAUSS_SEIDEL)) {
44654599
/* Force to Jacobi in case matrix type is not adapted */
44664600
if (cs_matrix_get_type(a) != CS_MATRIX_MSR)
44674601
c->type = CS_SLES_JACOBI;
@@ -4730,6 +4864,15 @@ cs_sles_it_solve(void *context,
47304864
rhs,
47314865
vx);
47324866
break;
4867+
case CS_SLES_P_B_GAUSS_SEIDEL:
4868+
cvg = _p_b_gauss_seidel_msr(c,
4869+
a,
4870+
_diag_block_size,
4871+
rotation_mode,
4872+
&convergence,
4873+
rhs,
4874+
vx);
4875+
break;
47334876
default:
47344877
bft_error
47354878
(__FILE__, __LINE__, 0,

src/alge/cs_sles_it.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ typedef enum {
6464
CS_SLES_GMRES, /* Generalized minimal residual */
6565
CS_SLES_P_GAUSS_SEIDEL, /* Process-local Gauss-Seidel */
6666
CS_SLES_P_SYM_GAUSS_SEIDEL, /* Process-local symmetric Gauss-Seidel */
67+
CS_SLES_P_B_GAUSS_SEIDEL, /* Process-local backward Gauss-Seidel */
6768
CS_SLES_PCR3, /* 3-layer conjugate residual */
6869
CS_SLES_N_IT_TYPES /* Number of resolution algorithms */
6970

0 commit comments

Comments
 (0)