7.2
general documentation
cs_sles_it_priv.h
Go to the documentation of this file.
1 #ifndef __CS_SLES_IT_PRIV_H__
2 #define __CS_SLES_IT_PRIV_H__
3 
4 /*============================================================================
5  * Sparse Linear Equation Solvers: private elements.
6  *
7  * These elements are shared between iterative solvers and smoother
8  * both for host and device implementations, but are not accessible to
9  * calling code.
10  *============================================================================*/
11 
12 /*
13  This file is part of code_saturne, a general-purpose CFD tool.
14 
15  Copyright (C) 1998-2022 EDF S.A.
16 
17  This program is free software; you can redistribute it and/or modify it under
18  the terms of the GNU General Public License as published by the Free Software
19  Foundation; either version 2 of the License, or (at your option) any later
20  version.
21 
22  This program is distributed in the hope that it will be useful, but WITHOUT
23  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
24  FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
25  details.
26 
27  You should have received a copy of the GNU General Public License along with
28  this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
29  Street, Fifth Floor, Boston, MA 02110-1301, USA.
30 */
31 
32 /*----------------------------------------------------------------------------*/
33 
34 #include "cs_defs.h"
35 
36 /*----------------------------------------------------------------------------
37  * Standard C library headers
38  *----------------------------------------------------------------------------*/
39 
40 #include <stdarg.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <assert.h>
45 #include <math.h>
46 
47 #if defined(HAVE_MPI)
48 #include <mpi.h>
49 #endif
50 
51 /*----------------------------------------------------------------------------
52  * Local headers
53  *----------------------------------------------------------------------------*/
54 
55 #include "bft_mem.h"
56 #include "bft_error.h"
57 #include "bft_printf.h"
58 
59 #include "cs_base.h"
60 #include "cs_blas.h"
61 #include "cs_file.h"
62 #include "cs_log.h"
63 #include "cs_halo.h"
64 #include "cs_mesh.h"
65 #include "cs_matrix.h"
66 #include "cs_matrix_default.h"
67 #include "cs_matrix_util.h"
68 #include "cs_post.h"
69 #include "cs_timer.h"
70 #include "cs_time_plot.h"
71 
72 /*----------------------------------------------------------------------------
73  * Header for the current file
74  *----------------------------------------------------------------------------*/
75 
76 #include "cs_sles.h"
77 #include "cs_sles_it.h"
78 #include "cs_sles_pc.h"
79 
80 /*----------------------------------------------------------------------------*/
81 
83 
86 /*=============================================================================
87  * Local Macro Definitions
88  *============================================================================*/
89 
90 #if !defined(HUGE_VAL)
91 #define HUGE_VAL 1.E+12
92 #endif
93 
94 #define DB_SIZE_MAX 9
95 
96 /*=============================================================================
97  * Local Structure Definitions
98  *============================================================================*/
99 
100 /*----------------------------------------------------------------------------
101  * Function pointer for actual resolution of a linear system.
102  *
103  * parameters:
104  * c <-- pointer to solver context info
105  * a <-- linear equation matrix
106  * convergence <-- convergence information structure
107  * rhs <-- right hand side
108  * vx --> system solution
109  * aux_size <-- number of elements in aux_vectors (in bytes)
110  * aux_vectors --- optional working area (allocation otherwise)
111  *
112  * returns:
113  * convergence status
114  *----------------------------------------------------------------------------*/
115 
117 (cs_sles_it_solve_t) (cs_sles_it_t *c,
118  const cs_matrix_t *a,
119  cs_lnum_t diag_block_size,
120  cs_sles_it_convergence_t *convergence,
121  const cs_real_t *rhs,
122  cs_real_t *restrict vx,
123  size_t aux_size,
124  void *aux_vectors);
125 
126 /* Solver setup data */
127 /*-------------------*/
128 
129 typedef struct _cs_sles_it_setup_t {
130 
131  double initial_residue; /* last initial residue value */
132 
133  cs_lnum_t n_rows; /* number of associated rows */
134 
135  const cs_real_t *ad_inv; /* pointer to diagonal inverse */
136  cs_real_t *_ad_inv; /* private pointer to
137  diagonal inverse */
138 
139  void *pc_context; /* preconditioner context */
140  cs_sles_pc_apply_t *pc_apply; /* preconditioner apply */
141 
142 } cs_sles_it_setup_t;
143 
144 /* Solver additional data */
145 /*------------------------*/
146 
147 typedef struct _cs_sles_it_add_t {
148 
149  cs_lnum_t *order; /* ordering */
150 
151 } cs_sles_it_add_t;
152 
153 /* Basic per linear system options and logging */
154 /*---------------------------------------------*/
155 
156 struct _cs_sles_it_t {
157 
158  /* Base settings */
159 
160  cs_sles_it_type_t type; /* Solver type */
161 
162  bool on_device; /* SpMV on device ? */
163 
164  bool update_stats; /* do stats need to be updated ? */
165  bool ignore_convergence; /* ignore convergence for some
166  solvers used as preconditioners */
167 
168  int n_max_iter; /* maximum number of iterations */
169  int restart_interval; /* maximum number of iterations
170  before restarting the algorithm
171  (only applicable for GMRES or GCR
172  algorithm up to now) */
173 
174  cs_sles_it_solve_t *solve; /* pointer to solve function */
175 
176  cs_sles_pc_t *pc; /* pointer to possibly shared
177  preconditioner object */
178  cs_sles_pc_t *_pc; /* pointer to owned
179  preconditioner object */
180 
181  /* Performance data */
182 
183  unsigned n_setups; /* Number of times system setup */
184  unsigned n_solves; /* Number of times system solved */
185 
186  unsigned n_iterations_last; /* Number of iterations for last
187  system resolution */
188  unsigned n_iterations_min; /* Minimum number ot iterations
189  in system resolution history */
190  unsigned n_iterations_max; /* Maximum number ot iterations
191  in system resolution history */
192  unsigned long long n_iterations_tot; /* Total accumulated number of
193  iterations */
194 
195  cs_timer_counter_t t_setup; /* Total setup */
196  cs_timer_counter_t t_solve; /* Total time used */
197 
198  /* Plot info */
199 
200  int plot_time_stamp; /* Plot time stamp */
201  cs_time_plot_t *plot; /* Pointer to plot structure,
202  which may be owned or shared */
203  cs_time_plot_t *_plot; /* Pointer to own plot structure */
204 
205  /* Communicator used for reduction operations
206  (if left at NULL, main communicator will be used) */
207 
208 # if defined(HAVE_MPI)
209  MPI_Comm comm;
210  MPI_Comm caller_comm;
211  int caller_n_ranks;
212 # endif
213 
214  /* Solver setup */
215 
216  const struct _cs_sles_it_t *shared; /* pointer to context sharing some
217  setup and preconditioner data,
218  or NULL */
219 
220  cs_sles_it_add_t *add_data; /* additional data */
221 
222  cs_sles_it_setup_t *setup_data; /* setup data */
223 
224  /* Alternative solvers (fallback or heuristics) */
225 
226  cs_sles_convergence_state_t fallback_cvg; /* threshold for fallback
227  convergence */
228  cs_sles_it_t *fallback; /* fallback solver */
229 
230 };
231 
232 /* Convergence testing and tracking */
233 /*----------------------------------*/
234 
235 struct _cs_sles_it_convergence_t {
236 
237  const char *name; /* Pointer to name string */
238 
239  int verbosity; /* Verbosity level */
240 
241  unsigned n_iterations; /* Current number of iterations */
242  unsigned n_iterations_max; /* Maximum number of iterations */
243 
244  double precision; /* Precision limit */
245  double r_norm; /* Residue normalization */
246  double residue; /* Current residue */
247 
248 };
249 
250 /*============================================================================
251  * Inline static function definitions
252  *============================================================================*/
253 
254 /*----------------------------------------------------------------------------
255  * Compute dot product, summing result over all ranks.
256  *
257  * parameters:
258  * c <-- pointer to solver context info
259  * x <-- first vector in s = x.y
260  * y <-- second vector in s = x.y
261  *
262  * returns:
263  * result of s = x.y
264  *----------------------------------------------------------------------------*/
265 
266 inline static double
267 _dot_product(const cs_sles_it_t *c,
268  const cs_real_t *x,
269  const cs_real_t *y)
270 {
271  double s = cs_dot(c->setup_data->n_rows, x, y);
272 
273 #if defined(HAVE_MPI)
274 
275  if (c->comm != MPI_COMM_NULL) {
276  double _sum;
277  MPI_Allreduce(&s, &_sum, 1, MPI_DOUBLE, MPI_SUM, c->comm);
278  s = _sum;
279  }
280 
281 #endif /* defined(HAVE_MPI) */
282 
283  return s;
284 }
285 
286 /*----------------------------------------------------------------------------
287  * Compute dot product x.x, summing result over all ranks.
288  *
289  * parameters:
290  * c <-- pointer to solver context info
291  * x <-- vector in s = x.x
292  *
293  * returns:
294  * result of s = x.x
295  *----------------------------------------------------------------------------*/
296 
297 inline static double
298 _dot_product_xx(const cs_sles_it_t *c,
299  const cs_real_t *x)
300 {
301  double s;
302 
303  s = cs_dot_xx(c->setup_data->n_rows, x);
304 
305 #if defined(HAVE_MPI)
306 
307  if (c->comm != MPI_COMM_NULL) {
308  double _sum;
309  MPI_Allreduce(&s, &_sum, 1, MPI_DOUBLE, MPI_SUM, c->comm);
310  s = _sum;
311  }
312 
313 #endif /* defined(HAVE_MPI) */
314 
315  return s;
316 }
317 
318 /*----------------------------------------------------------------------------
319  * Compute 2 dot products x.x and x.y, summing result over all ranks.
320  *
321  * parameters:
322  * c <-- pointer to solver context info
323  * x <-- vector in s1 = x.x and s2 = x.y
324  * y <-- vector in s2 = x.y
325  * s1 --> result of s1 = x.x
326  * s2 --> result of s2 = x.y
327  *----------------------------------------------------------------------------*/
328 
329 inline static void
330 _dot_products_xx_xy(const cs_sles_it_t *c,
331  const cs_real_t *x,
332  const cs_real_t *y,
333  double *s1,
334  double *s2)
335 {
336  double s[2];
337 
338  cs_dot_xx_xy(c->setup_data->n_rows, x, y, s, s+1);
339 
340 #if defined(HAVE_MPI)
341 
342  if (c->comm != MPI_COMM_NULL) {
343  double _sum[2];
344  MPI_Allreduce(s, _sum, 2, MPI_DOUBLE, MPI_SUM, c->comm);
345  s[0] = _sum[0];
346  s[1] = _sum[1];
347  }
348 
349 #endif /* defined(HAVE_MPI) */
350 
351  *s1 = s[0];
352  *s2 = s[1];
353 }
354 
355 /*----------------------------------------------------------------------------
356  * Compute 2 dot products x.x and x.y, summing result over all ranks.
357  *
358  * parameters:
359  * c <-- pointer to solver context info
360  * x <-- vector in s1 = x.y
361  * y <-- vector in s1 = x.y and s2 = y.z
362  * z <-- vector in s2 = y.z
363  * s1 --> result of s1 = x.y
364  * s2 --> result of s2 = y.z
365  *----------------------------------------------------------------------------*/
366 
367 inline static void
368 _dot_products_xy_yz(const cs_sles_it_t *c,
369  const cs_real_t *x,
370  const cs_real_t *y,
371  const cs_real_t *z,
372  double *s1,
373  double *s2)
374 {
375  double s[2];
376 
377  cs_dot_xy_yz(c->setup_data->n_rows, x, y, z, s, s+1);
378 
379 #if defined(HAVE_MPI)
380 
381  if (c->comm != MPI_COMM_NULL) {
382  double _sum[2];
383  MPI_Allreduce(s, _sum, 2, MPI_DOUBLE, MPI_SUM, c->comm);
384  s[0] = _sum[0];
385  s[1] = _sum[1];
386  }
387 
388 #endif /* defined(HAVE_MPI) */
389 
390  *s1 = s[0];
391  *s2 = s[1];
392 }
393 
394 /*----------------------------------------------------------------------------
395  * Compute 3 dot products, summing result over all ranks.
396  *
397  * parameters:
398  * c <-- pointer to solver context info
399  * x <-- first vector
400  * y <-- second vector
401  * z <-- third vector
402  * s1 --> result of s1 = x.x
403  * s2 --> result of s2 = x.y
404  * s3 --> result of s3 = y.z
405  *----------------------------------------------------------------------------*/
406 
407 inline static void
408 _dot_products_xx_xy_yz(const cs_sles_it_t *c,
409  const cs_real_t *x,
410  const cs_real_t *y,
411  const cs_real_t *z,
412  double *s1,
413  double *s2,
414  double *s3)
415 {
416  double s[3];
417 
418  cs_dot_xx_xy_yz(c->setup_data->n_rows, x, y, z, s, s+1, s+2);
419 
420 #if defined(HAVE_MPI)
421 
422  if (c->comm != MPI_COMM_NULL) {
423  double _sum[3];
424 
425  MPI_Allreduce(s, _sum, 3, MPI_DOUBLE, MPI_SUM, c->comm);
426  s[0] = _sum[0];
427  s[1] = _sum[1];
428  s[2] = _sum[2];
429  }
430 
431 #endif /* defined(HAVE_MPI) */
432 
433  *s1 = s[0];
434  *s2 = s[1];
435  *s3 = s[2];
436 }
437 
438 /*----------------------------------------------------------------------------
439  * Compute 5 dot products, summing result over all ranks.
440  *
441  * parameters:
442  * c <-- pointer to solver context info
443  * x <-- first vector
444  * y <-- second vector
445  * z <-- third vector
446  * xx --> result of x.x
447  * yy --> result of y.y
448  * xy --> result of x.y
449  * xz --> result of x.z
450  * yz --> result of y.z
451  *----------------------------------------------------------------------------*/
452 
453 inline static void
454 _dot_products_xx_yy_xy_xz_yz(const cs_sles_it_t *c,
455  const cs_real_t *x,
456  const cs_real_t *y,
457  const cs_real_t *z,
458  double *xx,
459  double *yy,
460  double *xy,
461  double *xz,
462  double *yz)
463 {
464  double s[5];
465 
466  cs_dot_xx_yy_xy_xz_yz(c->setup_data->n_rows, x, y, z, s, s+1, s+2, s+3, s+4);
467 
468 #if defined(HAVE_MPI)
469 
470  if (c->comm != MPI_COMM_NULL) {
471  double _sum[5];
472  MPI_Allreduce(s, _sum, 5, MPI_DOUBLE, MPI_SUM, c->comm);
473  memcpy(s, _sum, 5*sizeof(double));
474  }
475 
476 #endif /* defined(HAVE_MPI) */
477 
478  *xx = s[0];
479  *yy = s[1];
480  *xy = s[2];
481  *xz = s[3];
482  *yz = s[4];
483 }
484 
485 /*----------------------------------------------------------------------------
486  * Compute 4 dot products, summing result over all ranks.
487  *
488  * parameters:
489  * c <-- pointer to solver context info
490  * v <-- first vector
491  * r <-- second vector
492  * w <-- third vector
493  * q <-- fourth vector
494  * s1 --> result of s1 = v.r
495  * s2 --> result of s2 = v.w
496  * s3 --> result of s3 = v.q
497  * s4 --> result of s4 = r.r
498  *----------------------------------------------------------------------------*/
499 
500 inline static void
501 _dot_products_vr_vw_vq_rr(const cs_sles_it_t *c,
502  const cs_real_t *v,
503  const cs_real_t *r,
504  const cs_real_t *w,
505  const cs_real_t *q,
506  double *s1,
507  double *s2,
508  double *s3,
509  double *s4)
510 {
511  double s[4];
512 
513  /* Use two separate call as cs_blas.c does not yet hav matching call */
514 
515  cs_dot_xy_yz(c->setup_data->n_rows, w, v, q, s+1, s+2);
516  cs_dot_xx_xy(c->setup_data->n_rows, r, v, s+3, s);
517 
518 #if defined(HAVE_MPI)
519 
520  if (c->comm != MPI_COMM_NULL) {
521  double _sum[4];
522  MPI_Allreduce(s, _sum, 4, MPI_DOUBLE, MPI_SUM, c->comm);
523  memcpy(s, _sum, 4*sizeof(double));
524  }
525 
526 #endif /* defined(HAVE_MPI) */
527 
528  *s1 = s[0];
529  *s2 = s[1];
530  *s3 = s[2];
531  *s4 = s[3];
532 }
533 
534 /*----------------------------------------------------------------------------
535  * Block Jacobi utilities.
536  * Compute forward and backward to solve an LU 3*3 system.
537  *
538  * parameters:
539  * mat <-- 3*3*dim matrix
540  * x --> solution
541  * b --> 1st part of RHS (c - b)
542  * c --> 2nd part of RHS (c - b)
543  *----------------------------------------------------------------------------*/
544 
545 inline static void
546 _fw_and_bw_lu33(const cs_real_t mat[],
547  cs_real_t x[restrict],
548  const cs_real_t b[restrict],
549  const cs_real_t c[restrict])
550 {
551  cs_real_t aux[3];
552 
553  aux[0] = (c[0] - b[0]);
554  aux[1] = (c[1] - b[1]) - aux[0]*mat[3];
555  aux[2] = (c[2] - b[2]) - aux[0]*mat[6] - aux[1]*mat[7];
556 
557  x[2] = aux[2]/mat[8];
558  x[1] = (aux[1] - mat[5]*x[2])/mat[4];
559  x[0] = (aux[0] - mat[1]*x[1] - mat[2]*x[2])/mat[0];
560 }
561 
562 /*----------------------------------------------------------------------------
563  * Block Jacobi utilities.
564  * Compute forward and backward to solve an LU P*P system.
565  *
566  * parameters:
567  * mat <-- P*P*dim matrix
568  * db_size <-- matrix size
569  * x --> solution
570  * b --> 1st part of RHS (c - b)
571  * c --> 2nd part of RHS (c - b)
572  *----------------------------------------------------------------------------*/
573 
574 inline static void
575 _fw_and_bw_lu(const cs_real_t mat[],
576  int db_size,
577  cs_real_t x[restrict],
578  const cs_real_t b[restrict],
579  const cs_real_t c[restrict])
580 {
581  assert(db_size <= DB_SIZE_MAX);
582  cs_real_t aux[DB_SIZE_MAX];
583 
584  /* forward */
585  for (int ii = 0; ii < db_size; ii++) {
586  aux[ii] = (c[ii] - b[ii]);
587  for (int jj = 0; jj < ii; jj++) {
588  aux[ii] -= aux[jj]*mat[ii*db_size + jj];
589  }
590  }
591 
592  /* backward */
593  for (int ii = db_size - 1; ii >= 0; ii-=1) {
594  x[ii] = aux[ii];
595  for (int jj = db_size - 1; jj > ii; jj-=1) {
596  x[ii] -= x[jj]*mat[ii*db_size + jj];
597  }
598  x[ii] /= mat[ii*(db_size + 1)];
599  }
600 }
601 
602 /*----------------------------------------------------------------------------
603  * Block Gauss-Seidel utilities.
604  * Compute forward and backward to solve an LU P*P system.
605  *
606  * parameters:
607  * mat <-- P*P*dim matrix
608  * db_size <-- matrix size
609  * x --> solution
610  * b <-> RHS in, work array
611  *----------------------------------------------------------------------------*/
612 
613 inline static void
614 _fw_and_bw_lu_gs(const cs_real_t mat[],
615  int db_size,
616  cs_real_t x[restrict],
617  const cs_real_t b[restrict])
618 {
619  assert(db_size <= DB_SIZE_MAX);
620 
621  /* forward */
622  for (int ii = 0; ii < db_size; ii++) {
623  x[ii] = b[ii];
624  for (int jj = 0; jj < ii; jj++)
625  x[ii] -= x[jj]*mat[ii*db_size + jj];
626  }
627 
628  /* backward */
629  for (int ii = db_size - 1; ii >= 0; ii--) {
630  for (int jj = db_size - 1; jj > ii; jj--)
631  x[ii] -= x[jj]*mat[ii*db_size + jj];
632  x[ii] /= mat[ii*(db_size + 1)];
633  }
634 }
635 
636 /*============================================================================
637  * Public function definitions
638  *============================================================================*/
639 
640 /*----------------------------------------------------------------------------*/
654 /*----------------------------------------------------------------------------*/
655 
656 void
657 cs_sles_it_convergence_init(cs_sles_it_convergence_t *convergence,
658  const char *solver_name,
659  int verbosity,
660  unsigned n_iter_max,
661  double precision,
662  double r_norm,
663  double *residue);
664 
665 /*----------------------------------------------------------------------------
666  * Setup context for iterative linear solver.
667  *
668  * This function is common to most solvers
669  *
670  * parameters:
671  * c <-> pointer to solver context info
672  * name <-- pointer to system name
673  * a <-- matrix
674  * verbosity <-- verbosity level
675  * diag_block_size <-- diagonal block size
676  * block_nn_inverse <-- if diagonal block size is 3 or 6, compute inverse of
677  * block if true, inverse of block diagonal otherwise
678  *----------------------------------------------------------------------------*/
679 
680 void
681 cs_sles_it_setup_priv(cs_sles_it_t *c,
682  const char *name,
683  const cs_matrix_t *a,
684  int verbosity,
685  int diag_block_size,
686  bool block_nn_inverse);
687 
690 /*----------------------------------------------------------------------------*/
691 
693 
694 #endif /* __CS_SLES_IT_PRIV_H__ */
#define restrict
Definition: cs_defs.h:142
void cs_dot_xx_xy_yz(cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *xy, double *yz)
Return 3 dot products of 3 vectors: x.x, x.y, and y.z.
Definition: cs_blas.c:1622
void cs_dot_xy_yz(cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xy, double *yz)
Return 2 dot products of 3 vectors: x.y, and y.z.
Definition: cs_blas.c:1593
cs_sles_it_type_t
Definition: cs_sles_it.h:55
double cs_dot(cs_lnum_t n, const cs_real_t *x, const cs_real_t *y)
Return the dot product of 2 vectors: x.y.
Definition: cs_blas.c:1465
#define BEGIN_C_DECLS
Definition: cs_defs.h:510
struct _cs_sles_pc_t cs_sles_pc_t
Definition: cs_sles_pc.h:66
struct _cs_sles_it_t cs_sles_it_t
Definition: cs_sles_it.h:86
double cs_real_t
Floating-point value.
Definition: cs_defs.h:322
void cs_dot_xx_xy(cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, double *xx, double *xy)
Return 2 dot products of 2 vectors: x.x, and x.y.
Definition: cs_blas.c:1566
cs_sles_pc_state_t() cs_sles_pc_apply_t(void *context, const cs_real_t *x_in, cs_real_t *x_out)
Function pointer for application of a preconditioner.
Definition: cs_sles_pc.h:145
struct _cs_matrix_t cs_matrix_t
Definition: cs_matrix.h:110
struct _cs_time_plot_t cs_time_plot_t
Definition: cs_time_plot.h:48
cs_sles_convergence_state_t
Convergence status indicator.
Definition: cs_sles.h:56
double precision, dimension(:,:,:), allocatable v
Definition: atimbr.f90:114
void cs_dot_xx_yy_xy_xz_yz(cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *yy, double *xy, double *xz, double *yz)
Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z.
Definition: cs_blas.c:1654
struct _cs_sles_it_convergence_t cs_sles_it_convergence_t
Definition: cs_sles_it.h:90
double precision, save a
Definition: cs_fuel_incl.f90:146
double cs_dot_xx(cs_lnum_t n, const cs_real_t *x)
Return dot products of a vector with itself: x.x.
Definition: cs_blas.c:1486
int cs_lnum_t
local mesh entity id
Definition: cs_defs.h:316
#define END_C_DECLS
Definition: cs_defs.h:511
Definition: cs_timer.h:55
double precision, save b
Definition: cs_fuel_incl.f90:146