8.1
general documentation
cs_parall.h
Go to the documentation of this file.
1 #ifndef __CS_PARALL_H__
2 #define __CS_PARALL_H__
3 
4 /*============================================================================
5  * Functions dealing with parallelism
6  *============================================================================*/
7 
8 /*
9  This file is part of code_saturne, a general-purpose CFD tool.
10 
11  Copyright (C) 1998-2023 EDF S.A.
12 
13  This program is free software; you can redistribute it and/or modify it under
14  the terms of the GNU General Public License as published by the Free Software
15  Foundation; either version 2 of the License, or (at your option) any later
16  version.
17 
18  This program is distributed in the hope that it will be useful, but WITHOUT
19  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
20  FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
21  details.
22 
23  You should have received a copy of the GNU General Public License along with
24  this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
25  Street, Fifth Floor, Boston, MA 02110-1301, USA.
26 */
27 
28 /*----------------------------------------------------------------------------*/
29 
30 /*----------------------------------------------------------------------------
31  * Local headers
32  *----------------------------------------------------------------------------*/
33 
34 #include "cs_defs.h"
35 
36 /*----------------------------------------------------------------------------*/
37 
39 
40 /*============================================================================
41  * General types and macros used throughout code_saturne
42  *============================================================================*/
43 
44 /*----------------------------------------------------------------------------
45  * Variable value type.
46  *----------------------------------------------------------------------------*/
47 
51 typedef enum {
52 
62 
63 /*============================================================================
64  * Global variables
65  *============================================================================*/
66 
67 /* Preferred indexed sum option, adapted to shared-memory parallelism */
68 
70 
71 /*=============================================================================
72  * Public function prototypes
73  *============================================================================*/
74 
75 /*----------------------------------------------------------------------------*/
82 /*----------------------------------------------------------------------------*/
83 
84 #if defined(HAVE_MPI_IN_PLACE)
85 
86 inline static void
88  const int n)
89 {
90  if (cs_glob_n_ranks > 1) {
91  MPI_Allreduce(MPI_IN_PLACE, cpt, n, CS_MPI_GNUM, MPI_SUM,
93  }
94 }
95 
96 #elif defined(HAVE_MPI)
97 
98 void
100  const int n);
101 
102 #else
103 
104 #define cs_parall_counter(_cpt, _n)
105 
106 #endif
107 
108 /*----------------------------------------------------------------------------*/
115 /*----------------------------------------------------------------------------*/
116 
117 #if defined(HAVE_MPI_IN_PLACE)
118 
119 inline static void
121  const int n)
122 {
123  if (cs_glob_n_ranks > 1) {
124  MPI_Allreduce(MPI_IN_PLACE, cpt, n, CS_MPI_LNUM, MPI_MAX,
126  }
127 }
128 
129 #elif defined(HAVE_MPI)
130 
131 void
133  const int n);
134 
135 #else
136 
137 #define cs_parall_counter_max(_cpt, _n)
138 
139 #endif
140 
141 /*----------------------------------------------------------------------------*/
149 /*----------------------------------------------------------------------------*/
150 
151 #if defined(HAVE_MPI_IN_PLACE)
152 
153 inline static void
155  cs_datatype_t datatype,
156  void *val)
157 {
158  if (cs_glob_n_ranks > 1) {
159  MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_SUM,
161  }
162 }
163 
164 #elif defined(HAVE_MPI)
165 
166 void
167 cs_parall_sum(int n,
168  cs_datatype_t datatype,
169  void *val);
170 
171 #else
172 
173 #define cs_parall_sum(_n, _datatype, _val) { };
174 
175 #endif
176 
177 /*----------------------------------------------------------------------------*/
186 /*----------------------------------------------------------------------------*/
187 
188 #if defined(HAVE_MPI_IN_PLACE)
189 
190 inline static void
192  cs_datatype_t datatype,
193  void *val)
194 {
195  if (cs_glob_n_ranks > 1) {
196  MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_MAX,
198  }
199 }
200 
201 #elif defined(HAVE_MPI)
202 
203 void
204 cs_parall_max(int n,
205  cs_datatype_t datatype,
206  void *val);
207 
208 #else
209 
210 #define cs_parall_max(_n, _datatype, _val);
211 
212 #endif
213 
214 /*----------------------------------------------------------------------------*/
223 /*----------------------------------------------------------------------------*/
224 
225 #if defined(HAVE_MPI_IN_PLACE)
226 
227 inline static void
229  cs_datatype_t datatype,
230  void *val)
231 {
232  if (cs_glob_n_ranks > 1) {
233  MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_MIN,
235  }
236 }
237 
238 #elif defined(HAVE_MPI)
239 
240 void
241 cs_parall_min(int n,
242  cs_datatype_t datatype,
243  void *val);
244 
245 #else
246 
247 #define cs_parall_min(_n, _datatype, _val);
248 
249 #endif
250 
251 /*----------------------------------------------------------------------------*/
262 /*----------------------------------------------------------------------------*/
263 
264 #if defined(HAVE_MPI)
265 
266 inline static void
267 cs_parall_bcast(int root_rank,
268  int n,
269  cs_datatype_t datatype,
270  void *val)
271 {
272  if (cs_glob_n_ranks > 1)
273  MPI_Bcast(val, n, cs_datatype_to_mpi[datatype], root_rank,
275 }
276 
277 #else
278 
279 #define cs_parall_bcast(_root_rank, _n, _datatype, _val);
280 
281 #endif
282 
283 /*----------------------------------------------------------------------------*/
299 /*----------------------------------------------------------------------------*/
300 
301 void
302 cs_parall_allgather_r(int n_elts,
303  int n_g_elts,
304  cs_real_t array[],
305  cs_real_t g_array[]);
306 
307 /*----------------------------------------------------------------------------*/
327 /*----------------------------------------------------------------------------*/
328 
329 void
331  int n_g_elts,
332  int stride,
333  cs_real_t o_key[],
334  cs_real_t array[],
335  cs_real_t g_array[]);
336 
337 /*----------------------------------------------------------------------------*/
355 /*----------------------------------------------------------------------------*/
356 
357 void
358 cs_parall_gather_r(int root_rank,
359  int n_elts,
360  int n_g_elts,
361  const cs_real_t array[],
362  cs_real_t g_array[]);
363 
364 /*----------------------------------------------------------------------------*/
386 /*----------------------------------------------------------------------------*/
387 
388 void
389 cs_parall_gather_ordered_r(int root_rank,
390  int n_elts,
391  int n_g_elts,
392  int stride,
393  cs_real_t o_key[],
394  cs_real_t array[],
395  cs_real_t g_array[]);
396 
397 /*----------------------------------------------------------------------------*/
415 /*----------------------------------------------------------------------------*/
416 
417 void
418 cs_parall_scatter_r(int root_rank,
419  int n_elts,
420  int n_g_elts,
421  const cs_real_t g_array[],
422  cs_real_t array[]);
423 
424 /*----------------------------------------------------------------------------*/
443 /*----------------------------------------------------------------------------*/
444 
445 void
446 cs_parall_gather_f(int root_rank,
447  int n_elts,
448  int n_g_elts,
449  const float array[],
450  float g_array[]);
451 
452 /*----------------------------------------------------------------------------*/
471 /*----------------------------------------------------------------------------*/
472 
473 void
474 cs_parall_scatter_f(int root_rank,
475  int n_elts,
476  int n_g_elts,
477  const float g_array[],
478  float array[]);
479 
480 /*----------------------------------------------------------------------------*/
490 /*----------------------------------------------------------------------------*/
491 
492 void
494  cs_real_t *max,
495  cs_real_t max_loc_vals[]);
496 
497 /*----------------------------------------------------------------------------*/
507 /*----------------------------------------------------------------------------*/
508 
509 void
511  cs_real_t *min,
512  cs_real_t min_loc_vals[]);
513 
514 /*----------------------------------------------------------------------------*/
525 /*----------------------------------------------------------------------------*/
526 
527 void
529  int *rank_id,
530  cs_real_t val);
531 
532 /*----------------------------------------------------------------------------*/
541 /*----------------------------------------------------------------------------*/
542 
543 size_t
545 
546 /*----------------------------------------------------------------------------*/
556 /*----------------------------------------------------------------------------*/
557 
558 void
559 cs_parall_set_min_coll_buf_size(size_t buffer_size);
560 
561 /*----------------------------------------------------------------------------*/
574 /*----------------------------------------------------------------------------*/
575 
576 inline static void
578  size_t type_size,
579  cs_lnum_t *s_id,
580  cs_lnum_t *e_id)
581 {
582 #if defined(HAVE_OPENMP)
583  const int t_id = omp_get_thread_num();
584  const int n_t = omp_get_num_threads();
585  const cs_lnum_t t_n = (n + n_t - 1) / n_t;
586  const cs_lnum_t cl_m = CS_CL_SIZE / type_size; /* Cache line multiple */
587 
588  *s_id = t_id * t_n;
589  *e_id = (t_id+1) * t_n;
590  *s_id = cs_align(*s_id, cl_m);
591  *e_id = cs_align(*e_id, cl_m);
592  if (*e_id > n) *e_id = n;
593 #else
594  CS_UNUSED(type_size); /* avoid compiler warning */
595  *s_id = 0;
596  *e_id = n;
597 #endif
598 }
599 
600 /*----------------------------------------------------------------------------*/
619 /*----------------------------------------------------------------------------*/
620 
621 inline static void
623  size_t type_size,
624  cs_lnum_t *s_id,
625  cs_lnum_t *e_id)
626 {
627 #if defined(HAVE_OPENMP)
628  const int t_id = omp_get_thread_num();
629  const double n_t = omp_get_num_threads();
630  const cs_lnum_t cl_m = CS_CL_SIZE / type_size; /* Cache line multiple */
631 
632  double r0 = (double)t_id / (double)n_t;
633  double r1 = (double)(t_id+1) / (double)n_t;
634 
635  r0 = r0*r0;
636  r1 = r1*r1;
637 
638  const cs_lnum_t t_0 = r0*n;
639  const cs_lnum_t t_1 = r1*n;
640 
641  *s_id = t_0 * n;
642  *e_id = t_1 * n;
643  *s_id = cs_align(*s_id, cl_m);
644  *e_id = cs_align(*e_id, cl_m);
645  if (*e_id > n) *e_id = n;
646 #else
647  CS_UNUSED(type_size); /* avoid compiler warning */
648  *s_id = 0;
649  *e_id = n;
650 #endif
651 }
652 
653 /*----------------------------------------------------------------------------*/
654 
656 
657 #endif /* __CS_PARALL_H__ */
int cs_glob_n_ranks
Definition: cs_defs.c:175
MPI_Datatype cs_datatype_to_mpi[]
Definition: cs_defs.c:157
MPI_Comm cs_glob_mpi_comm
Definition: cs_defs.c:183
cs_datatype_t
Definition: cs_defs.h:272
#define BEGIN_C_DECLS
Definition: cs_defs.h:514
double cs_real_t
Floating-point value.
Definition: cs_defs.h:319
#define CS_MPI_LNUM
Definition: cs_defs.h:410
#define CS_MPI_GNUM
Definition: cs_defs.h:390
unsigned long cs_gnum_t
global mesh entity number
Definition: cs_defs.h:298
static cs_lnum_t cs_align(cs_lnum_t i, cs_lnum_t m)
Given a base index i, return the next index aligned with a size m.
Definition: cs_defs.h:613
#define CS_UNUSED(x)
Definition: cs_defs.h:500
#define END_C_DECLS
Definition: cs_defs.h:515
int cs_lnum_t
local mesh entity id
Definition: cs_defs.h:313
#define CS_CL_SIZE
Definition: cs_defs.h:470
void cs_parall_gather_r(int root_rank, int n_elts, int n_g_elts, const cs_real_t array[], cs_real_t g_array[])
Build a global array on the given root rank from all local arrays.
Definition: cs_parall.c:1016
static void cs_parall_bcast(int root_rank, int n, cs_datatype_t datatype, void *val)
Broadcast values of a given datatype to all default communicator processes.
Definition: cs_parall.h:267
void cs_parall_set_min_coll_buf_size(size_t buffer_size)
Define minimum recommended scatter or gather buffer size.
Definition: cs_parall.c:1339
void cs_parall_gather_ordered_r(int root_rank, int n_elts, int n_g_elts, int stride, cs_real_t o_key[], cs_real_t array[], cs_real_t g_array[])
Build an ordered global array on the given root rank from all local arrays.
Definition: cs_parall.c:1083
void cs_parall_min_id_rank_r(cs_lnum_t *elt_id, int *rank_id, cs_real_t val)
Given an (id, rank, value) tuple, return the local id and rank corresponding to the global minimum va...
Definition: cs_parall.c:841
static void cs_parall_thread_range_upper(cs_lnum_t n, size_t type_size, cs_lnum_t *s_id, cs_lnum_t *e_id)
Compute array index bounds for a local thread for upper triangular matrix elements.
Definition: cs_parall.h:622
static void cs_parall_max(int n, cs_datatype_t datatype, void *val)
Maximum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:191
static void cs_parall_counter_max(cs_lnum_t cpt[], const int n)
Maximum values of a counter on all default communicator processes.
Definition: cs_parall.h:120
void cs_parall_allgather_r(int n_elts, int n_g_elts, cs_real_t array[], cs_real_t g_array[])
Build a global array from each local array in each domain.
Definition: cs_parall.c:895
static void cs_parall_counter(cs_gnum_t cpt[], const int n)
Sum values of a counter on all default communicator processes.
Definition: cs_parall.h:87
void cs_parall_scatter_r(int root_rank, int n_elts, int n_g_elts, const cs_real_t g_array[], cs_real_t array[])
Distribute a global array from a given root rank over all ranks. Each rank receive the part related t...
Definition: cs_parall.c:1132
static void cs_parall_sum(int n, cs_datatype_t datatype, void *val)
Sum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:154
void cs_parall_allgather_ordered_r(int n_elts, int n_g_elts, int stride, cs_real_t o_key[], cs_real_t array[], cs_real_t g_array[])
Build an ordered global array from each local array in each domain.
Definition: cs_parall.c:972
void cs_parall_scatter_f(int root_rank, int n_elts, int n_g_elts, const float g_array[], float array[])
Distribute a global array from a given root rank over all ranks. Each rank receive the part related t...
Definition: cs_parall.c:1262
static void cs_parall_thread_range(cs_lnum_t n, size_t type_size, cs_lnum_t *s_id, cs_lnum_t *e_id)
Compute array index bounds for a local thread. When called inside an OpenMP parallel section,...
Definition: cs_parall.h:577
void cs_parall_min_loc_vals(int n, cs_real_t *min, cs_real_t min_loc_vals[])
Minimum value of a real and the value of related array on all default communicator processes.
Definition: cs_parall.c:802
static void cs_parall_min(int n, cs_datatype_t datatype, void *val)
Minimum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:228
void cs_parall_gather_f(int root_rank, int n_elts, int n_g_elts, const float array[], float g_array[])
Build a global array on the given root rank from all local arrays. Function dealing with single-preci...
Definition: cs_parall.c:1197
void cs_parall_max_loc_vals(int n, cs_real_t *max, cs_real_t max_loc_vals[])
Maximum value of a real and the value of related array on all default communicator processes.
Definition: cs_parall.c:764
size_t cs_parall_get_min_coll_buf_size(void)
Return minimum recommended scatter or gather buffer size.
Definition: cs_parall.c:1317
cs_e2n_sum_t
Definition: cs_parall.h:51
@ CS_E2N_SUM_SCATTER_ATOMIC
Definition: cs_parall.h:56
@ CS_E2N_SUM_SCATTER
Definition: cs_parall.h:53
@ CS_E2N_SUM_GATHER
Definition: cs_parall.h:58
cs_e2n_sum_t cs_glob_e2n_sum_type