7.1
general documentation
cs_parall.h
Go to the documentation of this file.
1 #ifndef __CS_PARALL_H__
2 #define __CS_PARALL_H__
3 
4 /*============================================================================
5  * Functions dealing with parallelism
6  *============================================================================*/
7 
8 /*
9  This file is part of Code_Saturne, a general-purpose CFD tool.
10 
11  Copyright (C) 1998-2021 EDF S.A.
12 
13  This program is free software; you can redistribute it and/or modify it under
14  the terms of the GNU General Public License as published by the Free Software
15  Foundation; either version 2 of the License, or (at your option) any later
16  version.
17 
18  This program is distributed in the hope that it will be useful, but WITHOUT
19  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
20  FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
21  details.
22 
23  You should have received a copy of the GNU General Public License along with
24  this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
25  Street, Fifth Floor, Boston, MA 02110-1301, USA.
26 */
27 
28 /*----------------------------------------------------------------------------*/
29 
30 /*----------------------------------------------------------------------------
31  * Local headers
32  *----------------------------------------------------------------------------*/
33 
34 #include "cs_defs.h"
35 
36 /*----------------------------------------------------------------------------*/
37 
39 
40 /*=============================================================================
41  * Public function prototypes
42  *============================================================================*/
43 
44 /*----------------------------------------------------------------------------
45  * Sum values of a counter on all default communicator processes.
46  *
47  * parameters:
48  * cpt <-> local counter in, global counter out (size: n)
49  * n <-- number of values
50  *----------------------------------------------------------------------------*/
51 
52 #if defined(HAVE_MPI_IN_PLACE)
53 
54 inline static void
55 cs_parall_counter(cs_gnum_t cpt[],
56  const int n)
57 {
58  if (cs_glob_n_ranks > 1) {
59  MPI_Allreduce(MPI_IN_PLACE, cpt, n, CS_MPI_GNUM, MPI_SUM,
61  }
62 }
63 
64 #elif defined(HAVE_MPI)
65 
66 void
67 cs_parall_counter(cs_gnum_t cpt[],
68  const int n);
69 
70 #else
71 
72 #define cs_parall_counter(_cpt, _n)
73 
74 #endif
75 
76 /*----------------------------------------------------------------------------
77  * Maximum values of a counter on all default communicator processes.
78  *
79  * parameters:
80  * cpt <-> local counter in, global counter out (size: n)
81  * n <-> number of values
82  *----------------------------------------------------------------------------*/
83 
84 #if defined(HAVE_MPI_IN_PLACE)
85 
86 inline static void
87 cs_parall_counter_max(cs_lnum_t cpt[],
88  const int n)
89 {
90  if (cs_glob_n_ranks > 1) {
91  MPI_Allreduce(MPI_IN_PLACE, cpt, n, CS_MPI_LNUM, MPI_MAX,
93  }
94 }
95 
96 #elif defined(HAVE_MPI)
97 
98 void
99 cs_parall_counter_max(cs_lnum_t cpt[],
100  const int n);
101 
102 #else
103 
104 #define cs_parall_counter_max(_cpt, _n)
105 
106 #endif
107 
108 /*----------------------------------------------------------------------------
109  * Sum values of a given datatype on all default communicator processes.
110  *
111  * parameters:
112  * n <-- number of values
113  * datatype <-- matching Code_Saturne datatype
114  * val <-> local sum in, global sum out (array)
115  *----------------------------------------------------------------------------*/
116 
117 #if defined(HAVE_MPI_IN_PLACE)
118 
119 inline static void
120 cs_parall_sum(int n,
121  cs_datatype_t datatype,
122  void *val)
123 {
124  if (cs_glob_n_ranks > 1) {
125  MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_SUM,
127  }
128 }
129 
130 #elif defined(HAVE_MPI)
131 
132 void
133 cs_parall_sum(int n,
134  cs_datatype_t datatype,
135  void *val);
136 
137 #else
138 
139 #define cs_parall_sum(_n, _datatype, _val) { };
140 
141 #endif
142 
143 /*----------------------------------------------------------------------------
144  * Maximum values of a given datatype on all default communicator processes.
145  *
146  * parameters:
147  * n <-- number of values
148  * datatype <-- matching Code_Saturne datatype
149  * val <-> local value input, global value output (array)
150  *----------------------------------------------------------------------------*/
151 
152 #if defined(HAVE_MPI_IN_PLACE)
153 
154 inline static void
155 cs_parall_max(int n,
156  cs_datatype_t datatype,
157  void *val)
158 {
159  if (cs_glob_n_ranks > 1) {
160  MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_MAX,
162  }
163 }
164 
165 #elif defined(HAVE_MPI)
166 
167 void
168 cs_parall_max(int n,
169  cs_datatype_t datatype,
170  void *val);
171 
172 #else
173 
174 #define cs_parall_max(_n, _datatype, _val);
175 
176 #endif
177 
178 /*----------------------------------------------------------------------------
179  * Minimum values of a given datatype on all default communicator processes.
180  *
181  * parameters:
182  * n <-- number of values
183  * datatype <-- matching Code_Saturne datatype
184  * val <-> local value input, global value output (array)
185  *----------------------------------------------------------------------------*/
186 
187 #if defined(HAVE_MPI_IN_PLACE)
188 
189 inline static void
190 cs_parall_min(int n,
191  cs_datatype_t datatype,
192  void *val)
193 {
194  if (cs_glob_n_ranks > 1) {
195  MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_MIN,
197  }
198 }
199 
200 #elif defined(HAVE_MPI)
201 
202 void
203 cs_parall_min(int n,
204  cs_datatype_t datatype,
205  void *val);
206 
207 #else
208 
209 #define cs_parall_min(_n, _datatype, _val);
210 
211 #endif
212 
213 /*----------------------------------------------------------------------------
214  * Broadcast values of a given datatype to all
215  * default communicator processes.
216  *
217  * parameters:
218  * root_rank <-- rank from which to broadcast
219  * n <-- number of values
220  * datatype <-- matching Code_Saturne datatype
221  * val <-- values to broadcast; input on root_rank,
222  * output on others (size: n)
223  *----------------------------------------------------------------------------*/
224 
225 #if defined(HAVE_MPI)
226 
227 inline static void
228 cs_parall_bcast(int root_rank,
229  int n,
230  cs_datatype_t datatype,
231  void *val)
232 {
233  if (cs_glob_n_ranks > 1)
234  MPI_Bcast(val, n, cs_datatype_to_mpi[datatype], root_rank,
236 }
237 
238 #else
239 
240 #define cs_parall_bcast(_root_rank, _n, _datatype, _val);
241 
242 #endif
243 
244 /*----------------------------------------------------------------------------
245  * Build a global array from each local array in each domain.
246  *
247  * Local arrays are appended in order of owning MPI rank.
248  * The size of each local array may be different.
249  *
250  * Use of this function may be quite practical, but should be limited
251  * to user functions, as it may limit scalability (especially as regards
252  * memory usage).
253  *
254  * parameters:
255  * n_elts <-- size of the local array
256  * n_g_elts <-- size of the global array
257  * array <-- local array (size: n_elts)
258  * g_array --> global array (size: n_g_elts)
259  *----------------------------------------------------------------------------*/
260 
261 void
262 cs_parall_allgather_r(int n_elts,
263  int n_g_elts,
264  cs_real_t array[],
265  cs_real_t g_array[]);
266 
267 /*----------------------------------------------------------------------------
268  * Build an ordered global array from each local array in each domain.
269  *
270  * Local array elements are ordered based on a given key value (usually
271  * some form of coordinate, so the result should be independent of
272  * partitioning as long as the definition of the o_key array's defintion
273  * is itself independent of the partitioning.
274  *
275  * Use of this function may be quite practical, but should be limited
276  * to user functions, as it may limit scalability (especially as regards
277  * memory usage).
278  *
279  * parameters:
280  * n_elts <-- number of local elements
281  * n_g_elts <-- number of global elements
282  * stride <-- number of values per element
283  * o_key <-- ordering key (coordinate) value per element
284  * array <-- local array (size: n_elts*stride)
285  * g_array --> global array (size: n_g_elts*stride)
286  *----------------------------------------------------------------------------*/
287 
288 void
290  int n_g_elts,
291  int stride,
292  cs_real_t o_key[],
293  cs_real_t array[],
294  cs_real_t g_array[]);
295 
296 /*----------------------------------------------------------------------------*/
314 /*----------------------------------------------------------------------------*/
315 
316 void
317 cs_parall_gather_r(int root_rank,
318  int n_elts,
319  int n_g_elts,
320  const cs_real_t array[],
321  cs_real_t g_array[]);
322 
323 /*----------------------------------------------------------------------------*/
345 /*----------------------------------------------------------------------------*/
346 
347 void
348 cs_parall_gather_ordered_r(int root_rank,
349  int n_elts,
350  int n_g_elts,
351  int stride,
352  cs_real_t o_key[],
353  cs_real_t array[],
354  cs_real_t g_array[]);
355 
356 /*----------------------------------------------------------------------------*/
374 /*----------------------------------------------------------------------------*/
375 
376 void
377 cs_parall_scatter_r(int root_rank,
378  int n_elts,
379  int n_g_elts,
380  const cs_real_t g_array[],
381  cs_real_t array[]);
382 
383 /*----------------------------------------------------------------------------*/
402 /*----------------------------------------------------------------------------*/
403 
404 void
405 cs_parall_gather_f(int root_rank,
406  int n_elts,
407  int n_g_elts,
408  const float array[],
409  float g_array[]);
410 
411 /*----------------------------------------------------------------------------*/
430 /*----------------------------------------------------------------------------*/
431 
432 void
433 cs_parall_scatter_f(int root_rank,
434  int n_elts,
435  int n_g_elts,
436  const float g_array[],
437  float array[]);
438 
439 /*----------------------------------------------------------------------------
440  * Maximum value of a real and the value of related array on all
441  * default communicator processes.
442  *
443  * parameters:
444  * n <-- size of the related array
445  * max <-> local max in, global max out
446  * max_loc_vals <-> array values at location of local max in,
447  * and at location of global max out
448  *----------------------------------------------------------------------------*/
449 
450 void
452  cs_real_t *max,
453  cs_real_t max_loc_vals[]);
454 
455 /*----------------------------------------------------------------------------
456  * Minimum value of a real and the value of related array on all
457  * default communicator processes.
458  *
459  * parameters:
460  * n <-- size of the related array
461  * min <-> local min in, global min out
462  * min_loc_vals <-> array values at location of local min in,
463  * and at location of global min out
464  *----------------------------------------------------------------------------*/
465 
466 void
468  cs_real_t *min,
469  cs_real_t min_loc_vals[]);
470 
471 /*----------------------------------------------------------------------------
472  * Given an (id, rank, value) tuple, return the local id and rank
473  * corresponding to the global minimum value.
474  *
475  * parameters:
476  * elt_id <-> element id for which the value is the smallest
477  * (local in, global out)
478  * rank_id <-> rank id for which the value is the smallest
479  * (local in, global out)
480  * val <-- associated local minimum value
481  *----------------------------------------------------------------------------*/
482 
483 void
485  int *rank_id,
486  cs_real_t dis2mn);
487 
488 /*----------------------------------------------------------------------------
489  * Return minimum recommended scatter or gather buffer size.
490  *
491  * This is used by some internal part to block or scatter/gather algorithms,
492  * so as to allow I/O buffer size tuning.
493  *
494  * returns:
495  * minimum recommended part to block or gather buffer size (in bytes)
496  *----------------------------------------------------------------------------*/
497 
498 size_t
500 
501 /*----------------------------------------------------------------------------
502  * Define minimum recommended gather buffer size.
503  *
504  * This is used by some internal part to block or scatter/gather algorithms,
505  * so as to allow I/O buffer size tuning.
506  *
507  * parameters:
508  * minimum recommended part to block or gather buffer size (in bytes)
509  *----------------------------------------------------------------------------*/
510 
511 void
512 cs_parall_set_min_coll_buf_size(size_t buffer_size);
513 
514 /*----------------------------------------------------------------------------*/
527 /*----------------------------------------------------------------------------*/
528 
529 inline static void
530 cs_parall_thread_range(cs_lnum_t n,
531  size_t type_size,
532  cs_lnum_t *s_id,
533  cs_lnum_t *e_id)
534 {
535 #if defined(HAVE_OPENMP)
536  const int t_id = omp_get_thread_num();
537  const int n_t = omp_get_num_threads();
538  const cs_lnum_t t_n = (n + n_t - 1) / n_t;
539  const cs_lnum_t cl_m = CS_CL_SIZE / type_size; /* Cache line multiple */
540 
541  *s_id = t_id * t_n;
542  *e_id = (t_id+1) * t_n;
543  *s_id = cs_align(*s_id, cl_m);
544  *e_id = cs_align(*e_id, cl_m);
545  if (*e_id > n) *e_id = n;
546 #else
547  CS_UNUSED(type_size); /* avoid compiler warning */
548  *s_id = 0;
549  *e_id = n;
550 #endif
551 }
552 
553 /*----------------------------------------------------------------------------*/
554 
556 
557 #endif /* __CS_PARALL_H__ */
cs_datatype_t
Definition: cs_defs.h:275
unsigned long cs_gnum_t
global mesh entity number
Definition: cs_defs.h:301
void cs_parall_gather_r(int root_rank, int n_elts, int n_g_elts, const cs_real_t array[], cs_real_t g_array[])
Build a global array on the given root rank from all local arrays.
Definition: cs_parall.c:1008
#define CS_CL_SIZE
Definition: cs_defs.h:466
void cs_parall_allgather_ordered_r(int n_elts, int n_g_elts, int stride, cs_real_t o_key[], cs_real_t array[], cs_real_t g_array[])
Build an ordered global array from each local array in each domain.
Definition: cs_parall.c:964
#define BEGIN_C_DECLS
Definition: cs_defs.h:510
void cs_parall_scatter_r(int root_rank, int n_elts, int n_g_elts, const cs_real_t g_array[], cs_real_t array[])
Distribute a global array from a given root rank over all ranks. Each rank receive the part related t...
Definition: cs_parall.c:1124
#define CS_UNUSED(x)
Definition: cs_defs.h:496
void cs_parall_gather_f(int root_rank, int n_elts, int n_g_elts, const float array[], float g_array[])
Build a global array on the given root rank from all local arrays. Function dealing with single-preci...
Definition: cs_parall.c:1189
int cs_glob_n_ranks
Definition: cs_defs.c:175
double cs_real_t
Floating-point value.
Definition: cs_defs.h:322
void cs_parall_min_loc_vals(int n, cs_real_t *min, cs_real_t min_loc_vals[])
Minimum value of a real and the value of related array on all default communicator processes...
Definition: cs_parall.c:794
MPI_Datatype cs_datatype_to_mpi[]
Definition: cs_defs.c:157
size_t cs_parall_get_min_coll_buf_size(void)
Return minimum recommended scatter or gather buffer size.
Definition: cs_parall.c:1309
void cs_parall_min_id_rank_r(cs_lnum_t *elt_id, int *rank_id, cs_real_t dis2mn)
Given an (id, rank, value) tuple, return the local id and rank corresponding to the global minimum va...
Definition: cs_parall.c:833
void cs_parall_scatter_f(int root_rank, int n_elts, int n_g_elts, const float g_array[], float array[])
Distribute a global array from a given root rank over all ranks. Each rank receive the part related t...
Definition: cs_parall.c:1254
#define CS_MPI_GNUM
Definition: cs_defs.h:386
void cs_parall_gather_ordered_r(int root_rank, int n_elts, int n_g_elts, int stride, cs_real_t o_key[], cs_real_t array[], cs_real_t g_array[])
Build an ordered global array on the given root rank from all local arrays.
Definition: cs_parall.c:1075
void cs_parall_set_min_coll_buf_size(size_t buffer_size)
Define minimum recommended scatter or gather buffer size.
Definition: cs_parall.c:1331
MPI_Comm cs_glob_mpi_comm
Definition: cs_defs.c:183
int cs_lnum_t
local mesh entity id
Definition: cs_defs.h:316
void cs_parall_max_loc_vals(int n, cs_real_t *max, cs_real_t max_loc_vals[])
Maximum value of a real and the value of related array on all default communicator processes...
Definition: cs_parall.c:756
#define CS_MPI_LNUM
Definition: cs_defs.h:406
#define END_C_DECLS
Definition: cs_defs.h:511
void cs_parall_allgather_r(int n_elts, int n_g_elts, cs_real_t array[], cs_real_t g_array[])
Build a global array from each local array in each domain.
Definition: cs_parall.c:887