8.2
general documentation
Loading...
Searching...
No Matches
cs_parall.h
Go to the documentation of this file.
1#ifndef __CS_PARALL_H__
2#define __CS_PARALL_H__
3
4/*============================================================================
5 * Functions dealing with parallelism
6 *============================================================================*/
7
8/*
9 This file is part of code_saturne, a general-purpose CFD tool.
10
11 Copyright (C) 1998-2024 EDF S.A.
12
13 This program is free software; you can redistribute it and/or modify it under
14 the terms of the GNU General Public License as published by the Free Software
15 Foundation; either version 2 of the License, or (at your option) any later
16 version.
17
18 This program is distributed in the hope that it will be useful, but WITHOUT
19 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
20 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
21 details.
22
23 You should have received a copy of the GNU General Public License along with
24 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
25 Street, Fifth Floor, Boston, MA 02110-1301, USA.
26*/
27
28/*----------------------------------------------------------------------------*/
29
30/*----------------------------------------------------------------------------
31 * Local headers
32 *----------------------------------------------------------------------------*/
33
34#include "cs_defs.h"
35
36/*----------------------------------------------------------------------------*/
37
39
40/*============================================================================
41 * General types and macros used throughout code_saturne
42 *============================================================================*/
43
44/*----------------------------------------------------------------------------
45 * Variable value type.
46 *----------------------------------------------------------------------------*/
47
50
62
63/*============================================================================
64 * Global variables
65 *============================================================================*/
66
67/* Preferred indexed sum option, adapted to shared-memory parallelism */
68
70
71/*=============================================================================
72 * Public function prototypes
73 *============================================================================*/
74
75/*----------------------------------------------------------------------------*/
82/*----------------------------------------------------------------------------*/
83
84#if defined(HAVE_MPI_IN_PLACE)
85
86inline static void
87cs_parall_counter(cs_gnum_t cpt[],
88 const int n)
89{
90 if (cs_glob_n_ranks > 1) {
91 MPI_Allreduce(MPI_IN_PLACE, cpt, n, CS_MPI_GNUM, MPI_SUM,
93 }
94}
95
96#elif defined(HAVE_MPI)
97
98void
99cs_parall_counter(cs_gnum_t cpt[],
100 const int n);
101
102#else
103
104#define cs_parall_counter(_cpt, _n)
105
106#endif
107
108/*----------------------------------------------------------------------------*/
115/*----------------------------------------------------------------------------*/
116
117#if defined(HAVE_MPI_IN_PLACE)
118
119inline static void
121 const int n)
122{
123 if (cs_glob_n_ranks > 1) {
124 MPI_Allreduce(MPI_IN_PLACE, cpt, n, CS_MPI_LNUM, MPI_MAX,
126 }
127}
128
129#elif defined(HAVE_MPI)
130
131void
133 const int n);
134
135#else
136
137#define cs_parall_counter_max(_cpt, _n)
138
139#endif
140
141/*----------------------------------------------------------------------------*/
149/*----------------------------------------------------------------------------*/
150
151#if defined(HAVE_MPI_IN_PLACE)
152
153inline static void
155 cs_datatype_t datatype,
156 void *val)
157{
158 if (cs_glob_n_ranks > 1) {
159 MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_SUM,
161 }
162}
163
164#elif defined(HAVE_MPI)
165
166void
167cs_parall_sum(int n,
168 cs_datatype_t datatype,
169 void *val);
170
171#else
172
173#define cs_parall_sum(_n, _datatype, _val) { };
174
175#endif
176
177/*----------------------------------------------------------------------------*/
186/*----------------------------------------------------------------------------*/
187
188#if defined(HAVE_MPI_IN_PLACE)
189
190inline static void
192 cs_datatype_t datatype,
193 void *val)
194{
195 if (cs_glob_n_ranks > 1) {
196 MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_MAX,
198 }
199}
200
201#elif defined(HAVE_MPI)
202
203void
204cs_parall_max(int n,
205 cs_datatype_t datatype,
206 void *val);
207
208#else
209
210#define cs_parall_max(_n, _datatype, _val);
211
212#endif
213
214/*----------------------------------------------------------------------------*/
223/*----------------------------------------------------------------------------*/
224
225#if defined(HAVE_MPI_IN_PLACE)
226
227inline static void
229 cs_datatype_t datatype,
230 void *val)
231{
232 if (cs_glob_n_ranks > 1) {
233 MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_MIN,
235 }
236}
237
238#elif defined(HAVE_MPI)
239
240void
241cs_parall_min(int n,
242 cs_datatype_t datatype,
243 void *val);
244
245#else
246
247#define cs_parall_min(_n, _datatype, _val);
248
249#endif
250
251/*----------------------------------------------------------------------------*/
262/*----------------------------------------------------------------------------*/
263
264#if defined(HAVE_MPI)
265
266inline static void
267cs_parall_bcast(int root_rank,
268 int n,
269 cs_datatype_t datatype,
270 void *val)
271{
272 if (cs_glob_n_ranks > 1)
273 MPI_Bcast(val, n, cs_datatype_to_mpi[datatype], root_rank,
275}
276
277#else
278
279#define cs_parall_bcast(_root_rank, _n, _datatype, _val);
280
281#endif
282
283/*----------------------------------------------------------------------------*/
299/*----------------------------------------------------------------------------*/
300
301void
302cs_parall_allgather_r(int n_elts,
303 int n_g_elts,
304 cs_real_t array[],
305 cs_real_t g_array[]);
306
307/*----------------------------------------------------------------------------*/
327/*----------------------------------------------------------------------------*/
328
329void
331 int n_g_elts,
332 int stride,
333 cs_real_t o_key[],
334 cs_real_t array[],
335 cs_real_t g_array[]);
336
337/*----------------------------------------------------------------------------*/
355/*----------------------------------------------------------------------------*/
356
357void
358cs_parall_gather_r(int root_rank,
359 int n_elts,
360 int n_g_elts,
361 const cs_real_t array[],
362 cs_real_t g_array[]);
363
364/*----------------------------------------------------------------------------*/
386/*----------------------------------------------------------------------------*/
387
388void
389cs_parall_gather_ordered_r(int root_rank,
390 int n_elts,
391 int n_g_elts,
392 int stride,
393 cs_real_t o_key[],
394 cs_real_t array[],
395 cs_real_t g_array[]);
396
397/*----------------------------------------------------------------------------*/
415/*----------------------------------------------------------------------------*/
416
417void
418cs_parall_scatter_r(int root_rank,
419 int n_elts,
420 int n_g_elts,
421 const cs_real_t g_array[],
422 cs_real_t array[]);
423
424/*----------------------------------------------------------------------------*/
443/*----------------------------------------------------------------------------*/
444
445void
446cs_parall_gather_f(int root_rank,
447 int n_elts,
448 int n_g_elts,
449 const float array[],
450 float g_array[]);
451
452/*----------------------------------------------------------------------------*/
471/*----------------------------------------------------------------------------*/
472
473void
474cs_parall_scatter_f(int root_rank,
475 int n_elts,
476 int n_g_elts,
477 const float g_array[],
478 float array[]);
479
480/*----------------------------------------------------------------------------*/
490/*----------------------------------------------------------------------------*/
491
492void
494 cs_real_t *max,
495 cs_real_t max_loc_vals[]);
496
497/*----------------------------------------------------------------------------*/
507/*----------------------------------------------------------------------------*/
508
509void
511 cs_real_t *min,
512 cs_real_t min_loc_vals[]);
513
514/*----------------------------------------------------------------------------*/
525/*----------------------------------------------------------------------------*/
526
527void
529 int *rank_id,
530 cs_real_t val);
531
532/*----------------------------------------------------------------------------*/
541/*----------------------------------------------------------------------------*/
542
543size_t
545
546/*----------------------------------------------------------------------------*/
556/*----------------------------------------------------------------------------*/
557
558void
559cs_parall_set_min_coll_buf_size(size_t buffer_size);
560
561/*----------------------------------------------------------------------------*/
574/*----------------------------------------------------------------------------*/
575
576inline static void
578 size_t type_size,
579 cs_lnum_t *s_id,
580 cs_lnum_t *e_id)
581{
582#if defined(HAVE_OPENMP)
583 const int t_id = omp_get_thread_num();
584 const int n_t = omp_get_num_threads();
585 const cs_lnum_t t_n = (n + n_t - 1) / n_t;
586 const cs_lnum_t cl_m = CS_CL_SIZE / type_size; /* Cache line multiple */
587
588 *s_id = t_id * t_n;
589 *e_id = (t_id+1) * t_n;
590 *s_id = cs_align(*s_id, cl_m);
591 *e_id = cs_align(*e_id, cl_m);
592 if (*e_id > n) *e_id = n;
593#else
594 CS_UNUSED(type_size); /* avoid compiler warning */
595 *s_id = 0;
596 *e_id = n;
597#endif
598}
599
600/*----------------------------------------------------------------------------*/
619/*----------------------------------------------------------------------------*/
620
621inline static void
623 size_t type_size,
624 cs_lnum_t *s_id,
625 cs_lnum_t *e_id)
626{
627#if defined(HAVE_OPENMP)
628 const int t_id = omp_get_thread_num();
629 const double n_t = omp_get_num_threads();
630 const cs_lnum_t cl_m = CS_CL_SIZE / type_size; /* Cache line multiple */
631
632 double r0 = (double)t_id / (double)n_t;
633 double r1 = (double)(t_id+1) / (double)n_t;
634
635 r0 = r0*r0;
636 r1 = r1*r1;
637
638 const cs_lnum_t t_0 = r0*n;
639 const cs_lnum_t t_1 = r1*n;
640
641 *s_id = t_0 * n;
642 *e_id = t_1 * n;
643 *s_id = cs_align(*s_id, cl_m);
644 *e_id = cs_align(*e_id, cl_m);
645 if (*e_id > n) *e_id = n;
646#else
647 CS_UNUSED(type_size); /* avoid compiler warning */
648 *s_id = 0;
649 *e_id = n;
650#endif
651}
652
653/*----------------------------------------------------------------------------*/
662/*----------------------------------------------------------------------------*/
663
664static inline size_t
666 size_t block_size)
667{
668 return (n % block_size) ? n/block_size + 1 : n/block_size;
669}
670
671/*----------------------------------------------------------------------------*/
672
674
675#endif /* __CS_PARALL_H__ */
int cs_glob_n_ranks
Definition cs_defs.c:175
MPI_Datatype cs_datatype_to_mpi[]
Definition cs_defs.c:157
MPI_Comm cs_glob_mpi_comm
Definition cs_defs.c:183
cs_datatype_t
Definition cs_defs.h:284
#define BEGIN_C_DECLS
Definition cs_defs.h:528
double cs_real_t
Floating-point value.
Definition cs_defs.h:332
#define CS_MPI_LNUM
Definition cs_defs.h:424
static cs_lnum_t cs_align(cs_lnum_t i, cs_lnum_t m)
Given a base index i, return the next index aligned with a size m.
Definition cs_defs.h:645
#define CS_UNUSED(x)
Definition cs_defs.h:514
#define END_C_DECLS
Definition cs_defs.h:529
int cs_lnum_t
local mesh entity id
Definition cs_defs.h:325
#define CS_CL_SIZE
Definition cs_defs.h:484
void cs_parall_gather_r(int root_rank, int n_elts, int n_g_elts, const cs_real_t array[], cs_real_t g_array[])
Build a global array on the given root rank from all local arrays.
Definition cs_parall.c:1016
static void cs_parall_bcast(int root_rank, int n, cs_datatype_t datatype, void *val)
Broadcast values of a given datatype to all default communicator processes.
Definition cs_parall.h:267
void cs_parall_set_min_coll_buf_size(size_t buffer_size)
Define minimum recommended scatter or gather buffer size.
Definition cs_parall.c:1339
void cs_parall_gather_ordered_r(int root_rank, int n_elts, int n_g_elts, int stride, cs_real_t o_key[], cs_real_t array[], cs_real_t g_array[])
Build an ordered global array on the given root rank from all local arrays.
Definition cs_parall.c:1083
void cs_parall_min_id_rank_r(cs_lnum_t *elt_id, int *rank_id, cs_real_t val)
Given an (id, rank, value) tuple, return the local id and rank corresponding to the global minimum va...
Definition cs_parall.c:841
static void cs_parall_thread_range_upper(cs_lnum_t n, size_t type_size, cs_lnum_t *s_id, cs_lnum_t *e_id)
Compute array index bounds for a local thread for upper triangular matrix elements.
Definition cs_parall.h:622
static void cs_parall_max(int n, cs_datatype_t datatype, void *val)
Maximum values of a given datatype on all default communicator processes.
Definition cs_parall.h:191
static void cs_parall_counter_max(cs_lnum_t cpt[], const int n)
Maximum values of a counter on all default communicator processes.
Definition cs_parall.h:120
void cs_parall_allgather_r(int n_elts, int n_g_elts, cs_real_t array[], cs_real_t g_array[])
Build a global array from each local array in each domain.
Definition cs_parall.c:895
static void cs_parall_counter(cs_gnum_t cpt[], const int n)
Sum values of a counter on all default communicator processes.
Definition cs_parall.h:87
void cs_parall_scatter_r(int root_rank, int n_elts, int n_g_elts, const cs_real_t g_array[], cs_real_t array[])
Distribute a global array from a given root rank over all ranks. Each rank receive the part related t...
Definition cs_parall.c:1132
static void cs_parall_sum(int n, cs_datatype_t datatype, void *val)
Sum values of a given datatype on all default communicator processes.
Definition cs_parall.h:154
void cs_parall_allgather_ordered_r(int n_elts, int n_g_elts, int stride, cs_real_t o_key[], cs_real_t array[], cs_real_t g_array[])
Build an ordered global array from each local array in each domain.
Definition cs_parall.c:972
void cs_parall_scatter_f(int root_rank, int n_elts, int n_g_elts, const float g_array[], float array[])
Distribute a global array from a given root rank over all ranks. Each rank receive the part related t...
Definition cs_parall.c:1262
static void cs_parall_thread_range(cs_lnum_t n, size_t type_size, cs_lnum_t *s_id, cs_lnum_t *e_id)
Compute array index bounds for a local thread. When called inside an OpenMP parallel section,...
Definition cs_parall.h:577
void cs_parall_min_loc_vals(int n, cs_real_t *min, cs_real_t min_loc_vals[])
Minimum value of a real and the value of related array on all default communicator processes.
Definition cs_parall.c:802
static void cs_parall_min(int n, cs_datatype_t datatype, void *val)
Minimum values of a given datatype on all default communicator processes.
Definition cs_parall.h:228
void cs_parall_gather_f(int root_rank, int n_elts, int n_g_elts, const float array[], float g_array[])
Build a global array on the given root rank from all local arrays. Function dealing with single-preci...
Definition cs_parall.c:1197
void cs_parall_max_loc_vals(int n, cs_real_t *max, cs_real_t max_loc_vals[])
Maximum value of a real and the value of related array on all default communicator processes.
Definition cs_parall.c:764
size_t cs_parall_get_min_coll_buf_size(void)
Return minimum recommended scatter or gather buffer size.
Definition cs_parall.c:1317
static size_t cs_parall_block_count(size_t n, size_t block_size)
Compute number of blocks needed for a given array and block sizes.
Definition cs_parall.h:665
cs_e2n_sum_t
Definition cs_parall.h:51
@ CS_E2N_SUM_SCATTER_ATOMIC
Definition cs_parall.h:56
@ CS_E2N_SUM_SCATTER
Definition cs_parall.h:53
@ CS_E2N_SUM_GATHER
Definition cs_parall.h:58
cs_e2n_sum_t cs_glob_e2n_sum_type