|
#define | cs_parall_counter(_cpt, _n) |
| Sum values of a counter on all default communicator processes. More...
|
|
#define | cs_parall_counter_max(_cpt, _n) |
| Maximum values of a counter on all default communicator processes. More...
|
|
#define | cs_parall_sum(_n, _datatype, _val) { }; |
| Sum values of a given datatype on all default communicator processes. More...
|
|
#define | cs_parall_max(_n, _datatype, _val) ; |
| Maximum values of a given datatype on all default communicator processes. More...
|
|
#define | cs_parall_min(_n, _datatype, _val) ; |
| Minimum values of a given datatype on all default communicator processes. More...
|
|
#define | cs_parall_bcast(_root_rank, _n, _datatype, _val) ; |
| Broadcast values of a given datatype to all default communicator processes. More...
|
|
|
void | cs_parall_allgather_r (int n_elts, int n_g_elts, cs_real_t array[], cs_real_t g_array[]) |
| Build a global array from each local array in each domain. More...
|
|
void | cs_parall_allgather_ordered_r (int n_elts, int n_g_elts, int stride, cs_real_t o_key[], cs_real_t array[], cs_real_t g_array[]) |
| Build an ordered global array from each local array in each domain. More...
|
|
void | cs_parall_gather_r (int root_rank, int n_elts, int n_g_elts, const cs_real_t array[], cs_real_t g_array[]) |
| Build a global array on the given root rank from all local arrays. More...
|
|
void | cs_parall_gather_ordered_r (int root_rank, int n_elts, int n_g_elts, int stride, cs_real_t o_key[], cs_real_t array[], cs_real_t g_array[]) |
| Build an ordered global array on the given root rank from all local arrays. More...
|
|
void | cs_parall_scatter_r (int root_rank, int n_elts, int n_g_elts, const cs_real_t g_array[], cs_real_t array[]) |
| Distribute a global array from a given root rank over all ranks. Each rank receive the part related to its local elements. More...
|
|
void | cs_parall_gather_f (int root_rank, int n_elts, int n_g_elts, const float array[], float g_array[]) |
| Build a global array on the given root rank from all local arrays. Function dealing with single-precision arrays. More...
|
|
void | cs_parall_scatter_f (int root_rank, int n_elts, int n_g_elts, const float g_array[], float array[]) |
| Distribute a global array from a given root rank over all ranks. Each rank receive the part related to its local elements. Function dealing with single-precision arrays. More...
|
|
void | cs_parall_max_loc_vals (int n, cs_real_t *max, cs_real_t max_loc_vals[]) |
| Maximum value of a real and the value of related array on all default communicator processes. More...
|
|
void | cs_parall_min_loc_vals (int n, cs_real_t *min, cs_real_t min_loc_vals[]) |
| Minimum value of a real and the value of related array on all default communicator processes. More...
|
|
void | cs_parall_min_id_rank_r (cs_lnum_t *elt_id, int *rank_id, cs_real_t val) |
| Given an (id, rank, value) tuple, return the local id and rank corresponding to the global minimum value. More...
|
|
size_t | cs_parall_get_min_coll_buf_size (void) |
| Return minimum recommended scatter or gather buffer size. More...
|
|
void | cs_parall_set_min_coll_buf_size (size_t buffer_size) |
| Define minimum recommended scatter or gather buffer size. More...
|
|
static void | cs_parall_thread_range (cs_lnum_t n, size_t type_size, cs_lnum_t *s_id, cs_lnum_t *e_id) |
| Compute array index bounds for a local thread. When called inside an OpenMP parallel section, this will return the start an past-the-end indexes for the array range assigned to that thread. In other cases, the start index is 1, and the past-the-end index is n;. More...
|
|
static void | cs_parall_thread_range_upper (cs_lnum_t n, size_t type_size, cs_lnum_t *s_id, cs_lnum_t *e_id) |
| Compute array index bounds for a local thread for upper triangular matrix elements. More...
|
|
static size_t | cs_parall_block_count (size_t n, size_t block_size) |
| Compute number of blocks needed for a given array and block sizes. More...
|
|
Build an ordered global array from each local array in each domain.
Local array elements are ordered based on a given key value (usually some form of coordinate, so the result should be independent of partitioning as long as the definition of the o_key array's defintion is itself independent of the partitioning.
Use of this function may be quite practical, but should be limited to user functions, as it may limit scalability (especially as regards memory usage).
- Parameters
-
[in] | n_elts | number of local elements |
[in] | n_g_elts | number of global elements |
[in] | stride | number of values per element |
[in] | o_key | ordering key (coordinate) value per element |
[in] | array | local array (size: n_elts*stride) |
[out] | g_array | global array (size: n_g_elts*stride) |
void cs_parall_gather_f |
( |
int |
root_rank, |
|
|
int |
n_elts, |
|
|
int |
n_g_elts, |
|
|
const float |
array[], |
|
|
float |
g_array[] |
|
) |
| |
Build a global array on the given root rank from all local arrays. Function dealing with single-precision arrays.
Local arrays are appended in order of owning MPI rank. The size of each local array may be different.
Use of this function may be quite practical, but should be limited to user functions, as it may limit scalability (especially as regards memory usage).
- Parameters
-
[in] | root_rank | rank which stores the global array |
[in] | n_elts | size of the local array |
[in] | n_g_elts | size of the global array |
[in] | array | local array (size: n_elts) |
[out] | g_array | global array (size: n_g_elts) only usable by the root rank |
void cs_parall_gather_ordered_r |
( |
int |
root_rank, |
|
|
int |
n_elts, |
|
|
int |
n_g_elts, |
|
|
int |
stride, |
|
|
cs_real_t |
o_key[], |
|
|
cs_real_t |
array[], |
|
|
cs_real_t |
g_array[] |
|
) |
| |
Build an ordered global array on the given root rank from all local arrays.
Local array elements are ordered based on a given key value (usually some form of coordinate, so the result should be independent of partitioning as long as the definition of the o_key array's defintion is itself independent of the partitioning.
Use of this function may be quite practical, but should be limited to user functions, as it may limit scalability (especially as regards memory usage).
- Parameters
-
[in] | root_rank | rank which stores the global array |
[in] | n_elts | number of local elements |
[in] | n_g_elts | number of global elements |
[in] | stride | number of values per element |
[in] | o_key | ordering key (coordinate) value per element |
[in] | array | local array (size: n_elts*stride) |
[out] | g_array | global array (size: n_g_elts*stride) |
void cs_parall_scatter_f |
( |
int |
root_rank, |
|
|
int |
n_elts, |
|
|
int |
n_g_elts, |
|
|
const float |
g_array[], |
|
|
float |
array[] |
|
) |
| |
Distribute a global array from a given root rank over all ranks. Each rank receive the part related to its local elements. Function dealing with single-precision arrays.
The size of each local array may be different.
Use of this function may be quite practical, but should be limited to specific usage, as it may limit scalability (especially as regards memory usage).
- Parameters
-
[in] | root_rank | rank which stores the global array |
[in] | n_elts | size of the local array |
[in] | n_g_elts | size of the global array |
[in] | g_array | global array (size: n_g_elts) only usable by the root rank |
[out] | array | local array (size: n_elts) |
Compute array index bounds for a local thread for upper triangular matrix elements.
When called inside an OpenMP parallel section, this will return the start an past-the-end indexes for the array range assigned to that thread. In other cases, the start index is 1, and the past-the-end index is n;
Compared to cs_parall_thread_range, this variant assumes work on the upper triangular part of a matrix, where the lower part is ignored.
- Parameters
-
[in] | n | size of array |
[in] | type_size | element type size (or multiple) |
[in,out] | s_id | start index for the current thread |
[in,out] | e_id | past-the-end index for the current thread |