8.3
general documentation
Performance tuning

Introduction

C user functions for performance tuning.

Several functions are present in the file, each specific to different performance tuning parameters.

Advanced mesh numbering

/* Force the target number of threads for mesh renumbering
(by default, OMP_NUM_THREADS if OpenMP is enabled, 1 otherwise) */
/* Set the minimum subset sizes when renumbering for threads. */
cs_renumber_set_min_subset_size(64, /* min. interior_subset_size */
64); /* min. boundary subset_size */
/* Select renumbering algorithms */
(false, /* halo_adjacent_cells_last */
false, /* halo_adjacent_i_faces_last */
CS_RENUMBER_ADJACENT_LOW, /* interior face base ordering */
CS_RENUMBER_CELLS_NONE, /* cells_pre_numbering */
CS_RENUMBER_CELLS_NONE, /* cells_numbering */
CS_RENUMBER_I_FACES_MULTIPASS, /* interior faces numbering */
CS_RENUMBER_B_FACES_THREAD, /* boundary faces numbering */
CS_RENUMBER_VERTICES_NONE); /* vertices numbering */
void cs_renumber_set_n_threads(int n_threads)
Set the target number of threads for mesh renumbering.
Definition: cs_renumber.cpp:5713
void cs_renumber_set_algorithm(bool halo_adjacent_cells_last, bool halo_adjacent_faces_last, cs_renumber_ordering_t i_faces_base_ordering, cs_renumber_cells_type_t cells_pre_numbering, cs_renumber_cells_type_t cells_numbering, cs_renumber_i_faces_type_t i_faces_numbering, cs_renumber_b_faces_type_t b_faces_numbering, cs_renumber_vertices_type_t vertices_numbering)
Select the algorithm for mesh renumbering.
Definition: cs_renumber.cpp:5803
void cs_renumber_set_min_subset_size(cs_lnum_t min_i_subset_size, cs_lnum_t min_b_subset_size)
Set the minimum sunset sizes when renumbering for threads.
Definition: cs_renumber.cpp:5754
@ CS_RENUMBER_I_FACES_MULTIPASS
Definition: cs_renumber.h:67
@ CS_RENUMBER_ADJACENT_LOW
Definition: cs_renumber.h:93
@ CS_RENUMBER_CELLS_NONE
Definition: cs_renumber.h:60
@ CS_RENUMBER_VERTICES_NONE
Definition: cs_renumber.h:85
@ CS_RENUMBER_B_FACES_THREAD
Definition: cs_renumber.h:75

Advanced partitioning

Example 1

{
/* Example:
Force PT-SCOTCH or SCOTCH for preprocessing partitioning,
and Hilbert SFC for main partitioning;
Available algorithms (subject to build with external libraries for
SCOTCH and METIS) are:
CS_PARTITION_DEFAULT Default partitioning, based on stage
CS_PARTITION_SFC_MORTON_BOX Morton (Z) curve in bounding box
CS_PARTITION_SFC_MORTON_CUBE Morton (Z) curve in bounding cube
CS_PARTITION_SFC_HILBERT_BOX Peano-Hilbert curve in bounding box
CS_PARTITION_SFC_HILBERT_CUBE Peano-Hilbert curve in bounding cube
CS_PARTITION_SCOTCH PT-SCOTCH or SCOTCH
CS_PARTITION_METIS ParMETIS or METIS
CS_PARTITION_BLOCK Unoptimized (naive) block partitioning */
1, /* rank_step */
false); /* ignore periodicity in graph */
1, /* rank_step */
false); /* ignore periodicity in graph */
}
void cs_partition_set_algorithm(cs_partition_stage_t stage, cs_partition_algorithm_t algorithm, int rank_step, bool ignore_perio)
Set algorithm for domain partitioning.
Definition: cs_partition.cpp:2914
@ CS_PARTITION_SCOTCH
Definition: cs_partition.h:113
@ CS_PARTITION_SFC_HILBERT_BOX
Definition: cs_partition.h:111
@ CS_PARTITION_FOR_PREPROCESS
Definition: cs_partition.h:86
@ CS_PARTITION_MAIN
Definition: cs_partition.h:87

Example 2

{
/* Example: set partitioning write to file option.
*
* value of write flag: 0: never
* 1: for graph-based partitioning only (default)
* 2: always */
}
void cs_partition_set_write_level(int write_flag)
Set partitioning write to file option.
Definition: cs_partition.cpp:2989

Example 3

{
/* Example: force activation/deactivation of initial partitioning
* for preprocessing. */
}
void cs_partition_set_preprocess(bool active)
Activate or deactivate initial partitioning for preprocessing.
Definition: cs_partition.cpp:3023

Example 4

{
/* Example: define list of extra partitionings to build.
*
* Partitionings in this list will be output to file, and may be used for
* subsequent calculations.
*
* When partitioning for both preprocessing and calculation stages, output to
* file of partioning data or generation of additional partitionings
* (see \ref cs_partition_add_partitions) will only be done for the
* second stage. */
int n_extra_partitions = 3;
int extra_partitions_list[] = {12, 24, 48};
cs_partition_add_partitions(n_extra_partitions, extra_partitions_list);
}
void cs_partition_add_partitions(int n_extra_partitions, int extra_partitions_list[])
Define list of extra partitionings to build.
Definition: cs_partition.cpp:3086

Parallel IO

#if defined(HAVE_MPI_IO) && MPI_VERSION > 1
/* Example fine-tune parallel IO settings.
Available distributed block access methods
(subject to build with MPI IO) are:
CS_FILE_STDIO_SERIAL Serial standard C IO
(funnelled through rank 0 in parallel)
CS_FILE_STDIO_PARALLEL Per-process standard C IO
CS_FILE_MPI_INDEPENDENT Non-collective MPI-IO
with independent file open and close
CS_FILE_MPI_NON_COLLECTIVE Non-collective MPI-IO
with collective file open and close
CS_FILE_MPI_COLLECTIVE Collective MPI-IO
*/
int block_rank_step = 8;
int block_min_size = 1024*1024*8;
/* Set MPI IO hints
(see MPI-IO or your filesystem documentation;
examples here may have no effect, improve, or degrade performance)
For LUSTRE filesystems, many articles in the literature seem
to recommend adjusting striping to improve performance.
If using ROMIO, useful hints for collective buffering and data-sieving
may take values: "enable", "disable", "automatic".
*/
MPI_Info_create(&hints);
MPI_Info_set(hints, "striping_factor", "8");
MPI_Info_set(hints, "striping_unit", "8388608");
MPI_Info_set(hints, "romio_cb_read", "automatic");
MPI_Info_set(hints, "romio_cb_write", "automatic");
MPI_Info_set(hints, "romio_ds_read", "automatic");
MPI_Info_set(hints, "romio_ds_write", "automatic");
/* Set default file acces methods and communicator stride */
MPI_Info_set(hints, "collective_buffering", "true");
MPI_Info_set(hints, "access_style", "read_once");
MPI_Info_free(&hints);
#endif /* defined(HAVE_MPI_IO) && MPI_VERSION > 1 */
MPI_Comm cs_glob_mpi_comm
Definition: cs_defs.cpp:183
#define MPI_INFO_NULL
Definition: cs_defs.h:95
#define MPI_Info
Definition: cs_defs.h:94
void cs_file_set_default_comm(int block_rank_step, MPI_Comm comm)
Set default MPI communicator values for file access.
Definition: cs_file.cpp:4095
void cs_file_set_mpi_io_positioning(cs_file_mpi_positioning_t positioning)
Set the positioning method for MPI-IO.
Definition: cs_file.cpp:4239
void cs_file_set_default_access(cs_file_mode_t mode, cs_file_access_t method, MPI_Info hints)
Set the default options for file access.
Definition: cs_file.cpp:3954
cs_file_access_t
Definition: cs_file.h:84
@ CS_FILE_MPI_COLLECTIVE
Definition: cs_file.h:91
@ CS_FILE_MODE_WRITE
Definition: cs_file.h:67
@ CS_FILE_MODE_READ
Definition: cs_file.h:66
@ CS_FILE_MPI_INDIVIDUAL_POINTERS
Definition: cs_file.h:101
void cs_parall_set_min_coll_buf_size(size_t buffer_size)
Define minimum recommended scatter or gather buffer size.
Definition: cs_parall.cpp:1353

Matrix tuning

/* Activate tuning of matrix-vector operations */
/* Set tuning runs (defaults) */
cs_matrix_set_tuning_runs(10); /* n_products */
/* Force default for selected types */
/* Also allow tuning for multigrid for all expected levels
* (we rarely have more than 10 or 11 levels except for huge meshes). */
void cs_grid_set_matrix_tuning(cs_matrix_fill_type_t fill_type, int max_level)
Set matrix tuning behavior for multigrid coarse meshes.
Definition: cs_grid.cpp:8841
@ CS_MATRIX_BLOCK_D
Definition: cs_matrix.h:76
@ CS_MATRIX_SCALAR_SYM
Definition: cs_matrix.h:75
@ CS_MATRIX_MSR
Definition: cs_matrix.h:58
void cs_matrix_default_set_type(cs_matrix_fill_type_t fill_type, cs_matrix_type_t type)
Set default matrix type for a given fill type.
Definition: cs_matrix_default.cpp:850
void cs_matrix_set_tuning_runs(int n_min_products)
Definition: cs_matrix_default.cpp:816