7.2
general documentation
cs_base_cuda.h File Reference
#include "cs_defs.h"
#include <stdio.h>
#include "cs_log.h"
+ Include dependency graph for cs_base_cuda.h:

Go to the source code of this file.

Macros

#define CS_CUDA_CHECK(a)
 
#define CS_CUDA_CHECK_CALL(a, file_name, line_num)
 
#define CS_CUDA_WARP_SIZE   32
 

Functions

static unsigned int cs_cuda_grid_size (cs_lnum_t n, unsigned int block_size)
 Compute grid size for given array and block sizes. More...
 

Variables

int cs_glob_cuda_device_id
 
int cs_glob_cuda_max_threads_per_block
 
int cs_glob_cuda_max_block_size
 
int cs_glob_cuda_max_blocks
 
int cs_glob_cuda_n_mp
 

Macro Definition Documentation

◆ CS_CUDA_CHECK

#define CS_CUDA_CHECK (   a)
Value:
{ \
cudaError_t ret_code = a; \
if (cudaSuccess != ret_code) { \
bft_error(__FILE__, __LINE__, 0, "[CUDA error] %d: %s\n running: %s", \
ret_code, ::cudaGetErrorString(ret_code), #a); \
} \
}
double precision, save a
Definition: cs_fuel_incl.f90:146

◆ CS_CUDA_CHECK_CALL

#define CS_CUDA_CHECK_CALL (   a,
  file_name,
  line_num 
)
Value:
{ \
cudaError_t ret_code = a; \
if (cudaSuccess != ret_code) { \
bft_error(file_name, line_num, 0, "[CUDA error] %d: %s\n running: %s", \
ret_code, ::cudaGetErrorString(ret_code), #a); \
} \
}
double precision, save a
Definition: cs_fuel_incl.f90:146

◆ CS_CUDA_WARP_SIZE

#define CS_CUDA_WARP_SIZE   32

Function Documentation

◆ cs_cuda_grid_size()

static unsigned int cs_cuda_grid_size ( cs_lnum_t  n,
unsigned int  block_size 
)
inlinestatic

Compute grid size for given array and block sizes.

This assumes each thread on a given block handles a single array element. For kernels in which each thread handles multiple elements, a grid size divided by that multiple is sufficient.

Parameters
[in]nsize of arrays
[in]block_sizeblock size for kernels
Returns
grid size for kernels

Variable Documentation

◆ cs_glob_cuda_device_id

int cs_glob_cuda_device_id

◆ cs_glob_cuda_max_block_size

int cs_glob_cuda_max_block_size

◆ cs_glob_cuda_max_blocks

int cs_glob_cuda_max_blocks

◆ cs_glob_cuda_max_threads_per_block

int cs_glob_cuda_max_threads_per_block

◆ cs_glob_cuda_n_mp

int cs_glob_cuda_n_mp