doxygen/src/cs__base__cuda_8h_source.html

 #ifndef __CS_BASE_CUDA_H__

 #define __CS_BASE_CUDA_H__


 /*============================================================================

  * Definitions, global variables, and base functions for CUDA

  *============================================================================*/


 /*

   This file is part of code_saturne, a general-purpose CFD tool.


   Copyright (C) 1998-2023 EDF S.A.


   This program is free software; you can redistribute it and/or modify it under

   the terms of the GNU General Public License as published by the Free Software

   Foundation; either version 2 of the License, or (at your option) any later

   version.


   This program is distributed in the hope that it will be useful, but WITHOUT

   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more

   details.


   You should have received a copy of the GNU General Public License along with

   this program; if not, write to the Free Software Foundation, Inc., 51 Franklin

   Street, Fifth Floor, Boston, MA 02110-1301, USA.

 */


 /*----------------------------------------------------------------------------*/


 #include "cs_defs.h"


 /*----------------------------------------------------------------------------

  * Standard C library headers

  *----------------------------------------------------------------------------*/


 #include <stdio.h>


 /*----------------------------------------------------------------------------

  *  Local headers

  *----------------------------------------------------------------------------*/


 #include "cs_log.h"


 /*=============================================================================

  * Macro definitions

  *============================================================================*/


 #define CS_CUDA_CHECK(a) { \

     cudaError_t ret_code = a; \

     if (cudaSuccess != ret_code) { \

       bft_error(__FILE__, __LINE__, 0, "[CUDA error] %d: %s\n  running: %s", \

                 ret_code, ::cudaGetErrorString(ret_code), #a); \

     } \

   }


 #define CS_CUDA_CHECK_CALL(a, file_name, line_num) { \

     cudaError_t ret_code = a; \

     if (cudaSuccess != ret_code) { \

       bft_error(file_name, line_num, 0, "[CUDA error] %d: %s\n  running: %s", \

                 ret_code, ::cudaGetErrorString(ret_code), #a); \

     } \

   }


 /* For all current compute capabilities, the warp size is 32; If it ever

    changes, it can be obtained through cudaDeviceProp, so we could then

    replace this macro with a global variable */


 #define CS_CUDA_WARP_SIZE 32


 /*----------------------------------------------------------------------------*/


 BEGIN_C_DECLS


 /*============================================================================

  * Type definitions

  *============================================================================*/


 /*=============================================================================

  * Global variable definitions

  *============================================================================*/


 extern int  cs_glob_cuda_device_id;


 /* Other device parameters */


 extern int  cs_glob_cuda_max_threads_per_block;

 extern int  cs_glob_cuda_max_block_size;

 extern int  cs_glob_cuda_max_blocks;

 extern int  cs_glob_cuda_n_mp;  /* Number of multiprocessors */


 /*============================================================================

  * Semi-private function prototypes

  *

  * The following functions are intended to be used by the common

  * host-device memory management functions from cs_base_accel.c, and

  * not directly by the user.

  *============================================================================*/


 #if defined(HAVE_CUDA)


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void *

 cs_cuda_mem_malloc_device(size_t        n,

                           const char   *var_name,

                           const char   *file_name,

                           int           line_num);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void *

 cs_cuda_mem_malloc_host(size_t        n,

                         const char   *var_name,

                         const char   *file_name,

                         int           line_num);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void *

 cs_cuda_mem_malloc_managed(size_t        n,

                            const char   *var_name,

                            const char   *file_name,

                            int           line_num);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void

 cs_cuda_mem_free(void         *p,

                  const char   *var_name,

                  const char   *file_name,

                  int           line_num);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void

 cs_cuda_mem_free_host(void         *p,

                       const char   *var_name,

                       const char   *file_name,

                       int           line_num);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void

 cs_cuda_copy_h2d(void         *dst,

                  const void   *src,

                  size_t        size);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void

 cs_cuda_copy_h2d_async(void        *dst,

                        const void  *src,

                        size_t       size);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void

 cs_cuda_copy_d2h(void        *dst,

                  const void  *src,

                  size_t       size);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void

 cs_cuda_copy_d2h_async(void        *dst,

                        const void  *src,

                        size_t       size);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void

 cs_cuda_prefetch_h2d(void    *dst,

                      size_t   size);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void

 cs_cuda_prefetch_d2h(void    *dst,

                      size_t   size);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void

 cs_cuda_copy_d2d(void        *dst,

                  const void  *src,

                  size_t       size);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void *

 cs_cuda_get_host_ptr(const void  *ptr);


 #endif


 /*=============================================================================

  * Inline static function prototypes

  *============================================================================*/


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 static inline unsigned int

 cs_cuda_grid_size(cs_lnum_t     n,

                   unsigned int  block_size)

 {

   return (n % block_size) ?  n/block_size + 1 : n/block_size;

 }


 /*=============================================================================

  * Public function prototypes

  *============================================================================*/


 #if defined(HAVE_CUDA)


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void

 cs_base_cuda_device_info(cs_log_t  log_id);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void

 cs_base_cuda_version_info(cs_log_t  log_id);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 void

 cs_base_cuda_compiler_info(cs_log_t  log_id);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 int

 cs_base_cuda_select_default_device(void);


 /*----------------------------------------------------------------------------*/

 /*----------------------------------------------------------------------------*/


 int

 cs_base_cuda_get_device(void);


 #endif


 /*----------------------------------------------------------------------------*/


 END_C_DECLS


 #endif /* __CS_BASE_CUDA_H__ */

cs_cuda_grid_size
static unsigned int cs_cuda_grid_size(cs_lnum_t n, unsigned int block_size)
Compute grid size for given array and block sizes.
Definition: cs_base_cuda.h:416

cs_glob_cuda_n_mp
int cs_glob_cuda_n_mp

cs_glob_cuda_max_threads_per_block
int cs_glob_cuda_max_threads_per_block

cs_glob_cuda_max_blocks
int cs_glob_cuda_max_blocks

cs_glob_cuda_device_id
int cs_glob_cuda_device_id

cs_glob_cuda_max_block_size
int cs_glob_cuda_max_block_size

cs_defs.h

BEGIN_C_DECLS
#define BEGIN_C_DECLS
Definition: cs_defs.h:514

END_C_DECLS
#define END_C_DECLS
Definition: cs_defs.h:515

cs_lnum_t
int cs_lnum_t
local mesh entity id
Definition: cs_defs.h:313

p
@ p
Definition: cs_field_pointer.h:67

cs_log.h

cs_log_t
cs_log_t
Definition: cs_log.h:48