8.3
general documentation
cs_array_cuda.h
Go to the documentation of this file.
1/*============================================================================
2 * Array handling utilities.
3 *============================================================================*/
4
5/*
6 This file is part of code_saturne, a general-purpose CFD tool.
7
8 Copyright (C) 1998-2024 EDF S.A.
9
10 This program is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free Software
12 Foundation; either version 2 of the License, or (at your option) any later
13 version.
14
15 This program is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
18 details.
19
20 You should have received a copy of the GNU General Public License along with
21 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22 Street, Fifth Floor, Boston, MA 02110-1301, USA.
23*/
24
25/*----------------------------------------------------------------------------*/
26
27/*----------------------------------------------------------------------------
28 * Standard C and C++ library headers
29 *----------------------------------------------------------------------------*/
30
31#include <string.h>
32
33/*----------------------------------------------------------------------------
34 * Local headers
35 *----------------------------------------------------------------------------*/
36
37#include "cs_defs.h"
38#include "cs_base_cuda.h"
39
40/*----------------------------------------------------------------------------*/
41
42/*=============================================================================
43 * Macro definitions
44 *============================================================================*/
45
46/*============================================================================
47 * Type definitions
48 *============================================================================*/
49
50/*============================================================================
51 * Global variables
52 *============================================================================*/
53
54/*=============================================================================
55 * Public inline function prototypes
56 *============================================================================*/
57
58/*=============================================================================
59 * Public function prototypes
60 *============================================================================*/
61
62/* Default kernel that loops over an integer range and calls a device functor.
63 This kernel uses a grid_size-stride loop and thus guarantees that all
64 integers are processed, even if the grid is smaller.
65 All arguments *must* be passed by value to avoid passing CPU references
66 to the GPU. */
67template <typename T, size_t stride>
68__global__ void
70 const T ref_val,
71 const int size_arrs,
72 T **array_ptrs) {
73
74 // grid_size-stride loop
75 for (cs_lnum_t id = blockIdx.x * blockDim.x + threadIdx.x; id < n;
76 id += blockDim.x * gridDim.x) {
77 for (int j = 0; j < size_arrs; j++)
78 for (size_t k = 0; k < stride; k++) {
79 array_ptrs[j][id*stride + k] = ref_val;
80 } // end loop stride
81 } // end loop arrays
82}
83
84template <typename T, size_t stride>
85__global__ void
87 const T *ref_val,
88 const int size_arrs,
89 T **array_ptrs) {
90 // grid_size-stride loop
91 for (cs_lnum_t id = blockIdx.x * blockDim.x + threadIdx.x; id < n;
92 id += blockDim.x * gridDim.x) {
93 for (int j = 0; j < size_arrs; j++)
94 for (size_t k = 0; k < stride; k++) {
95 array_ptrs[j][id*stride + k] = ref_val[k];
96 } // end loop stride
97 } // end loop arrays
98}
99
100/*----------------------------------------------------------------------------*/
116/*----------------------------------------------------------------------------*/
117
118template <typename T, size_t stride, typename... Arrays>
119void
120cs_arrays_set_value(cudaStream_t stream,
121 const cs_lnum_t n_elts,
122 const T *ref_val,
123 Arrays&&... arrays)
124{
125
126 const long block_size_ = 256;
127 const long grid_size_ = (n_elts % block_size_) ?
128 n_elts/block_size_ + 1 : n_elts/block_size_;
129
130 const int size_arrs = sizeof...(arrays);
131 T** array_ptrs = NULL;
132 CS_MALLOC_HD(array_ptrs, size_arrs, T*, cs_alloc_mode);
133
134 /* Explicit expansion of arrays */
135 T* _array_ptrs[] = {arrays ...};
136
137 for (int j = 0; j < size_arrs; j++)
138 array_ptrs[j] = _array_ptrs[j];
139
140 cuda_kernel_set_value<T, stride><<<grid_size_, block_size_, 0, stream>>>
141 (n_elts, ref_val, size_arrs, array_ptrs);
142
143 CS_CUDA_CHECK(cudaStreamSynchronize(stream));
144 CS_CUDA_CHECK(cudaGetLastError());
145
146 CS_FREE_HD(array_ptrs);
147}
148
149/*----------------------------------------------------------------------------*/
165/*----------------------------------------------------------------------------*/
166
167template <typename T, size_t stride, typename... Arrays>
168void
169cs_arrays_set_value(cudaStream_t stream,
170 const cs_lnum_t n_elts,
171 const T ref_val,
172 Arrays&&... arrays)
173{
174 const long block_size_ = 256;
175 const long grid_size_ = (n_elts % block_size_) ?
176 n_elts/block_size_ + 1 : n_elts/block_size_;
177
178 const int size_arrs = sizeof...(arrays);
179 T** array_ptrs = NULL;
180 CS_MALLOC_HD(array_ptrs, size_arrs, T*, cs_alloc_mode);
181
182 T* _array_ptrs[] = {arrays ...};
183
184 for (int j = 0; j < size_arrs; j++)
185 array_ptrs[j] = _array_ptrs[j];
186
187 cuda_kernel_set_value<T, stride><<<grid_size_, block_size_, 0, stream>>>
188 (n_elts, ref_val, size_arrs, array_ptrs);
189
190 CS_CUDA_CHECK(cudaStreamSynchronize(stream));
191 CS_CUDA_CHECK(cudaGetLastError());
192
193 CS_FREE_HD(array_ptrs);
194}
195
196/*----------------------------------------------------------------------------*/
208/*----------------------------------------------------------------------------*/
209
210template <typename T>
211void
212cs_array_copy(cudaStream_t stream,
213 const cs_lnum_t size,
214 const T* src,
215 T* dest)
216{
217 cudaMemcpyAsync(dest, src, size*sizeof(T),
218 cudaMemcpyDeviceToDevice, stream);
219}
220
void cs_array_copy(cudaStream_t stream, const cs_lnum_t size, const T *src, T *dest)
Copy values from an array to another of the same dimensions.
Definition: cs_array_cuda.h:212
void cs_arrays_set_value(cudaStream_t stream, const cs_lnum_t n_elts, const T *ref_val, Arrays &&... arrays)
Assign values to all elements of multiple arrays. ref_val is input as a pointer or an array.
Definition: cs_array_cuda.h:120
__global__ void cuda_kernel_set_value(cs_lnum_t n, const T ref_val, const int size_arrs, T **array_ptrs)
Definition: cs_array_cuda.h:69
#define CS_FREE_HD(_ptr)
Definition: cs_base_accel.h:52
int cs_lnum_t
local mesh entity id
Definition: cs_defs.h:335
@ k
Definition: cs_field_pointer.h:72
#define CS_MALLOC_HD(_ptr, _ni, _type, _mode)
Definition: cs_mem.h:99
#define cs_alloc_mode
Definition: cs_mem.h:186