COMPASS  5.4.4
End-to-end AO simulation tool using GPU acceleration
carma_obj.h
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------
2 // This file is part of COMPASS <https://anr-compass.github.io/compass/>
3 //
4 // Copyright (C) 2011-2023 COMPASS Team <https://github.com/ANR-COMPASS>
5 // All rights reserved.
6 
7 // -----------------------------------------------------------------------------
8 
16 
17 #ifndef _CARMA_OBJ_H_
18 #define _CARMA_OBJ_H_
19 
20 #include <carma_context.h>
21 #include <carma_streams.h>
22 #include <carma_utils.h>
23 #include <curand.h>
24 #include <curand_kernel.h>
25 #include <iostream>
26 #include <type_traits>
27 #include <typeinfo> // operator typeid
28 
29 /*
30  create a memory object
31  void *memory
32  int nb of reference
33 
34  create a class which contains :
35  - d_data
36  - ndims
37  - dims
38  - strides
39  - type
40 
41  new()
42 
43  new(existing)
44 
45 
46  and then
47  modify CarmaObj so that it is :
48  an object of the previous class
49  all the methods of a CarmaObj
50 
51  */
52 
53 #define BLOCK_SZ 16
54 
55 enum MemType {
64 };
65 // should add texture ?
66 
67 template <class T_data>
68 class CarmaData {
69  protected:
70  T_data *d_data;
71  int ndims;
72  int nb_elem;
73  long *dims_data;
74  int *strides;
76 
77  public:
78  T_data *get_data() { return d_data; }
79  int get_ndims() { return ndims; }
80  int get_nb_elem() { return nb_elem; }
81  const long *get_dims_data() { return dims_data; }
82  long get_dims_data(int i) { return dims_data[i]; }
83  int *get_strides() { return strides; }
84  int get_strides(int i) { return strides[i]; }
86 };
87 
88 template <class T_data>
89 class CarmaHostObj;
90 
91 template <class T_data>
92 class CarmaObj {
93  protected:
94  T_data *d_data = nullptr;
95  std::vector<T_data> h_data;
96  T_data *o_data = nullptr;
97  T_data *cub_data = nullptr;
98  size_t cub_data_size = 0; // optionnal for reduction
99  int ndim = 0;
100  long *dims_data = nullptr;
101  int nb_elem = 0;
102  int device = -1;
104 
105  curandGenerator_t gen;
106  curandState *d_states;
107 
110 
111  bool keys_only; //< optional flag (used for sort)
112  bool owner = true; // Flag if d_data is created inside the CarmaObj
113 
114  unsigned int *values;
115  size_t *d_num_valid;
116 
117  cufftHandle plan;
118  cufftType type_plan;
119 
121 
123  const T_data *data, bool fromHost, int nb_streams);
124 
125  public:
129  CarmaObj(CarmaContext *current_context, const std::vector<long> &dims);
132  const T_data *data);
134  int nb_streams);
136  int nb_streams);
138  const T_data *data, int nb_streams);
139  CarmaObj(const CarmaObj &)=delete;
141 
142  void sync_h_data() {
143  if (h_data.empty()) h_data = std::vector<T_data>(nb_elem);
144  device2host(h_data.data());
145  }
146 
147  T_data *get_h_data() { return h_data.data(); }
148 
149  int get_nb_streams() const {
152  return streams->get_nb_streams();
153  }
154  int add_stream() {
155  this->streams->add_stream();
156  return this->streams->get_nb_streams();
157  }
158  int add_stream(int nb) {
159  this->streams->add_stream(nb);
160  return this->streams->get_nb_streams();
161  }
162  int del_stream() {
163  this->streams->del_stream();
164  return this->streams->get_nb_streams();
165  }
166  int del_stream(int nb) {
167  this->streams->del_stream(nb);
168  return this->streams->get_nb_streams();
169  }
170  cudaStream_t get_cuda_stream(int stream) {
171  return this->streams->get_stream(stream);
172  }
173  int wait_stream(int stream) {
174  this->streams->wait_stream(stream);
175  return EXIT_SUCCESS;
176  }
178  this->streams->wait_all_streams();
179  return EXIT_SUCCESS;
180  }
181  void swap_ptr(T_data *ptr) {
182  dealloc();
183  d_data = ptr;
184  owner = false;
185  }
186 
187  void dealloc() {
188  if (owner && d_data) cudaFree(d_data);
189  }
190 
192  operator T_data *() { return d_data; }
193 
194  std::string to_string() {
195  std::ostringstream stream;
196  stream << *this;
197  return stream.str();
198  }
199 
200  operator std::string() { return this->to_string(); }
201  // inline char const *c_str() { return this->to_string().c_str(); }
202  const T_data operator[](int index) const {
203  T_data tmp_float;
204  carma_safe_call(cudaMemcpy(&tmp_float, &d_data[index], sizeof(T_data),
205  cudaMemcpyDeviceToHost));
206  return tmp_float;
207  }
208  T_data *get_data() { return d_data; }
209  T_data *get_data_at(int index) { return &d_data[index]; }
210  T_data *get_o_data() { return o_data; }
211  const T_data get_o_data_value() const {
212  T_data tmp_float;
214  cudaMemcpy(&tmp_float, o_data, sizeof(T_data), cudaMemcpyDeviceToHost));
215  return tmp_float;
216  }
217  const long *get_dims() { return dims_data; }
218  long get_dims(int i) { return dims_data[i]; }
219  int get_nb_elements() { return nb_elem; }
221 
222  int get_device() { return device; }
223 
224  bool is_rng_init() { return (gen != NULL); }
225 
227  template <typename T_dest>
228  int host2device(const T_dest *data);
229  template <typename T_dest>
230  int device2host(T_dest *data);
231 
232  int host2device_async(const T_data *data, cudaStream_t stream);
233  int device2host_async(T_data *data, cudaStream_t stream);
234  int device2host_opt(T_data *data);
235  int host2device_vect(const T_data *data, int incx, int incy);
236  int device2host_vect(T_data *data, int incx, int incy);
237  int host2device_mat(const T_data *data, int lda, int ldb);
238  int device2host_mat(T_data *data, int lda, int ldb);
239 
240  int copy_into(T_data *data, int nb_elem);
241  int copy_from(const T_data *data, int nb_elem);
242  int copy_from_async(const T_data *data, int nb_elem, cudaStream_t stream);
243 
244 #ifdef USE_OCTOPUS
245  int copy_into(ipc::Cacao<T_data> *cacaoInterface);
246  int copy_from(ipc::Cacao<T_data> *cacaoInterface);
247 #endif
248 
249  inline int reset() {
250  return cudaMemset(this->d_data, 0, this->nb_elem * sizeof(T_data));
251  }
252 
253  inline int reset(cudaStream_t stream) {
254  return cudaMemsetAsync(this->d_data, 0, this->nb_elem * sizeof(T_data), stream);
255  }
256 
257  inline int memset(T_data value) {
258  return fill_array_with_value(
259  this->d_data, value, this->nb_elem,
260  this->current_context->get_device(this->device));
261  }
262  cufftHandle *get_plan() { return &plan; }
264  cufftType get_type_plan() { return type_plan; }
266 
267  unsigned int *get_values() { return values; }
269 
271  T_data sum();
273  void reduceCub(cudaStream_t stream);
274  void reduceCub() {reduceCub(0);};
275 
276  void clip(T_data min, T_data max, cudaStream_t stream);
277  void clip(T_data min, T_data max) {clip(min, max, 0);};
278 
280  int transpose(CarmaObj<T_data> *source);
281  // CarmaObj<T_data>& operator= (const CarmaObj<T_data>& obj);
282 
283  /*
284  * ____ _ _ ____ _
285  * | __ )| | / \ / ___|/ |
286  * | _ \| | / _ \ \___ \| |
287  * | |_) | |___ / ___ \ ___) | |
288  * |____/|_____/_/ \_\____/|_|
289  *
290  */
291 
292  int aimax(int incx);
293  int aimin(int incx);
294  T_data asum(int incx);
295  T_data nrm2(int incx);
296  T_data dot(CarmaObj<T_data> *source, int incx, int incy);
297  void scale(T_data alpha, int incx);
298  void swap(CarmaObj<T_data> *source, int incx, int incy);
299  void copy(CarmaObj<T_data> *source, int incx, int incy);
300  void axpy(T_data alpha, CarmaObj<T_data> *source, int incx, int incy,
301  int offset = 0);
302  void rot(CarmaObj<T_data> *source, int incx, int incy, T_data sc, T_data ss);
303 
304  /*
305  * ____ _ _ ____ ____
306  * | __ )| | / \ / ___|___ \
307  * | _ \| | / _ \ \___ \ __) |
308  * | |_) | |___ / ___ \ ___) / __/
309  * |____/|_____/_/ \_\____/_____|
310  *
311  */
312 
313  void gemv(char trans, T_data alpha, CarmaObj<T_data> *matA, int lda,
314  CarmaObj<T_data> *vectx, int incx, T_data beta, int incy);
315  void ger(T_data alpha, CarmaObj<T_data> *vectx, int incx,
316  CarmaObj<T_data> *vecty, int incy, int lda);
317  void symv(char uplo, T_data alpha, CarmaObj<T_data> *matA, int lda,
318  CarmaObj<T_data> *vectx, int incx, T_data beta, int incy);
319 
320  /*
321  * ____ _ _ ____ _____
322  * | __ )| | / \ / ___|___ /
323  * | _ \| | / _ \ \___ \ |_ \
324  * | |_) | |___ / ___ \ ___) |__) |
325  * |____/|_____/_/ \_\____/____/
326  *
327  */
328 
329  void gemm(char transa, char transb, T_data alpha, CarmaObj<T_data> *matA,
330  int lda, CarmaObj<T_data> *matB, int ldb, T_data beta, int ldc);
331  void symm(char side, char uplo, T_data alpha, CarmaObj<T_data> *matA,
332  int lda, CarmaObj<T_data> *matB, int ldb, T_data beta, int ldc);
333  void syrk(char uplo, char transa, T_data alpha, CarmaObj<T_data> *matA,
334  int lda, T_data beta, int ldc);
335  void syrkx(char uplo, char transa, T_data alpha, CarmaObj<T_data> *matA,
336  int lda, CarmaObj<T_data> *matB, int ldb, T_data beta, int ldc);
337  void geam(char transa, char transb, T_data alpha, CarmaObj<T_data> *matA,
338  int lda, T_data beta, CarmaObj<T_data> *matB, int ldb, int ldc);
339  void dgmm(char side, CarmaObj<T_data> *matA, int lda,
340  CarmaObj<T_data> *vectx, int incx, int ldc);
341 
343  int init_prng();
344  int init_prng(long seed);
346  int prng(T_data *output, char gtype, float alpha, float beta);
347  int prng(T_data *output, char gtype, float alpha);
348  int prng(char gtype, float alpha, float beta);
349  int prng(char gtype, float alpha);
350  int prng(char gtype);
351 
352  int prng_montagn(float init_montagn);
353 
354  int init_prng_host(int seed);
355  int prng_host(char gtype);
356  int prng_host(char gtype, T_data stddev);
357  int prng_host(char gtype, T_data stddev, T_data alpha);
358  int destroy_prng_host();
359 };
369 // typedef CarmaObj<tuple_t<float>> CarmaObjTF;
370 
371 #ifdef CAN_DO_HALF
372 typedef CarmaObj<half> CarmaObjH;
373 #endif
374 
375 template <class T_data>
376 std::ostream &operator<<(std::ostream &os, CarmaObj<T_data> &obj) {
377  os << "-----------------------" << std::endl;
378  os << "CarmaObj<" << typeid(T_data).name() << "> object on GPU"
379  << obj.get_device() << std::endl;
380  long ndims = obj.get_dims(0);
381  os << "ndims = " << ndims << std::endl;
382  for (long dim = 0; dim < ndims; dim++) {
383  os << "dim[" << dim << "] = " << obj.get_dims(dim + 1) << std::endl;
384  }
385  os << "nbElem = " << obj.get_nb_elements() << std::endl;
386  os << "sizeof(" << typeid(T_data).name() << ") = " << sizeof(T_data)
387  << std::endl;
388  os << "-----------------------" << std::endl;
389  return os;
390 }
391 
392 // CU functions clip
393 template <class T_data>
394 void clip_array(T_data *d_data, T_data min, T_data max, int N,
395  CarmaDevice *device, cudaStream_t stream);
396 
397 // CU functions sum
398 template <class T_data>
399 void reduce(int size, int threads, int blocks, T_data *d_idata,
400  T_data *d_odata);
401 template <class T_data>
402 T_data reduce(T_data *data, int N);
403 
404 template <class T_data>
405 void init_reduceCubCU(T_data *&cub_data, size_t &cub_data_size, T_data *data,
406  T_data *&o_data, int N);
407 template <class T_data>
408 void reduceCubCU(T_data *cub_data, size_t cub_data_size, T_data *data,
409  T_data *o_data, int N, cudaStream_t stream=0);
410 
411 // CU functions transpose
412 template <class T_data>
413 int transposeCU(T_data *d_idata, T_data *d_odata, long N1, long N2);
414 
415 // CU functions generic
416 template <class T_data>
417 int launch_generic1d(T_data *d_idata, T_data *d_odata, int N,
418  CarmaDevice *device);
419 template <class T_data>
420 int launch_generic2d(T_data *d_odata, T_data *d_idata, int N1, int N2);
421 
422 // CU functions curand
423 int carma_prng_init(int *seed, const int nb_threads, const int nb_blocks,
424  curandState *state);
425 template <class T>
426 int carma_prng_cu(T *results, const int nb_threads, const int nb_blocks,
427  curandState *state, char gtype, int n, float alpha,
428  float beta);
429 template <class T>
430 int carma_curand_montagn(curandState *state, T *d_odata, int N,
431  CarmaDevice *device);
432 
433 // CU functions fft
434 template <class T_in, class T_out>
435 cufftType carma_select_plan();
436 template <class T_in, class T_out>
437 void carma_initfft(const long *dims_data, cufftHandle *plan, cufftType type_plan);
438 template <class T_in, class T_out>
439 int CarmaFFT(T_in *input, T_out *output, int dir, cufftHandle plan);
440 
441 // CU functions generic
442 template <class T_data>
443 int fillindex(T_data *d_odata, T_data *d_idata, int *indx, int N,
444  CarmaDevice *device);
445 template <class T_data>
446 int fillvalues(T_data *d_odata, T_data *val, int N, CarmaDevice *device);
447 template <class T>
448 int getarray2d(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N,
449  CarmaDevice *device);
450 template <class T>
451 int fillarray2d(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N,
452  CarmaDevice *device);
453 template <class T>
454 int fillarray2d2(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N,
455  CarmaDevice *device);
456 template <class T>
457 int fill_sym_matrix(char src_uplo, T *d_data, int Ncol, int N,
458  CarmaDevice *device);
459 template <class T>
460 int carma_plus(T *d_odata, T elpha, int N, CarmaDevice *device);
461 template <class T>
462 int carma_plusai(T *d_odata, T *i_data, int i, int sgn, int N,
463  CarmaDevice *device);
464 
465 // CU functions fftconv
466 // int fftconv_unpad(float *d_odata, float *d_idata, int fftW, int dataH,
467 // int dataW, int N, int n, int nim);
468 // int carma_initfftconv(CarmaObjS *data_in, CarmaObjS *kernel_in, CarmaObjS *padded_data,
469 // CarmaObjC *padded_spectrum, int kernelY, int kernelX);
470 // // CPP functions fftconv
471 // int carma_fftconv(CarmaObjS *data_out, CarmaObjS *padded_data,
472 // CarmaObjC *padded_spectrum, int kernelY, int kernelX);
473 
474 #ifdef CAN_DO_HALF
475 int custom_half_axpy(half alpha, half *source, int incx, int incy, int N,
476  half *dest, CarmaDevice *device);
477 #endif
478 
490 template <class T>
491 int extract(T *d_smallimg, const T *d_fullimg, int fullimg_size, int center_pos,
492  int extract_size, bool roll);
493 
494 #endif // _CARMA_OBJ_H_
int fill_sym_matrix(char src_uplo, T *d_data, int Ncol, int N, CarmaDevice *device)
cufftType carma_select_plan()
CarmaObj< uint16_t > CarmaObjUSI
Definition: carma_obj.h:362
int launch_generic1d(T_data *d_idata, T_data *d_odata, int N, CarmaDevice *device)
CarmaObj< double2 > CarmaObjD2
Definition: carma_obj.h:366
void carma_initfft(const long *dims_data, cufftHandle *plan, cufftType type_plan)
std::ostream & operator<<(std::ostream &os, CarmaObj< T_data > &obj)
Definition: carma_obj.h:376
int transposeCU(T_data *d_idata, T_data *d_odata, long N1, long N2)
CarmaObj< float2 > CarmaObjS2
Definition: carma_obj.h:365
CarmaObj< unsigned int > CarmaObjUI
Definition: carma_obj.h:361
CarmaObj< int > CarmaObjI
Definition: carma_obj.h:360
void reduceCubCU(T_data *cub_data, size_t cub_data_size, T_data *data, T_data *o_data, int N, cudaStream_t stream=0)
int getarray2d(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N, CarmaDevice *device)
CarmaObj< cuFloatComplex > CarmaObjC
Definition: carma_obj.h:367
int fillindex(T_data *d_odata, T_data *d_idata, int *indx, int N, CarmaDevice *device)
void init_reduceCubCU(T_data *&cub_data, size_t &cub_data_size, T_data *data, T_data *&o_data, int N)
int carma_plus(T *d_odata, T elpha, int N, CarmaDevice *device)
int fillarray2d2(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N, CarmaDevice *device)
void reduce(int size, int threads, int blocks, T_data *d_idata, T_data *d_odata)
int extract(T *d_smallimg, const T *d_fullimg, int fullimg_size, int center_pos, int extract_size, bool roll)
Kernel to extract a part of the image centred on center_pos.
int carma_prng_cu(T *results, const int nb_threads, const int nb_blocks, curandState *state, char gtype, int n, float alpha, float beta)
int fillarray2d(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N, CarmaDevice *device)
CarmaObj< double > CarmaObjD
Definition: carma_obj.h:364
int carma_plusai(T *d_odata, T *i_data, int i, int sgn, int N, CarmaDevice *device)
MemType
Definition: carma_obj.h:55
@ MT_PORTABLE
Definition: carma_obj.h:61
@ MT_DARRAY
Definition: carma_obj.h:57
@ MT_GENEPIN
Definition: carma_obj.h:63
@ MT_DEVICE
Definition: carma_obj.h:56
@ MT_PAGELOCK
Definition: carma_obj.h:59
@ MT_HOST
Definition: carma_obj.h:58
@ MT_WRICOMB
Definition: carma_obj.h:62
@ MT_ZEROCPY
Definition: carma_obj.h:60
void clip_array(T_data *d_data, T_data min, T_data max, int N, CarmaDevice *device, cudaStream_t stream)
CarmaObj< cuDoubleComplex > CarmaObjZ
Definition: carma_obj.h:368
int fillvalues(T_data *d_odata, T_data *val, int N, CarmaDevice *device)
int CarmaFFT(T_in *input, T_out *output, int dir, cufftHandle plan)
int carma_curand_montagn(curandState *state, T *d_odata, int N, CarmaDevice *device)
CarmaObj< float > CarmaObjS
Definition: carma_obj.h:363
int launch_generic2d(T_data *d_odata, T_data *d_idata, int N1, int N2)
int carma_prng_init(int *seed, const int nb_threads, const int nb_blocks, curandState *state)
this file provides tools to CarmaObj
int fill_array_with_value(T_data *d_data, T_data value, int N, CarmaDevice *device)
#define carma_safe_call(err)
Definition: carma_utils.h:108
this class provides the context in which CarmaObj are created
Definition: carma_context.h:79
CarmaDevice * get_device(int dev)
MemType get_malloc_type()
Definition: carma_obj.h:85
T_data * get_data()
Definition: carma_obj.h:78
int * strides
Strides for each dimension.
Definition: carma_obj.h:74
T_data * d_data
Pointer to data.
Definition: carma_obj.h:70
const long * get_dims_data()
Definition: carma_obj.h:81
int get_strides(int i)
Definition: carma_obj.h:84
int get_ndims()
Definition: carma_obj.h:79
int ndims
Number of dimensions.
Definition: carma_obj.h:71
long get_dims_data(int i)
Definition: carma_obj.h:82
MemType malloc_type
type of alloc
Definition: carma_obj.h:75
int * get_strides()
Definition: carma_obj.h:83
int get_nb_elem()
Definition: carma_obj.h:80
long * dims_data
Dimensions.
Definition: carma_obj.h:73
int nb_elem
Number of elements.
Definition: carma_obj.h:72
this class provides wrappers to the generic carma host object
this class provides wrappers to the generic carma object
Definition: carma_obj.h:92
cudaStream_t get_cuda_stream(int stream)
Definition: carma_obj.h:170
void dealloc()
Definition: carma_obj.h:187
CarmaObj(CarmaContext *current_context, const long *dims_data, int nb_streams)
int nb_elem
number of elements in the array
Definition: carma_obj.h:101
T_data * cub_data
optional data (used for scan / reduction)
Definition: carma_obj.h:97
T_data * get_data_at(int index)
Definition: carma_obj.h:209
int device2host_opt(T_data *data)
int prng_host(char gtype, T_data stddev, T_data alpha)
CarmaContext * get_context()
Definition: carma_obj.h:220
void copy(CarmaObj< T_data > *source, int incx, int incy)
CarmaObj(const CarmaObj< T_data > *obj)
CarmaStreams * streams
Definition: carma_obj.h:120
T_data * get_h_data()
Definition: carma_obj.h:147
cufftType get_type_plan()
FFT plan type.
Definition: carma_obj.h:264
long get_dims(int i)
Definition: carma_obj.h:218
CarmaObj(CarmaContext *current_context, const long *dims_data)
void syrk(char uplo, char transa, T_data alpha, CarmaObj< T_data > *matA, int lda, T_data beta, int ldc)
int wait_stream(int stream)
Definition: carma_obj.h:173
CarmaObj(const CarmaObj &)=delete
int prng(char gtype)
const T_data operator[](int index) const
Definition: carma_obj.h:202
int aimax(int incx)
CarmaObj(CarmaContext *current_context, const std::vector< long > &dims)
CarmaContext * current_context
Definition: carma_obj.h:103
void init_reduceCub()
void reduceCub()
Definition: carma_obj.h:272
int nb_blocks
Definition: carma_obj.h:109
void axpy(T_data alpha, CarmaObj< T_data > *source, int incx, int incy, int offset=0)
int host2device_mat(const T_data *data, int lda, int ldb)
void swap_ptr(T_data *ptr)
Definition: carma_obj.h:181
CarmaObj(CarmaContext *current_context, const long *dims_data, const T_data *data, int nb_streams)
void gemm(char transa, char transb, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *matB, int ldb, T_data beta, int ldc)
void dgmm(char side, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *vectx, int incx, int ldc)
int device2host(T_dest *data)
T_data sum()
int init_prng_host(int seed)
int device2host_mat(T_data *data, int lda, int ldb)
void symm(char side, char uplo, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *matB, int ldb, T_data beta, int ldc)
unsigned int * values
optional data (used for sort)
Definition: carma_obj.h:114
size_t * d_num_valid
used for compact
Definition: carma_obj.h:115
int copy_from(const T_data *data, int nb_elem)
int aimin(int incx)
int prng_host(char gtype)
T_data nrm2(int incx)
int copy_from_async(const T_data *data, int nb_elem, cudaStream_t stream)
void clip(T_data min, T_data max, cudaStream_t stream)
CarmaObj(CarmaContext *current_context, const long *dims_data, const T_data *data)
T_data dot(CarmaObj< T_data > *source, int incx, int incy)
void geam(char transa, char transb, T_data alpha, CarmaObj< T_data > *matA, int lda, T_data beta, CarmaObj< T_data > *matB, int ldb, int ldc)
bool owner
Definition: carma_obj.h:112
int destroy_prng()
CarmaObj(CarmaContext *current_context, const CarmaObj< T_data > *obj, int nb_streams)
void symv(char uplo, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *vectx, int incx, T_data beta, int incy)
int memset(T_data value)
Definition: carma_obj.h:257
int del_stream(int nb)
Definition: carma_obj.h:166
cufftHandle * get_plan()
FFT plan.
Definition: carma_obj.h:262
void swap(CarmaObj< T_data > *source, int incx, int incy)
int prng(char gtype, float alpha)
int del_stream()
Definition: carma_obj.h:162
bool is_rng_init()
Definition: carma_obj.h:224
void gemv(char trans, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *vectx, int incx, T_data beta, int incy)
T_data asum(int incx)
void syrkx(char uplo, char transa, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *matB, int ldb, T_data beta, int ldc)
int device2host_async(T_data *data, cudaStream_t stream)
int copy_into(T_data *data, int nb_elem)
int init_prng()
int host2device(const T_dest *data)
std::string to_string()
Definition: carma_obj.h:194
int prng(char gtype, float alpha, float beta)
T_data * d_data
Input data => change to vector.
Definition: carma_obj.h:94
bool keys_only
Definition: carma_obj.h:111
int prng_montagn(float init_montagn)
int get_nb_streams() const
Definition: carma_obj.h:149
int transpose(CarmaObj< T_data > *source)
int device2host_vect(T_data *data, int incx, int incy)
cufftType type_plan
FFT plan type.
Definition: carma_obj.h:118
T_data * get_data()
Definition: carma_obj.h:208
int add_stream()
Definition: carma_obj.h:154
void scale(T_data alpha, int incx)
int host2device_async(const T_data *data, cudaStream_t stream)
int destroy_prng_host()
int nb_threads
Definition: carma_obj.h:108
void sync_h_data()
Definition: carma_obj.h:142
void ger(T_data alpha, CarmaObj< T_data > *vectx, int incx, CarmaObj< T_data > *vecty, int incy, int lda)
int ndim
Definition: carma_obj.h:99
int reset(cudaStream_t stream)
Definition: carma_obj.h:253
int add_stream(int nb)
Definition: carma_obj.h:158
int wait_all_streams()
Definition: carma_obj.h:177
int device
device where the CarmaObj is allocate
Definition: carma_obj.h:102
long * dims_data
dimensions of the array
Definition: carma_obj.h:100
int reset()
Definition: carma_obj.h:249
CarmaObj(CarmaContext *current_context, const CarmaObj< T_data > *obj)
int prng(T_data *output, char gtype, float alpha, float beta)
std::vector< T_data > h_data
Definition: carma_obj.h:95
void init(CarmaContext *current_context, const long *dims_data, const T_data *data, bool fromHost, int nb_streams)
size_t cub_data_size
Definition: carma_obj.h:98
void rot(CarmaObj< T_data > *source, int incx, int incy, T_data sc, T_data ss)
T_data * o_data
optional data (used for scan / reduction)
Definition: carma_obj.h:96
unsigned int * get_values()
optional data (used for sort)
Definition: carma_obj.h:267
curandGenerator_t gen
Definition: carma_obj.h:105
T_data * get_o_data()
Definition: carma_obj.h:210
cufftHandle plan
FFT plan.
Definition: carma_obj.h:117
const T_data get_o_data_value() const
Definition: carma_obj.h:211
int host2device_vect(const T_data *data, int incx, int incy)
const long * get_dims()
Definition: carma_obj.h:217
int get_nb_elements()
Definition: carma_obj.h:219
int get_device()
Definition: carma_obj.h:222
curandState * d_states
Definition: carma_obj.h:106
this class provides the stream features to CarmaObj
Definition: carma_streams.h:24
int wait_stream(int stream)
int get_nb_streams()
int wait_all_streams()
int add_stream()
int del_stream()
cudaStream_t get_stream(int stream)
int roll(T *idata, int N, int M, int nim, CarmaDevice *device)