COMPASS  5.0.0
End-to-end AO simulation tool using GPU acceleration
carma_obj.h
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------
2 // This file is part of COMPASS <https://anr-compass.github.io/compass/>
3 //
4 // Copyright (C) 2011-2019 COMPASS Team <https://github.com/ANR-COMPASS>
5 // All rights reserved.
6 // Distributed under GNU - LGPL
7 //
8 // COMPASS is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser
9 // General Public License as published by the Free Software Foundation, either version 3 of the License,
10 // or any later version.
11 //
12 // COMPASS: End-to-end AO simulation tool using GPU acceleration
13 // The COMPASS platform was designed to meet the need of high-performance for the simulation of AO systems.
14 //
15 // The final product includes a software package for simulating all the critical subcomponents of AO,
16 // particularly in the context of the ELT and a real-time core based on several control approaches,
17 // with performances consistent with its integration into an instrument. Taking advantage of the specific
18 // hardware architecture of the GPU, the COMPASS tool allows to achieve adequate execution speeds to
19 // conduct large simulation campaigns called to the ELT.
20 //
21 // The COMPASS platform can be used to carry a wide variety of simulations to both testspecific components
22 // of AO of the E-ELT (such as wavefront analysis device with a pyramid or elongated Laser star), and
23 // various systems configurations such as multi-conjugate AO.
24 //
25 // COMPASS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
26 // implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
27 // See the GNU Lesser General Public License for more details.
28 //
29 // You should have received a copy of the GNU Lesser General Public License along with COMPASS.
30 // If not, see <https://www.gnu.org/licenses/lgpl-3.0.txt>.
31 // -----------------------------------------------------------------------------
32 
41 
42 #ifndef _CARMA_OBJ_H_
43 #define _CARMA_OBJ_H_
44 
45 #include <carma_context.h>
46 #include <carma_streams.h>
47 #include <carma_utils.h>
48 #include <curand.h>
49 #include <curand_kernel.h>
50 #include <iostream>
51 #include <type_traits>
52 #include <typeinfo> // operator typeid
53 
54 /*
55  create a memory object
56  void *memory
57  int nb of reference
58 
59  create a class which contains :
60  - d_data
61  - ndims
62  - dims
63  - strides
64  - type
65 
66  new()
67 
68  new(existing)
69 
70 
71  and then
72  modify CarmaObj so that it is :
73  an object of the previous class
74  all the methods of a CarmaObj
75 
76  */
77 
78 #define BLOCK_SZ 16
79 
80 enum MemType {
89 };
90 // should add texture ?
91 
92 template <class T_data>
93 class CarmaData {
94  protected:
95  T_data *d_data;
96  int ndims;
97  int nb_elem;
98  long *dims_data;
99  int *strides;
101 
102  public:
103  T_data *get_data() { return d_data; }
104  int get_ndims() { return ndims; }
105  int get_nb_elem() { return nb_elem; }
106  const long *get_dims_data() { return dims_data; }
107  long get_dims_data(int i) { return dims_data[i]; }
108  int *get_strides() { return strides; }
109  int get_strides(int i) { return strides[i]; }
111 };
112 
113 template <class T_data>
114 class CarmaHostObj;
115 
116 template <class T_data>
117 class CarmaObj {
118  protected:
119  T_data *d_data;
120  std::vector<T_data> h_data;
121  T_data *o_data;
122  T_data *cub_data;
123  size_t cub_data_size; // optionnal for reduction
124  int ndim;
125  long *dims_data;
126  int nb_elem;
127  int device;
129 
130  curandGenerator_t gen;
131  curandState *d_states;
132 
135 
136  bool keys_only; //< optional flag (used for sort)
137  bool owner = true; // Flag if d_data is created inside the CarmaObj
138 
139  unsigned int *values;
140  size_t *d_num_valid;
141 
142  cufftHandle plan;
143  cufftType type_plan;
144 
146 
148  const T_data *data, bool fromHost, int nb_streams);
149 
150  public:
155  const T_data *data);
157  int nb_streams);
159  int nb_streams);
161  const T_data *data, int nb_streams);
163 
164  void sync_h_data() {
165  if (h_data.empty()) h_data = std::vector<T_data>(nb_elem);
166  device2host(h_data.data());
167  }
168 
169  T_data *get_h_data() { return h_data.data(); }
170 
171  int get_nb_streams() const {
174  return streams->get_nb_streams();
175  }
176  int add_stream() {
177  this->streams->add_stream();
178  return this->streams->get_nb_streams();
179  }
180  int add_stream(int nb) {
181  this->streams->add_stream(nb);
182  return this->streams->get_nb_streams();
183  }
184  int del_stream() {
185  this->streams->del_stream();
186  return this->streams->get_nb_streams();
187  }
188  int del_stream(int nb) {
189  this->streams->del_stream(nb);
190  return this->streams->get_nb_streams();
191  }
192  cudaStream_t get_cuda_stream(int stream) {
193  return this->streams->get_stream(stream);
194  }
195  int wait_stream(int stream) {
196  this->streams->wait_stream(stream);
197  return EXIT_SUCCESS;
198  }
200  this->streams->wait_all_streams();
201  return EXIT_SUCCESS;
202  }
203  void swap_ptr(T_data *ptr) {
204  dealloc();
205  d_data = ptr;
206  owner = false;
207  }
208 
209  void dealloc() {
210  if (owner) cudaFree(d_data);
211  }
212 
214  operator T_data *() { return d_data; }
215 
216  std::string to_string() {
217  std::ostringstream stream;
218  stream << *this;
219  return stream.str();
220  }
221 
222  operator std::string() { return this->to_string(); }
223  inline char const *c_str() { return this->to_string().c_str(); }
224  const T_data operator[](int index) const {
225  T_data tmp_float;
226  carma_safe_call(cudaMemcpy(&tmp_float, &d_data[index], sizeof(T_data),
227  cudaMemcpyDeviceToHost));
228  return tmp_float;
229  }
230  T_data *get_data() { return d_data; }
231  T_data *get_data_at(int index) { return &d_data[index]; }
232  T_data *get_o_data() { return o_data; }
233  const T_data get_o_data_value() const {
234  T_data tmp_float;
236  cudaMemcpy(&tmp_float, o_data, sizeof(T_data), cudaMemcpyDeviceToHost));
237  return tmp_float;
238  }
239  const long *get_dims() { return dims_data; }
240  long get_dims(int i) { return dims_data[i]; }
241  int get_nb_elements() { return nb_elem; }
243 
244  int get_device() { return device; }
245 
246  bool is_rng_init() { return (gen != NULL); }
247 
249  template <typename T_dest>
250  int host2device(const T_dest *data);
251  template <typename T_dest>
252  int device2host(T_dest *data);
253 
254  int host2device_async(const T_data *data, cudaStream_t stream);
255  int device2host_async(T_data *data, cudaStream_t stream);
256  int device2host_opt(T_data *data);
257  int host2device_vect(const T_data *data, int incx, int incy);
258  int device2host_vect(T_data *data, int incx, int incy);
259  int host2device_mat(const T_data *data, int lda, int ldb);
260  int device2host_mat(T_data *data, int lda, int ldb);
261 
262  int copy_into(T_data *data, int nb_elem);
263  int copy_from(const T_data *data, int nb_elem);
264 
265 #ifdef USE_OCTOPUS
266  int copy_into(ipc::Cacao<T_data> *cacaoInterface);
267  int copy_from(ipc::Cacao<T_data> *cacaoInterface);
268 #endif
269 
270  inline int reset() {
271  return cudaMemset(this->d_data, 0, this->nb_elem * sizeof(T_data));
272  }
273  inline int memset(T_data value) {
274  return fill_array_with_value(
275  this->d_data, value, this->nb_elem,
276  this->current_context->get_device(this->device));
277  }
278  cufftHandle *get_plan() { return &plan; }
280  cufftType get_type_plan() { return type_plan; }
282 
283  unsigned int *get_values() { return values; }
285 
287  T_data sum();
288  void init_reduceCub();
289  void reduceCub();
290 
291  void clip(T_data min, T_data max);
292 
294  int transpose(CarmaObj<T_data> *source);
295  // CarmaObj<T_data>& operator= (const CarmaObj<T_data>& obj);
296 
297  /*
298  * ____ _ _ ____ _
299  * | __ )| | / \ / ___|/ |
300  * | _ \| | / _ \ \___ \| |
301  * | |_) | |___ / ___ \ ___) | |
302  * |____/|_____/_/ \_\____/|_|
303  *
304  */
305 
306  int aimax(int incx);
307  int aimin(int incx);
308  T_data asum(int incx);
309  T_data nrm2(int incx);
310  T_data dot(CarmaObj<T_data> *source, int incx, int incy);
311  void scale(T_data alpha, int incx);
312  void swap(CarmaObj<T_data> *source, int incx, int incy);
313  void copy(CarmaObj<T_data> *source, int incx, int incy);
314  void axpy(T_data alpha, CarmaObj<T_data> *source, int incx, int incy,
315  int offset = 0);
316  void rot(CarmaObj<T_data> *source, int incx, int incy, T_data sc, T_data ss);
317 
318  /*
319  * ____ _ _ ____ ____
320  * | __ )| | / \ / ___|___ \
321  * | _ \| | / _ \ \___ \ __) |
322  * | |_) | |___ / ___ \ ___) / __/
323  * |____/|_____/_/ \_\____/_____|
324  *
325  */
326 
327  void gemv(char trans, T_data alpha, CarmaObj<T_data> *matA, int lda,
328  CarmaObj<T_data> *vectx, int incx, T_data beta, int incy);
329  void ger(T_data alpha, CarmaObj<T_data> *vectx, int incx,
330  CarmaObj<T_data> *vecty, int incy, int lda);
331  void symv(char uplo, T_data alpha, CarmaObj<T_data> *matA, int lda,
332  CarmaObj<T_data> *vectx, int incx, T_data beta, int incy);
333 
334  /*
335  * ____ _ _ ____ _____
336  * | __ )| | / \ / ___|___ /
337  * | _ \| | / _ \ \___ \ |_ \
338  * | |_) | |___ / ___ \ ___) |__) |
339  * |____/|_____/_/ \_\____/____/
340  *
341  */
342 
343  void gemm(char transa, char transb, T_data alpha, CarmaObj<T_data> *matA,
344  int lda, CarmaObj<T_data> *matB, int ldb, T_data beta, int ldc);
345  void symm(char side, char uplo, T_data alpha, CarmaObj<T_data> *matA,
346  int lda, CarmaObj<T_data> *matB, int ldb, T_data beta, int ldc);
347  void syrk(char uplo, char transa, T_data alpha, CarmaObj<T_data> *matA,
348  int lda, T_data beta, int ldc);
349  void syrkx(char uplo, char transa, T_data alpha, CarmaObj<T_data> *matA,
350  int lda, CarmaObj<T_data> *matB, int ldb, T_data beta, int ldc);
351  void geam(char transa, char transb, T_data alpha, CarmaObj<T_data> *matA,
352  int lda, T_data beta, CarmaObj<T_data> *matB, int ldb, int ldc);
353  void dgmm(char side, CarmaObj<T_data> *matA, int lda,
354  CarmaObj<T_data> *vectx, int incx, int ldc);
355 
357  int init_prng();
358  int init_prng(long seed);
360  int prng(T_data *output, char gtype, float alpha, float beta);
361  int prng(T_data *output, char gtype, float alpha);
362  int prng(char gtype, float alpha, float beta);
363  int prng(char gtype, float alpha);
364  int prng(char gtype);
365 
366  int prng_montagn(float init_montagn);
367 
368  int init_prng_host(int seed);
369  int prng_host(char gtype);
370  int prng_host(char gtype, T_data stddev);
371  int prng_host(char gtype, T_data stddev, T_data alpha);
372  int destroy_prng_host();
373 };
383 // typedef CarmaObj<tuple_t<float>> CarmaObjTF;
384 
385 #ifdef CAN_DO_HALF
386 typedef CarmaObj<half> CarmaObjH;
387 #endif
388 
389 template <class T_data>
390 std::ostream &operator<<(std::ostream &os, CarmaObj<T_data> &obj) {
391  os << "-----------------------" << std::endl;
392  os << "CarmaObj<" << typeid(T_data).name() << "> object on GPU"
393  << obj.get_device() << std::endl;
394  long ndims = obj.get_dims(0);
395  os << "ndims = " << ndims << std::endl;
396  for (long dim = 0; dim < ndims; dim++) {
397  os << "dim[" << dim << "] = " << obj.get_dims(dim + 1) << std::endl;
398  }
399  os << "nbElem = " << obj.get_nb_elements() << std::endl;
400  os << "sizeof(" << typeid(T_data).name() << ") = " << sizeof(T_data)
401  << std::endl;
402  os << "-----------------------" << std::endl;
403  return os;
404 }
405 
406 // CU functions clip
407 template <class T_data>
408 void clip_array(T_data *d_data, T_data min, T_data max, int N,
409  CarmaDevice *device);
410 
411 // CU functions sum
412 template <class T_data>
413 void reduce(int size, int threads, int blocks, T_data *d_idata,
414  T_data *d_odata);
415 template <class T_data>
416 T_data reduce(T_data *data, int N);
417 
418 template <class T_data>
419 void init_reduceCubCU(T_data *&cub_data, size_t &cub_data_size, T_data *data,
420  T_data *&o_data, int N);
421 template <class T_data>
422 void reduceCubCU(T_data *cub_data, size_t cub_data_size, T_data *data,
423  T_data *o_data, int N);
424 
425 // CU functions transpose
426 template <class T_data>
427 int transposeCU(T_data *d_idata, T_data *d_odata, long N1, long N2);
428 
429 // CU functions generic
430 template <class T_data>
431 int launch_generic1d(T_data *d_idata, T_data *d_odata, int N,
432  CarmaDevice *device);
433 template <class T_data>
434 int launch_generic2d(T_data *d_odata, T_data *d_idata, int N1, int N2);
435 
436 // CU functions curand
437 int carma_prng_init(int *seed, const int nb_threads, const int nb_blocks,
438  curandState *state);
439 template <class T>
440 int carma_prng_cu(T *results, const int nb_threads, const int nb_blocks,
441  curandState *state, char gtype, int n, float alpha,
442  float beta);
443 template <class T>
444 int carma_curand_montagn(curandState *state, T *d_odata, int N,
445  CarmaDevice *device);
446 
447 // CU functions fft
448 template <class T_in, class T_out>
449 cufftType carma_select_plan();
450 template <class T_in, class T_out>
451 void carma_initfft(const long *dims_data, cufftHandle *plan, cufftType type_plan);
452 template <class T_in, class T_out>
453 int CarmaFFT(T_in *input, T_out *output, int dir, cufftHandle plan);
454 
455 // CU functions generic
456 template <class T_data>
457 int fillindex(T_data *d_odata, T_data *d_idata, int *indx, int N,
458  CarmaDevice *device);
459 template <class T_data>
460 int fillvalues(T_data *d_odata, T_data *val, int N, CarmaDevice *device);
461 template <class T>
462 int getarray2d(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N,
463  CarmaDevice *device);
464 template <class T>
465 int fillarray2d(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N,
466  CarmaDevice *device);
467 template <class T>
468 int fillarray2d2(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N,
469  CarmaDevice *device);
470 template <class T>
471 int fill_sym_matrix(char src_uplo, T *d_data, int Ncol, int N,
472  CarmaDevice *device);
473 template <class T>
474 int carma_plus(T *d_odata, T elpha, int N, CarmaDevice *device);
475 template <class T>
476 int carma_plusai(T *d_odata, T *i_data, int i, int sgn, int N,
477  CarmaDevice *device);
478 
479 // CU functions fftconv
480 int fftconv_unpad(float *d_odata, float *d_idata, int fftW, int dataH,
481  int dataW, int N, int n, int nim);
482 int carma_initfftconv(CarmaObjS *data_in, CarmaObjS *kernel_in, CarmaObjS *padded_data,
483  CarmaObjC *padded_spectrum, int kernelY, int kernelX);
484 // CPP functions fftconv
485 int carma_fftconv(CarmaObjS *data_out, CarmaObjS *padded_data,
486  CarmaObjC *padded_spectrum, int kernelY, int kernelX);
487 
488 #ifdef CAN_DO_HALF
489 int custom_half_axpy(half alpha, half *source, int incx, int incy, int N,
490  half *dest, CarmaDevice *device);
491 #endif
492 
504 template <class T>
505 int extract(T *d_smallimg, const T *d_fullimg, int fullimg_size, int center_pos,
506  int extract_size, bool roll);
507 
508 #endif // _CARMA_OBJ_H_
carma_plusai
int carma_plusai(T *d_odata, T *i_data, int i, int sgn, int N, CarmaDevice *device)
CarmaObj::reduceCub
void reduceCub()
CarmaObj::get_data
T_data * get_data()
Definition: carma_obj.h:230
CarmaObj::device2host_mat
int device2host_mat(T_data *data, int lda, int ldb)
carma_prng_cu
int carma_prng_cu(T *results, const int nb_threads, const int nb_blocks, curandState *state, char gtype, int n, float alpha, float beta)
CarmaData::get_nb_elem
int get_nb_elem()
Definition: carma_obj.h:105
CarmaObj::destroy_prng
int destroy_prng()
fillarray2d2
int fillarray2d2(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N, CarmaDevice *device)
CarmaData::ndims
int ndims
Number of dimensions.
Definition: carma_obj.h:96
CarmaObj::get_dims
long get_dims(int i)
Definition: carma_obj.h:240
CarmaObj::geam
void geam(char transa, char transb, T_data alpha, CarmaObj< T_data > *matA, int lda, T_data beta, CarmaObj< T_data > *matB, int ldb, int ldc)
CarmaObjUSI
CarmaObj< uint16_t > CarmaObjUSI
Definition: carma_obj.h:376
CarmaObj::get_cuda_stream
cudaStream_t get_cuda_stream(int stream)
Definition: carma_obj.h:192
CarmaData::d_data
T_data * d_data
Pointer to data.
Definition: carma_obj.h:95
CarmaObj::destroy_prng_host
int destroy_prng_host()
CarmaObj::init_reduceCub
void init_reduceCub()
CarmaObj::aimin
int aimin(int incx)
CarmaObj::type_plan
cufftType type_plan
FFT plan type.
Definition: carma_obj.h:143
CarmaObjC
CarmaObj< cuFloatComplex > CarmaObjC
Definition: carma_obj.h:381
CarmaObj::asum
T_data asum(int incx)
CarmaObj::get_dims
const long * get_dims()
Definition: carma_obj.h:239
CarmaDevice
Definition: carma_context.h:57
CarmaObj::symm
void symm(char side, char uplo, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *matB, int ldb, T_data beta, int ldc)
carma_context.h
CarmaObj::ger
void ger(T_data alpha, CarmaObj< T_data > *vectx, int incx, CarmaObj< T_data > *vecty, int incy, int lda)
CarmaData::malloc_type
MemType malloc_type
type of alloc
Definition: carma_obj.h:100
CarmaObj::del_stream
int del_stream()
Definition: carma_obj.h:184
carma_plus
int carma_plus(T *d_odata, T elpha, int N, CarmaDevice *device)
CarmaObj::host2device
int host2device(const T_dest *data)
CarmaObj::device2host
int device2host(T_dest *data)
CarmaObj::syrkx
void syrkx(char uplo, char transa, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *matB, int ldb, T_data beta, int ldc)
CarmaObj::cub_data
T_data * cub_data
optional data (used for scan / reduction)
Definition: carma_obj.h:122
carma_utils.h
this file provides tools to CarmaObj
CarmaObj::prng
int prng(T_data *output, char gtype, float alpha, float beta)
CarmaObj::get_values
unsigned int * get_values()
optional data (used for sort)
Definition: carma_obj.h:283
CarmaData
Definition: carma_obj.h:93
CarmaData::dims_data
long * dims_data
Dimensions.
Definition: carma_obj.h:98
carma_safe_call
#define carma_safe_call(err)
Definition: carma_utils.h:145
carma_curand_montagn
int carma_curand_montagn(curandState *state, T *d_odata, int N, CarmaDevice *device)
CarmaObj::get_h_data
T_data * get_h_data()
Definition: carma_obj.h:169
carma_fftconv
int carma_fftconv(CarmaObjS *data_out, CarmaObjS *padded_data, CarmaObjC *padded_spectrum, int kernelY, int kernelX)
CarmaObj::dealloc
void dealloc()
Definition: carma_obj.h:209
CarmaObj::nb_elem
int nb_elem
number of elements in the array
Definition: carma_obj.h:126
CarmaObj::prng_host
int prng_host(char gtype, T_data stddev, T_data alpha)
CarmaObj::gemv
void gemv(char trans, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *vectx, int incx, T_data beta, int incy)
CarmaObj::prng
int prng(char gtype)
reduce
void reduce(int size, int threads, int blocks, T_data *d_idata, T_data *d_odata)
CarmaObj::cub_data_size
size_t cub_data_size
Definition: carma_obj.h:123
CarmaObj::prng
int prng(char gtype, float alpha)
MT_WRICOMB
@ MT_WRICOMB
Definition: carma_obj.h:87
CarmaObj::get_context
CarmaContext * get_context()
Definition: carma_obj.h:242
fftconv_unpad
int fftconv_unpad(float *d_odata, float *d_idata, int fftW, int dataH, int dataW, int N, int n, int nim)
carma_streams.h
CarmaData::get_ndims
int get_ndims()
Definition: carma_obj.h:104
CarmaObj::device2host_async
int device2host_async(T_data *data, cudaStream_t stream)
CarmaData::get_dims_data
long get_dims_data(int i)
Definition: carma_obj.h:107
CarmaObj::wait_all_streams
int wait_all_streams()
Definition: carma_obj.h:199
CarmaObj::del_stream
int del_stream(int nb)
Definition: carma_obj.h:188
carma_initfft
void carma_initfft(const long *dims_data, cufftHandle *plan, cufftType type_plan)
CarmaStreams::del_stream
int del_stream()
MT_GENEPIN
@ MT_GENEPIN
Definition: carma_obj.h:88
CarmaData::get_data
T_data * get_data()
Definition: carma_obj.h:103
CarmaObj::memset
int memset(T_data value)
Definition: carma_obj.h:273
CarmaObj::d_states
curandState * d_states
Definition: carma_obj.h:131
CarmaObj::rot
void rot(CarmaObj< T_data > *source, int incx, int incy, T_data sc, T_data ss)
CarmaStreams::add_stream
int add_stream()
CarmaObj::copy_into
int copy_into(T_data *data, int nb_elem)
CarmaData::get_dims_data
const long * get_dims_data()
Definition: carma_obj.h:106
CarmaObj::d_data
T_data * d_data
Input data => change to vector.
Definition: carma_obj.h:119
CarmaObj::swap
void swap(CarmaObj< T_data > *source, int incx, int incy)
CarmaObj::CarmaObj
CarmaObj(CarmaContext *current_context, const long *dims_data, int nb_streams)
operator<<
std::ostream & operator<<(std::ostream &os, CarmaObj< T_data > &obj)
Definition: carma_obj.h:390
CarmaStreams::wait_all_streams
int wait_all_streams()
launch_generic1d
int launch_generic1d(T_data *d_idata, T_data *d_odata, int N, CarmaDevice *device)
CarmaObjD
CarmaObj< double > CarmaObjD
Definition: carma_obj.h:378
CarmaObj::init_prng
int init_prng()
CarmaObj::plan
cufftHandle plan
FFT plan.
Definition: carma_obj.h:142
CarmaObj::~CarmaObj
~CarmaObj()
CarmaObj::device2host_opt
int device2host_opt(T_data *data)
MT_DEVICE
@ MT_DEVICE
Definition: carma_obj.h:81
CarmaObj::get_type_plan
cufftType get_type_plan()
FFT plan type.
Definition: carma_obj.h:280
carma_prng_init
int carma_prng_init(int *seed, const int nb_threads, const int nb_blocks, curandState *state)
CarmaObj::copy
void copy(CarmaObj< T_data > *source, int incx, int incy)
getarray2d
int getarray2d(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N, CarmaDevice *device)
CarmaObj::prng_host
int prng_host(char gtype)
carma_select_plan
cufftType carma_select_plan()
CarmaObj::ndim
int ndim
Definition: carma_obj.h:124
CarmaObj::h_data
std::vector< T_data > h_data
Definition: carma_obj.h:120
CarmaObj::nrm2
T_data nrm2(int incx)
CarmaStreams
this class provides the stream features to CarmaObj
Definition: carma_streams.h:49
CarmaObj::host2device_mat
int host2device_mat(const T_data *data, int lda, int ldb)
CarmaStreams::get_nb_streams
int get_nb_streams()
CarmaObj::axpy
void axpy(T_data alpha, CarmaObj< T_data > *source, int incx, int incy, int offset=0)
CarmaObj::syrk
void syrk(char uplo, char transa, T_data alpha, CarmaObj< T_data > *matA, int lda, T_data beta, int ldc)
CarmaObjI
CarmaObj< int > CarmaObjI
Definition: carma_obj.h:374
CarmaObj::keys_only
bool keys_only
Definition: carma_obj.h:136
CarmaObj::swap_ptr
void swap_ptr(T_data *ptr)
Definition: carma_obj.h:203
CarmaData::get_strides
int * get_strides()
Definition: carma_obj.h:108
CarmaData::strides
int * strides
Strides for each dimension.
Definition: carma_obj.h:99
CarmaObj::get_nb_streams
int get_nb_streams() const
Definition: carma_obj.h:171
init_reduceCubCU
void init_reduceCubCU(T_data *&cub_data, size_t &cub_data_size, T_data *data, T_data *&o_data, int N)
CarmaObj::host2device_vect
int host2device_vect(const T_data *data, int incx, int incy)
carma_initfftconv
int carma_initfftconv(CarmaObjS *data_in, CarmaObjS *kernel_in, CarmaObjS *padded_data, CarmaObjC *padded_spectrum, int kernelY, int kernelX)
CarmaObj::sync_h_data
void sync_h_data()
Definition: carma_obj.h:164
fillvalues
int fillvalues(T_data *d_odata, T_data *val, int N, CarmaDevice *device)
CarmaObj::init_prng_host
int init_prng_host(int seed)
MemType
MemType
Definition: carma_obj.h:80
CarmaObj
this class provides wrappers to the generic carma object
Definition: carma_host_obj.h:65
launch_generic2d
int launch_generic2d(T_data *d_odata, T_data *d_idata, int N1, int N2)
CarmaObj::prng
int prng(char gtype, float alpha, float beta)
CarmaData::get_malloc_type
MemType get_malloc_type()
Definition: carma_obj.h:110
CarmaContext
this class provides the context in which CarmaObj are created
Definition: carma_context.h:104
CarmaObj::get_nb_elements
int get_nb_elements()
Definition: carma_obj.h:241
CarmaObj::o_data
T_data * o_data
optional data (used for scan / reduction)
Definition: carma_obj.h:121
CarmaObj::prng_montagn
int prng_montagn(float init_montagn)
CarmaObj::CarmaObj
CarmaObj(CarmaContext *current_context, const CarmaObj< T_data > *obj)
fillindex
int fillindex(T_data *d_odata, T_data *d_idata, int *indx, int N, CarmaDevice *device)
CarmaObjS2
CarmaObj< float2 > CarmaObjS2
Definition: carma_obj.h:379
CarmaObj::CarmaObj
CarmaObj(CarmaContext *current_context, const long *dims_data, const T_data *data)
CarmaObj::add_stream
int add_stream(int nb)
Definition: carma_obj.h:180
CarmaObj::streams
CarmaStreams * streams
Definition: carma_obj.h:145
CarmaObj::values
unsigned int * values
optional data (used for sort)
Definition: carma_obj.h:139
CarmaObj::clip
void clip(T_data min, T_data max)
CarmaObj::is_rng_init
bool is_rng_init()
Definition: carma_obj.h:246
CarmaObj::operator[]
const T_data operator[](int index) const
Definition: carma_obj.h:224
MT_HOST
@ MT_HOST
Definition: carma_obj.h:83
CarmaObj::gen
curandGenerator_t gen
Definition: carma_obj.h:130
MT_DARRAY
@ MT_DARRAY
Definition: carma_obj.h:82
CarmaData::get_strides
int get_strides(int i)
Definition: carma_obj.h:109
CarmaObj::get_plan
cufftHandle * get_plan()
FFT plan.
Definition: carma_obj.h:278
CarmaHostObj
this class provides wrappers to the generic carma host object
Definition: carma_host_obj.h:68
CarmaObj::get_o_data_value
const T_data get_o_data_value() const
Definition: carma_obj.h:233
CarmaObj::to_string
std::string to_string()
Definition: carma_obj.h:216
CarmaObj::nb_blocks
int nb_blocks
Definition: carma_obj.h:134
CarmaObj::host2device_async
int host2device_async(const T_data *data, cudaStream_t stream)
CarmaObj::device
int device
device where the CarmaObj is allocate
Definition: carma_obj.h:127
CarmaObj::get_data_at
T_data * get_data_at(int index)
Definition: carma_obj.h:231
CarmaObj::scale
void scale(T_data alpha, int incx)
CarmaObj::wait_stream
int wait_stream(int stream)
Definition: carma_obj.h:195
CarmaObj::init
void init(CarmaContext *current_context, const long *dims_data, const T_data *data, bool fromHost, int nb_streams)
CarmaObj::dgmm
void dgmm(char side, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *vectx, int incx, int ldc)
CarmaObj::CarmaObj
CarmaObj(CarmaContext *current_context, const CarmaObj< T_data > *obj, int nb_streams)
CarmaObj::c_str
char const * c_str()
Definition: carma_obj.h:223
CarmaObj::sum
T_data sum()
CarmaObj::device2host_vect
int device2host_vect(T_data *data, int incx, int incy)
CarmaObjZ
CarmaObj< cuDoubleComplex > CarmaObjZ
Definition: carma_obj.h:382
CarmaObj::reset
int reset()
Definition: carma_obj.h:270
CarmaObj::dims_data
long * dims_data
dimensions of the array
Definition: carma_obj.h:125
clip_array
void clip_array(T_data *d_data, T_data min, T_data max, int N, CarmaDevice *device)
CarmaObjS
CarmaObj< float > CarmaObjS
Definition: carma_obj.h:377
CarmaObj::add_stream
int add_stream()
Definition: carma_obj.h:176
extract
int extract(T *d_smallimg, const T *d_fullimg, int fullimg_size, int center_pos, int extract_size, bool roll)
Kernel to extract a part of the image centred on center_pos.
roll
int roll(T *idata, int N, int M, int nim, CarmaDevice *device)
MT_PORTABLE
@ MT_PORTABLE
Definition: carma_obj.h:86
CarmaStreams::get_stream
cudaStream_t get_stream(int stream)
reduceCubCU
void reduceCubCU(T_data *cub_data, size_t cub_data_size, T_data *data, T_data *o_data, int N)
CarmaObj::owner
bool owner
Definition: carma_obj.h:137
CarmaObj::CarmaObj
CarmaObj(const CarmaObj< T_data > *obj)
CarmaObj::d_num_valid
size_t * d_num_valid
used for compact
Definition: carma_obj.h:140
CarmaObj::symv
void symv(char uplo, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *vectx, int incx, T_data beta, int incy)
CarmaFFT
int CarmaFFT(T_in *input, T_out *output, int dir, cufftHandle plan)
MT_ZEROCPY
@ MT_ZEROCPY
Definition: carma_obj.h:85
CarmaObj::current_context
CarmaContext * current_context
Definition: carma_obj.h:128
CarmaObjD2
CarmaObj< double2 > CarmaObjD2
Definition: carma_obj.h:380
fill_sym_matrix
int fill_sym_matrix(char src_uplo, T *d_data, int Ncol, int N, CarmaDevice *device)
CarmaContext::get_device
CarmaDevice * get_device(int dev)
Definition: carma_context.h:132
CarmaObjUI
CarmaObj< unsigned int > CarmaObjUI
Definition: carma_obj.h:375
CarmaObj::aimax
int aimax(int incx)
CarmaObj::CarmaObj
CarmaObj(CarmaContext *current_context, const long *dims_data)
CarmaStreams::wait_stream
int wait_stream(int stream)
CarmaObj::gemm
void gemm(char transa, char transb, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *matB, int ldb, T_data beta, int ldc)
fill_array_with_value
int fill_array_with_value(T_data *d_data, T_data value, int N, CarmaDevice *device)
CarmaData::nb_elem
int nb_elem
Number of elements.
Definition: carma_obj.h:97
CarmaObj::nb_threads
int nb_threads
Definition: carma_obj.h:133
CarmaObj::CarmaObj
CarmaObj(CarmaContext *current_context, const long *dims_data, const T_data *data, int nb_streams)
CarmaObj::transpose
int transpose(CarmaObj< T_data > *source)
CarmaObj::get_device
int get_device()
Definition: carma_obj.h:244
CarmaObj::get_o_data
T_data * get_o_data()
Definition: carma_obj.h:232
transposeCU
int transposeCU(T_data *d_idata, T_data *d_odata, long N1, long N2)
MT_PAGELOCK
@ MT_PAGELOCK
Definition: carma_obj.h:84
CarmaObj::copy_from
int copy_from(const T_data *data, int nb_elem)
fillarray2d
int fillarray2d(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N, CarmaDevice *device)
CarmaObj::dot
T_data dot(CarmaObj< T_data > *source, int incx, int incy)