 |
COMPASS
5.0.0
End-to-end AO simulation tool using GPU acceleration
|
Go to the documentation of this file.
49 #include <curand_kernel.h>
51 #include <type_traits>
92 template <
class T_data>
113 template <
class T_data>
116 template <
class T_data>
148 const T_data *data,
bool fromHost,
int nb_streams);
161 const T_data *data,
int nb_streams);
217 std::ostringstream stream;
227 cudaMemcpyDeviceToHost));
236 cudaMemcpy(&tmp_float,
o_data,
sizeof(T_data), cudaMemcpyDeviceToHost));
249 template <
typename T_dest>
251 template <
typename T_dest>
266 int copy_into(ipc::Cacao<T_data> *cacaoInterface);
267 int copy_from(ipc::Cacao<T_data> *cacaoInterface);
271 return cudaMemset(this->d_data, 0, this->nb_elem *
sizeof(T_data));
275 this->d_data, value, this->nb_elem,
276 this->current_context->
get_device(this->device));
291 void clip(T_data min, T_data max);
348 int lda, T_data beta,
int ldc);
360 int prng(T_data *output,
char gtype,
float alpha,
float beta);
361 int prng(T_data *output,
char gtype,
float alpha);
362 int prng(
char gtype,
float alpha,
float beta);
363 int prng(
char gtype,
float alpha);
371 int prng_host(
char gtype, T_data stddev, T_data alpha);
389 template <
class T_data>
391 os <<
"-----------------------" << std::endl;
392 os <<
"CarmaObj<" <<
typeid(T_data).name() <<
"> object on GPU"
395 os <<
"ndims = " << ndims << std::endl;
396 for (
long dim = 0; dim < ndims; dim++) {
397 os <<
"dim[" << dim <<
"] = " << obj.
get_dims(dim + 1) << std::endl;
400 os <<
"sizeof(" <<
typeid(T_data).name() <<
") = " <<
sizeof(T_data)
402 os <<
"-----------------------" << std::endl;
407 template <
class T_data>
408 void clip_array(T_data *d_data, T_data min, T_data max,
int N,
412 template <
class T_data>
413 void reduce(
int size,
int threads,
int blocks, T_data *d_idata,
415 template <
class T_data>
418 template <
class T_data>
420 T_data *&o_data,
int N);
421 template <
class T_data>
422 void reduceCubCU(T_data *cub_data,
size_t cub_data_size, T_data *data,
423 T_data *o_data,
int N);
426 template <
class T_data>
427 int transposeCU(T_data *d_idata, T_data *d_odata,
long N1,
long N2);
430 template <
class T_data>
433 template <
class T_data>
441 curandState *state,
char gtype,
int n,
float alpha,
448 template <
class T_in,
class T_out>
450 template <
class T_in,
class T_out>
451 void carma_initfft(
const long *dims_data, cufftHandle *plan, cufftType type_plan);
452 template <
class T_in,
class T_out>
453 int CarmaFFT(T_in *input, T_out *output,
int dir, cufftHandle plan);
456 template <
class T_data>
457 int fillindex(T_data *d_odata, T_data *d_idata,
int *indx,
int N,
459 template <
class T_data>
462 int getarray2d(T *d_odata, T *d_idata,
int x0,
int Ncol,
int NC,
int N,
465 int fillarray2d(T *d_odata, T *d_idata,
int x0,
int Ncol,
int NC,
int N,
468 int fillarray2d2(T *d_odata, T *d_idata,
int x0,
int Ncol,
int NC,
int N,
481 int dataW,
int N,
int n,
int nim);
483 CarmaObjC *padded_spectrum,
int kernelY,
int kernelX);
486 CarmaObjC *padded_spectrum,
int kernelY,
int kernelX);
489 int custom_half_axpy(half alpha, half *source,
int incx,
int incy,
int N,
505 int extract(T *d_smallimg,
const T *d_fullimg,
int fullimg_size,
int center_pos,
506 int extract_size,
bool roll);
508 #endif // _CARMA_OBJ_H_
int carma_plusai(T *d_odata, T *i_data, int i, int sgn, int N, CarmaDevice *device)
int device2host_mat(T_data *data, int lda, int ldb)
int carma_prng_cu(T *results, const int nb_threads, const int nb_blocks, curandState *state, char gtype, int n, float alpha, float beta)
int fillarray2d2(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N, CarmaDevice *device)
int ndims
Number of dimensions.
void geam(char transa, char transb, T_data alpha, CarmaObj< T_data > *matA, int lda, T_data beta, CarmaObj< T_data > *matB, int ldb, int ldc)
CarmaObj< uint16_t > CarmaObjUSI
cudaStream_t get_cuda_stream(int stream)
T_data * d_data
Pointer to data.
cufftType type_plan
FFT plan type.
CarmaObj< cuFloatComplex > CarmaObjC
void symm(char side, char uplo, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *matB, int ldb, T_data beta, int ldc)
void ger(T_data alpha, CarmaObj< T_data > *vectx, int incx, CarmaObj< T_data > *vecty, int incy, int lda)
MemType malloc_type
type of alloc
int carma_plus(T *d_odata, T elpha, int N, CarmaDevice *device)
int host2device(const T_dest *data)
int device2host(T_dest *data)
void syrkx(char uplo, char transa, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *matB, int ldb, T_data beta, int ldc)
T_data * cub_data
optional data (used for scan / reduction)
this file provides tools to CarmaObj
int prng(T_data *output, char gtype, float alpha, float beta)
unsigned int * get_values()
optional data (used for sort)
long * dims_data
Dimensions.
#define carma_safe_call(err)
int carma_curand_montagn(curandState *state, T *d_odata, int N, CarmaDevice *device)
int carma_fftconv(CarmaObjS *data_out, CarmaObjS *padded_data, CarmaObjC *padded_spectrum, int kernelY, int kernelX)
int nb_elem
number of elements in the array
int prng_host(char gtype, T_data stddev, T_data alpha)
void gemv(char trans, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *vectx, int incx, T_data beta, int incy)
void reduce(int size, int threads, int blocks, T_data *d_idata, T_data *d_odata)
int prng(char gtype, float alpha)
CarmaContext * get_context()
int fftconv_unpad(float *d_odata, float *d_idata, int fftW, int dataH, int dataW, int N, int n, int nim)
int device2host_async(T_data *data, cudaStream_t stream)
long get_dims_data(int i)
void carma_initfft(const long *dims_data, cufftHandle *plan, cufftType type_plan)
void rot(CarmaObj< T_data > *source, int incx, int incy, T_data sc, T_data ss)
int copy_into(T_data *data, int nb_elem)
const long * get_dims_data()
T_data * d_data
Input data => change to vector.
void swap(CarmaObj< T_data > *source, int incx, int incy)
CarmaObj(CarmaContext *current_context, const long *dims_data, int nb_streams)
std::ostream & operator<<(std::ostream &os, CarmaObj< T_data > &obj)
int launch_generic1d(T_data *d_idata, T_data *d_odata, int N, CarmaDevice *device)
CarmaObj< double > CarmaObjD
cufftHandle plan
FFT plan.
int device2host_opt(T_data *data)
cufftType get_type_plan()
FFT plan type.
int carma_prng_init(int *seed, const int nb_threads, const int nb_blocks, curandState *state)
void copy(CarmaObj< T_data > *source, int incx, int incy)
int getarray2d(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N, CarmaDevice *device)
int prng_host(char gtype)
cufftType carma_select_plan()
std::vector< T_data > h_data
this class provides the stream features to CarmaObj
int host2device_mat(const T_data *data, int lda, int ldb)
void axpy(T_data alpha, CarmaObj< T_data > *source, int incx, int incy, int offset=0)
void syrk(char uplo, char transa, T_data alpha, CarmaObj< T_data > *matA, int lda, T_data beta, int ldc)
CarmaObj< int > CarmaObjI
void swap_ptr(T_data *ptr)
int * strides
Strides for each dimension.
int get_nb_streams() const
void init_reduceCubCU(T_data *&cub_data, size_t &cub_data_size, T_data *data, T_data *&o_data, int N)
int host2device_vect(const T_data *data, int incx, int incy)
int carma_initfftconv(CarmaObjS *data_in, CarmaObjS *kernel_in, CarmaObjS *padded_data, CarmaObjC *padded_spectrum, int kernelY, int kernelX)
int fillvalues(T_data *d_odata, T_data *val, int N, CarmaDevice *device)
int init_prng_host(int seed)
this class provides wrappers to the generic carma object
int launch_generic2d(T_data *d_odata, T_data *d_idata, int N1, int N2)
int prng(char gtype, float alpha, float beta)
MemType get_malloc_type()
this class provides the context in which CarmaObj are created
T_data * o_data
optional data (used for scan / reduction)
int prng_montagn(float init_montagn)
CarmaObj(CarmaContext *current_context, const CarmaObj< T_data > *obj)
int fillindex(T_data *d_odata, T_data *d_idata, int *indx, int N, CarmaDevice *device)
CarmaObj< float2 > CarmaObjS2
CarmaObj(CarmaContext *current_context, const long *dims_data, const T_data *data)
unsigned int * values
optional data (used for sort)
void clip(T_data min, T_data max)
const T_data operator[](int index) const
cufftHandle * get_plan()
FFT plan.
this class provides wrappers to the generic carma host object
const T_data get_o_data_value() const
int host2device_async(const T_data *data, cudaStream_t stream)
int device
device where the CarmaObj is allocate
T_data * get_data_at(int index)
void scale(T_data alpha, int incx)
int wait_stream(int stream)
void init(CarmaContext *current_context, const long *dims_data, const T_data *data, bool fromHost, int nb_streams)
void dgmm(char side, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *vectx, int incx, int ldc)
CarmaObj(CarmaContext *current_context, const CarmaObj< T_data > *obj, int nb_streams)
int device2host_vect(T_data *data, int incx, int incy)
CarmaObj< cuDoubleComplex > CarmaObjZ
long * dims_data
dimensions of the array
void clip_array(T_data *d_data, T_data min, T_data max, int N, CarmaDevice *device)
CarmaObj< float > CarmaObjS
int extract(T *d_smallimg, const T *d_fullimg, int fullimg_size, int center_pos, int extract_size, bool roll)
Kernel to extract a part of the image centred on center_pos.
int roll(T *idata, int N, int M, int nim, CarmaDevice *device)
cudaStream_t get_stream(int stream)
void reduceCubCU(T_data *cub_data, size_t cub_data_size, T_data *data, T_data *o_data, int N)
CarmaObj(const CarmaObj< T_data > *obj)
size_t * d_num_valid
used for compact
void symv(char uplo, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *vectx, int incx, T_data beta, int incy)
int CarmaFFT(T_in *input, T_out *output, int dir, cufftHandle plan)
CarmaContext * current_context
CarmaObj< double2 > CarmaObjD2
int fill_sym_matrix(char src_uplo, T *d_data, int Ncol, int N, CarmaDevice *device)
CarmaDevice * get_device(int dev)
CarmaObj< unsigned int > CarmaObjUI
CarmaObj(CarmaContext *current_context, const long *dims_data)
int wait_stream(int stream)
void gemm(char transa, char transb, T_data alpha, CarmaObj< T_data > *matA, int lda, CarmaObj< T_data > *matB, int ldb, T_data beta, int ldc)
int fill_array_with_value(T_data *d_data, T_data value, int N, CarmaDevice *device)
int nb_elem
Number of elements.
CarmaObj(CarmaContext *current_context, const long *dims_data, const T_data *data, int nb_streams)
int transpose(CarmaObj< T_data > *source)
int transposeCU(T_data *d_idata, T_data *d_odata, long N1, long N2)
int copy_from(const T_data *data, int nb_elem)
int fillarray2d(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N, CarmaDevice *device)
T_data dot(CarmaObj< T_data > *source, int incx, int incy)