compass/rc/carma__obj_8h_source.html

// -----------------------------------------------------------------------------

//  This file is part of COMPASS <https://anr-compass.github.io/compass/>

//

//  Copyright (C) 2011-2019 COMPASS Team <https://github.com/ANR-COMPASS>

//  All rights reserved.

//  Distributed under GNU - LGPL

//

//  COMPASS is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser

//  General Public License as published by the Free Software Foundation, either version 3 of the License,

//  or any later version.

//

//  COMPASS: End-to-end AO simulation tool using GPU acceleration

//  The COMPASS platform was designed to meet the need of high-performance for the simulation of AO systems.

//

//  The final product includes a software package for simulating all the critical subcomponents of AO,

//  particularly in the context of the ELT and a real-time core based on several control approaches,

//  with performances consistent with its integration into an instrument. Taking advantage of the specific

//  hardware architecture of the GPU, the COMPASS tool allows to achieve adequate execution speeds to

//  conduct large simulation campaigns called to the ELT.

//

//  The COMPASS platform can be used to carry a wide variety of simulations to both testspecific components

//  of AO of the E-ELT (such as wavefront analysis device with a pyramid or elongated Laser star), and

//  various systems configurations such as multi-conjugate AO.

//

//  COMPASS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the

//  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

//  See the GNU Lesser General Public License for more details.

//

//  You should have received a copy of the GNU Lesser General Public License along with COMPASS.

//  If not, see <https://www.gnu.org/licenses/lgpl-3.0.txt>.

// -----------------------------------------------------------------------------


#ifndef _CARMA_OBJ_H_

#define _CARMA_OBJ_H_


#include <carma_context.h>

#include <carma_streams.h>

#include <carma_utils.h>

#include <curand.h>

#include <curand_kernel.h>

#include <iostream>

#include <type_traits>

#include <typeinfo>  // operator typeid


/*

 create a memory object

 void *memory

 int  nb of reference


 create a class which contains :

 - d_data

 - ndims

 - dims

 - strides

 - type


 new()


 new(existing)


 and then

 modify CarmaObj so that it is :

 an object of the previous class

 all the methods of a CarmaObj


 */


#define BLOCK_SZ 16


enum MemType {

  MT_DEVICE,

  MT_DARRAY,

  MT_HOST,

  MT_PAGELOCK,

  MT_ZEROCPY,

  MT_PORTABLE,

  MT_WRICOMB,

  MT_GENEPIN

};

// should add texture ?


template <class T_data>

class CarmaData {

 protected:

  T_data *d_data;

  int ndims;

  int nb_elem;

  long *dims_data;

  int *strides;

  MemType malloc_type;


 public:

  T_data *get_data() { return d_data; }

  int get_ndims() { return ndims; }

  int get_nb_elem() { return nb_elem; }

  const long *get_dims_data() { return dims_data; }

  long get_dims_data(int i) { return dims_data[i]; }

  int *get_strides() { return strides; }

  int get_strides(int i) { return strides[i]; }

  MemType get_malloc_type() { return malloc_type; }

};


template <class T_data>

class CarmaHostObj;


template <class T_data>

class CarmaObj {

 protected:

  T_data *d_data;

  std::vector<T_data> h_data;

  T_data *o_data;

  T_data *cub_data;

  size_t cub_data_size;  // optionnal for reduction

  int ndim;

  long *dims_data;

  int nb_elem;

  int device;

  CarmaContext *current_context;


  curandGenerator_t gen;

  curandState *d_states;


  int nb_threads;

  int nb_blocks;


  bool keys_only;      //< optional flag (used for sort)

  bool owner = true;  // Flag if d_data is created inside the CarmaObj


  unsigned int *values;

  size_t *d_num_valid;


  cufftHandle plan;

  cufftType type_plan;


  CarmaStreams *streams;


  void init(CarmaContext *current_context, const long *dims_data,

            const T_data *data, bool fromHost, int nb_streams);


 public:

  CarmaObj(const CarmaObj<T_data> *obj);

  CarmaObj(CarmaContext *current_context, const long *dims_data);

  CarmaObj(CarmaContext *current_context, const CarmaObj<T_data> *obj);

  CarmaObj(CarmaContext *current_context, const long *dims_data,

            const T_data *data);

  CarmaObj(CarmaContext *current_context, const long *dims_data,

            int nb_streams);

  CarmaObj(CarmaContext *current_context, const CarmaObj<T_data> *obj,

            int nb_streams);

  CarmaObj(CarmaContext *current_context, const long *dims_data,

            const T_data *data, int nb_streams);

  ~CarmaObj();


  void sync_h_data() {

    if (h_data.empty()) h_data = std::vector<T_data>(nb_elem);

    device2host(h_data.data());

  }


  T_data *get_h_data() { return h_data.data(); }


  int get_nb_streams() const {

    return streams->get_nb_streams();

  }

  int add_stream() {

    this->streams->add_stream();

    return this->streams->get_nb_streams();

  }

  int add_stream(int nb) {

    this->streams->add_stream(nb);

    return this->streams->get_nb_streams();

  }

  int del_stream() {

    this->streams->del_stream();

    return this->streams->get_nb_streams();

  }

  int del_stream(int nb) {

    this->streams->del_stream(nb);

    return this->streams->get_nb_streams();

  }

  cudaStream_t get_cuda_stream(int stream) {

    return this->streams->get_stream(stream);

  }

  int wait_stream(int stream) {

    this->streams->wait_stream(stream);

    return EXIT_SUCCESS;

  }

  int wait_all_streams() {

    this->streams->wait_all_streams();

    return EXIT_SUCCESS;

  }

  void swap_ptr(T_data *ptr) {

    dealloc();

    d_data = ptr;

    owner = false;

  }


  void dealloc() {

    if (owner) cudaFree(d_data);

  }


  operator T_data *() { return d_data; }


  std::string to_string() {

    std::ostringstream stream;

    stream << *this;

    return stream.str();

  }


  operator std::string() { return this->to_string(); }

  inline char const *c_str() { return this->to_string().c_str(); }

  const T_data operator[](int index) const {

    T_data tmp_float;

    carma_safe_call(cudaMemcpy(&tmp_float, &d_data[index], sizeof(T_data),

                             cudaMemcpyDeviceToHost));

    return tmp_float;

  }

  T_data *get_data() { return d_data; }

  T_data *get_data_at(int index) { return &d_data[index]; }

  T_data *get_o_data() { return o_data; }

  const T_data get_o_data_value() const {

    T_data tmp_float;

    carma_safe_call(

        cudaMemcpy(&tmp_float, o_data, sizeof(T_data), cudaMemcpyDeviceToHost));

    return tmp_float;

  }

  const long *get_dims() { return dims_data; }

  long get_dims(int i) { return dims_data[i]; }

  int get_nb_elements() { return nb_elem; }

  CarmaContext *get_context() { return current_context; }


  int get_device() { return device; }


  bool is_rng_init() { return (gen != NULL); }


  template <typename T_dest>

  int host2device(const T_dest *data);

  template <typename T_dest>

  int device2host(T_dest *data);


  int host2device_async(const T_data *data, cudaStream_t stream);

  int device2host_async(T_data *data, cudaStream_t stream);

  int device2host_opt(T_data *data);

  int host2device_vect(const T_data *data, int incx, int incy);

  int device2host_vect(T_data *data, int incx, int incy);

  int host2device_mat(const T_data *data, int lda, int ldb);

  int device2host_mat(T_data *data, int lda, int ldb);


  int copy_into(T_data *data, int nb_elem);

  int copy_from(const T_data *data, int nb_elem);


#ifdef USE_OCTOPUS

  int copy_into(ipc::Cacao<T_data> *cacaoInterface);

  int copy_from(ipc::Cacao<T_data> *cacaoInterface);

#endif


  inline int reset() {

    return cudaMemset(this->d_data, 0, this->nb_elem * sizeof(T_data));

  }

  inline int memset(T_data value) {

    return fill_array_with_value(

        this->d_data, value, this->nb_elem,

        this->current_context->get_device(this->device));

  }

  cufftHandle *get_plan() { return &plan; }

  cufftType get_type_plan() { return type_plan; }


  unsigned int *get_values() { return values; }


  T_data sum();

  void init_reduceCub();

  void reduceCub();


  void clip(T_data min, T_data max);


  int transpose(CarmaObj<T_data> *source);

  // CarmaObj<T_data>& operator= (const CarmaObj<T_data>& obj);


  /*

   *  ____  _        _    ____  _

   * | __ )| |      / \  / ___|/ |

   * |  _ \| |     / _ \ \___ \| |

   * | |_) | |___ / ___ \ ___) | |

   * |____/|_____/_/   \_\____/|_|

   *

   */


  int aimax(int incx);

  int aimin(int incx);

  T_data asum(int incx);

  T_data nrm2(int incx);

  T_data dot(CarmaObj<T_data> *source, int incx, int incy);

  void scale(T_data alpha, int incx);

  void swap(CarmaObj<T_data> *source, int incx, int incy);

  void copy(CarmaObj<T_data> *source, int incx, int incy);

  void axpy(T_data alpha, CarmaObj<T_data> *source, int incx, int incy,

            int offset = 0);

  void rot(CarmaObj<T_data> *source, int incx, int incy, T_data sc, T_data ss);


  /*

   *  ____  _        _    ____ ____

   * | __ )| |      / \  / ___|___ \

   * |  _ \| |     / _ \ \___ \ __) |

   * | |_) | |___ / ___ \ ___) / __/

   * |____/|_____/_/   \_\____/_____|

   *

   */


  void gemv(char trans, T_data alpha, CarmaObj<T_data> *matA, int lda,

            CarmaObj<T_data> *vectx, int incx, T_data beta, int incy);

  void ger(T_data alpha, CarmaObj<T_data> *vectx, int incx,

           CarmaObj<T_data> *vecty, int incy, int lda);

  void symv(char uplo, T_data alpha, CarmaObj<T_data> *matA, int lda,

            CarmaObj<T_data> *vectx, int incx, T_data beta, int incy);


  /*

   *  ____  _        _    ____ _____

   * | __ )| |      / \  / ___|___ /

   * |  _ \| |     / _ \ \___ \ |_ \

   * | |_) | |___ / ___ \ ___) |__) |

   * |____/|_____/_/   \_\____/____/

   *

   */


  void gemm(char transa, char transb, T_data alpha, CarmaObj<T_data> *matA,

            int lda, CarmaObj<T_data> *matB, int ldb, T_data beta, int ldc);

  void symm(char side, char uplo, T_data alpha, CarmaObj<T_data> *matA,

            int lda, CarmaObj<T_data> *matB, int ldb, T_data beta, int ldc);

  void syrk(char uplo, char transa, T_data alpha, CarmaObj<T_data> *matA,

            int lda, T_data beta, int ldc);

  void syrkx(char uplo, char transa, T_data alpha, CarmaObj<T_data> *matA,

             int lda, CarmaObj<T_data> *matB, int ldb, T_data beta, int ldc);

  void geam(char transa, char transb, T_data alpha, CarmaObj<T_data> *matA,

            int lda, T_data beta, CarmaObj<T_data> *matB, int ldb, int ldc);

  void dgmm(char side, CarmaObj<T_data> *matA, int lda,

            CarmaObj<T_data> *vectx, int incx, int ldc);


  int init_prng();

  int init_prng(long seed);

  int destroy_prng();

  int prng(T_data *output, char gtype, float alpha, float beta);

  int prng(T_data *output, char gtype, float alpha);

  int prng(char gtype, float alpha, float beta);

  int prng(char gtype, float alpha);

  int prng(char gtype);


  int prng_montagn(float init_montagn);


  int init_prng_host(int seed);

  int prng_host(char gtype);

  int prng_host(char gtype, T_data stddev);

  int prng_host(char gtype, T_data stddev, T_data alpha);

  int destroy_prng_host();

};

typedef CarmaObj<int> CarmaObjI;

typedef CarmaObj<unsigned int> CarmaObjUI;

typedef CarmaObj<uint16_t> CarmaObjUSI;

typedef CarmaObj<float> CarmaObjS;

typedef CarmaObj<double> CarmaObjD;

typedef CarmaObj<float2> CarmaObjS2;

typedef CarmaObj<double2> CarmaObjD2;

typedef CarmaObj<cuFloatComplex> CarmaObjC;

typedef CarmaObj<cuDoubleComplex> CarmaObjZ;

// typedef CarmaObj<tuple_t<float>> CarmaObjTF;


#ifdef CAN_DO_HALF

typedef CarmaObj<half> CarmaObjH;

#endif


template <class T_data>

std::ostream &operator<<(std::ostream &os, CarmaObj<T_data> &obj) {

  os << "-----------------------" << std::endl;

  os << "CarmaObj<" << typeid(T_data).name() << "> object on GPU"

     << obj.get_device() << std::endl;

  long ndims = obj.get_dims(0);

  os << "ndims = " << ndims << std::endl;

  for (long dim = 0; dim < ndims; dim++) {

    os << "dim[" << dim << "] = " << obj.get_dims(dim + 1) << std::endl;

  }

  os << "nbElem = " << obj.get_nb_elements() << std::endl;

  os << "sizeof(" << typeid(T_data).name() << ") = " << sizeof(T_data)

     << std::endl;

  os << "-----------------------" << std::endl;

  return os;

}


// CU functions clip

template <class T_data>

void clip_array(T_data *d_data, T_data min, T_data max, int N,

                CarmaDevice *device);


// CU functions sum

template <class T_data>

void reduce(int size, int threads, int blocks, T_data *d_idata,

            T_data *d_odata);

template <class T_data>

T_data reduce(T_data *data, int N);


template <class T_data>

void init_reduceCubCU(T_data *&cub_data, size_t &cub_data_size, T_data *data,

                      T_data *&o_data, int N);

template <class T_data>

void reduceCubCU(T_data *cub_data, size_t cub_data_size, T_data *data,

                 T_data *o_data, int N);


// CU functions transpose

template <class T_data>

int transposeCU(T_data *d_idata, T_data *d_odata, long N1, long N2);


// CU functions generic

template <class T_data>

int launch_generic1d(T_data *d_idata, T_data *d_odata, int N,

                     CarmaDevice *device);

template <class T_data>

int launch_generic2d(T_data *d_odata, T_data *d_idata, int N1, int N2);


// CU functions curand

int carma_prng_init(int *seed, const int nb_threads, const int nb_blocks,

                    curandState *state);

template <class T>

int carma_prng_cu(T *results, const int nb_threads, const int nb_blocks,

                  curandState *state, char gtype, int n, float alpha,

                  float beta);

template <class T>

int carma_curand_montagn(curandState *state, T *d_odata, int N,

                         CarmaDevice *device);


// CU functions fft

template <class T_in, class T_out>

cufftType carma_select_plan();

template <class T_in, class T_out>

void carma_initfft(const long *dims_data, cufftHandle *plan, cufftType type_plan);

template <class T_in, class T_out>

int CarmaFFT(T_in *input, T_out *output, int dir, cufftHandle plan);


// CU functions generic

template <class T_data>

int fillindex(T_data *d_odata, T_data *d_idata, int *indx, int N,

              CarmaDevice *device);

template <class T_data>

int fillvalues(T_data *d_odata, T_data *val, int N, CarmaDevice *device);

template <class T>

int getarray2d(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N,

               CarmaDevice *device);

template <class T>

int fillarray2d(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N,

                CarmaDevice *device);

template <class T>

int fillarray2d2(T *d_odata, T *d_idata, int x0, int Ncol, int NC, int N,

                 CarmaDevice *device);

template <class T>

int fill_sym_matrix(char src_uplo, T *d_data, int Ncol, int N,

                    CarmaDevice *device);

template <class T>

int carma_plus(T *d_odata, T elpha, int N, CarmaDevice *device);

template <class T>

int carma_plusai(T *d_odata, T *i_data, int i, int sgn, int N,

                 CarmaDevice *device);


// CU functions fftconv

int fftconv_unpad(float *d_odata, float *d_idata, int fftW, int dataH,

                  int dataW, int N, int n, int nim);

int carma_initfftconv(CarmaObjS *data_in, CarmaObjS *kernel_in, CarmaObjS *padded_data,

                      CarmaObjC *padded_spectrum, int kernelY, int kernelX);

// CPP functions fftconv

int carma_fftconv(CarmaObjS *data_out, CarmaObjS *padded_data,

                  CarmaObjC *padded_spectrum, int kernelY, int kernelX);


#ifdef CAN_DO_HALF

int custom_half_axpy(half alpha, half *source, int incx, int incy, int N,

                     half *dest, CarmaDevice *device);

#endif


template <class T>

int extract(T *d_smallimg, const T *d_fullimg, int fullimg_size, int center_pos,

            int extract_size, bool roll);


#endif  // _CARMA_OBJ_H_