compass/rc/carma__utils_8cuh_source.html

// -----------------------------------------------------------------------------

//  This file is part of COMPASS <https://anr-compass.github.io/compass/>

//

//  Copyright (C) 2012-2019 COMPASS Team <https://github.com/ANR-COMPASS>

//  All rights reserved.

//  Distributed under GNU - LGPL

//

//  COMPASS is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser

//  General Public License as published by the Free Software Foundation, either version 3 of the License,

//  or any later version.

//

//  COMPASS: End-to-end AO simulation tool using GPU acceleration

//  The COMPASS platform was designed to meet the need of high-performance for the simulation of AO systems.

//

//  The final product includes a software package for simulating all the critical subcomponents of AO,

//  particularly in the context of the ELT and a real-time core based on several control approaches,

//  with performances consistent with its integration into an instrument. Taking advantage of the specific

//  hardware architecture of the GPU, the COMPASS tool allows to achieve adequate execution speeds to

//  conduct large simulation campaigns called to the ELT.

//

//  The COMPASS platform can be used to carry a wide variety of simulations to both testspecific components

//  of AO of the E-ELT (such as wavefront analysis device with a pyramid or elongated Laser star), and

//  various systems configurations such as multi-conjugate AO.

//

//  COMPASS is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the

//  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

//  See the GNU Lesser General Public License for more details.

//

//  You should have received a copy of the GNU Lesser General Public License along with COMPASS.

//  If not, see <https://www.gnu.org/licenses/lgpl-3.0.txt>.

// -----------------------------------------------------------------------------


//! \file      carma_utils.cuh

//! \ingroup   libcarma

//! \brief     this file provides tools to CarmaObj

//! \author    COMPASS Team <https://github.com/ANR-COMPASS>

//! \version   5.0.0

//! \date      2011/01/28

//! \copyright GNU Lesser General Public License


#ifndef _CARMA_UTILS_CUH_

#define _CARMA_UTILS_CUH_


#include <cufft.h>

#include <stdio.h>

#include <stdlib.h>

#include <string.h>


#include <driver_types.h>

#include <vector_types.h>


#include <cub/cub.cuh>

#include <cuda.h>

#include <cuda_runtime_api.h>


template <class T> struct SharedMemory {

  __device__ inline operator T *() {

    extern __shared__ int __smem[];

    return (T *)__smem;

  }


  __device__ inline operator const T *() const {

    extern __shared__ int __smem[];

    return (T *)__smem;

  }

};


// specialize for double to avoid unaligned memory

// access compile errors

template <> struct SharedMemory<double> {

  __device__ inline operator double *() {

    extern __shared__ double __smem_d[];

    return (double *)__smem_d;

  }


  __device__ inline operator const double *() const {

    extern __shared__ double __smem_d[];

    return (double *)__smem_d;

  }

};

template <> struct SharedMemory<float> {

  __device__ inline operator float *() {

    extern __shared__ float __smem_f[];

    return (float *)__smem_f;

  }


  __device__ inline operator const float *() const {

    extern __shared__ float __smem_f[];

    return (float *)__smem_f;

  }

};


template <class T> __device__ inline void mswap(T &a, T &b) {

  T tmp = a;

  a = b;

  b = tmp;

}


template <class T>

__inline__ __device__ void reduce_krnl(T *sdata, int size, int n) {

  if (size & (size - 1)) { // test if size is not a power of 2

    unsigned int s;

    if ((size & 1) != 0)

      s = size / 2 + 1; //(size&1)==size%2

    else

      s = size / 2;

    unsigned int s_old = size;

    while (s > 0) {

      if ((n < s) && (n + s < s_old)) {

        sdata[n] += sdata[n + s];

      }

      //__threadfence_block();

      //__threadfence();

      __syncthreads();

      s_old = s;

      s /= 2;

      if ((2 * s < s_old) && (s != 0))

        s += 1;

    }

  } else {

    // do reduction in shared mem

    for (unsigned int s = size / 2; s > 0; s >>= 1) {

      if (n < s) {

        sdata[n] += sdata[n + s];

      }

      //__threadfence_block();

      //__threadfence();

      __syncthreads();

    }

  }

}


template <class T_data>

__inline__ __device__ T_data carma_clip(T_data n, T_data min, T_data max) {

  return n > max ? max : (n < min ? min : n);

}


#endif //_CARMA_UTILS_CUH_