COMPASS  5.0.0
End-to-end AO simulation tool using GPU acceleration
carma_context.h File Reference
#include <cuda_runtime_api.h>
#include <stdio.h>
#include <stdlib.h>
#include <vector_types.h>
#include <memory>
#include <string>
#include <vector>
#include "carma_cublas.h"
#include "carma_cusparse.h"
#include "carma_utils.h"
Include dependency graph for carma_context.h:
This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes

class  CarmaDevice
 
class  CarmaContext
 this class provides the context in which CarmaObj are created More...
 

Macros

#define set_active_device(new_device, silent)   _set_active_device(new_device, silent, __FILE__, __LINE__)
 
#define set_active_device_force(new_device, silent)   _set_active_device_force(new_device, silent, __FILE__, __LINE__)
 
#define set_active_deviceForCpy(new_device, silent)   _set_active_device_for_copy(new_device, silent, __FILE__, __LINE__)
 

Functions

int convert_sm_version2cores (int major, int minor)
 from /usr/local/cuda/samples/common/inc/helper_cuda.h More...
 

Macro Definition Documentation

◆ set_active_device

#define set_active_device (   new_device,
  silent 
)    _set_active_device(new_device, silent, __FILE__, __LINE__)

Definition at line 98 of file carma_context.h.

◆ set_active_device_force

#define set_active_device_force (   new_device,
  silent 
)    _set_active_device_force(new_device, silent, __FILE__, __LINE__)

Definition at line 100 of file carma_context.h.

◆ set_active_deviceForCpy

#define set_active_deviceForCpy (   new_device,
  silent 
)    _set_active_device_for_copy(new_device, silent, __FILE__, __LINE__)

Definition at line 102 of file carma_context.h.

Function Documentation

◆ convert_sm_version2cores()

int convert_sm_version2cores ( int  major,
int  minor 
)

from /usr/local/cuda/samples/common/inc/helper_cuda.h

Definition at line 185 of file carma_context.h.

185  {
186  // Defines for GPU Architecture types (using the SM version to determine the #
187  // of cores per SM
188  typedef struct {
189  int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM
190  // minor version
191  int Cores;
192  } sSMtoCores;
193 
194  sSMtoCores nGpuArchCoresPerSM[] = {
195  {0x20, 32}, // Fermi Generation (SM 2.0) GF100 class
196  {0x21, 48}, // Fermi Generation (SM 2.1) GF10x class
197  {0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
198  {0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
199  {0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
200  {0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
201  {0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
202  {0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
203  {0x53, 128}, // Maxwell Generation (SM 5.3) GM20x class
204  {0x60, 64}, // Pascal Generation (SM 6.0) GP100 class
205  {0x61, 128}, // Pascal Generation (SM 6.1) GP10x class
206  {0x62, 128}, // Pascal Generation (SM 6.2) GP10x class
207  {0x70, 64}, // Volta Generation (SM 7.0) GV100 class
208  {0x72, 64}, // Volta Generation (SM 7.2) GV10B class
209  {0x75, 64}, // Turing Generation (SM 7.5) TU100 class
210  {-1, -1}};
211 
212  int index = 0;
213 
214  while (nGpuArchCoresPerSM[index].SM != -1) {
215  if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
216  return nGpuArchCoresPerSM[index].Cores;
217  }
218 
219  index++;
220  }
221 
222  // If we don't find the values, we default use the previous one to run
223  // properly
224  printf(
225  "MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n",
226  major, minor, nGpuArchCoresPerSM[index - 1].Cores);
227  return nGpuArchCoresPerSM[index - 1].Cores;
228 }
debug_pyr.index
int index
Definition: debug_pyr.py:78