#include <cuda_runtime_api.h>
#include <stdio.h>
#include <stdlib.h>
#include <vector_types.h>
#include <memory>
#include <string>
#include <vector>
#include "carma_cublas.h"
#include "carma_cusparse.h"
#include "carma_utils.h"

Include dependency graph for carma_context.h:

This graph shows which files directly or indirectly include this file:

Classes
class	CarmaDevice

class	CarmaContext
	this class provides the context in which CarmaObj are created More...

Macros
#define	set_active_device(new_device, silent) _set_active_device(new_device, silent, __FILE__, __LINE__)

#define	set_active_device_force(new_device, silent) _set_active_device_force(new_device, silent, __FILE__, __LINE__)

#define	set_active_deviceForCpy(new_device, silent) _set_active_device_for_copy(new_device, silent, __FILE__, __LINE__)

Functions
int	convert_sm_version2cores (int major, int minor)
	from /usr/local/cuda/samples/common/inc/helper_cuda.h More...

Macro Definition Documentation

◆ set_active_device

#define set_active_device	(	new_device,
		silent
	)	_set_active_device(new_device, silent, __FILE__, __LINE__)

Definition at line 98 of file carma_context.h.

◆ set_active_device_force

#define set_active_device_force	(	new_device,
		silent
	)	_set_active_device_force(new_device, silent, __FILE__, __LINE__)

Definition at line 100 of file carma_context.h.

◆ set_active_deviceForCpy

#define set_active_deviceForCpy	(	new_device,
		silent
	)	_set_active_device_for_copy(new_device, silent, __FILE__, __LINE__)

Definition at line 102 of file carma_context.h.

Function Documentation

◆ convert_sm_version2cores()

int convert_sm_version2cores	(	int	major,
		int	minor
	)

from /usr/local/cuda/samples/common/inc/helper_cuda.h

Definition at line 185 of file carma_context.h.

                                                           {
   // Defines for GPU Architecture types (using the SM version to determine the #
   // of cores per SM
   typedef struct {
     int SM;  // 0xMm (hexidecimal notation), M = SM Major version, and m = SM
              // minor version
     int Cores;
   } sSMtoCores;
  
   sSMtoCores nGpuArchCoresPerSM[] = {
       {0x20, 32},   // Fermi Generation (SM 2.0) GF100 class
       {0x21, 48},   // Fermi Generation (SM 2.1) GF10x class
       {0x30, 192},  // Kepler Generation (SM 3.0) GK10x class
       {0x32, 192},  // Kepler Generation (SM 3.2) GK10x class
       {0x35, 192},  // Kepler Generation (SM 3.5) GK11x class
       {0x37, 192},  // Kepler Generation (SM 3.7) GK21x class
       {0x50, 128},  // Maxwell Generation (SM 5.0) GM10x class
       {0x52, 128},  // Maxwell Generation (SM 5.2) GM20x class
       {0x53, 128},  // Maxwell Generation (SM 5.3) GM20x class
       {0x60, 64},   // Pascal Generation (SM 6.0) GP100 class
       {0x61, 128},  // Pascal Generation (SM 6.1) GP10x class
       {0x62, 128},  // Pascal Generation (SM 6.2) GP10x class
       {0x70, 64},   // Volta Generation (SM 7.0) GV100 class
       {0x72, 64},   // Volta Generation (SM 7.2) GV10B class
       {0x75, 64},   // Turing Generation (SM 7.5) TU100 class
       {-1, -1}};
  
   int index = 0;
  
   while (nGpuArchCoresPerSM[index].SM != -1) {
     if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
       return nGpuArchCoresPerSM[index].Cores;
     }
  
     index++;
   }
  
   // If we don't find the values, we default use the previous one to run
   // properly
   printf(
       "MapSMtoCores for SM %d.%d is undefined.  Default to use %d Cores/SM\n",
       major, minor, nGpuArchCoresPerSM[index - 1].Cores);
   return nGpuArchCoresPerSM[index - 1].Cores;
 }

Classes