compass/latest/carma__context_8h_source.html

 // -----------------------------------------------------------------------------

 //  This file is part of COMPASS <https://anr-compass.github.io/compass/>

 //

 //  Copyright (C) 2011-2023 COMPASS Team <https://github.com/ANR-COMPASS>

 //  All rights reserved.


 // -----------------------------------------------------------------------------


 #ifndef _CARMA_CONTEXT_H_

 #define _CARMA_CONTEXT_H_


 #include <cuda_runtime_api.h>

 #include <stdio.h>

 #include <stdlib.h>

 #include <vector_types.h>

 #include <memory>

 #include <string>

 #include <vector>


 #include <cublas_v2.h>

 #include <cusparse_v2.h>

 #include <cusolverDn.h>

 #include "carma_utils.h"


 class CarmaDevice {

  protected:

   int id;

   cudaDeviceProp properties;

   float compute_perf;

   float cores_per_sm;

   size_t free_mem;

   size_t total_mem;


   cublasHandle_t cublas_handle;

   cusparseHandle_t cusparse_handle;

   cusolverDnHandle_t cusolver_handle;

   cudaStream_t main_stream;


  public:

   CarmaDevice(int devid);

   int set_cublas_math_mode(bool tensor);

   // CarmaDevice(const CarmaDevice& device);

   ~CarmaDevice();

   cudaStream_t get_stream() { return main_stream; }

   int get_id() { return id; }

   cudaDeviceProp get_properties() { return properties; }

   float get_compute_perf() { return compute_perf; }

   float get_cores_per_sm() { return cores_per_sm; }

   bool is_gpu_capable_p2p() { return (bool)(properties.major >= 2); }


   std::string get_name() { return properties.name; }


   size_t get_total_mem() { return total_mem; }

   size_t get_free_mem() {

     carma_safe_call(cudaMemGetInfo(&total_mem, &total_mem));

     return total_mem;

   }


   cublasHandle_t get_cublas_handle() { return cublas_handle; }

   cusparseHandle_t get_cusparse_handle() { return cusparse_handle; }

   cusolverDnHandle_t get_cusolver_handle() { return cusolver_handle; }

 };


 #define set_active_device(new_device, silent) \

   _set_active_device(new_device, silent, __FILE__, __LINE__)

 #define set_active_device_force(new_device, silent) \

   _set_active_device_force(new_device, silent, __FILE__, __LINE__)

 #define set_active_deviceForCpy(new_device, silent) \

   _set_active_device_for_copy(new_device, silent, __FILE__, __LINE__)


 class CarmaContext {

  private:

   int ndevice;

   std::vector<CarmaDevice*> devices;

   int active_device;

   int** can_access_peer;


   static std::shared_ptr<CarmaContext> s_instance;


   CarmaContext();

   CarmaContext(int num_device);

   CarmaContext(int nb_devices, int32_t* devices);


   CarmaContext& operator=(const CarmaContext&) { return *s_instance; }

   CarmaContext(const CarmaContext&)

       : ndevice(0), active_device(0), can_access_peer(nullptr) {}


   void init_context(const int nb_devices, int32_t* devices_id);


  public:

   ~CarmaContext();


   static CarmaContext& instance_1gpu(int num_device);

   static CarmaContext& instance_ngpu(int nb_devices, int32_t* devices_id);

   static CarmaContext& instance();


   int get_ndevice() { return ndevice; }

   CarmaDevice* get_device(int dev) { return devices[dev]; }

   int get_active_device() { return active_device; }

   int get_active_real_device() { return devices[active_device]->get_id(); }


   int get_cuda_runtime_get_version() {

     int runtime_version;

     carma_safe_call(cudaRuntimeGetVersion(&runtime_version));

     return runtime_version;

   }


   int get_cuda_driver_get_version() {

     int driver_version;

     carma_safe_call(cudaRuntimeGetVersion(&driver_version));

     return driver_version;

   }


   std::string get_device_name(int device);

   std::string get_device_info(int device);

   std::string get_device_mem_info(int device);


   inline int _set_active_device_for_copy(int new_device, int silent,

                                      std::string file, int line) {

     if (new_device > ndevice) return -1;

     return (can_access_peer[active_device][new_device] != 1)

                ? _set_active_device(new_device, silent, file, line)

                : active_device;

   }

   inline int _set_active_device(int new_device, int silent, std::string file,

                                int line) {

     return (this->active_device != new_device)

                ? _set_active_device_force(new_device, silent, file, line)

                : active_device;

   }

   int _set_active_device_force(int new_device, int silent, std::string file,

                              int line);

   int get_max_gflops_device_id();

   cublasHandle_t get_cublas_handle() { return get_cublas_handle(active_device); }

   cusparseHandle_t get_cusparse_handle() {

     return get_cusparse_handle(active_device);

   }


   cublasHandle_t get_cublas_handle(int device) {

     return devices[device]->get_cublas_handle();

   }

   cusparseHandle_t get_cusparse_handle(int device) {

     return devices[device]->get_cusparse_handle();

   }

   bool can_p2p(int dev1, int dev2) { return can_access_peer[dev1][dev2]; }


   std::string magma_info();

 };


 inline int convert_sm_version2cores(int major, int minor) {

   // Defines for GPU Architecture types (using the SM version to determine the #

   // of cores per SM

   typedef struct {

     int SM;  // 0xMm (hexidecimal notation), M = SM Major version, and m = SM

              // minor version

     int Cores;

   } sSMtoCores;


   sSMtoCores nGpuArchCoresPerSM[] = {

       {0x20, 32},   // Fermi Generation (SM 2.0) GF100 class

       {0x21, 48},   // Fermi Generation (SM 2.1) GF10x class

       {0x30, 192},  // Kepler Generation (SM 3.0) GK10x class

       {0x32, 192},  // Kepler Generation (SM 3.2) GK10x class

       {0x35, 192},  // Kepler Generation (SM 3.5) GK11x class

       {0x37, 192},  // Kepler Generation (SM 3.7) GK21x class

       {0x50, 128},  // Maxwell Generation (SM 5.0) GM10x class

       {0x52, 128},  // Maxwell Generation (SM 5.2) GM20x class

       {0x53, 128},  // Maxwell Generation (SM 5.3) GM20x class

       {0x60, 64},   // Pascal Generation (SM 6.0) GP100 class

       {0x61, 128},  // Pascal Generation (SM 6.1) GP10x class

       {0x62, 128},  // Pascal Generation (SM 6.2) GP10x class

       {0x70, 64},   // Volta Generation (SM 7.0) GV100 class

       {0x72, 64},   // Volta Generation (SM 7.2) GV10B class

       {0x75, 64},   // Turing Generation (SM 7.5) TU100 class

       {0x80, 64},   // Ampere Generation (SM 8.0) GA102 class

       {0x86, 64},   // Ampere Generation (SM 8.6) GA104 class

       {-1, -1}};


   int index = 0;


   while (nGpuArchCoresPerSM[index].SM != -1) {

     if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {

       return nGpuArchCoresPerSM[index].Cores;

     }


     index++;

   }


   // If we don't find the values, we default use the previous one to run

   // properly

   printf(

       "MapSMtoCores for SM %d.%d is undefined.  Default to use %d Cores/SM\n",

       major, minor, nGpuArchCoresPerSM[index - 1].Cores);

   return nGpuArchCoresPerSM[index - 1].Cores;

 }

 #endif  // _CARMA_CONTEXT_H_

convert_sm_version2cores
int convert_sm_version2cores(int major, int minor)
from /usr/local/cuda/samples/common/inc/helper_cuda.h
Definition: carma_context.h:160

carma_utils.h
this file provides tools to CarmaObj

carma_safe_call
#define carma_safe_call(err)
Definition: carma_utils.h:108

CarmaContext
this class provides the context in which CarmaObj are created
Definition: carma_context.h:79

CarmaContext::get_device_info
std::string get_device_info(int device)

CarmaContext::get_cuda_driver_get_version
int get_cuda_driver_get_version()
Definition: carma_context.h:117

CarmaContext::get_max_gflops_device_id
int get_max_gflops_device_id()

CarmaContext::get_cuda_runtime_get_version
int get_cuda_runtime_get_version()
Definition: carma_context.h:111

CarmaContext::get_active_device
int get_active_device()
Definition: carma_context.h:108

CarmaContext::_set_active_device_for_copy
int _set_active_device_for_copy(int new_device, int silent, std::string file, int line)
Definition: carma_context.h:127

CarmaContext::get_active_real_device
int get_active_real_device()
Definition: carma_context.h:109

CarmaContext::get_ndevice
int get_ndevice()
Definition: carma_context.h:106

CarmaContext::get_device
CarmaDevice * get_device(int dev)
Definition: carma_context.h:107

CarmaContext::~CarmaContext
~CarmaContext()

CarmaContext::get_cublas_handle
cublasHandle_t get_cublas_handle(int device)
Definition: carma_context.h:148

CarmaContext::can_p2p
bool can_p2p(int dev1, int dev2)
Definition: carma_context.h:154

CarmaContext::instance
static CarmaContext & instance()

CarmaContext::_set_active_device_force
int _set_active_device_force(int new_device, int silent, std::string file, int line)

CarmaContext::instance_ngpu
static CarmaContext & instance_ngpu(int nb_devices, int32_t *devices_id)

CarmaContext::get_cusparse_handle
cusparseHandle_t get_cusparse_handle(int device)
Definition: carma_context.h:151

CarmaContext::get_cublas_handle
cublasHandle_t get_cublas_handle()
Definition: carma_context.h:143

CarmaContext::get_device_name
std::string get_device_name(int device)

CarmaContext::get_device_mem_info
std::string get_device_mem_info(int device)

CarmaContext::get_cusparse_handle
cusparseHandle_t get_cusparse_handle()
Definition: carma_context.h:144

CarmaContext::_set_active_device
int _set_active_device(int new_device, int silent, std::string file, int line)
Definition: carma_context.h:134

CarmaContext::instance_1gpu
static CarmaContext & instance_1gpu(int num_device)

CarmaContext::magma_info
std::string magma_info()

CarmaDevice
Definition: carma_context.h:33

CarmaDevice::get_name
std::string get_name()
Definition: carma_context.h:59

CarmaDevice::cusparse_handle
cusparseHandle_t cusparse_handle
Definition: carma_context.h:43

CarmaDevice::cusolver_handle
cusolverDnHandle_t cusolver_handle
Definition: carma_context.h:44

CarmaDevice::total_mem
size_t total_mem
Definition: carma_context.h:40

CarmaDevice::is_gpu_capable_p2p
bool is_gpu_capable_p2p()
Definition: carma_context.h:57

CarmaDevice::get_cores_per_sm
float get_cores_per_sm()
Definition: carma_context.h:56

CarmaDevice::get_id
int get_id()
Definition: carma_context.h:53

CarmaDevice::get_properties
cudaDeviceProp get_properties()
Definition: carma_context.h:54

CarmaDevice::cublas_handle
cublasHandle_t cublas_handle
Definition: carma_context.h:42

CarmaDevice::properties
cudaDeviceProp properties
Definition: carma_context.h:36

CarmaDevice::id
int id
Definition: carma_context.h:35

CarmaDevice::get_compute_perf
float get_compute_perf()
Definition: carma_context.h:55

CarmaDevice::~CarmaDevice
~CarmaDevice()

CarmaDevice::get_free_mem
size_t get_free_mem()
Definition: carma_context.h:62

CarmaDevice::cores_per_sm
float cores_per_sm
Definition: carma_context.h:38

CarmaDevice::main_stream
cudaStream_t main_stream
Definition: carma_context.h:45

CarmaDevice::get_cusolver_handle
cusolverDnHandle_t get_cusolver_handle()
Definition: carma_context.h:69

CarmaDevice::get_cusparse_handle
cusparseHandle_t get_cusparse_handle()
Definition: carma_context.h:68

CarmaDevice::get_stream
cudaStream_t get_stream()
Definition: carma_context.h:52

CarmaDevice::CarmaDevice
CarmaDevice(int devid)

CarmaDevice::get_total_mem
size_t get_total_mem()
Definition: carma_context.h:61

CarmaDevice::set_cublas_math_mode
int set_cublas_math_mode(bool tensor)

CarmaDevice::get_cublas_handle
cublasHandle_t get_cublas_handle()
Definition: carma_context.h:67

CarmaDevice::free_mem
size_t free_mem
Definition: carma_context.h:39

CarmaDevice::compute_perf
float compute_perf
Definition: carma_context.h:37