COMPASS  5.4.4
End-to-end AO simulation tool using GPU acceleration
carma_context.h
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------
2 // This file is part of COMPASS <https://anr-compass.github.io/compass/>
3 //
4 // Copyright (C) 2011-2023 COMPASS Team <https://github.com/ANR-COMPASS>
5 // All rights reserved.
6 
7 // -----------------------------------------------------------------------------
8 
16 
17 #ifndef _CARMA_CONTEXT_H_
18 #define _CARMA_CONTEXT_H_
19 
20 #include <cuda_runtime_api.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <vector_types.h>
24 #include <memory>
25 #include <string>
26 #include <vector>
27 
28 #include <cublas_v2.h>
29 #include <cusparse_v2.h>
30 #include <cusolverDn.h>
31 #include "carma_utils.h"
32 
33 class CarmaDevice {
34  protected:
35  int id;
36  cudaDeviceProp properties;
37  float compute_perf;
38  float cores_per_sm;
39  size_t free_mem;
40  size_t total_mem;
41 
42  cublasHandle_t cublas_handle;
43  cusparseHandle_t cusparse_handle;
44  cusolverDnHandle_t cusolver_handle;
45  cudaStream_t main_stream;
46 
47  public:
48  CarmaDevice(int devid);
49  int set_cublas_math_mode(bool tensor);
50  // CarmaDevice(const CarmaDevice& device);
52  cudaStream_t get_stream() { return main_stream; }
53  int get_id() { return id; }
54  cudaDeviceProp get_properties() { return properties; }
55  float get_compute_perf() { return compute_perf; }
56  float get_cores_per_sm() { return cores_per_sm; }
57  bool is_gpu_capable_p2p() { return (bool)(properties.major >= 2); }
58 
59  std::string get_name() { return properties.name; }
60 
61  size_t get_total_mem() { return total_mem; }
62  size_t get_free_mem() {
63  carma_safe_call(cudaMemGetInfo(&total_mem, &total_mem));
64  return total_mem;
65  }
66 
67  cublasHandle_t get_cublas_handle() { return cublas_handle; }
68  cusparseHandle_t get_cusparse_handle() { return cusparse_handle; }
69  cusolverDnHandle_t get_cusolver_handle() { return cusolver_handle; }
70 };
71 
72 #define set_active_device(new_device, silent) \
73  _set_active_device(new_device, silent, __FILE__, __LINE__)
74 #define set_active_device_force(new_device, silent) \
75  _set_active_device_force(new_device, silent, __FILE__, __LINE__)
76 #define set_active_deviceForCpy(new_device, silent) \
77  _set_active_device_for_copy(new_device, silent, __FILE__, __LINE__)
78 
79 class CarmaContext {
80  private:
81  int ndevice;
82  std::vector<CarmaDevice*> devices;
83  int active_device;
84  int** can_access_peer;
85 
87  static std::shared_ptr<CarmaContext> s_instance;
88 
89  CarmaContext();
90  CarmaContext(int num_device);
91  CarmaContext(int nb_devices, int32_t* devices);
92 
93  CarmaContext& operator=(const CarmaContext&) { return *s_instance; }
95  : ndevice(0), active_device(0), can_access_peer(nullptr) {}
96 
97  void init_context(const int nb_devices, int32_t* devices_id);
98 
99  public:
101 
102  static CarmaContext& instance_1gpu(int num_device);
103  static CarmaContext& instance_ngpu(int nb_devices, int32_t* devices_id);
105 
106  int get_ndevice() { return ndevice; }
107  CarmaDevice* get_device(int dev) { return devices[dev]; }
108  int get_active_device() { return active_device; }
109  int get_active_real_device() { return devices[active_device]->get_id(); }
110 
112  int runtime_version;
113  carma_safe_call(cudaRuntimeGetVersion(&runtime_version));
114  return runtime_version;
115  }
116 
118  int driver_version;
119  carma_safe_call(cudaRuntimeGetVersion(&driver_version));
120  return driver_version;
121  }
122 
123  std::string get_device_name(int device);
124  std::string get_device_info(int device);
125  std::string get_device_mem_info(int device);
126 
127  inline int _set_active_device_for_copy(int new_device, int silent,
128  std::string file, int line) {
129  if (new_device > ndevice) return -1;
130  return (can_access_peer[active_device][new_device] != 1)
131  ? _set_active_device(new_device, silent, file, line)
132  : active_device;
133  }
134  inline int _set_active_device(int new_device, int silent, std::string file,
135  int line) {
136  return (this->active_device != new_device)
137  ? _set_active_device_force(new_device, silent, file, line)
138  : active_device;
139  }
140  int _set_active_device_force(int new_device, int silent, std::string file,
141  int line);
143  cublasHandle_t get_cublas_handle() { return get_cublas_handle(active_device); }
144  cusparseHandle_t get_cusparse_handle() {
145  return get_cusparse_handle(active_device);
146  }
147 
148  cublasHandle_t get_cublas_handle(int device) {
149  return devices[device]->get_cublas_handle();
150  }
151  cusparseHandle_t get_cusparse_handle(int device) {
152  return devices[device]->get_cusparse_handle();
153  }
154  bool can_p2p(int dev1, int dev2) { return can_access_peer[dev1][dev2]; }
155 
156  std::string magma_info();
157 };
158 
160 inline int convert_sm_version2cores(int major, int minor) {
161  // Defines for GPU Architecture types (using the SM version to determine the #
162  // of cores per SM
163  typedef struct {
164  int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM
165  // minor version
166  int Cores;
167  } sSMtoCores;
168 
169  sSMtoCores nGpuArchCoresPerSM[] = {
170  {0x20, 32}, // Fermi Generation (SM 2.0) GF100 class
171  {0x21, 48}, // Fermi Generation (SM 2.1) GF10x class
172  {0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
173  {0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
174  {0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
175  {0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
176  {0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
177  {0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
178  {0x53, 128}, // Maxwell Generation (SM 5.3) GM20x class
179  {0x60, 64}, // Pascal Generation (SM 6.0) GP100 class
180  {0x61, 128}, // Pascal Generation (SM 6.1) GP10x class
181  {0x62, 128}, // Pascal Generation (SM 6.2) GP10x class
182  {0x70, 64}, // Volta Generation (SM 7.0) GV100 class
183  {0x72, 64}, // Volta Generation (SM 7.2) GV10B class
184  {0x75, 64}, // Turing Generation (SM 7.5) TU100 class
185  {0x80, 64}, // Ampere Generation (SM 8.0) GA102 class
186  {0x86, 64}, // Ampere Generation (SM 8.6) GA104 class
187  {-1, -1}};
188 
189  int index = 0;
190 
191  while (nGpuArchCoresPerSM[index].SM != -1) {
192  if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
193  return nGpuArchCoresPerSM[index].Cores;
194  }
195 
196  index++;
197  }
198 
199  // If we don't find the values, we default use the previous one to run
200  // properly
201  printf(
202  "MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n",
203  major, minor, nGpuArchCoresPerSM[index - 1].Cores);
204  return nGpuArchCoresPerSM[index - 1].Cores;
205 }
206 #endif // _CARMA_CONTEXT_H_
int convert_sm_version2cores(int major, int minor)
from /usr/local/cuda/samples/common/inc/helper_cuda.h
this file provides tools to CarmaObj
#define carma_safe_call(err)
Definition: carma_utils.h:108
this class provides the context in which CarmaObj are created
Definition: carma_context.h:79
std::string get_device_info(int device)
int get_cuda_driver_get_version()
int get_max_gflops_device_id()
int get_cuda_runtime_get_version()
int get_active_device()
int _set_active_device_for_copy(int new_device, int silent, std::string file, int line)
int get_active_real_device()
CarmaDevice * get_device(int dev)
cublasHandle_t get_cublas_handle(int device)
bool can_p2p(int dev1, int dev2)
static CarmaContext & instance()
int _set_active_device_force(int new_device, int silent, std::string file, int line)
static CarmaContext & instance_ngpu(int nb_devices, int32_t *devices_id)
cusparseHandle_t get_cusparse_handle(int device)
cublasHandle_t get_cublas_handle()
std::string get_device_name(int device)
std::string get_device_mem_info(int device)
cusparseHandle_t get_cusparse_handle()
int _set_active_device(int new_device, int silent, std::string file, int line)
static CarmaContext & instance_1gpu(int num_device)
std::string magma_info()
std::string get_name()
Definition: carma_context.h:59
cusparseHandle_t cusparse_handle
Definition: carma_context.h:43
cusolverDnHandle_t cusolver_handle
Definition: carma_context.h:44
size_t total_mem
Definition: carma_context.h:40
bool is_gpu_capable_p2p()
Definition: carma_context.h:57
float get_cores_per_sm()
Definition: carma_context.h:56
cudaDeviceProp get_properties()
Definition: carma_context.h:54
cublasHandle_t cublas_handle
Definition: carma_context.h:42
cudaDeviceProp properties
Definition: carma_context.h:36
float get_compute_perf()
Definition: carma_context.h:55
size_t get_free_mem()
Definition: carma_context.h:62
float cores_per_sm
Definition: carma_context.h:38
cudaStream_t main_stream
Definition: carma_context.h:45
cusolverDnHandle_t get_cusolver_handle()
Definition: carma_context.h:69
cusparseHandle_t get_cusparse_handle()
Definition: carma_context.h:68
cudaStream_t get_stream()
Definition: carma_context.h:52
CarmaDevice(int devid)
size_t get_total_mem()
Definition: carma_context.h:61
int set_cublas_math_mode(bool tensor)
cublasHandle_t get_cublas_handle()
Definition: carma_context.h:67
size_t free_mem
Definition: carma_context.h:39
float compute_perf
Definition: carma_context.h:37