17 #ifndef _CARMA_CONTEXT_H_
18 #define _CARMA_CONTEXT_H_
20 #include <cuda_runtime_api.h>
23 #include <vector_types.h>
28 #include <cublas_v2.h>
29 #include <cusparse_v2.h>
30 #include <cusolverDn.h>
72 #define set_active_device(new_device, silent) \
73 _set_active_device(new_device, silent, __FILE__, __LINE__)
74 #define set_active_device_force(new_device, silent) \
75 _set_active_device_force(new_device, silent, __FILE__, __LINE__)
76 #define set_active_deviceForCpy(new_device, silent) \
77 _set_active_device_for_copy(new_device, silent, __FILE__, __LINE__)
82 std::vector<CarmaDevice*> devices;
84 int** can_access_peer;
87 static std::shared_ptr<CarmaContext> s_instance;
95 : ndevice(0), active_device(0), can_access_peer(
nullptr) {}
97 void init_context(
const int nb_devices, int32_t* devices_id);
114 return runtime_version;
120 return driver_version;
128 std::string file,
int line) {
129 if (new_device > ndevice)
return -1;
130 return (can_access_peer[active_device][new_device] != 1)
136 return (this->active_device != new_device)
149 return devices[device]->get_cublas_handle();
152 return devices[device]->get_cusparse_handle();
154 bool can_p2p(
int dev1,
int dev2) {
return can_access_peer[dev1][dev2]; }
169 sSMtoCores nGpuArchCoresPerSM[] = {
191 while (nGpuArchCoresPerSM[index].SM != -1) {
192 if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) {
193 return nGpuArchCoresPerSM[index].Cores;
202 "MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n",
203 major, minor, nGpuArchCoresPerSM[index - 1].Cores);
204 return nGpuArchCoresPerSM[index - 1].Cores;
int convert_sm_version2cores(int major, int minor)
from /usr/local/cuda/samples/common/inc/helper_cuda.h
this file provides tools to CarmaObj
#define carma_safe_call(err)
this class provides the context in which CarmaObj are created
std::string get_device_info(int device)
int get_cuda_driver_get_version()
int get_max_gflops_device_id()
int get_cuda_runtime_get_version()
int _set_active_device_for_copy(int new_device, int silent, std::string file, int line)
int get_active_real_device()
CarmaDevice * get_device(int dev)
cublasHandle_t get_cublas_handle(int device)
bool can_p2p(int dev1, int dev2)
static CarmaContext & instance()
int _set_active_device_force(int new_device, int silent, std::string file, int line)
static CarmaContext & instance_ngpu(int nb_devices, int32_t *devices_id)
cusparseHandle_t get_cusparse_handle(int device)
cublasHandle_t get_cublas_handle()
std::string get_device_name(int device)
std::string get_device_mem_info(int device)
cusparseHandle_t get_cusparse_handle()
int _set_active_device(int new_device, int silent, std::string file, int line)
static CarmaContext & instance_1gpu(int num_device)
cusparseHandle_t cusparse_handle
cusolverDnHandle_t cusolver_handle
bool is_gpu_capable_p2p()
cudaDeviceProp get_properties()
cublasHandle_t cublas_handle
cudaDeviceProp properties
cusolverDnHandle_t get_cusolver_handle()
cusparseHandle_t get_cusparse_handle()
cudaStream_t get_stream()
int set_cublas_math_mode(bool tensor)
cublasHandle_t get_cublas_handle()