File size: 5,032 Bytes
9375c9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
// Copyright (C) 2017 Davis E. King ([email protected])
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DNN_CuDA_DATA_PTR_CPP_
#define DLIB_DNN_CuDA_DATA_PTR_CPP_
#ifdef DLIB_USE_CUDA
#include "cuda_data_ptr.h"
#include "cuda_utils.h"
namespace dlib
{
namespace cuda
{
// ----------------------------------------------------------------------------------------
weak_cuda_data_void_ptr::
weak_cuda_data_void_ptr(
const cuda_data_void_ptr& ptr
) : num(ptr.num), pdata(ptr.pdata)
{
}
// ----------------------------------------------------------------------------------------
cuda_data_void_ptr weak_cuda_data_void_ptr::
lock() const
{
auto ptr = pdata.lock();
if (ptr)
{
cuda_data_void_ptr temp;
temp.pdata = ptr;
temp.num = num;
return temp;
}
else
{
return cuda_data_void_ptr();
}
}
// -----------------------------------------------------------------------------------
// -----------------------------------------------------------------------------------
cuda_data_void_ptr::
cuda_data_void_ptr(
size_t n
) : num(n)
{
if (n == 0)
return;
void* data = nullptr;
CHECK_CUDA(cudaMalloc(&data, n));
pdata.reset(data, [](void* ptr){
auto err = cudaFree(ptr);
if(err!=cudaSuccess)
std::cerr << "cudaFree() failed. Reason: " << cudaGetErrorString(err) << std::endl;
});
}
// ------------------------------------------------------------------------------------
void memcpy(
void* dest,
const cuda_data_void_ptr& src,
const size_t num
)
{
DLIB_ASSERT(num <= src.size());
if (src.size() != 0)
{
CHECK_CUDA(cudaMemcpy(dest, src.data(), num, cudaMemcpyDefault));
}
}
// ------------------------------------------------------------------------------------
void memcpy(
void* dest,
const cuda_data_void_ptr& src
)
{
memcpy(dest, src, src.size());
}
// ------------------------------------------------------------------------------------
void memcpy(
cuda_data_void_ptr dest,
const void* src,
const size_t num
)
{
DLIB_ASSERT(num <= dest.size());
if (dest.size() != 0)
{
CHECK_CUDA(cudaMemcpy(dest.data(), src, num, cudaMemcpyDefault));
}
}
// ------------------------------------------------------------------------------------
void memcpy(
cuda_data_void_ptr dest,
const void* src
)
{
memcpy(dest,src,dest.size());
}
// ------------------------------------------------------------------------------------
class cudnn_device_buffer
{
public:
// not copyable
cudnn_device_buffer(const cudnn_device_buffer&) = delete;
cudnn_device_buffer& operator=(const cudnn_device_buffer&) = delete;
cudnn_device_buffer()
{
buffers.resize(16);
}
~cudnn_device_buffer()
{
}
cuda_data_void_ptr get (
size_t size
)
{
int new_device_id;
CHECK_CUDA(cudaGetDevice(&new_device_id));
// make room for more devices if needed
if (new_device_id >= (long)buffers.size())
buffers.resize(new_device_id+16);
// If we don't have a buffer already for this device then make one, or if it's too
// small, make a bigger one.
cuda_data_void_ptr buff = buffers[new_device_id].lock();
if (!buff || buff.size() < size)
{
buff = cuda_data_void_ptr(size);
buffers[new_device_id] = buff;
}
// Finally, return the buffer for the current device
return buff;
}
private:
std::vector<weak_cuda_data_void_ptr> buffers;
};
// ----------------------------------------------------------------------------------------
cuda_data_void_ptr device_global_buffer(size_t size)
{
thread_local cudnn_device_buffer buffer;
return buffer.get(size);
}
// ------------------------------------------------------------------------------------
}
}
#endif // DLIB_USE_CUDA
#endif // DLIB_DNN_CuDA_DATA_PTR_CPP_
|