|
<html><head><title>dlib C++ Library - gpu_data_abstract.h</title></head><body bgcolor='white'><pre> |
|
<font color='#009900'>// Copyright (C) 2015 Davis E. King ([email protected]) |
|
</font><font color='#009900'>// License: Boost Software License See LICENSE.txt for the full license. |
|
</font><font color='#0000FF'>#undef</font> DLIB_GPU_DaTA_ABSTRACT_H_ |
|
<font color='#0000FF'>#ifdef</font> DLIB_GPU_DaTA_ABSTRACT_H_ |
|
|
|
<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='cuda_errors.h.html'>cuda_errors.h</a>" |
|
<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='../serialize.h.html'>../serialize.h</a>" |
|
|
|
<font color='#0000FF'>namespace</font> dlib |
|
<b>{</b> |
|
|
|
<font color='#009900'>// ---------------------------------------------------------------------------------------- |
|
</font> |
|
<font color='#0000FF'>class</font> <b><a name='gpu_data'></a>gpu_data</b> |
|
<b>{</b> |
|
<font color='#009900'>/*! |
|
WHAT THIS OBJECT REPRESENTS |
|
This object is a block of size() floats, all stored contiguously in memory. |
|
Importantly, it keeps two copies of the floats, one on the host CPU side |
|
and another on the GPU device side. It automatically performs the necessary |
|
host/device transfers to keep these two copies of the data in sync. |
|
|
|
All transfers to the device happen asynchronously with respect to the |
|
default CUDA stream so that CUDA kernel computations can overlap with data |
|
transfers. However, any transfers from the device to the host happen |
|
synchronously in the default CUDA stream. Therefore, you should perform |
|
all your CUDA kernel launches on the default stream so that transfers back |
|
to the host do not happen before the relevant computations have completed. |
|
|
|
If DLIB_USE_CUDA is not #defined then this object will not use CUDA at all. |
|
Instead, it will simply store one host side memory block of floats. |
|
|
|
THREAD SAFETY |
|
Instances of this object are not thread-safe. So don't touch one from |
|
multiple threads at the same time. |
|
!*/</font> |
|
<font color='#0000FF'>public</font>: |
|
|
|
<b><a name='gpu_data'></a>gpu_data</b><font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- #size() == 0 |
|
- #host() == nullptr |
|
- #device() == nullptr |
|
- #host_ready() == true |
|
- #device_ready() == true |
|
- #device_id() == 0 |
|
!*/</font> |
|
|
|
<font color='#009900'>// This object is not copyable, however, it is movable. |
|
</font> <b><a name='gpu_data'></a>gpu_data</b><font face='Lucida Console'>(</font><font color='#0000FF'>const</font> gpu_data<font color='#5555FF'>&</font><font face='Lucida Console'>)</font> <font color='#5555FF'>=</font> <font color='#0000FF'>delete</font>; |
|
gpu_data<font color='#5555FF'>&</font> <b><a name='operator'></a>operator</b><font color='#5555FF'>=</font><font face='Lucida Console'>(</font><font color='#0000FF'>const</font> gpu_data<font color='#5555FF'>&</font><font face='Lucida Console'>)</font> <font color='#5555FF'>=</font> <font color='#0000FF'>delete</font>; |
|
<b><a name='gpu_data'></a>gpu_data</b><font face='Lucida Console'>(</font>gpu_data<font color='#5555FF'>&</font><font color='#5555FF'>&</font> item<font face='Lucida Console'>)</font>; |
|
gpu_data<font color='#5555FF'>&</font> <b><a name='operator'></a>operator</b><font color='#5555FF'>=</font><font face='Lucida Console'>(</font>gpu_data<font color='#5555FF'>&</font><font color='#5555FF'>&</font> item<font face='Lucida Console'>)</font>; |
|
|
|
<font color='#0000FF'><u>int</u></font> <b><a name='device_id'></a>device_id</b><font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- returns the ID of the CUDA device that allocated this memory. I.e. the |
|
number returned by cudaGetDevice() when the memory was allocated. |
|
- If CUDA is not being used then this function always returns 0. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='async_copy_to_device'></a>async_copy_to_device</b><font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- if (!device_ready()) then |
|
- Begins asynchronously copying host data to the device once it is safe |
|
to do so. I.e. This function will wait until any previously |
|
scheduled CUDA kernels, which are using the device() memory block, |
|
have completed before transferring the new data to the device. |
|
- A call to device() that happens before the transfer completes will |
|
block until the transfer is complete. That is, it is safe to call |
|
async_copy_to_device() and then immediately call device(). |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='set_size'></a>set_size</b><font face='Lucida Console'>(</font> |
|
<font color='#0000FF'><u>size_t</u></font> new_size |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- #size() == new_size |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>bool</u></font> <b><a name='host_ready'></a>host_ready</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- returns true if and only if the host's copy of the data is current. The |
|
host's data is current if there aren't any modifications to the data |
|
which were made on the device side that have yet to be copied to the |
|
host. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>bool</u></font> <b><a name='device_ready'></a>device_ready</b> <font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- returns true if and only if the device's copy of the data is current. |
|
The device's data is current if there aren't any modifications to the |
|
data which were made on the host side that have yet to be copied to the |
|
device. |
|
!*/</font> |
|
|
|
<font color='#0000FF'>const</font> <font color='#0000FF'><u>float</u></font><font color='#5555FF'>*</font> <b><a name='host'></a>host</b><font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- returns a pointer to the host memory block of size() contiguous float |
|
values or nullptr if size()==0. |
|
- if (!host_ready()) then |
|
- copies the data from the device to the host, while this is happening |
|
the call to host() blocks. |
|
- #host_ready() == true |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>float</u></font><font color='#5555FF'>*</font> <b><a name='host'></a>host</b><font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- returns a pointer to the host memory block of size() contiguous float |
|
values or nullptr if size()==0. |
|
- if (!host_ready()) then |
|
- copies the data from the device to the host, while this is happening |
|
the call to host() blocks. |
|
- #host_ready() == true |
|
- #device_ready() == false |
|
I.e. Marks the device side data as out of date so that the next call to |
|
device() will perform a host to device transfer. If you want to begin |
|
the transfer immediately then you can call async_copy_to_device() after |
|
calling host(). |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>float</u></font><font color='#5555FF'>*</font> <b><a name='host_write_only'></a>host_write_only</b><font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- This function returns the same pointer as host(), except that it never |
|
performs a device to host memory copy. Instead, it immediately marks the |
|
device side data as out of date, effectively discarding it. Therefore, |
|
the values in the data pointed to by host_write_only() are undefined and |
|
you should only call host_write_only() if you are going to assign to |
|
every memory location in the returned memory block. |
|
- #host_ready() == true |
|
- #device_ready() == false |
|
!*/</font> |
|
|
|
<font color='#0000FF'>const</font> <font color='#0000FF'><u>float</u></font><font color='#5555FF'>*</font> <b><a name='device'></a>device</b><font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- DLIB_USE_CUDA is #defined |
|
ensures |
|
- returns a pointer to the device memory block of size() contiguous float |
|
values or nullptr if size()==0. |
|
- if (!device_ready()) then |
|
- copies the data from the host to the device, while this is happening |
|
the call to device() blocks. |
|
- #device_ready() == true |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>float</u></font><font color='#5555FF'>*</font> <b><a name='device'></a>device</b><font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- DLIB_USE_CUDA is #defined |
|
ensures |
|
- returns a pointer to the device memory block of size() contiguous float |
|
values or nullptr if size()==0. |
|
- if (!device_ready()) then |
|
- copies the data from the host to the device, while this is happening |
|
the call to device() blocks. |
|
- #host_ready() == false |
|
- #device_ready() == true |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>float</u></font><font color='#5555FF'>*</font> <b><a name='device_write_only'></a>device_write_only</b><font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- DLIB_USE_CUDA is #defined |
|
ensures |
|
- This function returns the same pointer as device(), except that it never |
|
performs a host to device memory copy. Instead, it immediately marks the |
|
host side data as out of date, effectively discarding it. Therefore, the |
|
values in the data pointed to by device_write_only() are undefined and |
|
you should only call device_write_only() if you are going to assign to |
|
every memory location in the returned memory block. |
|
- #host_ready() == false |
|
- #device_ready() == true |
|
!*/</font> |
|
|
|
|
|
<font color='#0000FF'><u>size_t</u></font> <b><a name='size'></a>size</b><font face='Lucida Console'>(</font> |
|
<font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- returns the number of floats contained in this object. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='swap'></a>swap</b> <font face='Lucida Console'>(</font> |
|
gpu_data<font color='#5555FF'>&</font> item |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
ensures |
|
- swaps the state of *this and item |
|
!*/</font> |
|
|
|
<b>}</b>; |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='serialize'></a>serialize</b><font face='Lucida Console'>(</font><font color='#0000FF'>const</font> gpu_data<font color='#5555FF'>&</font> item, std::ostream<font color='#5555FF'>&</font> out<font face='Lucida Console'>)</font>; |
|
<font color='#0000FF'><u>void</u></font> <b><a name='deserialize'></a>deserialize</b><font face='Lucida Console'>(</font>gpu_data<font color='#5555FF'>&</font> item, std::istream<font color='#5555FF'>&</font> in<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
provides serialization support |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='memcpy'></a>memcpy</b> <font face='Lucida Console'>(</font> |
|
gpu_data<font color='#5555FF'>&</font> dest, |
|
<font color='#0000FF'>const</font> gpu_data<font color='#5555FF'>&</font> src |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- dest.size() == src.size() |
|
ensures |
|
- Copies the data in src to dest. If the device data is current (i.e. |
|
device_ready()==true) on both src and dest then the copy will happen entirely |
|
on the device side. |
|
- It doesn't matter what GPU device is selected by cudaSetDevice(). You can |
|
always copy gpu_data objects to and from each other regardless. |
|
- This function blocks until the copy has completed. |
|
!*/</font> |
|
|
|
<font color='#0000FF'><u>void</u></font> <b><a name='memcpy'></a>memcpy</b> <font face='Lucida Console'>(</font> |
|
gpu_data<font color='#5555FF'>&</font> dest, |
|
<font color='#0000FF'><u>size_t</u></font> dest_offset, |
|
<font color='#0000FF'>const</font> gpu_data<font color='#5555FF'>&</font> src, |
|
<font color='#0000FF'><u>size_t</u></font> src_offset, |
|
<font color='#0000FF'><u>size_t</u></font> num |
|
<font face='Lucida Console'>)</font>; |
|
<font color='#009900'>/*! |
|
requires |
|
- dest_offset + num <= dest.size() |
|
- src_offset + num <= src.size() |
|
ensures |
|
- Copies the data in src to dest, but only copies data in the range |
|
[src.host()+src_offset, src.host()+src_offset+num) to |
|
[dest.host()+dest_offset, dest.host()+dest_offset+num). Therefore, it is |
|
just like the above memcpy() except that you can specify some subset of data |
|
in a gpu_data object to be copied. |
|
- Like the above version of memcpy(), the copy will happen in the most |
|
efficient way, automatically using the appropriate type of host/device |
|
transfers based on where data is currently resident. |
|
- It doesn't matter what GPU device is selected by cudaSetDevice(). You can |
|
always copy gpu_data objects to and from each other regardless. |
|
- This function blocks until the copy has completed. |
|
!*/</font> |
|
|
|
<font color='#009900'>// ---------------------------------------------------------------------------------------- |
|
</font> |
|
<b>}</b> |
|
|
|
<font color='#0000FF'>#endif</font> <font color='#009900'>// DLIB_GPU_DaTA_ABSTRACT_H_ |
|
</font> |
|
|
|
</pre></body></html> |