File size: 33,297 Bytes
9375c9a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
<html><!-- Created using the cpp_pretty_printer from the dlib C++ library.  See http://dlib.net for updates. --><head><title>dlib C++ Library - gpu_data.cpp</title></head><body bgcolor='white'><pre>
<font color='#009900'>// Copyright (C) 2015  Davis E. King ([email protected])
</font><font color='#009900'>// License: Boost Software License   See LICENSE.txt for the full license.
</font><font color='#0000FF'>#ifndef</font> DLIB_GPU_DaTA_CPP_
<font color='#0000FF'>#define</font> DLIB_GPU_DaTA_CPP_

<font color='#009900'>// Only things that require CUDA are declared in this cpp file.  Everything else is in the
</font><font color='#009900'>// gpu_data.h header so that it can operate as "header-only" code when using just the CPU.
</font><font color='#0000FF'>#ifdef</font> DLIB_USE_CUDA

<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='gpu_data.h.html'>gpu_data.h</a>"
<font color='#0000FF'>#include</font> <font color='#5555FF'>&lt;</font>iostream<font color='#5555FF'>&gt;</font>
<font color='#0000FF'>#include</font> "<a style='text-decoration:none' href='cuda_utils.h.html'>cuda_utils.h</a>"
<font color='#0000FF'>#include</font> <font color='#5555FF'>&lt;</font>cstring<font color='#5555FF'>&gt;</font>
<font color='#0000FF'>#include</font> <font color='#5555FF'>&lt;</font>cuda.h<font color='#5555FF'>&gt;</font>

<font color='#0000FF'>namespace</font> dlib
<b>{</b>

<font color='#009900'>// ----------------------------------------------------------------------------------------
</font>
    <font color='#0000FF'><u>void</u></font> <b><a name='memcpy'></a>memcpy</b> <font face='Lucida Console'>(</font>
        gpu_data<font color='#5555FF'>&amp;</font> dest, 
        <font color='#0000FF'>const</font> gpu_data<font color='#5555FF'>&amp;</font> src
    <font face='Lucida Console'>)</font>
    <b>{</b>
        <font color='#BB00BB'>DLIB_CASSERT</font><font face='Lucida Console'>(</font>dest.<font color='#BB00BB'>size</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#5555FF'>=</font><font color='#5555FF'>=</font> src.<font color='#BB00BB'>size</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
        <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>src.<font color='#BB00BB'>size</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#5555FF'>=</font><font color='#5555FF'>=</font> <font color='#979000'>0</font> <font color='#5555FF'>|</font><font color='#5555FF'>|</font> <font color='#5555FF'>&amp;</font>dest <font color='#5555FF'>=</font><font color='#5555FF'>=</font> <font color='#5555FF'>&amp;</font>src<font face='Lucida Console'>)</font>
            <font color='#0000FF'>return</font>;

        <font color='#BB00BB'>memcpy</font><font face='Lucida Console'>(</font>dest,<font color='#979000'>0</font>, src, <font color='#979000'>0</font>, src.<font color='#BB00BB'>size</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
    <b>}</b>

    <font color='#0000FF'><u>void</u></font> <b><a name='memcpy'></a>memcpy</b> <font face='Lucida Console'>(</font>
        gpu_data<font color='#5555FF'>&amp;</font> dest, 
        <font color='#0000FF'><u>size_t</u></font> dest_offset,
        <font color='#0000FF'>const</font> gpu_data<font color='#5555FF'>&amp;</font> src,
        <font color='#0000FF'><u>size_t</u></font> src_offset,
        <font color='#0000FF'><u>size_t</u></font> num
    <font face='Lucida Console'>)</font>
    <b>{</b>
        <font color='#BB00BB'>DLIB_CASSERT</font><font face='Lucida Console'>(</font>dest_offset <font color='#5555FF'>+</font> num <font color='#5555FF'>&lt;</font><font color='#5555FF'>=</font> dest.<font color='#BB00BB'>size</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
        <font color='#BB00BB'>DLIB_CASSERT</font><font face='Lucida Console'>(</font>src_offset <font color='#5555FF'>+</font> num <font color='#5555FF'>&lt;</font><font color='#5555FF'>=</font> src.<font color='#BB00BB'>size</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
        <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>num <font color='#5555FF'>=</font><font color='#5555FF'>=</font> <font color='#979000'>0</font><font face='Lucida Console'>)</font>
            <font color='#0000FF'>return</font>;

        <font color='#009900'>// if there is aliasing
</font>        <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font><font color='#5555FF'>&amp;</font>dest <font color='#5555FF'>=</font><font color='#5555FF'>=</font> <font color='#5555FF'>&amp;</font>src <font color='#5555FF'>&amp;</font><font color='#5555FF'>&amp;</font> std::<font color='#BB00BB'>max</font><font face='Lucida Console'>(</font>dest_offset, src_offset<font face='Lucida Console'>)</font> <font color='#5555FF'>&lt;</font> std::<font color='#BB00BB'>min</font><font face='Lucida Console'>(</font>dest_offset,src_offset<font face='Lucida Console'>)</font><font color='#5555FF'>+</font>num<font face='Lucida Console'>)</font>
        <b>{</b>
            <font color='#009900'>// if they perfectly alias each other then there is nothing to do
</font>            <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>dest_offset <font color='#5555FF'>=</font><font color='#5555FF'>=</font> src_offset<font face='Lucida Console'>)</font>
                <font color='#0000FF'>return</font>;
            <font color='#0000FF'>else</font>
                std::<font color='#BB00BB'>memmove</font><font face='Lucida Console'>(</font>dest.<font color='#BB00BB'>host</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font color='#5555FF'>+</font>dest_offset, src.<font color='#BB00BB'>host</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font color='#5555FF'>+</font>src_offset, <font color='#0000FF'>sizeof</font><font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font face='Lucida Console'>)</font><font color='#5555FF'>*</font>num<font face='Lucida Console'>)</font>;
        <b>}</b>
        <font color='#0000FF'>else</font>
        <b>{</b>
            <font color='#009900'>// if we write to the entire thing then we can use device_write_only()
</font>            <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>dest_offset <font color='#5555FF'>=</font><font color='#5555FF'>=</font> <font color='#979000'>0</font> <font color='#5555FF'>&amp;</font><font color='#5555FF'>&amp;</font> num <font color='#5555FF'>=</font><font color='#5555FF'>=</font> dest.<font color='#BB00BB'>size</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>
            <b>{</b>
                <font color='#009900'>// copy the memory efficiently based on which copy is current in each object.
</font>                <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>src.<font color='#BB00BB'>device_ready</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>
                    <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaMemcpy</font><font face='Lucida Console'>(</font>dest.<font color='#BB00BB'>device_write_only</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>, src.<font color='#BB00BB'>device</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font color='#5555FF'>+</font>src_offset,  num<font color='#5555FF'>*</font><font color='#0000FF'>sizeof</font><font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font face='Lucida Console'>)</font>, cudaMemcpyDeviceToDevice<font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
                <font color='#0000FF'>else</font> 
                    <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaMemcpy</font><font face='Lucida Console'>(</font>dest.<font color='#BB00BB'>device_write_only</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>, src.<font color='#BB00BB'>host</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font color='#5555FF'>+</font>src_offset,    num<font color='#5555FF'>*</font><font color='#0000FF'>sizeof</font><font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font face='Lucida Console'>)</font>, cudaMemcpyHostToDevice<font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
            <b>}</b>
            <font color='#0000FF'>else</font>
            <b>{</b>
                <font color='#009900'>// copy the memory efficiently based on which copy is current in each object.
</font>                <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>dest.<font color='#BB00BB'>device_ready</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#5555FF'>&amp;</font><font color='#5555FF'>&amp;</font> src.<font color='#BB00BB'>device_ready</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>
                    <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaMemcpy</font><font face='Lucida Console'>(</font>dest.<font color='#BB00BB'>device</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font color='#5555FF'>+</font>dest_offset, src.<font color='#BB00BB'>device</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font color='#5555FF'>+</font>src_offset, num<font color='#5555FF'>*</font><font color='#0000FF'>sizeof</font><font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font face='Lucida Console'>)</font>, cudaMemcpyDeviceToDevice<font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
                <font color='#0000FF'>else</font> <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font><font color='#5555FF'>!</font>dest.<font color='#BB00BB'>device_ready</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#5555FF'>&amp;</font><font color='#5555FF'>&amp;</font> src.<font color='#BB00BB'>device_ready</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>
                    <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaMemcpy</font><font face='Lucida Console'>(</font>dest.<font color='#BB00BB'>host</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font color='#5555FF'>+</font>dest_offset, src.<font color='#BB00BB'>device</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font color='#5555FF'>+</font>src_offset,   num<font color='#5555FF'>*</font><font color='#0000FF'>sizeof</font><font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font face='Lucida Console'>)</font>, cudaMemcpyDeviceToHost<font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
                <font color='#0000FF'>else</font> <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>dest.<font color='#BB00BB'>device_ready</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#5555FF'>&amp;</font><font color='#5555FF'>&amp;</font> <font color='#5555FF'>!</font>src.<font color='#BB00BB'>device_ready</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>
                    <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaMemcpy</font><font face='Lucida Console'>(</font>dest.<font color='#BB00BB'>device</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font color='#5555FF'>+</font>dest_offset, src.<font color='#BB00BB'>host</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font color='#5555FF'>+</font>src_offset,   num<font color='#5555FF'>*</font><font color='#0000FF'>sizeof</font><font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font face='Lucida Console'>)</font>, cudaMemcpyHostToDevice<font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
                <font color='#0000FF'>else</font> 
                    <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaMemcpy</font><font face='Lucida Console'>(</font>dest.<font color='#BB00BB'>host</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font color='#5555FF'>+</font>dest_offset, src.<font color='#BB00BB'>host</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font color='#5555FF'>+</font>src_offset,     num<font color='#5555FF'>*</font><font color='#0000FF'>sizeof</font><font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font face='Lucida Console'>)</font>, cudaMemcpyHostToHost<font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
            <b>}</b>
        <b>}</b>
    <b>}</b>
<font color='#009900'>// ----------------------------------------------------------------------------------------
</font>
    <font color='#0000FF'><u>void</u></font> <b><a name='synchronize_stream'></a>synchronize_stream</b><font face='Lucida Console'>(</font>cudaStream_t stream<font face='Lucida Console'>)</font>
    <b>{</b>
<font color='#0000FF'>#if</font> <font color='#5555FF'>!</font>defined CUDA_VERSION
<font color='#0000FF'>#error</font> CUDA_VERSION not defined
<font color='#0000FF'>#elif</font> CUDA_VERSION <font color='#5555FF'>&gt;</font><font color='#5555FF'>=</font> <font color='#979000'>9020</font> <font color='#5555FF'>&amp;</font><font color='#5555FF'>&amp;</font> CUDA_VERSION <font color='#5555FF'>&lt;</font> <font color='#979000'>11000</font>
        <font color='#009900'>// We will stop using this alternative version with cuda V11, hopefully the bug in
</font>        <font color='#009900'>// cudaStreamSynchronize is fixed by then.
</font>        <font color='#009900'>//
</font>        <font color='#009900'>// This should be pretty much the same as cudaStreamSynchronize, which for some
</font>        <font color='#009900'>// reason makes training freeze in some cases.
</font>        <font color='#009900'>// (see https://github.com/davisking/dlib/issues/1513)
</font>        <font color='#0000FF'>while</font> <font face='Lucida Console'>(</font><font color='#979000'>true</font><font face='Lucida Console'>)</font>
        <b>{</b>
            cudaError_t err <font color='#5555FF'>=</font> <font color='#BB00BB'>cudaStreamQuery</font><font face='Lucida Console'>(</font>stream<font face='Lucida Console'>)</font>;
            <font color='#0000FF'>switch</font> <font face='Lucida Console'>(</font>err<font face='Lucida Console'>)</font>
            <b>{</b>
            <font color='#0000FF'>case</font> cudaSuccess: <font color='#0000FF'>return</font>;      <font color='#009900'>// now we are synchronized
</font>            <font color='#0000FF'>case</font> cudaErrorNotReady: <font color='#0000FF'>break</font>; <font color='#009900'>// continue waiting
</font>            <font color='#0000FF'>default</font>: <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font>err<font face='Lucida Console'>)</font>;      <font color='#009900'>// unexpected error: throw
</font>            <b>}</b>
        <b>}</b>
<font color='#0000FF'>#else</font> <font color='#009900'>// CUDA_VERSION
</font>        <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaStreamSynchronize</font><font face='Lucida Console'>(</font>stream<font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
<font color='#0000FF'>#endif</font> <font color='#009900'>// CUDA_VERSION
</font>    <b>}</b>

    <font color='#0000FF'><u>void</u></font> gpu_data::
    <b><a name='wait_for_transfer_to_finish'></a>wait_for_transfer_to_finish</b><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>
    <b>{</b>
        <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>have_active_transfer<font face='Lucida Console'>)</font>
        <b>{</b>
            <font color='#BB00BB'>synchronize_stream</font><font face='Lucida Console'>(</font><font face='Lucida Console'>(</font>cudaStream_t<font face='Lucida Console'>)</font>cuda_stream.<font color='#BB00BB'>get</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
            have_active_transfer <font color='#5555FF'>=</font> <font color='#979000'>false</font>;
            <font color='#009900'>// Check for errors.  These calls to cudaGetLastError() are what help us find
</font>            <font color='#009900'>// out if our kernel launches have been failing.
</font>            <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaGetLastError</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
        <b>}</b>
    <b>}</b>

    <font color='#0000FF'><u>void</u></font> gpu_data::
    <b><a name='copy_to_device'></a>copy_to_device</b><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>
    <b>{</b>
        <font color='#009900'>// We want transfers to the device to always be concurrent with any device
</font>        <font color='#009900'>// computation.  So we use our non-default stream to do the transfer.
</font>        <font color='#BB00BB'>async_copy_to_device</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>;
        <font color='#BB00BB'>wait_for_transfer_to_finish</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>;
    <b>}</b>

    <font color='#0000FF'><u>void</u></font> gpu_data::
    <b><a name='copy_to_host'></a>copy_to_host</b><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>
    <b>{</b>
        <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font><font color='#5555FF'>!</font>host_current<font face='Lucida Console'>)</font>
        <b>{</b>
            <font color='#BB00BB'>wait_for_transfer_to_finish</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>;
            <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaMemcpy</font><font face='Lucida Console'>(</font>data_host.<font color='#BB00BB'>get</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>, data_device.<font color='#BB00BB'>get</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>, data_size<font color='#5555FF'>*</font><font color='#0000FF'>sizeof</font><font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font face='Lucida Console'>)</font>, cudaMemcpyDeviceToHost<font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
            host_current <font color='#5555FF'>=</font> <font color='#979000'>true</font>;
            <font color='#009900'>// At this point we know our RAM block isn't in use because cudaMemcpy()
</font>            <font color='#009900'>// implicitly syncs with the device. 
</font>            device_in_use <font color='#5555FF'>=</font> <font color='#979000'>false</font>;
            <font color='#009900'>// Check for errors.  These calls to cudaGetLastError() are what help us find
</font>            <font color='#009900'>// out if our kernel launches have been failing.
</font>            <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaGetLastError</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
        <b>}</b>
    <b>}</b>

    <font color='#0000FF'><u>void</u></font> gpu_data::
    <b><a name='async_copy_to_device'></a>async_copy_to_device</b><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font> <font color='#0000FF'>const</font>
    <b>{</b>
        <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font><font color='#5555FF'>!</font>device_current<font face='Lucida Console'>)</font>
        <b>{</b>
            <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>device_in_use<font face='Lucida Console'>)</font>
            <b>{</b>
                <font color='#009900'>// Wait for any possible CUDA kernels that might be using our memory block to
</font>                <font color='#009900'>// complete before we overwrite the memory.
</font>                <font color='#BB00BB'>synchronize_stream</font><font face='Lucida Console'>(</font><font color='#979000'>0</font><font face='Lucida Console'>)</font>;
                device_in_use <font color='#5555FF'>=</font> <font color='#979000'>false</font>;
            <b>}</b>
            <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaMemcpyAsync</font><font face='Lucida Console'>(</font>data_device.<font color='#BB00BB'>get</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>, data_host.<font color='#BB00BB'>get</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>, data_size<font color='#5555FF'>*</font><font color='#0000FF'>sizeof</font><font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font face='Lucida Console'>)</font>, cudaMemcpyHostToDevice, <font face='Lucida Console'>(</font>cudaStream_t<font face='Lucida Console'>)</font>cuda_stream.<font color='#BB00BB'>get</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
            have_active_transfer <font color='#5555FF'>=</font> <font color='#979000'>true</font>;
            device_current <font color='#5555FF'>=</font> <font color='#979000'>true</font>;
        <b>}</b>
    <b>}</b>

    <font color='#0000FF'><u>void</u></font> gpu_data::
    <b><a name='set_size'></a>set_size</b><font face='Lucida Console'>(</font>
        <font color='#0000FF'><u>size_t</u></font> new_size
    <font face='Lucida Console'>)</font>
    <b>{</b>
        <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>new_size <font color='#5555FF'>=</font><font color='#5555FF'>=</font> <font color='#979000'>0</font><font face='Lucida Console'>)</font>
        <b>{</b>
            <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>device_in_use<font face='Lucida Console'>)</font>
            <b>{</b>
                <font color='#009900'>// Wait for any possible CUDA kernels that might be using our memory block to
</font>                <font color='#009900'>// complete before we free the memory.
</font>                <font color='#BB00BB'>synchronize_stream</font><font face='Lucida Console'>(</font><font color='#979000'>0</font><font face='Lucida Console'>)</font>;
                device_in_use <font color='#5555FF'>=</font> <font color='#979000'>false</font>;
            <b>}</b>
            <font color='#BB00BB'>wait_for_transfer_to_finish</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>;
            data_size <font color='#5555FF'>=</font> <font color='#979000'>0</font>;
            host_current <font color='#5555FF'>=</font> <font color='#979000'>true</font>;
            device_current <font color='#5555FF'>=</font> <font color='#979000'>true</font>;
            device_in_use <font color='#5555FF'>=</font> <font color='#979000'>false</font>;
            data_host.<font color='#BB00BB'>reset</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>;
            data_device.<font color='#BB00BB'>reset</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>;
        <b>}</b>
        <font color='#0000FF'>else</font> <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>new_size <font color='#5555FF'>!</font><font color='#5555FF'>=</font> data_size<font face='Lucida Console'>)</font>
        <b>{</b>
            <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font>device_in_use<font face='Lucida Console'>)</font>
            <b>{</b>
                <font color='#009900'>// Wait for any possible CUDA kernels that might be using our memory block to
</font>                <font color='#009900'>// complete before we free the memory.
</font>                <font color='#BB00BB'>synchronize_stream</font><font face='Lucida Console'>(</font><font color='#979000'>0</font><font face='Lucida Console'>)</font>;
                device_in_use <font color='#5555FF'>=</font> <font color='#979000'>false</font>;
            <b>}</b>
            <font color='#BB00BB'>wait_for_transfer_to_finish</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>;
            data_size <font color='#5555FF'>=</font> new_size;
            host_current <font color='#5555FF'>=</font> <font color='#979000'>true</font>;
            device_current <font color='#5555FF'>=</font> <font color='#979000'>true</font>;
            device_in_use <font color='#5555FF'>=</font> <font color='#979000'>false</font>;

            <font color='#0000FF'>try</font>
            <b>{</b>
                <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaGetDevice</font><font face='Lucida Console'>(</font><font color='#5555FF'>&amp;</font>the_device_id<font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;

                <font color='#009900'>// free memory blocks before we allocate new ones.
</font>                data_host.<font color='#BB00BB'>reset</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>;
                data_device.<font color='#BB00BB'>reset</font><font face='Lucida Console'>(</font><font face='Lucida Console'>)</font>;

                <font color='#0000FF'><u>void</u></font><font color='#5555FF'>*</font> data;
                <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaMallocHost</font><font face='Lucida Console'>(</font><font color='#5555FF'>&amp;</font>data, new_size<font color='#5555FF'>*</font><font color='#0000FF'>sizeof</font><font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
                <font color='#009900'>// Note that we don't throw exceptions since the free calls are invariably
</font>                <font color='#009900'>// called in destructors.  They also shouldn't fail anyway unless someone
</font>                <font color='#009900'>// is resetting the GPU card in the middle of their program.
</font>                data_host.<font color='#BB00BB'>reset</font><font face='Lucida Console'>(</font><font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font color='#5555FF'>*</font><font face='Lucida Console'>)</font>data, []<font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font color='#5555FF'>*</font> ptr<font face='Lucida Console'>)</font><b>{</b>
                    <font color='#0000FF'>auto</font> err <font color='#5555FF'>=</font> <font color='#BB00BB'>cudaFreeHost</font><font face='Lucida Console'>(</font>ptr<font face='Lucida Console'>)</font>;
                    <font color='#0000FF'>if</font><font face='Lucida Console'>(</font>err<font color='#5555FF'>!</font><font color='#5555FF'>=</font>cudaSuccess<font face='Lucida Console'>)</font>
                        std::cerr <font color='#5555FF'>&lt;</font><font color='#5555FF'>&lt;</font> "<font color='#CC0000'>cudaFreeHost() failed. Reason: </font>" <font color='#5555FF'>&lt;</font><font color='#5555FF'>&lt;</font> <font color='#BB00BB'>cudaGetErrorString</font><font face='Lucida Console'>(</font>err<font face='Lucida Console'>)</font> <font color='#5555FF'>&lt;</font><font color='#5555FF'>&lt;</font> std::endl;
                <b>}</b><font face='Lucida Console'>)</font>;

                <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaMalloc</font><font face='Lucida Console'>(</font><font color='#5555FF'>&amp;</font>data, new_size<font color='#5555FF'>*</font><font color='#0000FF'>sizeof</font><font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
                data_device.<font color='#BB00BB'>reset</font><font face='Lucida Console'>(</font><font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font color='#5555FF'>*</font><font face='Lucida Console'>)</font>data, []<font face='Lucida Console'>(</font><font color='#0000FF'><u>float</u></font><font color='#5555FF'>*</font> ptr<font face='Lucida Console'>)</font><b>{</b>
                    <font color='#0000FF'>auto</font> err <font color='#5555FF'>=</font> <font color='#BB00BB'>cudaFree</font><font face='Lucida Console'>(</font>ptr<font face='Lucida Console'>)</font>;
                    <font color='#0000FF'>if</font><font face='Lucida Console'>(</font>err<font color='#5555FF'>!</font><font color='#5555FF'>=</font>cudaSuccess<font face='Lucida Console'>)</font>
                        std::cerr <font color='#5555FF'>&lt;</font><font color='#5555FF'>&lt;</font> "<font color='#CC0000'>cudaFree() failed. Reason: </font>" <font color='#5555FF'>&lt;</font><font color='#5555FF'>&lt;</font> <font color='#BB00BB'>cudaGetErrorString</font><font face='Lucida Console'>(</font>err<font face='Lucida Console'>)</font> <font color='#5555FF'>&lt;</font><font color='#5555FF'>&lt;</font> std::endl;
                <b>}</b><font face='Lucida Console'>)</font>;

                <font color='#0000FF'>if</font> <font face='Lucida Console'>(</font><font color='#5555FF'>!</font>cuda_stream<font face='Lucida Console'>)</font>
                <b>{</b>
                    cudaStream_t cstream;
                    <font color='#BB00BB'>CHECK_CUDA</font><font face='Lucida Console'>(</font><font color='#BB00BB'>cudaStreamCreateWithFlags</font><font face='Lucida Console'>(</font><font color='#5555FF'>&amp;</font>cstream, cudaStreamNonBlocking<font face='Lucida Console'>)</font><font face='Lucida Console'>)</font>;
                    cuda_stream.<font color='#BB00BB'>reset</font><font face='Lucida Console'>(</font>cstream, []<font face='Lucida Console'>(</font><font color='#0000FF'><u>void</u></font><font color='#5555FF'>*</font> ptr<font face='Lucida Console'>)</font><b>{</b>
                        <font color='#0000FF'>auto</font> err <font color='#5555FF'>=</font> <font color='#BB00BB'>cudaStreamDestroy</font><font face='Lucida Console'>(</font><font face='Lucida Console'>(</font>cudaStream_t<font face='Lucida Console'>)</font>ptr<font face='Lucida Console'>)</font>;
                        <font color='#0000FF'>if</font><font face='Lucida Console'>(</font>err<font color='#5555FF'>!</font><font color='#5555FF'>=</font>cudaSuccess<font face='Lucida Console'>)</font>
                            std::cerr <font color='#5555FF'>&lt;</font><font color='#5555FF'>&lt;</font> "<font color='#CC0000'>cudaStreamDestroy() failed. Reason: </font>" <font color='#5555FF'>&lt;</font><font color='#5555FF'>&lt;</font> <font color='#BB00BB'>cudaGetErrorString</font><font face='Lucida Console'>(</font>err<font face='Lucida Console'>)</font> <font color='#5555FF'>&lt;</font><font color='#5555FF'>&lt;</font> std::endl;
                    <b>}</b><font face='Lucida Console'>)</font>;
                <b>}</b>

            <b>}</b>
            <font color='#0000FF'>catch</font><font face='Lucida Console'>(</font>...<font face='Lucida Console'>)</font>
            <b>{</b>
                <font color='#BB00BB'>set_size</font><font face='Lucida Console'>(</font><font color='#979000'>0</font><font face='Lucida Console'>)</font>;
                <font color='#0000FF'>throw</font>;
            <b>}</b>
        <b>}</b>
    <b>}</b>

<font color='#009900'>// ----------------------------------------------------------------------------------------
</font><b>}</b>

<font color='#0000FF'>#endif</font> <font color='#009900'>// DLIB_USE_CUDA
</font>
<font color='#0000FF'>#endif</font> <font color='#009900'>// DLIB_GPU_DaTA_CPP_
</font>

</pre></body></html>