AshanGimhana's picture
Upload folder using huggingface_hub
9375c9a verified
// Copyright (C) 2015 Davis E. King ([email protected])
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_DNn_TENSOR_H_
#define DLIB_DNn_TENSOR_H_
#include "tensor_abstract.h"
#include <cstring>
#include "../matrix.h"
#include "cudnn_dlibapi.h"
#include "gpu_data.h"
#include "../byte_orderer.h"
#include <memory>
#include "../any.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
class tensor;
namespace cuda
{
void set_tensor (
tensor& t,
float value
);
void scale_tensor (
tensor& t,
float value
);
}
// ----------------------------------------------------------------------------------------
class tensor
{
public:
tensor (
) :
m_n(0), m_k(0), m_nr(0), m_nc(0), m_size(0)
{
}
virtual ~tensor() {}
long long num_samples() const { return m_n; }
long long k() const { return m_k; }
long long nr() const { return m_nr; }
long long nc() const { return m_nc; }
size_t size() const { return m_size; }
typedef float* iterator;
typedef const float* const_iterator;
iterator begin() { return host(); }
const_iterator begin() const { return host(); }
iterator end() { return host()+size(); }
const_iterator end() const { return host()+size(); }
void async_copy_to_device() const
{
data().async_copy_to_device();
}
virtual const float* host() const = 0;
virtual float* host() = 0;
virtual float* host_write_only() = 0;
virtual const float* device() const = 0;
virtual float* device() = 0;
virtual float* device_write_only() = 0;
virtual const any& annotation() const = 0;
virtual any& annotation() = 0;
int device_id() const { return data().device_id(); }
tensor& operator= (float val)
{
#ifdef DLIB_USE_CUDA
// If you are using CUDA then presumably you will be mostly using tensors on
// the GPU. So unless you seem to be actively working with the host side's
// data then we do this initialization on the device side since this avoids a
// host to device transfer that would likely immediately follow.
if (data().device_ready())
{
cuda::set_tensor(*this, val);
return *this;
}
#endif
auto d = host_write_only();
for (size_t i = 0; i < size(); ++i)
d[i] = val;
return *this;
}
tensor& operator*= (float val)
{
#ifdef DLIB_USE_CUDA
cuda::scale_tensor(*this, val);
return *this;
#else
for (auto& d : *this)
d *= val;
return *this;
#endif
}
tensor& operator/= (float val)
{
*this *= 1.0/val;
return *this;
}
template <typename EXP>
tensor& operator= (const matrix_exp<EXP>& item)
{
DLIB_CASSERT(num_samples() == item.nr() &&
nr()*nc()*k() == item.nc());
static_assert((is_same_type<float, typename EXP::type>::value == true),
"To assign a matrix to a tensor the matrix must contain float values");
set_ptrm(host_write_only(), m_n, m_nr*m_nc*m_k) = item;
return *this;
}
template <typename EXP>
tensor& operator+= (const matrix_exp<EXP>& item)
{
DLIB_CASSERT(num_samples() == item.nr() &&
nr()*nc()*k() == item.nc());
static_assert((is_same_type<float, typename EXP::type>::value == true),
"To assign a matrix to a tensor the matrix must contain float values");
set_ptrm(host(), m_n, m_nr*m_nc*m_k) += item;
return *this;
}
template <typename EXP>
tensor& operator-= (const matrix_exp<EXP>& item)
{
DLIB_CASSERT(num_samples() == item.nr() &&
nr()*nc()*k() == item.nc());
static_assert((is_same_type<float, typename EXP::type>::value == true),
"To assign a matrix to a tensor the matrix must contain float values");
set_ptrm(host(), m_n, m_nr*m_nc*m_k) -= item;
return *this;
}
template <typename EXP>
void set_sample (
unsigned long long idx,
const matrix_exp<EXP>& item
)
{
DLIB_CASSERT(idx < (unsigned long long)num_samples());
DLIB_CASSERT(item.size() == nr()*nc()*k());
static_assert((is_same_type<float, typename EXP::type>::value == true),
"To assign a matrix to a tensor the matrix must contain float values");
set_ptrm(host()+idx*item.size(), item.nr(), item.nc()) = item;
}
template <typename EXP>
void add_to_sample (
unsigned long long idx,
const matrix_exp<EXP>& item
)
{
DLIB_CASSERT(idx < (unsigned long long)num_samples());
DLIB_CASSERT(item.size() == nr()*nc()*k());
static_assert((is_same_type<float, typename EXP::type>::value == true),
"To assign a matrix to a tensor the matrix must contain float values");
set_ptrm(host()+idx*item.size(), item.nr(), item.nc()) += item;
}
#ifdef DLIB_USE_CUDA
virtual const cuda::tensor_descriptor& get_cudnn_tensor_descriptor (
) const = 0;
#endif
friend void memcpy (
tensor& dest,
const tensor& src
)
{
DLIB_CASSERT(dest.size() == src.size());
memcpy(dest.data(), dest.get_alias_offset(),
src.data(), src.get_alias_offset(),
src.size());
}
protected:
friend class alias_tensor;
virtual gpu_data& data() = 0;
virtual const gpu_data& data() const = 0;
virtual size_t get_alias_offset() const { return 0; } // needed by alias_tensor.
long long m_n;
long long m_k;
long long m_nr;
long long m_nc;
long long m_size; // always equal to m_n*m_k*m_nr*m_nc
};
// ----------------------------------------------------------------------------------------
inline bool is_vector (
const tensor& t
)
{
return t.size() == (size_t)t.num_samples() ||
t.size() == (size_t)t.k() ||
t.size() == (size_t)t.nr() ||
t.size() == (size_t)t.nc();
}
// ----------------------------------------------------------------------------------------
inline const matrix_op<op_pointer_to_mat<float> > mat (
const tensor& t,
long long nr,
long long nc
)
{
DLIB_ASSERT(nr >= 0 && nc >= 0 ,
"\tconst matrix_exp mat(tensor, nr, nc)"
<< "\n\t nr and nc must be >= 0"
<< "\n\t nr: " << nr
<< "\n\t nc: " << nc
);
DLIB_ASSERT(nr*nc == (long long)t.size() ,
"\tconst matrix_exp mat(tensor, nr, nc)"
<< "\n\t The sizes don't match up."
<< "\n\t nr*nc: " << nr*nc
<< "\n\t t.size(): " << t.size()
);
typedef op_pointer_to_mat<float> op;
return matrix_op<op>(op(t.host(),nr,nc));
}
inline const matrix_op<op_pointer_to_mat<float> > mat (
const tensor& t
)
{
if (t.size() != 0)
return mat(t, t.num_samples(), t.size()/t.num_samples());
else
return mat((float*)0,0,0);
}
inline const matrix_op<op_pointer_to_mat<float> > image_plane (
const tensor& t,
long long sample = 0,
long long k = 0
)
{
DLIB_ASSERT(0 <= sample && sample < t.num_samples() &&
0 <= k && k < t.k() &&
t.size() != 0,
"\tconst matrix_exp image_plane(tensor,sample,k)"
<< "\n\t Invalid arguments were given to this function."
<< "\n\t sample: " << sample
<< "\n\t k: " << k
<< "\n\t t.num_samples(): " << t.num_samples()
<< "\n\t t.k(): " << t.k()
<< "\n\t t.size(): " << t.size()
);
typedef op_pointer_to_mat<float> op;
return matrix_op<op>(op(t.host() + ((sample*t.k() + k)*t.nr())*t.nc(),
t.nr(),
t.nc()));
}
// ----------------------------------------------------------------------------------------
inline bool have_same_dimensions (
const tensor& a,
const tensor& b
)
{
return a.num_samples() == b.num_samples() &&
a.k() == b.k() &&
a.nr() == b.nr() &&
a.nc() == b.nc();
}
// ----------------------------------------------------------------------------------------
class resizable_tensor : public tensor
{
public:
resizable_tensor(
)
{}
template <typename EXP>
resizable_tensor(
const matrix_exp<EXP>& item
)
{
set_size(item.nr(), item.nc());
*this = item;
}
explicit resizable_tensor(
long long n_, long long k_ = 1, long long nr_ = 1, long long nc_ = 1
)
{
DLIB_ASSERT( n_ >= 0 && k_ >= 0 && nr_ >= 0 && nc_ >= 0);
set_size(n_,k_,nr_,nc_);
}
resizable_tensor(const resizable_tensor& item) : _annotation(item.annotation())
{
copy_size(item);
memcpy(*this, item);
}
resizable_tensor(const tensor& item) : _annotation(item.annotation())
{
copy_size(item);
memcpy(*this, item);
}
resizable_tensor(resizable_tensor&& item) { swap(item); }
resizable_tensor& operator=(resizable_tensor&& item) { swap(item); return *this; }
virtual const float* host() const { return data_instance.host(); }
virtual float* host() { return data_instance.host(); }
virtual float* host_write_only() { return data_instance.host_write_only(); }
virtual const float* device() const { return data_instance.device(); }
virtual float* device() { return data_instance.device(); }
virtual float* device_write_only() { return data_instance.device_write_only(); }
virtual const any& annotation() const { return _annotation; }
virtual any& annotation() { return _annotation; }
void clear(
)
{
set_size(0,0,0,0);
_annotation.clear();
// free underlying memory
data_instance.set_size(0);
}
void copy_size (
const tensor& item
)
{
set_size(item.num_samples(), item.k(), item.nr(), item.nc());
}
resizable_tensor& operator= (float val)
{
tensor::operator=(val);
return *this;
}
template <typename EXP>
resizable_tensor& operator= (
const matrix_exp<EXP>& item
)
{
if (!(num_samples() == item.nr() && k()*nr()*nc() == item.nc()))
set_size(item.nr(), item.nc());
tensor::operator=(item);
return *this;
}
void set_size(
long long n_, long long k_ = 1, long long nr_ = 1, long long nc_ = 1
)
{
DLIB_ASSERT( n_ >= 0 && k_ >= 0 && nr_ >= 0 && nc_ >= 0);
m_n = n_;
m_k = k_;
m_nr = nr_;
m_nc = nc_;
m_size = n_*k_*nr_*nc_;
if ((long long)data_instance.size() < m_size)
data_instance.set_size(m_size);
#ifdef DLIB_USE_CUDA
cudnn_descriptor.set_size(m_n,m_k,m_nr,m_nc);
#endif
}
resizable_tensor& operator= (const resizable_tensor& item)
{
resizable_tensor temp(item);
temp.swap(*this);
return *this;
}
resizable_tensor& operator= (const tensor& item)
{
resizable_tensor temp(item);
temp.swap(*this);
return *this;
}
void swap(resizable_tensor& item)
{
std::swap(m_n, item.m_n);
std::swap(m_k, item.m_k);
std::swap(m_nr, item.m_nr);
std::swap(m_nc, item.m_nc);
std::swap(m_size, item.m_size);
std::swap(data_instance, item.data_instance);
std::swap(_annotation, item._annotation);
#ifdef DLIB_USE_CUDA
std::swap(cudnn_descriptor, item.cudnn_descriptor);
#endif
}
#ifdef DLIB_USE_CUDA
virtual const cuda::tensor_descriptor& get_cudnn_tensor_descriptor (
) const { return cudnn_descriptor; }
#endif
private:
#ifdef DLIB_USE_CUDA
cuda::tensor_descriptor cudnn_descriptor;
#endif
gpu_data data_instance;
any _annotation;
virtual gpu_data& data() { return data_instance; }
virtual const gpu_data& data() const { return data_instance; }
};
inline void serialize(const tensor& item, std::ostream& out)
{
int version = 2;
serialize(version, out);
serialize(item.num_samples(), out);
serialize(item.k(), out);
serialize(item.nr(), out);
serialize(item.nc(), out);
byte_orderer bo;
auto sbuf = out.rdbuf();
for (auto d : item)
{
// Write out our data as 4byte little endian IEEE floats rather than using
// dlib's default float serialization. We do this because it will result in
// more compact outputs. It's slightly less portable but it seems doubtful
// that any CUDA enabled platform isn't going to use IEEE floats. But if one
// does we can just update the serialization code here to handle it if such a
// platform is encountered.
bo.host_to_little(d);
static_assert(sizeof(d)==4, "This serialization code assumes we are writing 4 byte floats");
sbuf->sputn((char*)&d, sizeof(d));
}
}
inline void deserialize(resizable_tensor& item, std::istream& in)
{
int version;
deserialize(version, in);
if (version != 2)
throw serialization_error("Unexpected version found while deserializing dlib::resizable_tensor.");
long long num_samples=0, k=0, nr=0, nc=0;
deserialize(num_samples, in);
deserialize(k, in);
deserialize(nr, in);
deserialize(nc, in);
item.set_size(num_samples, k, nr, nc);
byte_orderer bo;
auto sbuf = in.rdbuf();
for (auto& d : item)
{
static_assert(sizeof(d)==4, "This serialization code assumes we are writing 4 byte floats");
if (sbuf->sgetn((char*)&d,sizeof(d)) != sizeof(d))
{
in.setstate(std::ios::badbit);
throw serialization_error("Error reading data while deserializing dlib::resizable_tensor.");
}
bo.little_to_host(d);
}
}
// ----------------------------------------------------------------------------------------
inline double dot(
const tensor& a,
const tensor& b
)
{
DLIB_CASSERT(a.size() == b.size());
const float* da = a.host();
const float* db = b.host();
double sum = 0;
for (size_t i = 0; i < a.size(); ++i)
sum += da[i]*db[i];
return sum;
}
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
class alias_tensor_instance : public tensor
{
alias_tensor_instance(
) : data_instance(0), _annotation(0), data_offset(0) {}
public:
friend class alias_tensor;
friend class alias_tensor_const_instance;
alias_tensor_instance& operator= (float val)
{
tensor::operator=(val);
return *this;
}
template <typename EXP>
alias_tensor_instance& operator= (const matrix_exp<EXP>& item)
{
tensor::operator=(item);
return *this;
}
virtual const float* host() const { return data_instance->host()+data_offset; }
virtual float* host() { return data_instance->host()+data_offset; }
virtual float* host_write_only() { return data_instance->host()+data_offset; }
virtual const float* device() const { return data_instance->device()+data_offset; }
virtual float* device() { return data_instance->device()+data_offset; }
virtual float* device_write_only() { return data_instance->device()+data_offset; }
virtual const any& annotation() const { return *_annotation; }
virtual any& annotation() { return *_annotation; }
#ifdef DLIB_USE_CUDA
virtual const cuda::tensor_descriptor& get_cudnn_tensor_descriptor (
) const { return *cudnn_descriptor; }
#endif
private:
virtual size_t get_alias_offset() const { return data_offset; }
#ifdef DLIB_USE_CUDA
std::shared_ptr<cuda::tensor_descriptor> cudnn_descriptor;
#endif
gpu_data* data_instance;
any* _annotation;
size_t data_offset;
virtual gpu_data& data() { return *data_instance; }
virtual const gpu_data& data() const { return *data_instance; }
};
// ----------------------------------------------------------------------------------------
class alias_tensor_const_instance
{
public:
const tensor& get() const { return inst; }
operator const tensor& () { return inst; }
alias_tensor_const_instance(const alias_tensor_instance& item) : inst(item) {}
private:
alias_tensor_instance inst;
friend class alias_tensor;
alias_tensor_const_instance() {}
};
// ----------------------------------------------------------------------------------------
class alias_tensor
{
public:
alias_tensor (
) {}
alias_tensor (
long long n_, long long k_ = 1, long long nr_ = 1, long long nc_ = 1
)
{
DLIB_ASSERT( n_ >= 0 && k_ >= 0 && nr_ >= 0 && nc_ >= 0);
inst.m_n = n_;
inst.m_k = k_;
inst.m_nr = nr_;
inst.m_nc = nc_;
inst.m_size = n_*k_*nr_*nc_;
}
long long num_samples(
) const { return inst.m_n; }
long long k(
) const { return inst.m_k; }
long long nr(
) const { return inst.m_nr; }
long long nc(
) const { return inst.m_nc; }
size_t size(
) const { return inst.m_size; }
alias_tensor_instance operator() (
tensor& t,
size_t offset = 0
) const
{
DLIB_CASSERT(offset+size() <= t.size(),
"offset: "<<offset <<"\n"<<
"size(): "<<size() <<"\n"<<
"t.size(): "<<t.size() <<"\n");
#ifdef DLIB_USE_CUDA
if (!inst.cudnn_descriptor)
{
inst.cudnn_descriptor = std::make_shared<cuda::tensor_descriptor>();
inst.cudnn_descriptor->set_size(inst.m_n, inst.m_k, inst.m_nr, inst.m_nc);
}
#endif
inst.data_instance = &t.data();
inst._annotation = &t.annotation();
// Note that t might already be an aliasing tensor so we need to take that into
// account.
inst.data_offset = t.get_alias_offset()+offset;
return inst;
}
alias_tensor_const_instance operator() (
const tensor& t,
size_t offset = 0
) const
{
alias_tensor_const_instance temp;
temp.inst = (*this)(const_cast<tensor&>(t),offset);
return temp;
}
private:
mutable alias_tensor_instance inst;
};
inline void serialize(const alias_tensor& item, std::ostream& out)
{
int version = 1;
serialize(version, out);
serialize(item.num_samples(), out);
serialize(item.k(), out);
serialize(item.nr(), out);
serialize(item.nc(), out);
}
inline void deserialize(alias_tensor& item, std::istream& in)
{
int version = 0;
deserialize(version, in);
if (version != 1)
throw serialization_error("Unexpected version found while deserializing dlib::alias_tensor.");
long long num_samples, k, nr, nc;
deserialize(num_samples, in);
deserialize(k, in);
deserialize(nr, in);
deserialize(nc, in);
item = alias_tensor(num_samples, k, nr, nc);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_DNn_TENSOR_H_