|
|
|
|
|
#ifndef DLIB_MNIST_CPp_ |
|
#define DLIB_MNIST_CPp_ |
|
|
|
#include "mnist.h" |
|
#include <fstream> |
|
#include "../byte_orderer.h" |
|
#include "../uintn.h" |
|
|
|
|
|
|
|
namespace dlib |
|
{ |
|
void load_mnist_dataset ( |
|
const std::string& folder_name, |
|
std::vector<matrix<unsigned char> >& training_images, |
|
std::vector<unsigned long>& training_labels, |
|
std::vector<matrix<unsigned char> >& testing_images, |
|
std::vector<unsigned long>& testing_labels |
|
) |
|
{ |
|
using namespace std; |
|
ifstream fin1((folder_name+"/train-images-idx3-ubyte").c_str(), ios::binary); |
|
if (!fin1) |
|
{ |
|
fin1.open((folder_name + "/train-images.idx3-ubyte").c_str(), ios::binary); |
|
} |
|
|
|
ifstream fin2((folder_name+"/train-labels-idx1-ubyte").c_str(), ios::binary); |
|
if (!fin2) |
|
{ |
|
fin2.open((folder_name + "/train-labels.idx1-ubyte").c_str(), ios::binary); |
|
} |
|
|
|
ifstream fin3((folder_name+"/t10k-images-idx3-ubyte").c_str(), ios::binary); |
|
if (!fin3) |
|
{ |
|
fin3.open((folder_name + "/t10k-images.idx3-ubyte").c_str(), ios::binary); |
|
} |
|
|
|
ifstream fin4((folder_name+"/t10k-labels-idx1-ubyte").c_str(), ios::binary); |
|
if (!fin4) |
|
{ |
|
fin4.open((folder_name + "/t10k-labels.idx1-ubyte").c_str(), ios::binary); |
|
} |
|
|
|
if (!fin1) throw error("Unable to open file train-images-idx3-ubyte or train-images.idx3-ubyte"); |
|
if (!fin2) throw error("Unable to open file train-labels-idx1-ubyte or train-labels.idx1-ubyte"); |
|
if (!fin3) throw error("Unable to open file t10k-images-idx3-ubyte or t10k-images.idx3-ubyte"); |
|
if (!fin4) throw error("Unable to open file t10k-labels-idx1-ubyte or t10k-labels.idx1-ubyte"); |
|
|
|
byte_orderer bo; |
|
|
|
|
|
uint32 magic, num, nr, nc, num2, num3, num4; |
|
fin1.read((char*)&magic, sizeof(magic)); bo.big_to_host(magic); |
|
fin1.read((char*)&num, sizeof(num)); bo.big_to_host(num); |
|
fin1.read((char*)&nr, sizeof(nr)); bo.big_to_host(nr); |
|
fin1.read((char*)&nc, sizeof(nc)); bo.big_to_host(nc); |
|
if (magic != 2051 || num != 60000 || nr != 28 || nc != 28) |
|
throw error("mnist dat files are corrupted."); |
|
|
|
fin2.read((char*)&magic, sizeof(magic)); bo.big_to_host(magic); |
|
fin2.read((char*)&num2, sizeof(num2)); bo.big_to_host(num2); |
|
if (magic != 2049 || num2 != 60000) |
|
throw error("mnist dat files are corrupted."); |
|
|
|
fin3.read((char*)&magic, sizeof(magic)); bo.big_to_host(magic); |
|
fin3.read((char*)&num3, sizeof(num3)); bo.big_to_host(num3); |
|
fin3.read((char*)&nr, sizeof(nr)); bo.big_to_host(nr); |
|
fin3.read((char*)&nc, sizeof(nc)); bo.big_to_host(nc); |
|
if (magic != 2051 || num3 != 10000 || nr != 28 || nc != 28) |
|
throw error("mnist dat files are corrupted."); |
|
|
|
fin4.read((char*)&magic, sizeof(magic)); bo.big_to_host(magic); |
|
fin4.read((char*)&num4, sizeof(num4)); bo.big_to_host(num4); |
|
if (magic != 2049 || num4 != 10000) |
|
throw error("mnist dat files are corrupted."); |
|
|
|
if (!fin1) throw error("Unable to read train-images-idx3-ubyte"); |
|
if (!fin2) throw error("Unable to read train-labels-idx1-ubyte"); |
|
if (!fin3) throw error("Unable to read t10k-images-idx3-ubyte"); |
|
if (!fin4) throw error("Unable to read t10k-labels-idx1-ubyte"); |
|
|
|
|
|
training_images.resize(60000); |
|
training_labels.resize(60000); |
|
testing_images.resize(10000); |
|
testing_labels.resize(10000); |
|
|
|
for (size_t i = 0; i < training_images.size(); ++i) |
|
{ |
|
training_images[i].set_size(nr,nc); |
|
fin1.read((char*)&training_images[i](0,0), nr*nc); |
|
} |
|
for (size_t i = 0; i < training_labels.size(); ++i) |
|
{ |
|
char l; |
|
fin2.read(&l, 1); |
|
training_labels[i] = l; |
|
} |
|
|
|
for (size_t i = 0; i < testing_images.size(); ++i) |
|
{ |
|
testing_images[i].set_size(nr,nc); |
|
fin3.read((char*)&testing_images[i](0,0), nr*nc); |
|
} |
|
for (size_t i = 0; i < testing_labels.size(); ++i) |
|
{ |
|
char l; |
|
fin4.read(&l, 1); |
|
testing_labels[i] = l; |
|
} |
|
|
|
if (!fin1) throw error("Unable to read train-images-idx3-ubyte"); |
|
if (!fin2) throw error("Unable to read train-labels-idx1-ubyte"); |
|
if (!fin3) throw error("Unable to read t10k-images-idx3-ubyte"); |
|
if (!fin4) throw error("Unable to read t10k-labels-idx1-ubyte"); |
|
|
|
if (fin1.get() != EOF) throw error("Unexpected bytes at end of train-images-idx3-ubyte"); |
|
if (fin2.get() != EOF) throw error("Unexpected bytes at end of train-labels-idx1-ubyte"); |
|
if (fin3.get() != EOF) throw error("Unexpected bytes at end of t10k-images-idx3-ubyte"); |
|
if (fin4.get() != EOF) throw error("Unexpected bytes at end of t10k-labels-idx1-ubyte"); |
|
} |
|
} |
|
|
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|