|
|
|
|
|
|
|
|
|
#ifndef LIBCNPY_H_ |
|
#define LIBCNPY_H_ |
|
|
|
#if 0 |
|
#include <zlib.h> |
|
#endif |
|
|
|
#include <stdint.h> |
|
|
|
#include <cassert> |
|
#include <cstdio> |
|
#include <fstream> |
|
#include <iostream> |
|
#include <map> |
|
#include <memory> |
|
#include <numeric> |
|
#include <sstream> |
|
#include <stdexcept> |
|
#include <string> |
|
#include <typeinfo> |
|
#include <vector> |
|
|
|
namespace cnpy { |
|
|
|
struct NpyArray |
|
{ |
|
NpyArray(const std::vector<size_t>& _shape, size_t _word_size, bool _fortran_order, std::string _typeName) |
|
: shape(_shape) |
|
, word_size(_word_size) |
|
, fortran_order(_fortran_order) |
|
, typeName(_typeName) |
|
{ |
|
num_vals = 1; |
|
for (size_t i = 0; i < shape.size(); i++) |
|
num_vals *= shape[i]; |
|
data_holder = std::shared_ptr<std::vector<char>>(new std::vector<char>(num_vals * word_size)); |
|
} |
|
|
|
NpyArray() |
|
: shape(0) |
|
, word_size(0) |
|
, fortran_order(0) |
|
, num_vals(0) |
|
{} |
|
|
|
template <typename T> |
|
T* data() |
|
{ |
|
return reinterpret_cast<T*>(&(*data_holder)[0]); |
|
} |
|
|
|
template <typename T> |
|
const T* data() const |
|
{ |
|
return reinterpret_cast<T*>(&(*data_holder)[0]); |
|
} |
|
|
|
template <typename T> |
|
std::vector<T> as_vec() const |
|
{ |
|
const T* p = data<T>(); |
|
return std::vector<T>(p, p + num_vals); |
|
} |
|
|
|
size_t num_bytes() const { return data_holder->size(); } |
|
|
|
std::shared_ptr<std::vector<char>> data_holder; |
|
std::vector<size_t> shape; |
|
size_t word_size; |
|
bool fortran_order; |
|
size_t num_vals; |
|
std::string typeName; |
|
}; |
|
|
|
using npz_t = std::map<std::string, NpyArray>; |
|
|
|
char BigEndianTest(int size); |
|
char map_type(const std::type_info& t); |
|
template <typename T> |
|
std::vector<char> create_npy_header(const std::vector<size_t>& shape); |
|
void parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order, |
|
std::string& typeName); |
|
void parse_npy_header(unsigned char* buffer, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order, |
|
std::string& typeName); |
|
void parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset); |
|
npz_t npz_load(std::string fname); |
|
NpyArray npz_load(std::string fname, std::string varname); |
|
NpyArray npy_load(std::string fname); |
|
|
|
template <typename T> |
|
std::vector<char>& operator+=(std::vector<char>& lhs, const T rhs) |
|
{ |
|
|
|
for (size_t byte = 0; byte < sizeof(T); byte++) { |
|
char val = *((char*)&rhs + byte); |
|
lhs.push_back(val); |
|
} |
|
return lhs; |
|
} |
|
|
|
template <> |
|
std::vector<char>& operator+=(std::vector<char>& lhs, const std::string rhs); |
|
template <> |
|
std::vector<char>& operator+=(std::vector<char>& lhs, const char* rhs); |
|
|
|
template <typename T> |
|
int npy_save(std::string fname, const T* data, const std::vector<size_t> shape, std::string mode = "w") |
|
{ |
|
std::ofstream ofs(fname, std::ios::out); |
|
if (!ofs.is_open()) { |
|
return -1; |
|
} |
|
ofs.close(); |
|
FILE* fp = NULL; |
|
std::vector<size_t> true_data_shape; |
|
|
|
if (mode == "a") |
|
fp = fopen(fname.c_str(), "r+b"); |
|
|
|
if (fp) { |
|
|
|
size_t word_size; |
|
bool fortran_order; |
|
std::string typeName; |
|
parse_npy_header(fp, word_size, true_data_shape, fortran_order, typeName); |
|
assert(!fortran_order); |
|
|
|
if (word_size != sizeof(T)) { |
|
std::cout << "libnpy error: " << fname << " has word size " << word_size << " but npy_save appending data sized " |
|
<< sizeof(T) << "\n"; |
|
assert(word_size == sizeof(T)); |
|
} |
|
if (true_data_shape.size() != shape.size()) { |
|
std::cout << "libnpy error: npy_save attempting to append misdimensioned data to " << fname << "\n"; |
|
assert(true_data_shape.size() != shape.size()); |
|
} |
|
|
|
for (size_t i = 1; i < shape.size(); i++) { |
|
if (shape[i] != true_data_shape[i]) { |
|
std::cout << "libnpy error: npy_save attempting to append misshaped data to " << fname << "\n"; |
|
assert(shape[i] == true_data_shape[i]); |
|
} |
|
} |
|
true_data_shape[0] += shape[0]; |
|
} else { |
|
fp = fopen(fname.c_str(), "wb"); |
|
true_data_shape = shape; |
|
} |
|
|
|
std::vector<char> header = create_npy_header<T>(true_data_shape); |
|
size_t nels = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>()); |
|
|
|
fseek(fp, 0, SEEK_SET); |
|
fwrite(&header[0], sizeof(char), header.size(), fp); |
|
fseek(fp, 0, SEEK_END); |
|
fwrite(data, sizeof(T), nels, fp); |
|
fclose(fp); |
|
return 0; |
|
} |
|
|
|
template <typename T> |
|
void npz_save(std::string zipname, std::string fname, const T* data, const std::vector<size_t>& shape, |
|
std::string mode = "w") |
|
{ |
|
|
|
fname += ".npy"; |
|
|
|
|
|
FILE* fp = NULL; |
|
uint16_t nrecs = 0; |
|
size_t global_header_offset = 0; |
|
std::vector<char> global_header; |
|
|
|
if (mode == "a") |
|
fp = fopen(zipname.c_str(), "r+b"); |
|
|
|
if (fp) { |
|
|
|
|
|
|
|
|
|
|
|
size_t global_header_size; |
|
parse_zip_footer(fp, nrecs, global_header_size, global_header_offset); |
|
fseek(fp, global_header_offset, SEEK_SET); |
|
global_header.resize(global_header_size); |
|
size_t res = fread(&global_header[0], sizeof(char), global_header_size, fp); |
|
if (res != global_header_size) { |
|
throw std::runtime_error("npz_save: header read error while adding to existing zip"); |
|
} |
|
fseek(fp, global_header_offset, SEEK_SET); |
|
} else { |
|
fp = fopen(zipname.c_str(), "wb"); |
|
} |
|
|
|
std::vector<char> npy_header = create_npy_header<T>(shape); |
|
|
|
size_t nels = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<size_t>()); |
|
size_t nbytes = nels * sizeof(T) + npy_header.size(); |
|
|
|
#if 0 |
|
|
|
uint32_t crc = crc32(0L, (uint8_t*)&npy_header[0], npy_header.size()); |
|
crc = crc32(crc, (uint8_t*)data, nels * sizeof(T)); |
|
#else |
|
uint32_t crc = 0; |
|
#endif |
|
|
|
|
|
std::vector<char> local_header; |
|
local_header += "PK"; |
|
local_header += (uint16_t)0x0403; |
|
local_header += (uint16_t)20; |
|
local_header += (uint16_t)0; |
|
local_header += (uint16_t)0; |
|
local_header += (uint16_t)0; |
|
local_header += (uint16_t)0; |
|
local_header += (uint32_t)crc; |
|
local_header += (uint32_t)nbytes; |
|
local_header += (uint32_t)nbytes; |
|
local_header += (uint16_t)fname.size(); |
|
local_header += (uint16_t)0; |
|
local_header += fname; |
|
|
|
|
|
global_header += "PK"; |
|
global_header += (uint16_t)0x0201; |
|
global_header += (uint16_t)20; |
|
global_header.insert(global_header.end(), local_header.begin() + 4, local_header.begin() + 30); |
|
global_header += (uint16_t)0; |
|
global_header += (uint16_t)0; |
|
global_header += (uint16_t)0; |
|
global_header += (uint32_t)0; |
|
global_header += (uint32_t) |
|
global_header_offset; |
|
global_header += fname; |
|
|
|
|
|
std::vector<char> footer; |
|
footer += "PK"; |
|
footer += (uint16_t)0x0605; |
|
footer += (uint16_t)0; |
|
footer += (uint16_t)0; |
|
footer += (uint16_t)(nrecs + 1); |
|
footer += (uint16_t)(nrecs + 1); |
|
footer += (uint32_t)global_header.size(); |
|
footer += |
|
(uint32_t)(global_header_offset + nbytes + local_header.size()); |
|
|
|
footer += (uint16_t)0; |
|
|
|
|
|
fwrite(&local_header[0], sizeof(char), local_header.size(), fp); |
|
fwrite(&npy_header[0], sizeof(char), npy_header.size(), fp); |
|
fwrite(data, sizeof(T), nels, fp); |
|
fwrite(&global_header[0], sizeof(char), global_header.size(), fp); |
|
fwrite(&footer[0], sizeof(char), footer.size(), fp); |
|
fclose(fp); |
|
} |
|
|
|
template <typename T> |
|
void npy_save(std::string fname, const std::vector<T> data, std::string mode = "w") |
|
{ |
|
std::vector<size_t> shape; |
|
shape.push_back(data.size()); |
|
npy_save(fname, &data[0], shape, mode); |
|
} |
|
|
|
template <typename T> |
|
void npz_save(std::string zipname, std::string fname, const std::vector<T> data, std::string mode = "w") |
|
{ |
|
std::vector<size_t> shape; |
|
shape.push_back(data.size()); |
|
npz_save(zipname, fname, &data[0], shape, mode); |
|
} |
|
|
|
template <typename T> |
|
std::vector<char> create_npy_header(const std::vector<size_t>& shape) |
|
{ |
|
const char* tpye_name = typeid(T).name(); |
|
std::vector<char> dict; |
|
dict += "{'descr': '"; |
|
dict += BigEndianTest(sizeof(T)); |
|
if (std::string(tpye_name) == "N4rknn7float16E") { |
|
dict += "f"; |
|
} else { |
|
dict += map_type(typeid(T)); |
|
} |
|
dict += std::to_string(sizeof(T)); |
|
dict += "', 'fortran_order': False, 'shape': ("; |
|
dict += std::to_string(shape[0]); |
|
for (size_t i = 1; i < shape.size(); i++) { |
|
dict += ", "; |
|
dict += std::to_string(shape[i]); |
|
} |
|
if (shape.size() == 1) |
|
dict += ","; |
|
dict += "), }"; |
|
|
|
int remainder = 16 - (10 + dict.size()) % 16; |
|
dict.insert(dict.end(), remainder, ' '); |
|
dict.back() = '\n'; |
|
|
|
std::vector<char> header; |
|
header += (char)0x93; |
|
header += "NUMPY"; |
|
header += (char)0x01; |
|
header += (char)0x00; |
|
header += (uint16_t)dict.size(); |
|
header.insert(header.end(), dict.begin(), dict.end()); |
|
|
|
return header; |
|
} |
|
|
|
} |
|
|
|
#endif |
|
|