|
|
|
|
|
|
|
|
|
#include "cnpy.h" |
|
|
|
#include <stdint.h> |
|
|
|
#include <algorithm> |
|
#include <complex> |
|
#include <cstdlib> |
|
#include <cstring> |
|
#include <iomanip> |
|
#include <regex> |
|
#include <stdexcept> |
|
|
|
char cnpy::BigEndianTest(int size) |
|
{ |
|
if (size == 1) |
|
return '|'; |
|
int x = 1; |
|
return (((char*)&x)[0]) ? '<' : '>'; |
|
} |
|
|
|
char cnpy::map_type(const std::type_info& t) |
|
{ |
|
if (t == typeid(float)) |
|
return 'f'; |
|
if (t == typeid(double)) |
|
return 'f'; |
|
if (t == typeid(long double)) |
|
return 'f'; |
|
|
|
if (t == typeid(int)) |
|
return 'i'; |
|
if (t == typeid(char)) |
|
return 'i'; |
|
if (t == typeid(signed char)) |
|
return 'i'; |
|
if (t == typeid(short)) |
|
return 'i'; |
|
if (t == typeid(long)) |
|
return 'i'; |
|
if (t == typeid(long long)) |
|
return 'i'; |
|
|
|
if (t == typeid(unsigned char)) |
|
return 'u'; |
|
if (t == typeid(unsigned short)) |
|
return 'u'; |
|
if (t == typeid(unsigned long)) |
|
return 'u'; |
|
if (t == typeid(unsigned long long)) |
|
return 'u'; |
|
if (t == typeid(unsigned int)) |
|
return 'u'; |
|
|
|
if (t == typeid(bool)) |
|
return 'b'; |
|
|
|
if (t == typeid(std::complex<float>)) |
|
return 'c'; |
|
if (t == typeid(std::complex<double>)) |
|
return 'c'; |
|
if (t == typeid(std::complex<long double>)) |
|
return 'c'; |
|
|
|
else |
|
return '?'; |
|
} |
|
|
|
template <> |
|
std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const std::string rhs) |
|
{ |
|
lhs.insert(lhs.end(), rhs.begin(), rhs.end()); |
|
return lhs; |
|
} |
|
|
|
template <> |
|
std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const char* rhs) |
|
{ |
|
|
|
size_t len = strlen(rhs); |
|
lhs.reserve(len); |
|
for (size_t byte = 0; byte < len; byte++) { |
|
lhs.push_back(rhs[byte]); |
|
} |
|
return lhs; |
|
} |
|
|
|
void cnpy::parse_npy_header(unsigned char* buffer, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order, |
|
std::string& typeName) |
|
{ |
|
|
|
uint8_t major_version = *reinterpret_cast<uint8_t*>(buffer + 6); |
|
uint8_t minor_version = *reinterpret_cast<uint8_t*>(buffer + 7); |
|
uint16_t header_len = *reinterpret_cast<uint16_t*>(buffer + 8); |
|
std::string header(reinterpret_cast<char*>(buffer + 9), header_len); |
|
|
|
size_t loc1, loc2; |
|
|
|
|
|
loc1 = header.find("fortran_order") + 16; |
|
fortran_order = (header.substr(loc1, 4) == "True" ? true : false); |
|
if (fortran_order) |
|
throw std::runtime_error("npy input file: 'fortran_order' must be false, use: arr2 = np.ascontiguousarray(arr1)"); |
|
|
|
|
|
loc1 = header.find("("); |
|
loc2 = header.find(")"); |
|
|
|
std::regex num_regex("[0-9][0-9]*"); |
|
std::smatch sm; |
|
shape.clear(); |
|
|
|
std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1); |
|
while (std::regex_search(str_shape, sm, num_regex)) { |
|
shape.push_back(std::stoi(sm[0].str())); |
|
str_shape = sm.suffix().str(); |
|
} |
|
|
|
|
|
|
|
|
|
loc1 = header.find("descr") + 9; |
|
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); |
|
assert(littleEndian); |
|
|
|
|
|
|
|
|
|
std::string str_ws = header.substr(loc1 + 2); |
|
loc2 = str_ws.find("'"); |
|
word_size = atoi(str_ws.substr(0, loc2).c_str()); |
|
if (header.substr(loc1 + 1, 1) == "i") { |
|
typeName = "int"; |
|
} else if (header.substr(loc1 + 1, 1) == "u") { |
|
typeName = "uint"; |
|
} else if (header.substr(loc1 + 1, 1) == "f") { |
|
typeName = "float"; |
|
} |
|
typeName = typeName + std::to_string(word_size * 8); |
|
} |
|
|
|
void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order, |
|
std::string& typeName) |
|
{ |
|
char buffer[256]; |
|
size_t res = fread(buffer, sizeof(char), 11, fp); |
|
if (res != 11) |
|
throw std::runtime_error("parse_npy_header: failed fread"); |
|
std::string header = fgets(buffer, 256, fp); |
|
assert(header[header.size() - 1] == '\n'); |
|
|
|
size_t loc1, loc2; |
|
|
|
|
|
loc1 = header.find("fortran_order"); |
|
if (loc1 == std::string::npos) |
|
throw std::runtime_error("parse_npy_header: failed to find header keyword: 'fortran_order'"); |
|
loc1 += 16; |
|
fortran_order = (header.substr(loc1, 4) == "True" ? true : false); |
|
if (fortran_order) |
|
throw std::runtime_error("npy input file: 'fortran_order' must be false, use: arr2 = np.ascontiguousarray(arr1)"); |
|
|
|
|
|
loc1 = header.find("("); |
|
loc2 = header.find(")"); |
|
if (loc1 == std::string::npos || loc2 == std::string::npos) |
|
throw std::runtime_error("parse_npy_header: failed to find header keyword: '(' or ')'"); |
|
|
|
std::regex num_regex("[0-9][0-9]*"); |
|
std::smatch sm; |
|
shape.clear(); |
|
|
|
std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1); |
|
while (std::regex_search(str_shape, sm, num_regex)) { |
|
shape.push_back(std::stoi(sm[0].str())); |
|
str_shape = sm.suffix().str(); |
|
} |
|
|
|
|
|
|
|
|
|
loc1 = header.find("descr"); |
|
if (loc1 == std::string::npos) |
|
throw std::runtime_error("parse_npy_header: failed to find header keyword: 'descr'"); |
|
loc1 += 9; |
|
bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); |
|
assert(littleEndian); |
|
|
|
|
|
|
|
|
|
std::string str_ws = header.substr(loc1 + 2); |
|
loc2 = str_ws.find("'"); |
|
word_size = atoi(str_ws.substr(0, loc2).c_str()); |
|
if (header.substr(loc1 + 1, 1) == "i") { |
|
typeName = "int"; |
|
} else if (header.substr(loc1 + 1, 1) == "u") { |
|
typeName = "uint"; |
|
} else if (header.substr(loc1 + 1, 1) == "f") { |
|
typeName = "float"; |
|
} |
|
typeName = typeName + std::to_string(word_size * 8); |
|
} |
|
|
|
void cnpy::parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset) |
|
{ |
|
std::vector<char> footer(22); |
|
fseek(fp, -22, SEEK_END); |
|
size_t res = fread(&footer[0], sizeof(char), 22, fp); |
|
if (res != 22) |
|
throw std::runtime_error("parse_zip_footer: failed fread"); |
|
|
|
uint16_t disk_no, disk_start, nrecs_on_disk, comment_len; |
|
disk_no = *(uint16_t*)&footer[4]; |
|
disk_start = *(uint16_t*)&footer[6]; |
|
nrecs_on_disk = *(uint16_t*)&footer[8]; |
|
nrecs = *(uint16_t*)&footer[10]; |
|
global_header_size = *(uint32_t*)&footer[12]; |
|
global_header_offset = *(uint32_t*)&footer[16]; |
|
comment_len = *(uint16_t*)&footer[20]; |
|
|
|
assert(disk_no == 0); |
|
assert(disk_start == 0); |
|
assert(nrecs_on_disk == nrecs); |
|
assert(comment_len == 0); |
|
} |
|
|
|
cnpy::NpyArray load_the_npy_file(FILE* fp) |
|
{ |
|
std::vector<size_t> shape; |
|
size_t word_size; |
|
std::string typeName; |
|
bool fortran_order; |
|
cnpy::parse_npy_header(fp, word_size, shape, fortran_order, typeName); |
|
|
|
cnpy::NpyArray arr(shape, word_size, fortran_order, typeName); |
|
size_t nread = fread(arr.data<char>(), 1, arr.num_bytes(), fp); |
|
if (nread != arr.num_bytes()) |
|
throw std::runtime_error("load_the_npy_file: failed fread"); |
|
return arr; |
|
} |
|
|
|
cnpy::NpyArray load_the_npz_array(FILE* fp, uint32_t compr_bytes, uint32_t uncompr_bytes) |
|
{ |
|
std::vector<unsigned char> buffer_compr(compr_bytes); |
|
std::vector<unsigned char> buffer_uncompr(uncompr_bytes); |
|
size_t nread = fread(&buffer_compr[0], 1, compr_bytes, fp); |
|
if (nread != compr_bytes) |
|
throw std::runtime_error("load_the_npy_file: failed fread"); |
|
|
|
#if 0 |
|
int err; |
|
z_stream d_stream; |
|
|
|
d_stream.zalloc = Z_NULL; |
|
d_stream.zfree = Z_NULL; |
|
d_stream.opaque = Z_NULL; |
|
d_stream.avail_in = 0; |
|
d_stream.next_in = Z_NULL; |
|
err = inflateInit2(&d_stream, -MAX_WBITS); |
|
|
|
d_stream.avail_in = compr_bytes; |
|
d_stream.next_in = &buffer_compr[0]; |
|
d_stream.avail_out = uncompr_bytes; |
|
d_stream.next_out = &buffer_uncompr[0]; |
|
|
|
err = inflate(&d_stream, Z_FINISH); |
|
err = inflateEnd(&d_stream); |
|
#endif |
|
|
|
std::vector<size_t> shape; |
|
size_t word_size; |
|
bool fortran_order; |
|
std::string typeName; |
|
cnpy::parse_npy_header(&buffer_uncompr[0], word_size, shape, fortran_order, typeName); |
|
|
|
cnpy::NpyArray array(shape, word_size, fortran_order, typeName); |
|
|
|
size_t offset = uncompr_bytes - array.num_bytes(); |
|
memcpy(array.data<unsigned char>(), &buffer_uncompr[0] + offset, array.num_bytes()); |
|
|
|
return array; |
|
} |
|
|
|
cnpy::npz_t cnpy::npz_load(std::string fname) |
|
{ |
|
FILE* fp = fopen(fname.c_str(), "rb"); |
|
|
|
if (!fp) { |
|
throw std::runtime_error("npz_load: Error! Unable to open file " + fname + "!"); |
|
} |
|
|
|
cnpy::npz_t arrays; |
|
|
|
while (1) { |
|
std::vector<char> local_header(30); |
|
size_t headerres = fread(&local_header[0], sizeof(char), 30, fp); |
|
if (headerres != 30) |
|
throw std::runtime_error("npz_load: failed fread"); |
|
|
|
|
|
if (local_header[2] != 0x03 || local_header[3] != 0x04) |
|
break; |
|
|
|
|
|
uint16_t name_len = *(uint16_t*)&local_header[26]; |
|
std::string varname(name_len, ' '); |
|
size_t vname_res = fread(&varname[0], sizeof(char), name_len, fp); |
|
if (vname_res != name_len) |
|
throw std::runtime_error("npz_load: failed fread"); |
|
|
|
|
|
varname.erase(varname.end() - 4, varname.end()); |
|
|
|
|
|
uint16_t extra_field_len = *(uint16_t*)&local_header[28]; |
|
if (extra_field_len > 0) { |
|
std::vector<char> buff(extra_field_len); |
|
size_t efield_res = fread(&buff[0], sizeof(char), extra_field_len, fp); |
|
if (efield_res != extra_field_len) |
|
throw std::runtime_error("npz_load: failed fread"); |
|
} |
|
|
|
uint16_t compr_method = *reinterpret_cast<uint16_t*>(&local_header[0] + 8); |
|
uint32_t compr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 18); |
|
uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 22); |
|
|
|
if (compr_method == 0) { |
|
arrays[varname] = load_the_npy_file(fp); |
|
} else { |
|
arrays[varname] = load_the_npz_array(fp, compr_bytes, uncompr_bytes); |
|
} |
|
} |
|
|
|
fclose(fp); |
|
return arrays; |
|
} |
|
|
|
cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) |
|
{ |
|
FILE* fp = fopen(fname.c_str(), "rb"); |
|
|
|
if (!fp) |
|
throw std::runtime_error("npz_load: Unable to open file " + fname); |
|
|
|
while (1) { |
|
std::vector<char> local_header(30); |
|
size_t header_res = fread(&local_header[0], sizeof(char), 30, fp); |
|
if (header_res != 30) |
|
throw std::runtime_error("npz_load: failed fread"); |
|
|
|
|
|
if (local_header[2] != 0x03 || local_header[3] != 0x04) |
|
break; |
|
|
|
|
|
uint16_t name_len = *(uint16_t*)&local_header[26]; |
|
std::string vname(name_len, ' '); |
|
size_t vname_res = fread(&vname[0], sizeof(char), name_len, fp); |
|
if (vname_res != name_len) |
|
throw std::runtime_error("npz_load: failed fread"); |
|
vname.erase(vname.end() - 4, vname.end()); |
|
|
|
|
|
uint16_t extra_field_len = *(uint16_t*)&local_header[28]; |
|
fseek(fp, extra_field_len, SEEK_CUR); |
|
|
|
uint16_t compr_method = *reinterpret_cast<uint16_t*>(&local_header[0] + 8); |
|
uint32_t compr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 18); |
|
uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 22); |
|
|
|
if (vname == varname) { |
|
NpyArray array = (compr_method == 0) ? load_the_npy_file(fp) : load_the_npz_array(fp, compr_bytes, uncompr_bytes); |
|
fclose(fp); |
|
return array; |
|
} else { |
|
|
|
uint32_t size = *(uint32_t*)&local_header[22]; |
|
fseek(fp, size, SEEK_CUR); |
|
} |
|
} |
|
|
|
fclose(fp); |
|
|
|
|
|
throw std::runtime_error("npz_load: Variable name " + varname + " not found in " + fname); |
|
} |
|
|
|
cnpy::NpyArray cnpy::npy_load(std::string fname) |
|
{ |
|
FILE* fp = fopen(fname.c_str(), "rb"); |
|
|
|
if (!fp) |
|
throw std::runtime_error("npy_load: Unable to open file " + fname); |
|
|
|
NpyArray arr = load_the_npy_file(fp); |
|
|
|
fclose(fp); |
|
return arr; |
|
} |
|
|