402 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
		
		
			
		
	
	
			402 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
|  | // Copyright (C) 2011  Carl Rogers
 | ||
|  | // Released under MIT License
 | ||
|  | // license available in LICENSE file, or at http://www.opensource.org/licenses/mit-license.php
 | ||
|  | 
 | ||
|  | #include "cnpy.h"
 | ||
|  | 
 | ||
|  | #include <stdint.h>
 | ||
|  | 
 | ||
|  | #include <algorithm>
 | ||
|  | #include <complex>
 | ||
|  | #include <cstdlib>
 | ||
|  | #include <cstring>
 | ||
|  | #include <iomanip>
 | ||
|  | #include <regex>
 | ||
|  | #include <stdexcept>
 | ||
|  | 
 | ||
|  | char cnpy::BigEndianTest(int size) | ||
|  | { | ||
|  |   if (size == 1) | ||
|  |     return '|'; | ||
|  |   int x = 1; | ||
|  |   return (((char*)&x)[0]) ? '<' : '>'; | ||
|  | } | ||
|  | 
 | ||
|  | char cnpy::map_type(const std::type_info& t) | ||
|  | { | ||
|  |   if (t == typeid(float)) | ||
|  |     return 'f'; | ||
|  |   if (t == typeid(double)) | ||
|  |     return 'f'; | ||
|  |   if (t == typeid(long double)) | ||
|  |     return 'f'; | ||
|  | 
 | ||
|  |   if (t == typeid(int)) | ||
|  |     return 'i'; | ||
|  |   if (t == typeid(char)) | ||
|  |     return 'i'; | ||
|  |   if (t == typeid(signed char)) | ||
|  |     return 'i'; | ||
|  |   if (t == typeid(short)) | ||
|  |     return 'i'; | ||
|  |   if (t == typeid(long)) | ||
|  |     return 'i'; | ||
|  |   if (t == typeid(long long)) | ||
|  |     return 'i'; | ||
|  | 
 | ||
|  |   if (t == typeid(unsigned char)) | ||
|  |     return 'u'; | ||
|  |   if (t == typeid(unsigned short)) | ||
|  |     return 'u'; | ||
|  |   if (t == typeid(unsigned long)) | ||
|  |     return 'u'; | ||
|  |   if (t == typeid(unsigned long long)) | ||
|  |     return 'u'; | ||
|  |   if (t == typeid(unsigned int)) | ||
|  |     return 'u'; | ||
|  | 
 | ||
|  |   if (t == typeid(bool)) | ||
|  |     return 'b'; | ||
|  | 
 | ||
|  |   if (t == typeid(std::complex<float>)) | ||
|  |     return 'c'; | ||
|  |   if (t == typeid(std::complex<double>)) | ||
|  |     return 'c'; | ||
|  |   if (t == typeid(std::complex<long double>)) | ||
|  |     return 'c'; | ||
|  | 
 | ||
|  |   else | ||
|  |     return '?'; | ||
|  | } | ||
|  | 
 | ||
|  | template <> | ||
|  | std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const std::string rhs) | ||
|  | { | ||
|  |   lhs.insert(lhs.end(), rhs.begin(), rhs.end()); | ||
|  |   return lhs; | ||
|  | } | ||
|  | 
 | ||
|  | template <> | ||
|  | std::vector<char>& cnpy::operator+=(std::vector<char>& lhs, const char* rhs) | ||
|  | { | ||
|  |   // write in little endian
 | ||
|  |   size_t len = strlen(rhs); | ||
|  |   lhs.reserve(len); | ||
|  |   for (size_t byte = 0; byte < len; byte++) { | ||
|  |     lhs.push_back(rhs[byte]); | ||
|  |   } | ||
|  |   return lhs; | ||
|  | } | ||
|  | 
 | ||
|  | void cnpy::parse_npy_header(unsigned char* buffer, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order, | ||
|  |                             std::string& typeName) | ||
|  | { | ||
|  |   // std::string magic_string(buffer,6);
 | ||
|  |   uint8_t     major_version = *reinterpret_cast<uint8_t*>(buffer + 6); | ||
|  |   uint8_t     minor_version = *reinterpret_cast<uint8_t*>(buffer + 7); | ||
|  |   uint16_t    header_len    = *reinterpret_cast<uint16_t*>(buffer + 8); | ||
|  |   std::string header(reinterpret_cast<char*>(buffer + 9), header_len); | ||
|  | 
 | ||
|  |   size_t loc1, loc2; | ||
|  | 
 | ||
|  |   // fortran order
 | ||
|  |   loc1          = header.find("fortran_order") + 16; | ||
|  |   fortran_order = (header.substr(loc1, 4) == "True" ? true : false); | ||
|  |   if (fortran_order) | ||
|  |     throw std::runtime_error("npy input file: 'fortran_order' must be false, use: arr2 = np.ascontiguousarray(arr1)"); | ||
|  | 
 | ||
|  |   // shape
 | ||
|  |   loc1 = header.find("("); | ||
|  |   loc2 = header.find(")"); | ||
|  | 
 | ||
|  |   std::regex  num_regex("[0-9][0-9]*"); | ||
|  |   std::smatch sm; | ||
|  |   shape.clear(); | ||
|  | 
 | ||
|  |   std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1); | ||
|  |   while (std::regex_search(str_shape, sm, num_regex)) { | ||
|  |     shape.push_back(std::stoi(sm[0].str())); | ||
|  |     str_shape = sm.suffix().str(); | ||
|  |   } | ||
|  | 
 | ||
|  |   // endian, word size, data type
 | ||
|  |   // byte order code | stands for not applicable.
 | ||
|  |   // not sure when this applies except for byte array
 | ||
|  |   loc1              = header.find("descr") + 9; | ||
|  |   bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); | ||
|  |   assert(littleEndian); | ||
|  | 
 | ||
|  |   // char type = header[loc1+1];
 | ||
|  |   // assert(type == map_type(T));
 | ||
|  | 
 | ||
|  |   std::string str_ws = header.substr(loc1 + 2); | ||
|  |   loc2               = str_ws.find("'"); | ||
|  |   word_size          = atoi(str_ws.substr(0, loc2).c_str()); | ||
|  |   if (header.substr(loc1 + 1, 1) == "i") { | ||
|  |     typeName = "int"; | ||
|  |   } else if (header.substr(loc1 + 1, 1) == "u") { | ||
|  |     typeName = "uint"; | ||
|  |   } else if (header.substr(loc1 + 1, 1) == "f") { | ||
|  |     typeName = "float"; | ||
|  |   } | ||
|  |   typeName = typeName + std::to_string(word_size * 8); | ||
|  | } | ||
|  | 
 | ||
|  | void cnpy::parse_npy_header(FILE* fp, size_t& word_size, std::vector<size_t>& shape, bool& fortran_order, | ||
|  |                             std::string& typeName) | ||
|  | { | ||
|  |   char   buffer[256]; | ||
|  |   size_t res = fread(buffer, sizeof(char), 11, fp); | ||
|  |   if (res != 11) | ||
|  |     throw std::runtime_error("parse_npy_header: failed fread"); | ||
|  |   std::string header = fgets(buffer, 256, fp); | ||
|  |   assert(header[header.size() - 1] == '\n'); | ||
|  | 
 | ||
|  |   size_t loc1, loc2; | ||
|  | 
 | ||
|  |   // fortran order
 | ||
|  |   loc1 = header.find("fortran_order"); | ||
|  |   if (loc1 == std::string::npos) | ||
|  |     throw std::runtime_error("parse_npy_header: failed to find header keyword: 'fortran_order'"); | ||
|  |   loc1 += 16; | ||
|  |   fortran_order = (header.substr(loc1, 4) == "True" ? true : false); | ||
|  |   if (fortran_order) | ||
|  |     throw std::runtime_error("npy input file: 'fortran_order' must be false, use: arr2 = np.ascontiguousarray(arr1)"); | ||
|  | 
 | ||
|  |   // shape
 | ||
|  |   loc1 = header.find("("); | ||
|  |   loc2 = header.find(")"); | ||
|  |   if (loc1 == std::string::npos || loc2 == std::string::npos) | ||
|  |     throw std::runtime_error("parse_npy_header: failed to find header keyword: '(' or ')'"); | ||
|  | 
 | ||
|  |   std::regex  num_regex("[0-9][0-9]*"); | ||
|  |   std::smatch sm; | ||
|  |   shape.clear(); | ||
|  | 
 | ||
|  |   std::string str_shape = header.substr(loc1 + 1, loc2 - loc1 - 1); | ||
|  |   while (std::regex_search(str_shape, sm, num_regex)) { | ||
|  |     shape.push_back(std::stoi(sm[0].str())); | ||
|  |     str_shape = sm.suffix().str(); | ||
|  |   } | ||
|  | 
 | ||
|  |   // endian, word size, data type
 | ||
|  |   // byte order code | stands for not applicable.
 | ||
|  |   // not sure when this applies except for byte array
 | ||
|  |   loc1 = header.find("descr"); | ||
|  |   if (loc1 == std::string::npos) | ||
|  |     throw std::runtime_error("parse_npy_header: failed to find header keyword: 'descr'"); | ||
|  |   loc1 += 9; | ||
|  |   bool littleEndian = (header[loc1] == '<' || header[loc1] == '|' ? true : false); | ||
|  |   assert(littleEndian); | ||
|  | 
 | ||
|  |   // char type = header[loc1+1];
 | ||
|  |   // assert(type == map_type(T));
 | ||
|  | 
 | ||
|  |   std::string str_ws = header.substr(loc1 + 2); | ||
|  |   loc2               = str_ws.find("'"); | ||
|  |   word_size          = atoi(str_ws.substr(0, loc2).c_str()); | ||
|  |   if (header.substr(loc1 + 1, 1) == "i") { | ||
|  |     typeName = "int"; | ||
|  |   } else if (header.substr(loc1 + 1, 1) == "u") { | ||
|  |     typeName = "uint"; | ||
|  |   } else if (header.substr(loc1 + 1, 1) == "f") { | ||
|  |     typeName = "float"; | ||
|  |   } | ||
|  |   typeName = typeName + std::to_string(word_size * 8); | ||
|  | } | ||
|  | 
 | ||
|  | void cnpy::parse_zip_footer(FILE* fp, uint16_t& nrecs, size_t& global_header_size, size_t& global_header_offset) | ||
|  | { | ||
|  |   std::vector<char> footer(22); | ||
|  |   fseek(fp, -22, SEEK_END); | ||
|  |   size_t res = fread(&footer[0], sizeof(char), 22, fp); | ||
|  |   if (res != 22) | ||
|  |     throw std::runtime_error("parse_zip_footer: failed fread"); | ||
|  | 
 | ||
|  |   uint16_t disk_no, disk_start, nrecs_on_disk, comment_len; | ||
|  |   disk_no              = *(uint16_t*)&footer[4]; | ||
|  |   disk_start           = *(uint16_t*)&footer[6]; | ||
|  |   nrecs_on_disk        = *(uint16_t*)&footer[8]; | ||
|  |   nrecs                = *(uint16_t*)&footer[10]; | ||
|  |   global_header_size   = *(uint32_t*)&footer[12]; | ||
|  |   global_header_offset = *(uint32_t*)&footer[16]; | ||
|  |   comment_len          = *(uint16_t*)&footer[20]; | ||
|  | 
 | ||
|  |   assert(disk_no == 0); | ||
|  |   assert(disk_start == 0); | ||
|  |   assert(nrecs_on_disk == nrecs); | ||
|  |   assert(comment_len == 0); | ||
|  | } | ||
|  | 
 | ||
|  | cnpy::NpyArray load_the_npy_file(FILE* fp) | ||
|  | { | ||
|  |   std::vector<size_t> shape; | ||
|  |   size_t              word_size; | ||
|  |   std::string         typeName; | ||
|  |   bool                fortran_order; | ||
|  |   cnpy::parse_npy_header(fp, word_size, shape, fortran_order, typeName); | ||
|  | 
 | ||
|  |   cnpy::NpyArray arr(shape, word_size, fortran_order, typeName); | ||
|  |   size_t         nread = fread(arr.data<char>(), 1, arr.num_bytes(), fp); | ||
|  |   if (nread != arr.num_bytes()) | ||
|  |     throw std::runtime_error("load_the_npy_file: failed fread"); | ||
|  |   return arr; | ||
|  | } | ||
|  | 
 | ||
|  | cnpy::NpyArray load_the_npz_array(FILE* fp, uint32_t compr_bytes, uint32_t uncompr_bytes) | ||
|  | { | ||
|  |   std::vector<unsigned char> buffer_compr(compr_bytes); | ||
|  |   std::vector<unsigned char> buffer_uncompr(uncompr_bytes); | ||
|  |   size_t                     nread = fread(&buffer_compr[0], 1, compr_bytes, fp); | ||
|  |   if (nread != compr_bytes) | ||
|  |     throw std::runtime_error("load_the_npy_file: failed fread"); | ||
|  | 
 | ||
|  |   int      err; | ||
|  |   z_stream d_stream; | ||
|  | 
 | ||
|  |   d_stream.zalloc   = Z_NULL; | ||
|  |   d_stream.zfree    = Z_NULL; | ||
|  |   d_stream.opaque   = Z_NULL; | ||
|  |   d_stream.avail_in = 0; | ||
|  |   d_stream.next_in  = Z_NULL; | ||
|  |   err               = inflateInit2(&d_stream, -MAX_WBITS); | ||
|  | 
 | ||
|  |   d_stream.avail_in  = compr_bytes; | ||
|  |   d_stream.next_in   = &buffer_compr[0]; | ||
|  |   d_stream.avail_out = uncompr_bytes; | ||
|  |   d_stream.next_out  = &buffer_uncompr[0]; | ||
|  | 
 | ||
|  |   err = inflate(&d_stream, Z_FINISH); | ||
|  |   err = inflateEnd(&d_stream); | ||
|  | 
 | ||
|  |   std::vector<size_t> shape; | ||
|  |   size_t              word_size; | ||
|  |   bool                fortran_order; | ||
|  |   std::string         typeName; | ||
|  |   cnpy::parse_npy_header(&buffer_uncompr[0], word_size, shape, fortran_order, typeName); | ||
|  | 
 | ||
|  |   cnpy::NpyArray array(shape, word_size, fortran_order, typeName); | ||
|  | 
 | ||
|  |   size_t offset = uncompr_bytes - array.num_bytes(); | ||
|  |   memcpy(array.data<unsigned char>(), &buffer_uncompr[0] + offset, array.num_bytes()); | ||
|  | 
 | ||
|  |   return array; | ||
|  | } | ||
|  | 
 | ||
|  | cnpy::npz_t cnpy::npz_load(std::string fname) | ||
|  | { | ||
|  |   FILE* fp = fopen(fname.c_str(), "rb"); | ||
|  | 
 | ||
|  |   if (!fp) { | ||
|  |     throw std::runtime_error("npz_load: Error! Unable to open file " + fname + "!"); | ||
|  |   } | ||
|  | 
 | ||
|  |   cnpy::npz_t arrays; | ||
|  | 
 | ||
|  |   while (1) { | ||
|  |     std::vector<char> local_header(30); | ||
|  |     size_t            headerres = fread(&local_header[0], sizeof(char), 30, fp); | ||
|  |     if (headerres != 30) | ||
|  |       throw std::runtime_error("npz_load: failed fread"); | ||
|  | 
 | ||
|  |     // if we've reached the global header, stop reading
 | ||
|  |     if (local_header[2] != 0x03 || local_header[3] != 0x04) | ||
|  |       break; | ||
|  | 
 | ||
|  |     // read in the variable name
 | ||
|  |     uint16_t    name_len = *(uint16_t*)&local_header[26]; | ||
|  |     std::string varname(name_len, ' '); | ||
|  |     size_t      vname_res = fread(&varname[0], sizeof(char), name_len, fp); | ||
|  |     if (vname_res != name_len) | ||
|  |       throw std::runtime_error("npz_load: failed fread"); | ||
|  | 
 | ||
|  |     // erase the lagging .npy
 | ||
|  |     varname.erase(varname.end() - 4, varname.end()); | ||
|  | 
 | ||
|  |     // read in the extra field
 | ||
|  |     uint16_t extra_field_len = *(uint16_t*)&local_header[28]; | ||
|  |     if (extra_field_len > 0) { | ||
|  |       std::vector<char> buff(extra_field_len); | ||
|  |       size_t            efield_res = fread(&buff[0], sizeof(char), extra_field_len, fp); | ||
|  |       if (efield_res != extra_field_len) | ||
|  |         throw std::runtime_error("npz_load: failed fread"); | ||
|  |     } | ||
|  | 
 | ||
|  |     uint16_t compr_method  = *reinterpret_cast<uint16_t*>(&local_header[0] + 8); | ||
|  |     uint32_t compr_bytes   = *reinterpret_cast<uint32_t*>(&local_header[0] + 18); | ||
|  |     uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 22); | ||
|  | 
 | ||
|  |     if (compr_method == 0) { | ||
|  |       arrays[varname] = load_the_npy_file(fp); | ||
|  |     } else { | ||
|  |       arrays[varname] = load_the_npz_array(fp, compr_bytes, uncompr_bytes); | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   fclose(fp); | ||
|  |   return arrays; | ||
|  | } | ||
|  | 
 | ||
|  | cnpy::NpyArray cnpy::npz_load(std::string fname, std::string varname) | ||
|  | { | ||
|  |   FILE* fp = fopen(fname.c_str(), "rb"); | ||
|  | 
 | ||
|  |   if (!fp) | ||
|  |     throw std::runtime_error("npz_load: Unable to open file " + fname); | ||
|  | 
 | ||
|  |   while (1) { | ||
|  |     std::vector<char> local_header(30); | ||
|  |     size_t            header_res = fread(&local_header[0], sizeof(char), 30, fp); | ||
|  |     if (header_res != 30) | ||
|  |       throw std::runtime_error("npz_load: failed fread"); | ||
|  | 
 | ||
|  |     // if we've reached the global header, stop reading
 | ||
|  |     if (local_header[2] != 0x03 || local_header[3] != 0x04) | ||
|  |       break; | ||
|  | 
 | ||
|  |     // read in the variable name
 | ||
|  |     uint16_t    name_len = *(uint16_t*)&local_header[26]; | ||
|  |     std::string vname(name_len, ' '); | ||
|  |     size_t      vname_res = fread(&vname[0], sizeof(char), name_len, fp); | ||
|  |     if (vname_res != name_len) | ||
|  |       throw std::runtime_error("npz_load: failed fread"); | ||
|  |     vname.erase(vname.end() - 4, vname.end()); // erase the lagging .npy
 | ||
|  | 
 | ||
|  |     // read in the extra field
 | ||
|  |     uint16_t extra_field_len = *(uint16_t*)&local_header[28]; | ||
|  |     fseek(fp, extra_field_len, SEEK_CUR); // skip past the extra field
 | ||
|  | 
 | ||
|  |     uint16_t compr_method  = *reinterpret_cast<uint16_t*>(&local_header[0] + 8); | ||
|  |     uint32_t compr_bytes   = *reinterpret_cast<uint32_t*>(&local_header[0] + 18); | ||
|  |     uint32_t uncompr_bytes = *reinterpret_cast<uint32_t*>(&local_header[0] + 22); | ||
|  | 
 | ||
|  |     if (vname == varname) { | ||
|  |       NpyArray array = (compr_method == 0) ? load_the_npy_file(fp) : load_the_npz_array(fp, compr_bytes, uncompr_bytes); | ||
|  |       fclose(fp); | ||
|  |       return array; | ||
|  |     } else { | ||
|  |       // skip past the data
 | ||
|  |       uint32_t size = *(uint32_t*)&local_header[22]; | ||
|  |       fseek(fp, size, SEEK_CUR); | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   fclose(fp); | ||
|  | 
 | ||
|  |   // if we get here, we haven't found the variable in the file
 | ||
|  |   throw std::runtime_error("npz_load: Variable name " + varname + " not found in " + fname); | ||
|  | } | ||
|  | 
 | ||
|  | cnpy::NpyArray cnpy::npy_load(std::string fname) | ||
|  | { | ||
|  |   FILE* fp = fopen(fname.c_str(), "rb"); | ||
|  | 
 | ||
|  |   if (!fp) | ||
|  |     throw std::runtime_error("npy_load: Unable to open file " + fname); | ||
|  | 
 | ||
|  |   NpyArray arr = load_the_npy_file(fp); | ||
|  | 
 | ||
|  |   fclose(fp); | ||
|  |   return arr; | ||
|  | } |