#! python import h5py import bigfile import logging import argparse ap = argparse.ArgumentParser('hdf2bigfile', description=""" Converting a HDF5 file to a bigfile. DataGroup is converted as sub-file, Composite Data Types are converted as sub-file, Simple Columns are converted as BigBlock. Shape are converted to ndarray.shape attribute. Vector columns in a DataSet is converted to 2d tables (nmemb >= 1) Multidimensional arrays are flattened, and the orginal shape is saved as ndarray.shape attribute. Strides are lost: arrays are always written as C-contiguous. Currently, attributes are not carried over. Currently, include and exclude filters do not work. """) ap.add_argument("hdf5") ap.add_argument("bigfile") ap.add_argument("--verify", action='store_true', default=True) ap.add_argument("--include", action="append") ap.add_argument("--exclude", action="append") def main(ns): hin = h5py.File(ns.hdf5, mode='r') bout = bigfile.BigFile(ns.bigfile, create=True) convert(hin, bout, ns.verify) bout.close() hin.close() def convert(hin, bout, verify=False): def traverse(hobj, operation, prefix="", dtype=None): if hasattr(hobj, 'keys'): # this is a datagroup for k in hobj.keys(): path = prefix + '/' + k traverse(hobj[k], operation, path) if hasattr(hobj, 'dtype'): # this is a dataset if hobj.dtype.names: for field in hobj.dtype.names: path = prefix + '/' + field + '/' traverse(hobj[field], operation, path, hobj.dtype[field]) else: if dtype is None: dtype = hobj.dtype operation(hobj, prefix, dtype) def create(hobj, path, dtype): print("Copying %s as %s %s" % (str(hobj), path, dtype)) if len(dtype.shape) > 1: raise ValueError("vector types unsupported") isvector = len(dtype.shape) != 0 if not isvector: size = hobj.size else: size, Nmemb = hobj.shape bb = bout.create(path, dtype, size=size) bb.write(0, hobj[:].ravel()) if not isvector: bb.attrs['ndarray.shape'] = hobj.shape print(bb) print(bb.attrs) traverse(hin, create) if verify: bout2 = bigfile.BigFile(ns.bigfile, create=False) def verify(hobj, path, dtype): print("Verifying %s against %s" % (str(hobj), path)) bb = bout[path] assert (hobj.dtype == bb.dtype) assert (hobj.shape == bb.attrs['ndarray.shape']).all() assert (hobj[:].flat == bb[:]).all() traverse(hin, verify) bout2.close() if __name__ == "__main__" : ns = ap.parse_args() main(ns)