Import tensorflow

This commit is contained in:
2026-02-15 21:45:42 -08:00
parent f3e8b90764
commit c530630153
20524 changed files with 9017694 additions and 25 deletions
@@ -0,0 +1,115 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
This is the h5py package, a Python interface to the HDF5
scientific data format.
"""
from warnings import warn as _warn
import atexit
# --- Library setup -----------------------------------------------------------
# When importing from the root of the unpacked tarball or git checkout,
# Python sees the "h5py" source directory and tries to load it, which fails.
# We tried working around this by using "package_dir" but that breaks Cython.
try:
from . import _errors
except ImportError:
import os.path as _op
if _op.exists(_op.join(_op.dirname(__file__), '..', 'setup.py')):
raise ImportError("You cannot import h5py from inside the install directory.\nChange to another directory first.")
else:
raise
from . import version
if version.hdf5_version_tuple != version.hdf5_built_version_tuple:
_warn(("h5py is running against HDF5 {0} when it was built against {1}, "
"this may cause problems").format(
'{0}.{1}.{2}'.format(*version.hdf5_version_tuple),
'{0}.{1}.{2}'.format(*version.hdf5_built_version_tuple)
))
_errors.silence_errors()
from ._conv import register_converters as _register_converters, \
unregister_converters as _unregister_converters
_register_converters()
atexit.register(_unregister_converters)
from .h5z import _register_lzf
_register_lzf()
# --- Public API --------------------------------------------------------------
from . import h5a, h5d, h5ds, h5f, h5fd, h5g, h5r, h5s, h5t, h5p, h5z, h5pl
from ._hl import filters
from ._hl.base import is_hdf5, HLObject, Empty
from ._hl.files import (
File,
register_driver,
unregister_driver,
registered_drivers,
)
from ._hl.group import Group, SoftLink, ExternalLink, HardLink
from ._hl.dataset import Dataset
from ._hl.datatype import Datatype
from ._hl.attrs import AttributeManager
from ._hl.vds import VirtualSource, VirtualLayout
from ._selector import MultiBlockSlice
from .h5 import get_config
from .h5r import Reference, RegionReference
from .h5t import (special_dtype, check_dtype,
vlen_dtype, string_dtype, enum_dtype, ref_dtype, regionref_dtype,
opaque_dtype,
check_vlen_dtype, check_string_dtype, check_enum_dtype, check_ref_dtype,
check_opaque_dtype,
)
from .h5s import UNLIMITED
from .version import version as __version__
def run_tests(args=''):
"""Run tests with pytest and returns the exit status as an int.
"""
# Lazy-loading of tests package to avoid strong dependency on test
# requirements, e.g. pytest
from .tests import run_tests
return run_tests(args)
def enable_ipython_completer():
""" Call this from an interactive IPython session to enable tab-completion
of group and attribute names.
"""
import sys
if 'IPython' in sys.modules:
ip_running = False
try:
from IPython.core.interactiveshell import InteractiveShell
ip_running = InteractiveShell.initialized()
except ImportError:
# support <ipython-0.11
from IPython import ipapi as _ipapi
ip_running = _ipapi.get() is not None
except Exception:
pass
if ip_running:
from . import ipy_completer
return ipy_completer.load_ipython_extension()
raise RuntimeError('Completer must be enabled in active ipython session')
@@ -0,0 +1,15 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
This subpackage implements the high-level interface for h5py.
Don't manually import things from here; the public API lives directly
in the top-level package namespace.
"""
@@ -0,0 +1,277 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Implements high-level operations for attributes.
Provides the AttributeManager class, available on high-level objects
as <obj>.attrs.
"""
import numpy
from .. import h5, h5s, h5t, h5a, h5p
from . import base
from .base import phil, with_phil, Empty, is_empty_dataspace, product
from .datatype import Datatype
class AttributeManager(base.MutableMappingHDF5, base.CommonStateObject):
"""
Allows dictionary-style access to an HDF5 object's attributes.
These are created exclusively by the library and are available as
a Python attribute at <object>.attrs
Like Group objects, attributes provide a minimal dictionary-
style interface. Anything which can be reasonably converted to a
Numpy array or Numpy scalar can be stored.
Attributes are automatically created on assignment with the
syntax <obj>.attrs[name] = value, with the HDF5 type automatically
deduced from the value. Existing attributes are overwritten.
To modify an existing attribute while preserving its type, use the
method modify(). To specify an attribute of a particular type and
shape, use create().
"""
def __init__(self, parent):
""" Private constructor.
"""
self._id = parent.id
@with_phil
def __getitem__(self, name):
""" Read the value of an attribute.
"""
attr = h5a.open(self._id, self._e(name))
shape = attr.shape
# shape is None for empty dataspaces
if shape is None:
return Empty(attr.dtype)
dtype = attr.dtype
# Do this first, as we'll be fiddling with the dtype for top-level
# array types
htype = h5t.py_create(dtype)
# NumPy doesn't support top-level array types, so we have to "fake"
# the correct type and shape for the array. For example, consider
# attr.shape == (5,) and attr.dtype == '(3,)f'. Then:
if dtype.subdtype is not None:
subdtype, subshape = dtype.subdtype
shape = attr.shape + subshape # (5, 3)
dtype = subdtype # 'f'
arr = numpy.zeros(shape, dtype=dtype, order='C')
attr.read(arr, mtype=htype)
string_info = h5t.check_string_dtype(dtype)
if string_info and (string_info.length is None):
# Vlen strings: convert bytes to Python str
arr = numpy.array([
b.decode('utf-8', 'surrogateescape') for b in arr.flat
], dtype=dtype).reshape(arr.shape)
if arr.ndim == 0:
return arr[()]
return arr
def get_id(self, name):
"""Get a low-level AttrID object for the named attribute.
"""
return h5a.open(self._id, self._e(name))
@with_phil
def __setitem__(self, name, value):
""" Set a new attribute, overwriting any existing attribute.
The type and shape of the attribute are determined from the data. To
use a specific type or shape, or to preserve the type of an attribute,
use the methods create() and modify().
"""
self.create(name, data=value)
@with_phil
def __delitem__(self, name):
""" Delete an attribute (which must already exist). """
h5a.delete(self._id, self._e(name))
def create(self, name, data, shape=None, dtype=None):
""" Create a new attribute, overwriting any existing attribute.
name
Name of the new attribute (required)
data
An array to initialize the attribute (required)
shape
Shape of the attribute. Overrides data.shape if both are
given, in which case the total number of points must be unchanged.
dtype
Data type of the attribute. Overrides data.dtype if both
are given.
"""
name = self._e(name)
with phil:
# First, make sure we have a NumPy array. We leave the data type
# conversion for HDF5 to perform.
if not isinstance(data, Empty):
data = base.array_for_new_object(data, specified_dtype=dtype)
if shape is None:
shape = data.shape
elif isinstance(shape, int):
shape = (shape,)
use_htype = None # If a committed type is given, we must use it
# in the call to h5a.create.
if isinstance(dtype, Datatype):
use_htype = dtype.id
dtype = dtype.dtype
elif dtype is None:
dtype = data.dtype
else:
dtype = numpy.dtype(dtype) # In case a string, e.g. 'i8' is passed
original_dtype = dtype # We'll need this for top-level array types
# Where a top-level array type is requested, we have to do some
# fiddling around to present the data as a smaller array of
# subarrays.
if dtype.subdtype is not None:
subdtype, subshape = dtype.subdtype
# Make sure the subshape matches the last N axes' sizes.
if shape[-len(subshape):] != subshape:
raise ValueError("Array dtype shape %s is incompatible with data shape %s" % (subshape, shape))
# New "advertised" shape and dtype
shape = shape[0:len(shape)-len(subshape)]
dtype = subdtype
# Not an array type; make sure to check the number of elements
# is compatible, and reshape if needed.
else:
if shape is not None and product(shape) != product(data.shape):
raise ValueError("Shape of new attribute conflicts with shape of data")
if shape != data.shape:
data = data.reshape(shape)
# We need this to handle special string types.
if not isinstance(data, Empty):
data = numpy.asarray(data, dtype=dtype)
# Make HDF5 datatype and dataspace for the H5A calls
if use_htype is None:
htype = h5t.py_create(original_dtype, logical=True)
htype2 = h5t.py_create(original_dtype) # Must be bit-for-bit representation rather than logical
else:
htype = use_htype
htype2 = None
if isinstance(data, Empty):
space = h5s.create(h5s.NULL)
else:
space = h5s.create_simple(shape)
# For a long time, h5py would create attributes with a random name
# and then rename them, imitating how you can atomically replace
# a file in a filesystem. But HDF5 does not offer atomic replacement
# (you have to delete the existing attribute first), and renaming
# exposes some bugs - see https://github.com/h5py/h5py/issues/1385
# So we've gone back to the simpler delete & recreate model.
if h5a.exists(self._id, name):
h5a.delete(self._id, name)
attr = h5a.create(self._id, name, htype, space)
try:
if not isinstance(data, Empty):
attr.write(data, mtype=htype2)
except:
attr.close()
h5a.delete(self._id, name)
raise
attr.close()
def modify(self, name, value):
""" Change the value of an attribute while preserving its type.
Differs from __setitem__ in that if the attribute already exists, its
type is preserved. This can be very useful for interacting with
externally generated files.
If the attribute doesn't exist, it will be automatically created.
"""
with phil:
if not name in self:
self[name] = value
else:
attr = h5a.open(self._id, self._e(name))
if is_empty_dataspace(attr):
raise OSError("Empty attributes can't be modified")
# If the input data is already an array, let HDF5 do the conversion.
# If it's a list or similar, don't make numpy guess a dtype for it.
dt = None if isinstance(value, numpy.ndarray) else attr.dtype
value = numpy.asarray(value, order='C', dtype=dt)
# Allow the case of () <-> (1,)
if (value.shape != attr.shape) and not \
(value.size == 1 and product(attr.shape) == 1):
raise TypeError("Shape of data is incompatible with existing attribute")
attr.write(value)
@with_phil
def __len__(self):
""" Number of attributes attached to the object. """
# I expect we will not have more than 2**32 attributes
return h5a.get_num_attrs(self._id)
def __iter__(self):
""" Iterate over the names of attributes. """
with phil:
attrlist = []
def iter_cb(name, *args):
""" Callback to gather attribute names """
attrlist.append(self._d(name))
cpl = self._id.get_create_plist()
crt_order = cpl.get_attr_creation_order()
cpl.close()
if crt_order & h5p.CRT_ORDER_TRACKED:
idx_type = h5.INDEX_CRT_ORDER
else:
idx_type = h5.INDEX_NAME
h5a.iterate(self._id, iter_cb, index_type=idx_type)
for name in attrlist:
yield name
@with_phil
def __contains__(self, name):
""" Determine if an attribute exists, by name. """
return h5a.exists(self._id, self._e(name))
@with_phil
def __repr__(self):
if not self._id:
return "<Attributes of closed HDF5 object>"
return "<Attributes of HDF5 object at %s>" % id(self._id)
@@ -0,0 +1,535 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Implements operations common to all high-level objects (File, etc.).
"""
from collections.abc import (
Mapping, MutableMapping, KeysView, ValuesView, ItemsView
)
import os
import posixpath
import numpy as np
# The high-level interface is serialized; every public API function & method
# is wrapped in a lock. We reuse the low-level lock because (1) it's fast,
# and (2) it eliminates the possibility of deadlocks due to out-of-order
# lock acquisition.
from .._objects import phil, with_phil
from .. import h5d, h5i, h5r, h5p, h5f, h5t, h5s
from .compat import filename_encode
def is_hdf5(fname):
""" Determine if a file is valid HDF5 (False if it doesn't exist). """
with phil:
fname = os.path.abspath(os.fspath(fname))
if os.path.isfile(fname):
return h5f.is_hdf5(filename_encode(fname))
return False
def find_item_type(data):
"""Find the item type of a simple object or collection of objects.
E.g. [[['a']]] -> str
The focus is on collections where all items have the same type; we'll return
None if that's not the case.
The aim is to treat numpy arrays of Python objects like normal Python
collections, while treating arrays with specific dtypes differently.
We're also only interested in array-like collections - lists and tuples,
possibly nested - not things like sets or dicts.
"""
if isinstance(data, np.ndarray):
if (
data.dtype.kind == 'O'
and not h5t.check_string_dtype(data.dtype)
and not h5t.check_vlen_dtype(data.dtype)
):
item_types = {type(e) for e in data.flat}
else:
return None
elif isinstance(data, (list, tuple)):
item_types = {find_item_type(e) for e in data}
else:
return type(data)
if len(item_types) != 1:
return None
return item_types.pop()
def guess_dtype(data):
""" Attempt to guess an appropriate dtype for the object, returning None
if nothing is appropriate (or if it should be left up the the array
constructor to figure out)
"""
with phil:
if isinstance(data, h5r.RegionReference):
return h5t.regionref_dtype
if isinstance(data, h5r.Reference):
return h5t.ref_dtype
item_type = find_item_type(data)
if item_type is bytes:
return h5t.string_dtype(encoding='ascii')
if item_type is str:
return h5t.string_dtype()
return None
def is_float16_dtype(dt):
if dt is None:
return False
dt = np.dtype(dt) # normalize strings -> np.dtype objects
return dt.kind == 'f' and dt.itemsize == 2
def array_for_new_object(data, specified_dtype=None):
"""Prepare an array from data used to create a new dataset or attribute"""
if not isinstance(specified_dtype, (np.dtype, type(None))):
specified_dtype = np.dtype(specified_dtype)
# We mostly let HDF5 convert data as necessary when it's written.
# But if we are going to a float16 datatype, pre-convert in python
# to workaround a bug in the conversion.
# https://github.com/h5py/h5py/issues/819
if is_float16_dtype(specified_dtype):
as_dtype = specified_dtype
elif not isinstance(data, np.ndarray) and (specified_dtype is not None):
# If we need to convert e.g. a list to an array, don't leave numpy
# to guess a dtype we already know.
as_dtype = specified_dtype
else:
as_dtype = guess_dtype(data)
data = np.asarray(data, order="C", dtype=as_dtype)
# In most cases, this does nothing. But if data was already an array,
# and as_dtype is a tagged h5py dtype (e.g. for an object array of strings),
# asarray() doesn't replace its dtype object. This gives it the tagged dtype:
if as_dtype is not None:
data = data.view(dtype=as_dtype)
return data
def default_lapl():
""" Default link access property list """
return None
def default_lcpl():
""" Default link creation property list """
lcpl = h5p.create(h5p.LINK_CREATE)
lcpl.set_create_intermediate_group(True)
return lcpl
dlapl = default_lapl()
dlcpl = default_lcpl()
def is_empty_dataspace(obj):
""" Check if an object's dataspace is empty """
if obj.get_space().get_simple_extent_type() == h5s.NULL:
return True
return False
class CommonStateObject:
"""
Mixin class that allows sharing information between objects which
reside in the same HDF5 file. Requires that the host class have
a ".id" attribute which returns a low-level ObjectID subclass.
Also implements Unicode operations.
"""
@property
def _lapl(self):
""" Fetch the link access property list appropriate for this object
"""
return dlapl
@property
def _lcpl(self):
""" Fetch the link creation property list appropriate for this object
"""
return dlcpl
def _e(self, name, lcpl=None):
""" Encode a name according to the current file settings.
Returns name, or 2-tuple (name, lcpl) if lcpl is True
- Binary strings are always passed as-is, h5t.CSET_ASCII
- Unicode strings are encoded utf8, h5t.CSET_UTF8
If name is None, returns either None or (None, None) appropriately.
"""
def get_lcpl(coding):
""" Create an appropriate link creation property list """
lcpl = self._lcpl.copy()
lcpl.set_char_encoding(coding)
return lcpl
if name is None:
return (None, None) if lcpl else None
if isinstance(name, bytes):
coding = h5t.CSET_ASCII
elif isinstance(name, str):
try:
name = name.encode('ascii')
coding = h5t.CSET_ASCII
except UnicodeEncodeError:
name = name.encode('utf8')
coding = h5t.CSET_UTF8
else:
raise TypeError(f"A name should be string or bytes, not {type(name)}")
if lcpl:
return name, get_lcpl(coding)
return name
def _d(self, name):
""" Decode a name according to the current file settings.
- Try to decode utf8
- Failing that, return the byte string
If name is None, returns None.
"""
if name is None:
return None
try:
return name.decode('utf8')
except UnicodeDecodeError:
pass
return name
class _RegionProxy:
"""
Proxy object which handles region references.
To create a new region reference (datasets only), use slicing syntax:
>>> newref = obj.regionref[0:10:2]
To determine the target dataset shape from an existing reference:
>>> shape = obj.regionref.shape(existingref)
where <obj> may be any object in the file. To determine the shape of
the selection in use on the target dataset:
>>> selection_shape = obj.regionref.selection(existingref)
"""
def __init__(self, obj):
self.obj = obj
self.id = obj.id
def __getitem__(self, args):
if not isinstance(self.id, h5d.DatasetID):
raise TypeError("Region references can only be made to datasets")
from . import selections
with phil:
selection = selections.select(self.id.shape, args, dataset=self.obj)
return h5r.create(self.id, b'.', h5r.DATASET_REGION, selection.id)
def shape(self, ref):
""" Get the shape of the target dataspace referred to by *ref*. """
with phil:
sid = h5r.get_region(ref, self.id)
return sid.shape
def selection(self, ref):
""" Get the shape of the target dataspace selection referred to by *ref*
"""
from . import selections
with phil:
sid = h5r.get_region(ref, self.id)
return selections.guess_shape(sid)
class HLObject(CommonStateObject):
"""
Base class for high-level interface objects.
"""
@property
def file(self):
""" Return a File instance associated with this object """
from . import files
with phil:
return files.File(self.id)
@property
@with_phil
def name(self):
""" Return the full name of this object. None if anonymous. """
return self._d(h5i.get_name(self.id))
@property
@with_phil
def parent(self):
"""Return the parent group of this object.
This is always equivalent to obj.file[posixpath.dirname(obj.name)].
ValueError if this object is anonymous.
"""
if self.name is None:
raise ValueError("Parent of an anonymous object is undefined")
return self.file[posixpath.dirname(self.name)]
@property
@with_phil
def id(self):
""" Low-level identifier appropriate for this object """
return self._id
@property
@with_phil
def ref(self):
""" An (opaque) HDF5 reference to this object """
return h5r.create(self.id, b'.', h5r.OBJECT)
@property
@with_phil
def regionref(self):
"""Create a region reference (Datasets only).
The syntax is regionref[<slices>]. For example, dset.regionref[...]
creates a region reference in which the whole dataset is selected.
Can also be used to determine the shape of the referenced dataset
(via .shape property), or the shape of the selection (via the
.selection property).
"""
return _RegionProxy(self)
@property
def attrs(self):
""" Attributes attached to this object """
from . import attrs
with phil:
return attrs.AttributeManager(self)
@with_phil
def __init__(self, oid):
""" Setup this object, given its low-level identifier """
self._id = oid
@with_phil
def __hash__(self):
return hash(self.id)
@with_phil
def __eq__(self, other):
if hasattr(other, 'id'):
return self.id == other.id
return NotImplemented
def __bool__(self):
with phil:
return bool(self.id)
def __getnewargs__(self):
"""Disable pickle.
Handles for HDF5 objects can't be reliably deserialised, because the
recipient may not have access to the same files. So we do this to
fail early.
If you really want to pickle h5py objects and can live with some
limitations, look at the h5pickle project on PyPI.
"""
raise TypeError("h5py objects cannot be pickled")
def __getstate__(self):
# Pickle protocols 0 and 1 use this instead of __getnewargs__
raise TypeError("h5py objects cannot be pickled")
# --- Dictionary-style interface ----------------------------------------------
# To implement the dictionary-style interface from groups and attributes,
# we inherit from the appropriate abstract base classes in collections.
#
# All locking is taken care of by the subclasses.
# We have to override ValuesView and ItemsView here because Group and
# AttributeManager can only test for key names.
class KeysViewHDF5(KeysView):
def __str__(self):
return "<KeysViewHDF5 {}>".format(list(self))
def __reversed__(self):
yield from reversed(self._mapping)
__repr__ = __str__
class ValuesViewHDF5(ValuesView):
"""
Wraps e.g. a Group or AttributeManager to provide a value view.
Note that __contains__ will have poor performance as it has
to scan all the links or attributes.
"""
def __contains__(self, value):
with phil:
for key in self._mapping:
if value == self._mapping.get(key):
return True
return False
def __iter__(self):
with phil:
for key in self._mapping:
yield self._mapping.get(key)
def __reversed__(self):
with phil:
for key in reversed(self._mapping):
yield self._mapping.get(key)
class ItemsViewHDF5(ItemsView):
"""
Wraps e.g. a Group or AttributeManager to provide an items view.
"""
def __contains__(self, item):
with phil:
key, val = item
if key in self._mapping:
return val == self._mapping.get(key)
return False
def __iter__(self):
with phil:
for key in self._mapping:
yield (key, self._mapping.get(key))
def __reversed__(self):
with phil:
for key in reversed(self._mapping):
yield (key, self._mapping.get(key))
class MappingHDF5(Mapping):
"""
Wraps a Group, AttributeManager or DimensionManager object to provide
an immutable mapping interface.
We don't inherit directly from MutableMapping because certain
subclasses, for example DimensionManager, are read-only.
"""
def keys(self):
""" Get a view object on member names """
return KeysViewHDF5(self)
def values(self):
""" Get a view object on member objects """
return ValuesViewHDF5(self)
def items(self):
""" Get a view object on member items """
return ItemsViewHDF5(self)
def _ipython_key_completions_(self):
""" Custom tab completions for __getitem__ in IPython >=5.0. """
return sorted(self.keys())
class MutableMappingHDF5(MappingHDF5, MutableMapping):
"""
Wraps a Group or AttributeManager object to provide a mutable
mapping interface, in contrast to the read-only mapping of
MappingHDF5.
"""
pass
class Empty:
"""
Proxy object to represent empty/null dataspaces (a.k.a H5S_NULL).
This can have an associated dtype, but has no shape or data. This is not
the same as an array with shape (0,).
"""
shape = None
size = None
def __init__(self, dtype):
self.dtype = np.dtype(dtype)
def __eq__(self, other):
if isinstance(other, Empty) and self.dtype == other.dtype:
return True
return False
def __repr__(self):
return "Empty(dtype={0!r})".format(self.dtype)
def product(nums):
"""Calculate a numeric product
For small amounts of data (e.g. shape tuples), this simple code is much
faster than calling numpy.prod().
"""
prod = 1
for n in nums:
prod *= n
return prod
# Simple variant of cached_property:
# Unlike functools, this has no locking, so we don't have to worry about
# deadlocks with phil (see issue gh-2064). Unlike cached-property on PyPI, it
# doesn't try to import asyncio (which can be ~100 extra modules).
# Many projects seem to have similar variants of this, often without attribution,
# but to be cautious, this code comes from cached-property (Copyright (c) 2015,
# Daniel Greenfeld, BSD license), where it is attributed to bottle (Copyright
# (c) 2009-2022, Marcel Hellkamp, MIT license).
class cached_property:
def __init__(self, func):
self.__doc__ = getattr(func, "__doc__")
self.func = func
def __get__(self, obj, cls):
if obj is None:
return self
value = obj.__dict__[self.func.__name__] = self.func(obj)
return value
@@ -0,0 +1,46 @@
"""
Compatibility module for high-level h5py
"""
import os
import sys
from ..version import hdf5_built_version_tuple
# HDF5 supported passing paths as UTF-8 for Windows from 1.10.6, but this
# was broken again in 1.14.4 - https://github.com/HDFGroup/hdf5/issues/5037 .
# The change was reverted in 1.14.6.
if (1, 14, 4) <= hdf5_built_version_tuple < (1, 14, 6):
WINDOWS_ENCODING = "mbcs"
else:
WINDOWS_ENCODING = "utf-8"
def filename_encode(filename):
"""
Encode filename for use in the HDF5 library.
Due to how HDF5 handles filenames on different systems, this should be
called on any filenames passed to the HDF5 library. See the documentation on
filenames in h5py for more information.
"""
filename = os.fspath(filename)
if sys.platform == "win32" and isinstance(filename, str):
return filename.encode(WINDOWS_ENCODING, "strict")
else:
return os.fsencode(filename)
def filename_decode(filename):
"""
Decode filename used by HDF5 library.
Due to how HDF5 handles filenames on different systems, this should be
called on any filenames passed from the HDF5 library. See the documentation
on filenames in h5py for more information.
"""
if not isinstance(filename, (str, bytes)):
raise TypeError(f"expect bytes or str, not {type(filename).__name__}")
if sys.platform == "win32" and isinstance(filename, bytes):
return filename.decode(WINDOWS_ENCODING, "strict")
else:
return os.fsdecode(filename)
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,55 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Implements high-level access to committed datatypes in the file.
"""
import posixpath as pp
from ..h5t import TypeID
from .base import HLObject, with_phil
class Datatype(HLObject):
"""
Represents an HDF5 named datatype stored in a file.
To store a datatype, simply assign it to a name in a group:
>>> MyGroup["name"] = numpy.dtype("f")
>>> named_type = MyGroup["name"]
>>> assert named_type.dtype == numpy.dtype("f")
"""
@property
@with_phil
def dtype(self):
"""Numpy dtype equivalent for this datatype"""
return self.id.dtype
@with_phil
def __init__(self, bind):
""" Create a new Datatype object by binding to a low-level TypeID.
"""
if not isinstance(bind, TypeID):
raise ValueError("%s is not a TypeID" % bind)
super().__init__(bind)
@with_phil
def __repr__(self):
if not self.id:
return "<Closed HDF5 named type>"
if self.name is None:
namestr = '("anonymous")'
else:
name = pp.basename(pp.normpath(self.name))
namestr = '"%s"' % (name if name != '' else '/')
return '<HDF5 named type %s (dtype %s)>' % \
(namestr, self.dtype.str)
@@ -0,0 +1,181 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Implements support for HDF5 dimension scales.
"""
import warnings
from .. import h5ds
from ..h5py_warnings import H5pyDeprecationWarning
from . import base
from .base import phil, with_phil
from .dataset import Dataset
class DimensionProxy(base.CommonStateObject):
"""
Represents an HDF5 "dimension".
"""
@property
@with_phil
def label(self):
""" Get or set the dimension scale label """
return self._d(h5ds.get_label(self._id, self._dimension))
@label.setter
@with_phil
def label(self, val):
# pylint: disable=missing-docstring
h5ds.set_label(self._id, self._dimension, self._e(val))
@with_phil
def __init__(self, id_, dimension):
self._id = id_
self._dimension = dimension
@with_phil
def __hash__(self):
return hash((type(self), self._id, self._dimension))
@with_phil
def __eq__(self, other):
return hash(self) == hash(other)
@with_phil
def __iter__(self):
yield from self.keys()
@with_phil
def __len__(self):
return h5ds.get_num_scales(self._id, self._dimension)
@with_phil
def __getitem__(self, item):
if isinstance(item, int):
scales = []
h5ds.iterate(self._id, self._dimension, scales.append, 0)
return Dataset(scales[item])
else:
def f(dsid):
""" Iterate over scales to find a matching name """
if h5ds.get_scale_name(dsid) == self._e(item):
return dsid
res = h5ds.iterate(self._id, self._dimension, f, 0)
if res is None:
raise KeyError(item)
return Dataset(res)
def attach_scale(self, dset):
""" Attach a scale to this dimension.
Provide the Dataset of the scale you would like to attach.
"""
with phil:
h5ds.attach_scale(self._id, dset.id, self._dimension)
def detach_scale(self, dset):
""" Remove a scale from this dimension.
Provide the Dataset of the scale you would like to remove.
"""
with phil:
h5ds.detach_scale(self._id, dset.id, self._dimension)
def items(self):
""" Get a list of (name, Dataset) pairs with all scales on this
dimension.
"""
with phil:
scales = []
# H5DSiterate raises an error if there are no dimension scales,
# rather than iterating 0 times. See #483.
if len(self) > 0:
h5ds.iterate(self._id, self._dimension, scales.append, 0)
return [
(self._d(h5ds.get_scale_name(x)), Dataset(x))
for x in scales
]
def keys(self):
""" Get a list of names for the scales on this dimension. """
with phil:
return [key for (key, _) in self.items()]
def values(self):
""" Get a list of Dataset for scales on this dimension. """
with phil:
return [val for (_, val) in self.items()]
@with_phil
def __repr__(self):
if not self._id:
return "<Dimension of closed HDF5 dataset>"
return ('<"%s" dimension %d of HDF5 dataset at %s>'
% (self.label, self._dimension, id(self._id)))
class DimensionManager(base.CommonStateObject):
"""
Represents a collection of dimension associated with a dataset.
Like AttributeManager, an instance of this class is returned when
accessing the ".dims" property on a Dataset.
"""
@with_phil
def __init__(self, parent):
""" Private constructor.
"""
self._id = parent.id
@with_phil
def __getitem__(self, index):
""" Return a Dimension object
"""
if index > len(self) - 1:
raise IndexError('Index out of range')
return DimensionProxy(self._id, index)
@with_phil
def __len__(self):
""" Number of dimensions associated with the dataset. """
return self._id.rank
@with_phil
def __iter__(self):
""" Iterate over the dimensions. """
for i in range(len(self)):
yield self[i]
@with_phil
def __repr__(self):
if not self._id:
return "<Dimensions of closed HDF5 dataset>"
return "<Dimensions of HDF5 object at %s>" % id(self._id)
def create_scale(self, dset, name=''):
""" Create a new dimension, from an initial scale.
Provide the dataset and a name for the scale.
"""
warnings.warn("other_ds.dims.create_scale(ds, name) is deprecated. "
"Use ds.make_scale(name) instead.",
H5pyDeprecationWarning, stacklevel=2,
)
dset.make_scale(name)
@@ -0,0 +1,664 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Implements high-level support for HDF5 file objects.
"""
import inspect
import os
import sys
from warnings import warn
from .compat import filename_decode, filename_encode
from .base import phil, with_phil
from .group import Group
from .. import h5, h5f, h5p, h5i, h5fd, _objects
from .. import version
mpi = h5.get_config().mpi
ros3 = h5.get_config().ros3
direct_vfd = h5.get_config().direct_vfd
hdf5_version = version.hdf5_version_tuple[0:3]
swmr_support = True
libver_dict = {'earliest': h5f.LIBVER_EARLIEST, 'latest': h5f.LIBVER_LATEST,
'v108': h5f.LIBVER_V18, 'v110': h5f.LIBVER_V110}
libver_dict_r = dict((y, x) for x, y in libver_dict.items())
if hdf5_version >= (1, 11, 4):
libver_dict.update({'v112': h5f.LIBVER_V112})
libver_dict_r.update({h5f.LIBVER_V112: 'v112'})
if hdf5_version >= (1, 13, 0):
libver_dict.update({'v114': h5f.LIBVER_V114})
libver_dict_r.update({h5f.LIBVER_V114: 'v114'})
if hdf5_version >= (2, 0, 0):
libver_dict.update({'v200': h5f.LIBVER_V200})
libver_dict_r.update({h5f.LIBVER_V200: 'v200'})
def _set_fapl_mpio(plist, **kwargs):
"""Set file access property list for mpio driver"""
if not mpi:
raise ValueError("h5py was built without MPI support, can't use mpio driver")
import mpi4py.MPI
kwargs.setdefault('info', mpi4py.MPI.Info())
plist.set_fapl_mpio(**kwargs)
def _set_fapl_fileobj(plist, **kwargs):
"""Set the Python file object driver in a file access property list"""
plist.set_fileobj_driver(h5fd.fileobj_driver, kwargs.get('fileobj'))
_drivers = {
'sec2': lambda plist, **kwargs: plist.set_fapl_sec2(**kwargs),
'stdio': lambda plist, **kwargs: plist.set_fapl_stdio(**kwargs),
'core': lambda plist, **kwargs: plist.set_fapl_core(**kwargs),
'family': lambda plist, **kwargs: plist.set_fapl_family(
memb_fapl=plist.copy(),
**kwargs
),
'mpio': _set_fapl_mpio,
'fileobj': _set_fapl_fileobj,
'split': lambda plist, **kwargs: plist.set_fapl_split(**kwargs),
}
if ros3:
_drivers['ros3'] = lambda plist, **kwargs: plist.set_fapl_ros3(**kwargs)
if direct_vfd:
_drivers['direct'] = lambda plist, **kwargs: plist.set_fapl_direct(**kwargs) # noqa
def register_driver(name, set_fapl):
"""Register a custom driver.
Parameters
----------
name : str
The name of the driver.
set_fapl : callable[PropFAID, **kwargs] -> NoneType
The function to set the fapl to use your custom driver.
"""
_drivers[name] = set_fapl
def unregister_driver(name):
"""Unregister a custom driver.
Parameters
----------
name : str
The name of the driver.
"""
del _drivers[name]
def registered_drivers():
"""Return a frozenset of the names of all of the registered drivers.
"""
return frozenset(_drivers)
def make_fapl(
driver, libver=None, rdcc_nslots=None, rdcc_nbytes=None, rdcc_w0=None,
locking=None, page_buf_size=None, min_meta_keep=0, min_raw_keep=0,
alignment_threshold=1, alignment_interval=1, meta_block_size=None,
**kwds
):
""" Set up a file access property list """
plist = h5p.create(h5p.FILE_ACCESS)
if libver is not None:
if libver in libver_dict:
low = libver_dict[libver]
high = h5f.LIBVER_LATEST
else:
low, high = (libver_dict[x] for x in libver)
else:
# we default to earliest
low, high = h5f.LIBVER_EARLIEST, h5f.LIBVER_LATEST
plist.set_libver_bounds(low, high)
plist.set_alignment(alignment_threshold, alignment_interval)
cache_settings = list(plist.get_cache())
if rdcc_nslots is not None:
cache_settings[1] = rdcc_nslots
if rdcc_nbytes is not None:
cache_settings[2] = rdcc_nbytes
if rdcc_w0 is not None:
cache_settings[3] = rdcc_w0
plist.set_cache(*cache_settings)
if page_buf_size:
plist.set_page_buffer_size(int(page_buf_size), int(min_meta_keep),
int(min_raw_keep))
if meta_block_size is not None:
plist.set_meta_block_size(int(meta_block_size))
if locking is not None:
if locking in ("false", False):
plist.set_file_locking(False, ignore_when_disabled=False)
elif locking in ("true", True):
plist.set_file_locking(True, ignore_when_disabled=False)
elif locking == "best-effort":
plist.set_file_locking(True, ignore_when_disabled=True)
else:
raise ValueError(f"Unsupported locking value: {locking}")
if driver is None or (driver == 'windows' and sys.platform == 'win32'):
# Prevent swallowing unused key arguments
if kwds:
msg = "'{key}' is an invalid keyword argument for this function" \
.format(key=next(iter(kwds)))
raise TypeError(msg)
return plist
try:
set_fapl = _drivers[driver]
except KeyError as exc:
raise ValueError(f'Unknown driver type {driver!r}') from exc
else:
if driver == 'ros3':
token = kwds.pop('session_token', None)
set_fapl(plist, **kwds)
if token:
if hdf5_version < (1, 14, 2):
raise ValueError('HDF5 >= 1.14.2 required for AWS session token')
plist.set_fapl_ros3_token(token)
else:
set_fapl(plist, **kwds)
return plist
def make_fcpl(track_order=False, track_times=False, fs_strategy=None, fs_persist=False,
fs_threshold=1, fs_page_size=None):
""" Set up a file creation property list """
plist = h5p.create(h5p.FILE_CREATE)
if track_order:
plist.set_link_creation_order(
h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED)
plist.set_attr_creation_order(
h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED)
if track_times is None:
track_times = False # Allow explicit None to mean h5py's default
if track_times in (True, False):
plist.set_obj_track_times(track_times)
else:
raise TypeError("track_times must be either True or False")
if fs_strategy:
strategies = {
'fsm': h5f.FSPACE_STRATEGY_FSM_AGGR,
'page': h5f.FSPACE_STRATEGY_PAGE,
'aggregate': h5f.FSPACE_STRATEGY_AGGR,
'none': h5f.FSPACE_STRATEGY_NONE
}
fs_strat_num = strategies.get(fs_strategy, -1)
if fs_strat_num == -1:
raise ValueError("Invalid file space strategy type")
plist.set_file_space_strategy(fs_strat_num, fs_persist, fs_threshold)
if fs_page_size and fs_strategy == 'page':
plist.set_file_space_page_size(int(fs_page_size))
return plist
def make_fid(name, mode, userblock_size, fapl, fcpl=None, swmr=False):
""" Get a new FileID by opening or creating a file.
Also validates mode argument."""
if userblock_size is not None:
if mode in ('r', 'r+'):
raise ValueError("User block may only be specified "
"when creating a file")
try:
userblock_size = int(userblock_size)
except (TypeError, ValueError):
raise ValueError("User block size must be an integer") from None
if fcpl is None:
fcpl = h5p.create(h5p.FILE_CREATE)
fcpl.set_userblock(userblock_size)
if mode == 'r':
flags = h5f.ACC_RDONLY
if swmr and swmr_support:
flags |= h5f.ACC_SWMR_READ
fid = h5f.open(name, flags, fapl=fapl)
elif mode == 'r+':
fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)
elif mode in ['w-', 'x']:
fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
elif mode == 'w':
fid = h5f.create(name, h5f.ACC_TRUNC, fapl=fapl, fcpl=fcpl)
elif mode == 'a':
# Open in append mode (read/write).
# If that fails, create a new file only if it won't clobber an
# existing one (ACC_EXCL)
try:
fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)
# Not all drivers raise FileNotFoundError (commented those that do not)
except FileNotFoundError if fapl.get_driver() in (
h5fd.SEC2,
h5fd.DIRECT if direct_vfd else -1,
# h5fd.STDIO,
# h5fd.CORE,
h5fd.FAMILY,
h5fd.WINDOWS,
# h5fd.MPIO,
# h5fd.MPIPOSIX,
h5fd.fileobj_driver,
h5fd.ROS3D if ros3 else -1,
) else OSError:
fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
else:
raise ValueError("Invalid mode; must be one of r, r+, w, w-, x, a")
try:
if userblock_size is not None:
existing_fcpl = fid.get_create_plist()
if existing_fcpl.get_userblock() != userblock_size:
raise ValueError("Requested userblock size (%d) does not match that of existing file (%d)" % (userblock_size, existing_fcpl.get_userblock()))
except Exception as e:
fid.close()
raise e
return fid
class File(Group):
"""
Represents an HDF5 file.
"""
@property
def attrs(self):
""" Attributes attached to this object """
# hdf5 complains that a file identifier is an invalid location for an
# attribute. Instead of self, pass the root group to AttributeManager:
from . import attrs
with phil:
return attrs.AttributeManager(self['/'])
@property
@with_phil
def filename(self):
"""File name on disk"""
return filename_decode(h5f.get_name(self.id))
@property
@with_phil
def driver(self):
"""Low-level HDF5 file driver used to open file"""
drivers = {h5fd.SEC2: 'sec2',
h5fd.STDIO: 'stdio',
h5fd.CORE: 'core',
h5fd.FAMILY: 'family',
h5fd.WINDOWS: 'windows',
h5fd.MPIO: 'mpio',
h5fd.MPIPOSIX: 'mpiposix',
h5fd.fileobj_driver: 'fileobj'}
if ros3:
drivers[h5fd.ROS3D] = 'ros3'
if direct_vfd:
drivers[h5fd.DIRECT] = 'direct'
return drivers.get(self.id.get_access_plist().get_driver(), 'unknown')
@property
@with_phil
def mode(self):
""" Python mode used to open file """
write_intent = h5f.ACC_RDWR
if swmr_support:
write_intent |= h5f.ACC_SWMR_WRITE
return 'r+' if self.id.get_intent() & write_intent else 'r'
@property
@with_phil
def libver(self):
"""File format version bounds (2-tuple: low, high)"""
bounds = self.id.get_access_plist().get_libver_bounds()
return tuple(libver_dict_r[x] for x in bounds)
@property
@with_phil
def userblock_size(self):
""" User block size (in bytes) """
fcpl = self.id.get_create_plist()
return fcpl.get_userblock()
@property
@with_phil
def meta_block_size(self):
""" Meta block size (in bytes) """
fapl = self.id.get_access_plist()
return fapl.get_meta_block_size()
if mpi:
@property
@with_phil
def atomic(self):
""" Set/get MPI-IO atomic mode
"""
return self.id.get_mpi_atomicity()
@atomic.setter
@with_phil
def atomic(self, value):
# pylint: disable=missing-docstring
self.id.set_mpi_atomicity(value)
@property
@with_phil
def swmr_mode(self):
""" Controls single-writer multiple-reader mode """
return swmr_support and bool(self.id.get_intent() & (h5f.ACC_SWMR_READ | h5f.ACC_SWMR_WRITE))
@swmr_mode.setter
@with_phil
def swmr_mode(self, value):
# pylint: disable=missing-docstring
if value:
self.id.start_swmr_write()
else:
raise ValueError("It is not possible to forcibly switch SWMR mode off.")
def __init__(self, name, mode='r', driver=None, libver=None, userblock_size=None, swmr=False,
rdcc_nslots=None, rdcc_nbytes=None, rdcc_w0=None, track_order=None,
fs_strategy=None, fs_persist=False, fs_threshold=1, fs_page_size=None,
page_buf_size=None, min_meta_keep=0, min_raw_keep=0, locking=None,
alignment_threshold=1, alignment_interval=1, meta_block_size=None,
*, track_times=False, **kwds):
"""Create a new file object.
See the h5py user guide for a detailed explanation of the options.
name
Name of the file on disk, or file-like object. Note: for files
created with the 'core' driver, HDF5 still requires this be
non-empty.
mode
r Readonly, file must exist (default)
r+ Read/write, file must exist
w Create file, truncate if exists
w- or x Create file, fail if exists
a Read/write if exists, create otherwise
driver
Name of the driver to use. Legal values are None (default,
recommended), 'core', 'sec2', 'direct', 'stdio', 'mpio', 'ros3'.
libver
Library version bounds. Supported values: 'earliest', 'v108',
'v110', 'v112', 'v114', 'v200' and 'latest' depending on the
version of libhdf5 h5py is built against.
userblock_size
Desired size of user block. Only allowed when creating a new
file (mode w, w- or x).
swmr
Open the file in SWMR read mode. Only used when mode = 'r'.
rdcc_nslots
The number of chunk slots in the raw data chunk cache for this
file. Increasing this value reduces the number of cache collisions,
but slightly increases the memory used. Due to the hashing
strategy, this value should ideally be a prime number. As a rule of
thumb, this value should be at least 10 times the number of chunks
that can fit in rdcc_nbytes bytes. For maximum performance, this
value should be set approximately 100 times that number of
chunks. The default value is 521. Applies to all datasets unless individually changed.
rdcc_nbytes
Total size of the dataset chunk cache in bytes. The default size per
dataset is 1024**2 (1 MiB) for HDF5 before 2.0 and 8 MiB for HDF5
2.0 and later. Applies to all datasets unless individually changed.
rdcc_w0
The chunk preemption policy for all datasets. This must be
between 0 and 1 inclusive and indicates the weighting according to
which chunks which have been fully read or written are penalized
when determining which chunks to flush from cache. A value of 0
means fully read or written chunks are treated no differently than
other chunks (the preemption is strictly LRU) while a value of 1
means fully read or written chunks are always preempted before
other chunks. If your application only reads or writes data once,
this can be safely set to 1. Otherwise, this should be set lower
depending on how often you re-read or re-write the same data. The
default value is 0.75. Applies to all datasets unless individually changed.
track_order
Track dataset/group/attribute creation order under root group
if True. If None use global default h5.get_config().track_order.
track_times: bool or None, default: False
If True, store timestamps for this group in the file.
If None, fall back to the default value.
fs_strategy
The file space handling strategy to be used. Only allowed when
creating a new file (mode w, w- or x). Defined as:
"fsm" FSM, Aggregators, VFD
"page" Paged FSM, VFD
"aggregate" Aggregators, VFD
"none" VFD
If None use HDF5 defaults.
fs_page_size
File space page size in bytes. Only used when fs_strategy="page". If
None use the HDF5 default (4096 bytes).
fs_persist
A boolean value to indicate whether free space should be persistent
or not. Only allowed when creating a new file. The default value
is False.
fs_threshold
The smallest free-space section size that the free space manager
will track. Only allowed when creating a new file. The default
value is 1.
page_buf_size
Page buffer size in bytes. Only allowed for HDF5 files created with
fs_strategy="page". Must be a power of two value and greater or
equal than the file space page size when creating the file. It is
not used by default.
min_meta_keep
Minimum percentage of metadata to keep in the page buffer before
allowing pages containing metadata to be evicted. Applicable only if
page_buf_size is set. Default value is zero.
min_raw_keep
Minimum percentage of raw data to keep in the page buffer before
allowing pages containing raw data to be evicted. Applicable only if
page_buf_size is set. Default value is zero.
locking
The file locking behavior. Defined as:
- False (or "false") -- Disable file locking
- True (or "true") -- Enable file locking
- "best-effort" -- Enable file locking but ignore some errors
- None -- Use HDF5 defaults
.. warning::
The HDF5_USE_FILE_LOCKING environment variable can override
this parameter.
alignment_threshold
Together with ``alignment_interval``, this property ensures that
any file object greater than or equal in size to the alignment
threshold (in bytes) will be aligned on an address which is a
multiple of alignment interval.
alignment_interval
This property should be used in conjunction with
``alignment_threshold``. See the description above. For more
details, see
https://support.hdfgroup.org/documentation/hdf5/latest/group___f_a_p_l.html#gab99d5af749aeb3896fd9e3ceb273677a
meta_block_size
Set the current minimum size, in bytes, of new metadata block allocations.
See https://support.hdfgroup.org/documentation/hdf5/latest/group___f_a_p_l.html#ga8822e3dedc8e1414f20871a87d533cb1
Additional keywords
Passed on to the selected file driver.
"""
if driver == 'ros3':
if not ros3:
raise ValueError("h5py was built without ROS3 support, can't use ros3 driver")
if hdf5_version < (2, 0, 0):
from urllib.parse import urlparse
url = urlparse(name)
if url.scheme == 's3':
aws_region = kwds.get('aws_region', b'').decode('ascii')
if len(aws_region) == 0:
raise ValueError('AWS region required for s3:// location')
name = f'https://s3.{aws_region}.amazonaws.com/{url.netloc}{url.path}'
elif url.scheme not in ('https', 'http'):
raise ValueError(f'{name}: S3 location must begin with '
'either "https://", "http://", or "s3://"')
if isinstance(name, _objects.ObjectID):
if fs_strategy:
raise ValueError("Unable to set file space strategy of an existing file")
with phil:
fid = h5i.get_file_id(name)
else:
if hasattr(name, 'read') and hasattr(name, 'seek'):
if driver not in (None, 'fileobj'):
raise ValueError("Driver must be 'fileobj' for file-like object if specified.")
driver = 'fileobj'
if kwds.get('fileobj', name) != name:
raise ValueError("Invalid value of 'fileobj' argument; "
"must equal to file-like object if specified.")
kwds.update(fileobj=name)
name = repr(name).encode('ASCII', 'replace')
else:
name = filename_encode(name)
if track_order is None:
track_order = h5.get_config().track_order
if fs_strategy and mode not in ('w', 'w-', 'x'):
raise ValueError("Unable to set file space strategy of an existing file")
if swmr and mode != 'r':
warn(
"swmr=True only affects read ('r') mode. For swmr write "
"mode, set f.swmr_mode = True after opening the file.",
stacklevel=2,
)
with phil:
fapl = make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0,
locking, page_buf_size, min_meta_keep, min_raw_keep,
alignment_threshold=alignment_threshold,
alignment_interval=alignment_interval,
meta_block_size=meta_block_size,
**kwds)
fcpl = make_fcpl(track_order=track_order, track_times=track_times,
fs_strategy=fs_strategy, fs_persist=fs_persist,
fs_threshold=fs_threshold, fs_page_size=fs_page_size)
fid = make_fid(name, mode, userblock_size, fapl, fcpl, swmr=swmr)
if isinstance(libver, tuple):
self._libver = libver
else:
self._libver = (libver, 'latest')
super().__init__(fid)
_in_memory_file_counter = 0
@classmethod
@with_phil
def in_memory(cls, file_image=None, **kwargs):
"""Create an HDF5 file in memory, without an underlying file
file_image
The initial file contents as bytes (or anything that supports the
Python buffer interface). HDF5 takes a copy of this data.
block_size
Chunk size for new memory alloactions (default 64 KiB).
Other keyword arguments are like File(), although name, mode,
driver and locking can't be passed.
"""
for k in ('driver', 'locking', 'backing_store'):
if k in kwargs:
raise TypeError(
f"File.in_memory() got an unexpected keyword argument {k!r}"
)
fcpl_kwargs = {}
for k in inspect.signature(make_fcpl).parameters:
if k in kwargs:
fcpl_kwargs[k] = kwargs.pop(k)
fcpl = make_fcpl(**fcpl_kwargs)
fapl = make_fapl(driver="core", backing_store=False, **kwargs)
if file_image:
if fcpl_kwargs:
kw = ', '.join(fcpl_kwargs)
raise TypeError(f"{kw} parameters cannot be used with file_image")
fapl.set_file_image(file_image)
# We have to give HDF5 a filename, but it should never use it.
# This is a hint both in memory, and in case a bug ever creates a file.
# The name also needs to be different from any other open file;
# we use a simple counter (protected by the 'phil' lock) for this.
name = b"h5py_in_memory_nonfile_%d" % cls._in_memory_file_counter
cls._in_memory_file_counter += 1
if file_image:
fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)
else:
fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
return cls(fid)
def close(self):
""" Close the file. All open objects become invalid """
with phil:
# Check that the file is still open, otherwise skip
if self.id.valid:
# We have to explicitly murder all open objects related to the file
# Close file-resident objects first, then the files.
# Otherwise we get errors in MPI mode.
self.id._close_open_objects(h5f.OBJ_LOCAL | ~h5f.OBJ_FILE)
self.id._close_open_objects(h5f.OBJ_LOCAL | h5f.OBJ_FILE)
self.id.close()
_objects.nonlocal_close()
def flush(self):
""" Tell the HDF5 library to flush its buffers.
"""
with phil:
h5f.flush(self.id)
@with_phil
def __enter__(self):
return self
@with_phil
def __exit__(self, *args):
if self.id:
self.close()
@with_phil
def __repr__(self):
if not self.id:
r = '<Closed HDF5 file>'
else:
# Filename has to be forced to Unicode if it comes back bytes
# Mode is always a "native" string
filename = self.filename
if isinstance(filename, bytes): # Can't decode fname
filename = filename.decode('utf8', 'replace')
r = f'<HDF5 file "{os.path.basename(filename)}" (mode {self.mode})>'
return r
@@ -0,0 +1,412 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Implements support for HDF5 compression filters via the high-level
interface. The following types of filter are available:
"gzip"
Standard DEFLATE-based compression, at integer levels from 0 to 9.
Built-in to all public versions of HDF5. Use this if you want a
decent-to-good ratio, good portability, and don't mind waiting.
"lzf"
Custom compression filter for h5py. This filter is much, much faster
than gzip (roughly 10x in compression vs. gzip level 4, and 3x faster
in decompressing), but at the cost of a worse compression ratio. Use
this if you want cheap compression and portability is not a concern.
"szip"
Access to the HDF5 SZIP encoder. SZIP is a non-mainstream compression
format used in space science on integer and float datasets. SZIP is
subject to license requirements, which means the encoder is not
guaranteed to be always available. However, it is also much faster
than gzip.
The following constants in this module are also useful:
decode
Tuple of available filter names for decoding
encode
Tuple of available filter names for encoding
"""
from collections.abc import Mapping
import operator
import numpy as np
from .base import product
from .compat import filename_encode
from .. import h5z, h5p, h5d, h5f
_COMP_FILTERS = {'gzip': h5z.FILTER_DEFLATE,
'szip': h5z.FILTER_SZIP,
'lzf': h5z.FILTER_LZF,
'shuffle': h5z.FILTER_SHUFFLE,
'fletcher32': h5z.FILTER_FLETCHER32,
'scaleoffset': h5z.FILTER_SCALEOFFSET }
_FILL_TIME_ENUM = {'alloc': h5d.FILL_TIME_ALLOC,
'never': h5d.FILL_TIME_NEVER,
'ifset': h5d.FILL_TIME_IFSET,
}
DEFAULT_GZIP = 4
DEFAULT_SZIP = ('nn', 8)
def _gen_filter_tuples():
""" Bootstrap function to figure out what filters are available. """
dec = []
enc = []
for name, code in _COMP_FILTERS.items():
if h5z.filter_avail(code):
info = h5z.get_filter_info(code)
if info & h5z.FILTER_CONFIG_ENCODE_ENABLED:
enc.append(name)
if info & h5z.FILTER_CONFIG_DECODE_ENABLED:
dec.append(name)
return tuple(dec), tuple(enc)
decode, encode = _gen_filter_tuples()
def _external_entry(entry):
""" Check for and return a well-formed entry tuple for
a call to h5p.set_external. """
# We require only an iterable entry but also want to guard against
# raising a confusing exception from unpacking below a str or bytes that
# was mistakenly passed as an entry. We go further than that and accept
# only a tuple, which allows simpler documentation and exception
# messages.
if not isinstance(entry, tuple):
raise TypeError(
"Each external entry must be a tuple of (name, offset, size)")
name, offset, size = entry # raise ValueError without three elements
name = filename_encode(name)
offset = operator.index(offset)
size = operator.index(size)
return (name, offset, size)
def _normalize_external(external):
""" Normalize external into a well-formed list of tuples and return. """
if external is None:
return []
try:
# Accept a solitary name---a str, bytes, or os.PathLike acceptable to
# filename_encode.
return [_external_entry((external, 0, h5f.UNLIMITED))]
except TypeError:
pass
# Check and rebuild each entry to be well-formed.
return [_external_entry(entry) for entry in external]
class FilterRefBase(Mapping):
"""Base class for referring to an HDF5 and describing its options
Your subclass must define filter_id, and may define a filter_options tuple.
"""
filter_id = None
filter_options = ()
# Mapping interface supports using instances as **kwargs for compatibility
# with older versions of h5py
@property
def _kwargs(self):
return {
'compression': self.filter_id,
'compression_opts': self.filter_options
}
def __hash__(self):
return hash((self.filter_id, self.filter_options))
def __eq__(self, other):
return (
isinstance(other, FilterRefBase)
and self.filter_id == other.filter_id
and self.filter_options == other.filter_options
)
def __len__(self):
return len(self._kwargs)
def __iter__(self):
return iter(self._kwargs)
def __getitem__(self, item):
return self._kwargs[item]
class Gzip(FilterRefBase):
filter_id = h5z.FILTER_DEFLATE
def __init__(self, level=DEFAULT_GZIP):
self.filter_options = (level,)
def fill_dcpl(plist, shape, dtype, chunks, compression, compression_opts,
shuffle, fletcher32, maxshape, scaleoffset, external,
allow_unknown_filter=False, *, fill_time=None):
""" Generate a dataset creation property list.
Undocumented and subject to change without warning.
"""
if shape is None or shape == ():
shapetype = 'Empty' if shape is None else 'Scalar'
if any((chunks, compression, compression_opts, shuffle, fletcher32,
scaleoffset is not None)):
raise TypeError(
f"{shapetype} datasets don't support chunk/filter options"
)
if maxshape and maxshape != ():
raise TypeError(f"{shapetype} datasets cannot be extended")
return h5p.create(h5p.DATASET_CREATE)
def rq_tuple(tpl, name):
""" Check if chunks/maxshape match dataset rank """
if tpl in (None, True):
return
try:
tpl = tuple(tpl)
except TypeError as exc:
raise TypeError(f'{name!r} argument must be None or a sequence object') from exc
if len(tpl) != len(shape):
raise ValueError(f'{name!r} must have same rank as dataset shape')
rq_tuple(chunks, 'chunks')
rq_tuple(maxshape, 'maxshape')
if compression is not None:
if isinstance(compression, FilterRefBase):
compression_opts = compression.filter_options
compression = compression.filter_id
if compression not in encode and not isinstance(compression, int):
raise ValueError('Compression filter "%s" is unavailable' % compression)
if compression == 'gzip':
if compression_opts is None:
gzip_level = DEFAULT_GZIP
elif compression_opts in range(10):
gzip_level = compression_opts
else:
raise ValueError("GZIP setting must be an integer from 0-9, not %r" % compression_opts)
elif compression == 'lzf':
if compression_opts is not None:
raise ValueError("LZF compression filter accepts no options")
elif compression == 'szip':
if compression_opts is None:
compression_opts = DEFAULT_SZIP
err = "SZIP options must be a 2-tuple ('ec'|'nn', even integer 0-32)"
try:
szmethod, szpix = compression_opts
except TypeError as exc:
raise TypeError(err) from exc
if szmethod not in ('ec', 'nn'):
raise ValueError(err)
if not (0<szpix<=32 and szpix%2 == 0):
raise ValueError(err)
elif compression_opts is not None:
# Can't specify just compression_opts by itself.
raise TypeError("Compression method must be specified")
if scaleoffset is not None:
# scaleoffset must be an integer when it is not None or False,
# except for integral data, for which scaleoffset == True is
# permissible (will use SO_INT_MINBITS_DEFAULT)
if scaleoffset < 0:
raise ValueError('scale factor must be >= 0')
if dtype.kind == 'f':
if scaleoffset is True:
raise ValueError('integer scaleoffset must be provided for '
'floating point types')
elif dtype.kind in ('u', 'i'):
if scaleoffset is True:
scaleoffset = h5z.SO_INT_MINBITS_DEFAULT
else:
raise TypeError('scale/offset filter only supported for integer '
'and floating-point types')
# Scale/offset following fletcher32 in the filter chain will (almost?)
# always triggers a read error, as most scale/offset settings are
# lossy. Since fletcher32 must come first (see comment below) we
# simply prohibit the combination of fletcher32 and scale/offset.
if fletcher32:
raise ValueError('fletcher32 cannot be used with potentially lossy'
' scale/offset filter')
external = _normalize_external(external)
# End argument validation
if (chunks is True) or (chunks is None and any((
shuffle,
fletcher32,
compression,
(maxshape and not len(external)),
scaleoffset is not None,
))):
chunks = guess_chunk(shape, maxshape, dtype.itemsize)
if maxshape is True:
maxshape = (None,)*len(shape)
if chunks is not None:
plist.set_chunk(chunks)
if fill_time is not None:
if (ft := _FILL_TIME_ENUM.get(fill_time)) is not None:
plist.set_fill_time(ft)
else:
msg = ("fill_time must be one of the following choices: 'alloc', "
f"'never' or 'ifset', but it is {fill_time}.")
raise ValueError(msg)
# scale-offset must come before shuffle and compression
if scaleoffset is not None:
if dtype.kind in ('u', 'i'):
plist.set_scaleoffset(h5z.SO_INT, scaleoffset)
else: # dtype.kind == 'f'
plist.set_scaleoffset(h5z.SO_FLOAT_DSCALE, scaleoffset)
for item in external:
plist.set_external(*item)
if shuffle:
plist.set_shuffle()
if compression == 'gzip':
plist.set_deflate(gzip_level)
elif compression == 'lzf':
plist.set_filter(h5z.FILTER_LZF, h5z.FLAG_OPTIONAL)
elif compression == 'szip':
opts = {'ec': h5z.SZIP_EC_OPTION_MASK, 'nn': h5z.SZIP_NN_OPTION_MASK}
plist.set_szip(opts[szmethod], szpix)
elif isinstance(compression, int):
if not allow_unknown_filter and not h5z.filter_avail(compression):
raise ValueError("Unknown compression filter number: %s" % compression)
plist.set_filter(compression, h5z.FLAG_OPTIONAL, compression_opts)
# `fletcher32` must come after `compression`, otherwise, if `compression`
# is "szip" and the data is 64bit, the fletcher32 checksum will be wrong
# (see GitHub issue #953).
if fletcher32:
plist.set_fletcher32()
return plist
def get_filter_name(code):
"""
Return the name of the compression filter for a given filter identifier.
Undocumented and subject to change without warning.
"""
filters = {h5z.FILTER_DEFLATE: 'gzip', h5z.FILTER_SZIP: 'szip',
h5z.FILTER_SHUFFLE: 'shuffle', h5z.FILTER_FLETCHER32: 'fletcher32',
h5z.FILTER_LZF: 'lzf', h5z.FILTER_SCALEOFFSET: 'scaleoffset'}
return filters.get(code, str(code))
def get_filters(plist):
""" Extract a dictionary of active filters from a DCPL, along with
their settings.
Undocumented and subject to change without warning.
"""
pipeline = {}
nfilters = plist.get_nfilters()
for i in range(nfilters):
code, _, vals, _ = plist.get_filter(i)
if code == h5z.FILTER_DEFLATE:
vals = vals[0] # gzip level
elif code == h5z.FILTER_SZIP:
mask, pixels = vals[0:2]
if mask & h5z.SZIP_EC_OPTION_MASK:
mask = 'ec'
elif mask & h5z.SZIP_NN_OPTION_MASK:
mask = 'nn'
else:
raise TypeError("Unknown SZIP configuration")
vals = (mask, pixels)
elif code == h5z.FILTER_LZF:
vals = None
else:
if len(vals) == 0:
vals = None
pipeline[get_filter_name(code)] = vals
return pipeline
CHUNK_BASE = 16*1024 # Multiplier by which chunks are adjusted
CHUNK_MIN = 8*1024 # Soft lower limit (8k)
CHUNK_MAX = 1024*1024 # Hard upper limit (1M)
def guess_chunk(shape, maxshape, typesize):
""" Guess an appropriate chunk layout for a dataset, given its shape and
the size of each element in bytes. Will allocate chunks only as large
as MAX_SIZE. Chunks are generally close to some power-of-2 fraction of
each axis, slightly favoring bigger values for the last index.
Undocumented and subject to change without warning.
"""
# pylint: disable=unused-argument
# For unlimited dimensions we have to guess 1024
shape = tuple((x if x!=0 else 1024) for i, x in enumerate(shape))
ndims = len(shape)
if ndims == 0:
raise ValueError("Chunks not allowed for scalar datasets.")
chunks = np.array(shape, dtype='=f8')
if not np.all(np.isfinite(chunks)):
raise ValueError("Illegal value in chunk tuple")
# Determine the optimal chunk size in bytes using a PyTables expression.
# This is kept as a float.
dset_size = product(chunks)*typesize
target_size = CHUNK_BASE * (2**np.log10(dset_size/(1024.*1024)))
if target_size > CHUNK_MAX:
target_size = CHUNK_MAX
elif target_size < CHUNK_MIN:
target_size = CHUNK_MIN
idx = 0
while True:
# Repeatedly loop over the axes, dividing them by 2. Stop when:
# 1a. We're smaller than the target chunk size, OR
# 1b. We're within 50% of the target chunk size, AND
# 2. The chunk is smaller than the maximum chunk size
chunk_bytes = product(chunks)*typesize
if (chunk_bytes < target_size or \
abs(chunk_bytes-target_size)/target_size < 0.5) and \
chunk_bytes < CHUNK_MAX:
break
if product(chunks) == 1:
break # Element size larger than CHUNK_MAX
chunks[idx%ndims] = np.ceil(chunks[idx%ndims] / 2.0)
idx += 1
return tuple(int(x) for x in chunks)
@@ -0,0 +1,811 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Implements support for high-level access to HDF5 groups.
"""
from contextlib import contextmanager
import posixpath as pp
import numpy
from .compat import filename_decode, filename_encode
from .. import h5, h5g, h5i, h5o, h5r, h5t, h5l, h5p
from . import base
from .base import HLObject, MutableMappingHDF5, phil, with_phil
from . import dataset
from . import datatype
from .vds import vds_support
class Group(HLObject, MutableMappingHDF5):
""" Represents an HDF5 group.
"""
def __init__(self, bind):
""" Create a new Group object by binding to a low-level GroupID.
"""
with phil:
if not isinstance(bind, h5g.GroupID):
raise ValueError("%s is not a GroupID" % bind)
super().__init__(bind)
def create_group(self, name, track_order=None, *, track_times=False):
""" Create and return a new subgroup.
Name may be absolute or relative. Fails if the target name already
exists.
track_order
Track dataset/group/attribute creation order under this group
if True. If None use global default h5.get_config().track_order.
track_times: bool or None, default: False
If True, store timestamps for this group in the file.
If None, fall back to the default value.
"""
if track_order is None:
track_order = h5.get_config().track_order
with phil:
name, lcpl = self._e(name, lcpl=True)
gcpl = h5p.create(h5p.GROUP_CREATE)
if track_order:
order_flags = h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED
gcpl.set_link_creation_order(order_flags)
gcpl.set_attr_creation_order(order_flags)
if track_times is None:
track_times = False # Allow explicit None to mean h5py's default
if track_times in (True, False):
gcpl.set_obj_track_times(track_times)
else:
raise TypeError("track_times must be either True, False, or None")
gid = h5g.create(self.id, name, lcpl=lcpl, gcpl=gcpl)
return Group(gid)
def create_dataset(self, name, shape=None, dtype=None, data=None, **kwds):
""" Create a new HDF5 dataset
name
Name of the dataset (absolute or relative). Provide None to make
an anonymous dataset.
shape
Dataset shape. Use "()" for scalar datasets. Required if "data"
isn't provided.
dtype
Numpy dtype or string. If omitted, dtype('f') will be used.
Required if "data" isn't provided; otherwise, overrides data
array's dtype.
data
Provide data to initialize the dataset. If used, you can omit
shape and dtype arguments.
Keyword-only arguments:
chunks
(Tuple or int) Chunk shape, or True to enable auto-chunking. Integers can
be used for 1D shape.
maxshape
(Tuple or int) Make the dataset resizable up to this shape. Use None for
axes within the tuple you want to be unlimited. Integers can be used for 1D shape.
For 1D datasets with unlimited maxshape, a shape tuple of length 1 must be
provided, ``(None,)``. Passing ``None`` sets ``maxshape` to `shape`, making the
dataset un-resizable, which is the default.
compression
(String or int) Compression strategy. Legal values are 'gzip',
'szip', 'lzf'. If an integer in range(10), this indicates gzip
compression level. Otherwise, an integer indicates the number of a
dynamically loaded compression filter.
compression_opts
Compression settings. This is an integer for gzip, 2-tuple for
szip, etc. If specifying a dynamically loaded compression filter
number, this must be a tuple of values.
scaleoffset
(Integer) Enable scale/offset filter for (usually) lossy
compression of integer or floating-point data. For integer
data, the value of scaleoffset is the number of bits to
retain (pass 0 to let HDF5 determine the minimum number of
bits necessary for lossless compression). For floating point
data, scaleoffset is the number of digits after the decimal
place to retain; stored values thus have absolute error
less than 0.5*10**(-scaleoffset).
shuffle
(T/F) Enable shuffle filter.
fletcher32
(T/F) Enable fletcher32 error detection. Not permitted in
conjunction with the scale/offset filter.
fillvalue
(Scalar) Use this value for uninitialized parts of the dataset.
track_times
(T/F) Enable dataset creation timestamps.
track_order
(T/F) Track attribute creation order if True. If omitted use
global default h5.get_config().track_order.
external
(Iterable of tuples) Sets the external storage property, thus
designating that the dataset will be stored in one or more
non-HDF5 files external to the HDF5 file. Adds each tuple
of (name, offset, size) to the dataset's list of external files.
Each name must be a str, bytes, or os.PathLike; each offset and
size, an integer. If only a name is given instead of an iterable
of tuples, it is equivalent to [(name, 0, h5py.h5f.UNLIMITED)].
efile_prefix
(String) External dataset file prefix for dataset access property
list. Does not persist in the file.
virtual_prefix
(String) Virtual dataset file prefix for dataset access property
list. Does not persist in the file.
allow_unknown_filter
(T/F) Do not check that the requested filter is available for use.
This should only be used with ``write_direct_chunk``, where the caller
compresses the data before handing it to h5py.
rdcc_nbytes
Total size of the dataset's chunk cache in bytes. The default size
is 1024**2 (1 MiB) for HDF5 before 2.0 and 8 MiB for HDF5 2.0 or later.
rdcc_w0
The chunk preemption policy for this dataset. This must be
between 0 and 1 inclusive and indicates the weighting according to
which chunks which have been fully read or written are penalized
when determining which chunks to flush from cache. A value of 0
means fully read or written chunks are treated no differently than
other chunks (the preemption is strictly LRU) while a value of 1
means fully read or written chunks are always preempted before
other chunks. If your application only reads or writes data once,
this can be safely set to 1. Otherwise, this should be set lower
depending on how often you re-read or re-write the same data. The
default value is 0.75.
rdcc_nslots
The number of chunk slots in the dataset's chunk cache. Increasing
this value reduces the number of cache collisions, but slightly
increases the memory used. Due to the hashing strategy, this value
should ideally be a prime number. As a rule of thumb, this value
should be at least 10 times the number of chunks that can fit in
rdcc_nbytes bytes. For maximum performance, this value should be set
approximately 100 times that number of chunks. The default value is
521.
"""
if 'track_order' not in kwds:
kwds['track_order'] = h5.get_config().track_order
if 'efile_prefix' in kwds:
kwds['efile_prefix'] = self._e(kwds['efile_prefix'])
if 'virtual_prefix' in kwds:
kwds['virtual_prefix'] = self._e(kwds['virtual_prefix'])
with phil:
group = self
if name:
name = self._e(name)
if b'/' in name.lstrip(b'/'):
parent_path, name = name.rsplit(b'/', 1)
group = self.require_group(parent_path)
dsid = dataset.make_new_dset(group, shape, dtype, data, name, **kwds)
dset = dataset.Dataset(dsid)
return dset
if vds_support:
def create_virtual_dataset(self, name, layout, fillvalue=None):
"""Create a new virtual dataset in this group.
See virtual datasets in the docs for more information.
name
(str) Name of the new dataset
layout
(VirtualLayout) Defines the sources for the virtual dataset
fillvalue
The value to use where there is no data.
"""
with phil:
group = self
if name:
name = self._e(name)
if b'/' in name.lstrip(b'/'):
parent_path, name = name.rsplit(b'/', 1)
group = self.require_group(parent_path)
dsid = layout.make_dataset(
group, name=name, fillvalue=fillvalue,
)
dset = dataset.Dataset(dsid)
return dset
@contextmanager
def build_virtual_dataset(
self, name, shape, dtype, maxshape=None, fillvalue=None
):
"""Assemble a virtual dataset in this group.
This is used as a context manager::
with f.build_virtual_dataset('virt', (10, 1000), np.uint32) as layout:
layout[0] = h5py.VirtualSource('foo.h5', 'data', (1000,))
name
(str) Name of the new dataset
shape
(tuple) Shape of the dataset
dtype
A numpy dtype for data read from the virtual dataset
maxshape
(tuple, optional) Maximum dimensions if the dataset can grow.
Use None for unlimited dimensions.
fillvalue
The value used where no data is available.
"""
from .vds import VirtualLayout
layout = VirtualLayout(shape, dtype, maxshape, self.file.filename)
yield layout
self.create_virtual_dataset(name, layout, fillvalue)
def require_dataset(self, name, shape, dtype, exact=False, **kwds):
""" Open a dataset, creating it if it doesn't exist.
If keyword "exact" is False (default), an existing dataset must have
the same shape and a conversion-compatible dtype to be returned. If
True, the shape and dtype must match exactly.
If keyword "maxshape" is given, the maxshape and dtype must match
instead.
If any of the keywords "rdcc_nslots", "rdcc_nbytes", or "rdcc_w0" are
given, they will be used to configure the dataset's chunk cache.
Other dataset keywords (see create_dataset) may be provided, but are
only used if a new dataset is to be created.
Raises TypeError if an incompatible object already exists, or if the
shape, maxshape or dtype don't match according to the above rules.
"""
if 'efile_prefix' in kwds:
kwds['efile_prefix'] = self._e(kwds['efile_prefix'])
if 'virtual_prefix' in kwds:
kwds['virtual_prefix'] = self._e(kwds['virtual_prefix'])
with phil:
if name not in self:
return self.create_dataset(name, *(shape, dtype), **kwds)
if isinstance(shape, int):
shape = (shape,)
try:
dsid = dataset.open_dset(self, self._e(name), **kwds)
dset = dataset.Dataset(dsid)
except KeyError as exc:
dset = self[name]
raise TypeError(f"Incompatible object ({dset.__class__.__name__}) already exists") from exc
if shape != dset.shape:
if "maxshape" not in kwds:
raise TypeError("Shapes do not match (existing %s vs new %s)" % (dset.shape, shape))
elif kwds["maxshape"] != dset.maxshape:
raise TypeError("Max shapes do not match (existing %s vs new %s)" % (dset.maxshape, kwds["maxshape"]))
if exact:
if dtype != dset.dtype:
raise TypeError("Datatypes do not exactly match (existing %s vs new %s)" % (dset.dtype, dtype))
elif not numpy.can_cast(dtype, dset.dtype):
raise TypeError("Datatypes cannot be safely cast (existing %s vs new %s)" % (dset.dtype, dtype))
return dset
def create_dataset_like(self, name, other, **kwupdate):
""" Create a dataset similar to `other`.
name
Name of the dataset (absolute or relative). Provide None to make
an anonymous dataset.
other
The dataset which the new dataset should mimic. All properties, such
as shape, dtype, chunking, ... will be taken from it, but no data
or attributes are being copied.
Any dataset keywords (see create_dataset) may be provided, including
shape and dtype, in which case the provided values take precedence over
those from `other`.
"""
for k in ('shape', 'dtype', 'chunks', 'compression',
'compression_opts', 'scaleoffset', 'shuffle', 'fletcher32',
'fillvalue'):
kwupdate.setdefault(k, getattr(other, k))
# TODO: more elegant way to pass these (dcpl to create_dataset?)
dcpl = other.id.get_create_plist()
kwupdate.setdefault('track_times', dcpl.get_obj_track_times())
kwupdate.setdefault('track_order', dcpl.get_attr_creation_order() > 0)
# Special case: the maxshape property always exists, but if we pass it
# to create_dataset, the new dataset will automatically get chunked
# layout. So we copy it only if it is different from shape.
if other.maxshape != other.shape:
kwupdate.setdefault('maxshape', other.maxshape)
return self.create_dataset(name, **kwupdate)
def require_group(self, name):
# TODO: support kwargs like require_dataset
"""Return a group, creating it if it doesn't exist.
TypeError is raised if something with that name already exists that
isn't a group.
"""
with phil:
if name not in self:
return self.create_group(name)
grp = self[name]
if not isinstance(grp, Group):
raise TypeError("Incompatible object (%s) already exists" % grp.__class__.__name__)
return grp
@with_phil
def __getitem__(self, name):
""" Open an object in the file """
if isinstance(name, h5r.Reference):
oid = h5r.dereference(name, self.id)
if oid is None:
raise ValueError("Invalid HDF5 object reference")
elif isinstance(name, (bytes, str)):
oid = h5o.open(self.id, self._e(name), lapl=self._lapl)
else:
raise TypeError("Accessing a group is done with bytes or str, "
"not {}".format(type(name)))
otype = h5i.get_type(oid)
if otype == h5i.GROUP:
return Group(oid)
elif otype == h5i.DATASET:
return dataset.Dataset(oid, readonly=(self.file.mode == 'r'))
elif otype == h5i.DATATYPE:
return datatype.Datatype(oid)
else:
raise TypeError("Unknown object type")
def get(self, name, default=None, getclass=False, getlink=False):
""" Retrieve an item or other information.
"name" given only:
Return the item, or "default" if it doesn't exist
"getclass" is True:
Return the class of object (Group, Dataset, etc.), or "default"
if nothing with that name exists
"getlink" is True:
Return HardLink, SoftLink or ExternalLink instances. Return
"default" if nothing with that name exists.
"getlink" and "getclass" are True:
Return HardLink, SoftLink and ExternalLink classes. Return
"default" if nothing with that name exists.
Example:
>>> cls = group.get('foo', getclass=True)
>>> if cls == SoftLink:
"""
# pylint: disable=arguments-differ
with phil:
if not (getclass or getlink):
try:
return self[name]
except KeyError:
return default
if name not in self:
return default
elif getclass and not getlink:
typecode = h5o.get_info(self.id, self._e(name), lapl=self._lapl).type
try:
return {h5o.TYPE_GROUP: Group,
h5o.TYPE_DATASET: dataset.Dataset,
h5o.TYPE_NAMED_DATATYPE: datatype.Datatype}[typecode]
except KeyError as exc:
raise TypeError("Unknown object type") from exc
elif getlink:
typecode = self.id.links.get_info(self._e(name), lapl=self._lapl).type
if typecode == h5l.TYPE_SOFT:
if getclass:
return SoftLink
linkbytes = self.id.links.get_val(self._e(name), lapl=self._lapl)
return SoftLink(self._d(linkbytes))
elif typecode == h5l.TYPE_EXTERNAL:
if getclass:
return ExternalLink
filebytes, linkbytes = self.id.links.get_val(self._e(name), lapl=self._lapl)
return ExternalLink(
filename_decode(filebytes), self._d(linkbytes)
)
elif typecode == h5l.TYPE_HARD:
return HardLink if getclass else HardLink()
else:
raise TypeError("Unknown link type")
def __setitem__(self, name, obj):
""" Add an object to the group. The name must not already be in use.
The action taken depends on the type of object assigned:
Named HDF5 object (Dataset, Group, Datatype)
A hard link is created at "name" which points to the
given object.
SoftLink or ExternalLink
Create the corresponding link.
Numpy ndarray
The array is converted to a dataset object, with default
settings (contiguous storage, etc.).
Numpy dtype
Commit a copy of the datatype as a named datatype in the file.
Anything else
Attempt to convert it to an ndarray and store it. Scalar
values are stored as scalar datasets. Raise ValueError if we
can't understand the resulting array dtype.
"""
with phil:
name, lcpl = self._e(name, lcpl=True)
if isinstance(obj, HLObject):
h5o.link(obj.id, self.id, name, lcpl=lcpl, lapl=self._lapl)
elif isinstance(obj, SoftLink):
self.id.links.create_soft(name, self._e(obj.path), lcpl=lcpl, lapl=self._lapl)
elif isinstance(obj, ExternalLink):
fn = filename_encode(obj.filename)
self.id.links.create_external(name, fn, self._e(obj.path),
lcpl=lcpl, lapl=self._lapl)
elif isinstance(obj, numpy.dtype):
htype = h5t.py_create(obj, logical=True)
htype.commit(self.id, name, lcpl=lcpl)
else:
ds = self.create_dataset(None, data=obj)
h5o.link(ds.id, self.id, name, lcpl=lcpl)
@with_phil
def __delitem__(self, name):
""" Delete (unlink) an item from this group. """
self.id.unlink(self._e(name))
@with_phil
def __len__(self):
""" Number of members attached to this group """
return self.id.get_num_objs()
@with_phil
def __iter__(self):
""" Iterate over member names """
for x in self.id.__iter__():
yield self._d(x)
@with_phil
def __reversed__(self):
""" Iterate over member names in reverse order. """
for x in self.id.__reversed__():
yield self._d(x)
@with_phil
def __contains__(self, name):
""" Test if a member name exists """
if hasattr(h5g, "_path_valid"):
if not self.id:
return False
return h5g._path_valid(self.id, self._e(name), self._lapl)
return self._e(name) in self.id
def copy(self, source, dest, name=None,
shallow=False, expand_soft=False, expand_external=False,
expand_refs=False, without_attrs=False):
"""Copy an object or group.
The source can be a path, Group, Dataset, or Datatype object. The
destination can be either a path or a Group object. The source and
destinations need not be in the same file.
If the source is a Group object, all objects contained in that group
will be copied recursively.
When the destination is a Group object, by default the target will
be created in that group with its current name (basename of obj.name).
You can override that by setting "name" to a string.
There are various options which all default to "False":
- shallow: copy only immediate members of a group.
- expand_soft: expand soft links into new objects.
- expand_external: expand external links into new objects.
- expand_refs: copy objects that are pointed to by references.
- without_attrs: copy object without copying attributes.
Example:
>>> f = File('myfile.hdf5', 'w')
>>> f.create_group("MyGroup")
>>> list(f.keys())
['MyGroup']
>>> f.copy('MyGroup', 'MyCopy')
>>> list(f.keys())
['MyGroup', 'MyCopy']
"""
with phil:
if isinstance(source, HLObject):
source_path = '.'
else:
# Interpret source as a path relative to this group
source_path = source
source = self
if isinstance(dest, Group):
if name is not None:
dest_path = name
elif source_path == '.':
dest_path = pp.basename(h5i.get_name(source.id))
else:
# copy source into dest group: dest_name/source_name
dest_path = pp.basename(h5i.get_name(source[source_path].id))
elif isinstance(dest, HLObject):
raise TypeError("Destination must be path or Group object")
else:
# Interpret destination as a path relative to this group
dest_path = dest
dest = self
flags = 0
if shallow:
flags |= h5o.COPY_SHALLOW_HIERARCHY_FLAG
if expand_soft:
flags |= h5o.COPY_EXPAND_SOFT_LINK_FLAG
if expand_external:
flags |= h5o.COPY_EXPAND_EXT_LINK_FLAG
if expand_refs:
flags |= h5o.COPY_EXPAND_REFERENCE_FLAG
if without_attrs:
flags |= h5o.COPY_WITHOUT_ATTR_FLAG
if flags:
copypl = h5p.create(h5p.OBJECT_COPY)
copypl.set_copy_object(flags)
else:
copypl = None
h5o.copy(source.id, self._e(source_path), dest.id, self._e(dest_path),
copypl, base.dlcpl)
def move(self, source, dest):
""" Move a link to a new location in the file.
If "source" is a hard link, this effectively renames the object. If
"source" is a soft or external link, the link itself is moved, with its
value unmodified.
"""
with phil:
if source == dest:
return
self.id.links.move(self._e(source), self.id, self._e(dest),
lapl=self._lapl, lcpl=self._lcpl)
def visit(self, func):
""" Recursively visit all names in this group and subgroups.
Note: visit ignores soft and external links. To visit those, use
visit_links.
You supply a callable (function, method or callable object); it
will be called exactly once for each link in this group and every
group below it. Your callable must conform to the signature:
func(<member name>) => <None or return value>
Returning None continues iteration, returning anything else stops
and immediately returns that value from the visit method. The
iteration order is lexicographic.
Example:
>>> # List the entire contents of the file
>>> f = File("foo.hdf5")
>>> list_of_names = []
>>> f.visit(list_of_names.append)
"""
with phil:
def proxy(name):
""" Call the function with the text name, not bytes """
return func(self._d(name))
return h5o.visit(self.id, proxy)
def visititems(self, func):
""" Recursively visit names and objects in this group.
Note: visititems ignores soft and external links. To visit those, use
visititems_links.
You supply a callable (function, method or callable object); it
will be called exactly once for each link in this group and every
group below it. Your callable must conform to the signature:
func(<member name>, <object>) => <None or return value>
Returning None continues iteration, returning anything else stops
and immediately returns that value from the visit method. The
iteration order is lexicographic.
Example:
# Get a list of all datasets in the file
>>> mylist = []
>>> def func(name, obj):
... if isinstance(obj, Dataset):
... mylist.append(name)
...
>>> f = File('foo.hdf5')
>>> f.visititems(func)
"""
with phil:
def proxy(name):
""" Use the text name of the object, not bytes """
name = self._d(name)
return func(name, self[name])
return h5o.visit(self.id, proxy)
def visit_links(self, func):
""" Recursively visit all names in this group and subgroups.
Each link will be visited exactly once, regardless of its target.
You supply a callable (function, method or callable object); it
will be called exactly once for each link in this group and every
group below it. Your callable must conform to the signature:
func(<member name>) => <None or return value>
Returning None continues iteration, returning anything else stops
and immediately returns that value from the visit method. The
iteration order is lexicographic.
Example:
>>> # List the entire contents of the file
>>> f = File("foo.hdf5")
>>> list_of_names = []
>>> f.visit_links(list_of_names.append)
"""
with phil:
def proxy(name):
""" Call the function with the text name, not bytes """
return func(self._d(name))
return self.id.links.visit(proxy)
def visititems_links(self, func):
""" Recursively visit links in this group.
Each link will be visited exactly once, regardless of its target.
You supply a callable (function, method or callable object); it
will be called exactly once for each link in this group and every
group below it. Your callable must conform to the signature:
func(<member name>, <link>) => <None or return value>
Returning None continues iteration, returning anything else stops
and immediately returns that value from the visit method. The
iteration order is lexicographic.
Example:
# Get a list of all softlinks in the file
>>> mylist = []
>>> def func(name, link):
... if isinstance(link, SoftLink):
... mylist.append(name)
...
>>> f = File('foo.hdf5')
>>> f.visititems_links(func)
"""
with phil:
def proxy(name):
""" Use the text name of the object, not bytes """
name = self._d(name)
return func(name, self.get(name, getlink=True))
return self.id.links.visit(proxy)
@with_phil
def __repr__(self):
if not self:
r = u"<Closed HDF5 group>"
else:
namestr = (
'"%s"' % self.name
) if self.name is not None else u"(anonymous)"
r = '<HDF5 group %s (%d members)>' % (namestr, len(self))
return r
class HardLink:
"""
Represents a hard link in an HDF5 file. Provided only so that
Group.get works in a sensible way. Has no other function.
"""
pass
class SoftLink:
"""
Represents a symbolic ("soft") link in an HDF5 file. The path
may be absolute or relative. No checking is performed to ensure
that the target actually exists.
"""
@property
def path(self):
""" Soft link value. Not guaranteed to be a valid path. """
return self._path
def __init__(self, path):
self._path = str(path)
def __repr__(self):
return '<SoftLink to "%s">' % self.path
class ExternalLink:
"""
Represents an HDF5 external link. Paths may be absolute or relative.
No checking is performed to ensure either the target or file exists.
"""
@property
def path(self):
""" Soft link path, i.e. the part inside the HDF5 file. """
return self._path
@property
def filename(self):
""" Path to the external HDF5 file in the filesystem. """
return self._filename
def __init__(self, filename, path):
self._filename = filename_decode(filename_encode(filename))
self._path = path
def __repr__(self):
return '<ExternalLink to "%s" in file "%s"' % (self.path,
self.filename)
@@ -0,0 +1,439 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
High-level access to HDF5 dataspace selections
"""
import numpy as np
from .base import product
from .. import h5s, h5r, _selector
def select(shape, args, dataset=None):
""" High-level routine to generate a selection from arbitrary arguments
to __getitem__. The arguments should be the following:
shape
Shape of the "source" dataspace.
args
Either a single argument or a tuple of arguments. See below for
supported classes of argument.
dataset
A h5py.Dataset instance representing the source dataset.
Argument classes:
Single Selection instance
Returns the argument.
numpy.ndarray
Must be a boolean mask. Returns a PointSelection instance.
RegionReference
Returns a Selection instance.
Indices, slices, ellipses, MultiBlockSlices only
Returns a SimpleSelection instance
Indices, slices, ellipses, lists or boolean index arrays
Returns a FancySelection instance.
"""
if not isinstance(args, tuple):
args = (args,)
# "Special" indexing objects
if len(args) == 1:
arg = args[0]
if isinstance(arg, Selection):
if arg.shape != shape:
raise TypeError("Mismatched selection shape")
return arg
elif isinstance(arg, np.ndarray) and arg.dtype.kind == 'b':
if arg.shape != shape:
raise TypeError("Boolean indexing array has incompatible shape")
return PointSelection.from_mask(arg)
elif isinstance(arg, h5r.RegionReference):
if dataset is None:
raise TypeError("Cannot apply a region reference without a dataset")
sid = h5r.get_region(arg, dataset.id)
if shape != sid.shape:
raise TypeError("Reference shape does not match dataset shape")
return Selection(shape, spaceid=sid)
if dataset is not None:
selector = dataset._selector
else:
space = h5s.create_simple(shape)
selector = _selector.Selector(space)
return selector.make_selection(args)
class Selection:
"""
Base class for HDF5 dataspace selections. Subclasses support the
"selection protocol", which means they have at least the following
members:
__init__(shape) => Create a new selection on "shape"-tuple
__getitem__(args) => Perform a selection with the range specified.
What args are allowed depends on the
particular subclass in use.
id (read-only) => h5py.h5s.SpaceID instance
shape (read-only) => The shape of the dataspace.
mshape (read-only) => The shape of the selection region.
Not guaranteed to fit within "shape", although
the total number of points is less than
product(shape).
nselect (read-only) => Number of selected points. Always equal to
product(mshape).
broadcast(target_shape) => Return an iterable which yields dataspaces
for read, based on target_shape.
The base class represents "unshaped" selections (1-D).
"""
def __init__(self, shape, spaceid=None):
""" Create a selection. Shape may be None if spaceid is given. """
if spaceid is not None:
self._id = spaceid
self._shape = spaceid.shape
else:
shape = tuple(shape)
self._shape = shape
self._id = h5s.create_simple(shape, (h5s.UNLIMITED,)*len(shape))
self._id.select_all()
@property
def id(self):
""" SpaceID instance """
return self._id
@property
def shape(self):
""" Shape of whole dataspace """
return self._shape
@property
def nselect(self):
""" Number of elements currently selected """
return self._id.get_select_npoints()
@property
def mshape(self):
""" Shape of selection (always 1-D for this class) """
return (self.nselect,)
@property
def array_shape(self):
"""Shape of array to read/write (always 1-D for this class)"""
return self.mshape
# expand_shape and broadcast only really make sense for SimpleSelection
def expand_shape(self, source_shape):
if product(source_shape) != self.nselect:
raise TypeError("Broadcasting is not supported for point-wise selections")
return source_shape
def broadcast(self, source_shape):
""" Get an iterable for broadcasting """
if product(source_shape) != self.nselect:
raise TypeError("Broadcasting is not supported for point-wise selections")
yield self._id
def __getitem__(self, args):
raise NotImplementedError("This class does not support indexing")
class PointSelection(Selection):
"""
Represents a point-wise selection. You can supply sequences of
points to the three methods append(), prepend() and set(), or
instantiate it with a single boolean array using from_mask().
"""
def __init__(self, shape, spaceid=None, points=None):
super().__init__(shape, spaceid)
if points is not None:
self._perform_selection(points, h5s.SELECT_SET)
def _perform_selection(self, points, op):
""" Internal method which actually performs the selection """
points = np.asarray(points, order='C', dtype='u8')
if len(points.shape) == 1:
points.shape = (1,points.shape[0])
if self._id.get_select_type() != h5s.SEL_POINTS:
op = h5s.SELECT_SET
if len(points) == 0:
self._id.select_none()
else:
self._id.select_elements(points, op)
@classmethod
def from_mask(cls, mask, spaceid=None):
"""Create a point-wise selection from a NumPy boolean array """
if not (isinstance(mask, np.ndarray) and mask.dtype.kind == 'b'):
raise TypeError("PointSelection.from_mask only works with bool arrays")
points = np.transpose(mask.nonzero())
return cls(mask.shape, spaceid, points=points)
def append(self, points):
""" Add the sequence of points to the end of the current selection """
self._perform_selection(points, h5s.SELECT_APPEND)
def prepend(self, points):
""" Add the sequence of points to the beginning of the current selection """
self._perform_selection(points, h5s.SELECT_PREPEND)
def set(self, points):
""" Replace the current selection with the given sequence of points"""
self._perform_selection(points, h5s.SELECT_SET)
class SimpleSelection(Selection):
""" A single "rectangular" (regular) selection composed of only slices
and integer arguments. Can participate in broadcasting.
"""
@property
def mshape(self):
""" Shape of current selection """
return self._sel[1]
@property
def array_shape(self):
scalar = self._sel[3]
return tuple(x for x, s in zip(self.mshape, scalar, strict=True) if not s)
def __init__(self, shape, spaceid=None, hyperslab=None):
super().__init__(shape, spaceid)
if hyperslab is not None:
self._sel = hyperslab
else:
# No hyperslab specified - select all
rank = len(self.shape)
self._sel = ((0,)*rank, self.shape, (1,)*rank, (False,)*rank)
def expand_shape(self, source_shape):
"""Match the dimensions of an array to be broadcast to the selection
The returned shape describes an array of the same size as the input
shape, but its dimensions
E.g. with a dataset shape (10, 5, 4, 2), writing like this::
ds[..., 0] = np.ones((5, 4))
The source shape (5, 4) will expand to (1, 5, 4, 1).
Then the broadcast method below repeats that chunk 10
times to write to an effective shape of (10, 5, 4, 1).
"""
start, count, step, scalar = self._sel
rank = len(count)
remaining_src_dims = list(source_shape)
eshape = []
for idx in range(1, rank + 1):
if len(remaining_src_dims) == 0 or scalar[-idx]: # Skip scalar axes
eshape.append(1)
else:
t = remaining_src_dims.pop()
if t == 1 or count[-idx] == t:
eshape.append(t)
else:
raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape)) # array shape
if any([n > 1 for n in remaining_src_dims]):
# All dimensions from target_shape should either have been popped
# to match the selection shape, or be 1.
raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape)) # array shape
# We have built eshape backwards, so now reverse it
return tuple(eshape[::-1])
def broadcast(self, source_shape):
""" Return an iterator over target dataspaces for broadcasting.
Follows the standard NumPy broadcasting rules against the current
selection shape (self.mshape).
"""
if self.shape == ():
if product(source_shape) != 1:
raise TypeError("Can't broadcast %s to scalar" % source_shape)
self._id.select_all()
yield self._id
return
start, count, step, scalar = self._sel
rank = len(count)
tshape = self.expand_shape(source_shape)
# Avoid ZeroDivisionError below (after the shape checks in expand_source)
if any(d == 0 for d in count):
return
chunks = tuple(x//y for x, y in zip(count, tshape, strict=True))
nchunks = product(chunks)
if nchunks == 1:
yield self._id
else:
sid = self._id.copy()
sid.select_hyperslab((0,)*rank, tshape, step)
for idx in range(nchunks):
offset = tuple(x*y*z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start, strict=True))
sid.offset_simple(offset)
yield sid
class FancySelection(Selection):
"""
Implements advanced NumPy-style selection operations in addition to
the standard slice-and-int behavior.
Indexing arguments may be ints, slices, lists of indices, or
per-axis (1D) boolean arrays.
Broadcasting is not supported for these selections.
"""
@property
def mshape(self):
return self._mshape
@property
def array_shape(self):
return self._array_shape
def __init__(self, shape, spaceid=None, mshape=None, array_shape=None):
super().__init__(shape, spaceid)
if mshape is None:
mshape = self.shape
if array_shape is None:
array_shape = mshape
self._mshape = mshape
self._array_shape = array_shape
def expand_shape(self, source_shape):
if not source_shape == self.array_shape:
raise TypeError("Broadcasting is not supported for complex selections")
return source_shape
def broadcast(self, source_shape):
if not source_shape == self.array_shape:
raise TypeError("Broadcasting is not supported for complex selections")
yield self._id
def guess_shape(sid):
""" Given a dataspace, try to deduce the shape of the selection.
Returns one of:
* A tuple with the selection shape, same length as the dataspace
* A 1D selection shape for point-based and multiple-hyperslab selections
* None, for unselected scalars and for NULL dataspaces
"""
sel_class = sid.get_simple_extent_type() # Dataspace class
sel_type = sid.get_select_type() # Flavor of selection in use
if sel_class == h5s.NULL:
# NULL dataspaces don't support selections
return None
elif sel_class == h5s.SCALAR:
# NumPy has no way of expressing empty 0-rank selections, so we use None
if sel_type == h5s.SEL_NONE: return None
if sel_type == h5s.SEL_ALL: return tuple()
elif sel_class != h5s.SIMPLE:
raise TypeError("Unrecognized dataspace class %s" % sel_class)
# We have a "simple" (rank >= 1) dataspace
N = sid.get_select_npoints()
rank = len(sid.shape)
if sel_type == h5s.SEL_NONE:
return (0,)*rank
elif sel_type == h5s.SEL_ALL:
return sid.shape
elif sel_type == h5s.SEL_POINTS:
# Like NumPy, point-based selections yield 1D arrays regardless of
# the dataspace rank
return (N,)
elif sel_type != h5s.SEL_HYPERSLABS:
raise TypeError("Unrecognized selection method %s" % sel_type)
# We have a hyperslab-based selection
if N == 0:
return (0,)*rank
bottomcorner, topcorner = (np.array(x) for x in sid.get_select_bounds())
# Shape of full selection box
boxshape = topcorner - bottomcorner + np.ones((rank,))
def get_n_axis(sid, axis):
""" Determine the number of elements selected along a particular axis.
To do this, we "mask off" the axis by making a hyperslab selection
which leaves only the first point along the axis. For a 2D dataset
with selection box shape (X, Y), for axis 1, this would leave a
selection of shape (X, 1). We count the number of points N_leftover
remaining in the selection and compute the axis selection length by
N_axis = N/N_leftover.
"""
if(boxshape[axis]) == 1:
return 1
start = bottomcorner.copy()
start[axis] += 1
count = boxshape.copy()
count[axis] -= 1
# Throw away all points along this axis
masked_sid = sid.copy()
masked_sid.select_hyperslab(tuple(start), tuple(count), op=h5s.SELECT_NOTB)
N_leftover = masked_sid.get_select_npoints()
return N//N_leftover
shape = tuple(get_n_axis(sid, x) for x in range(rank))
if product(shape) != N:
# This means multiple hyperslab selections are in effect,
# so we fall back to a 1D shape
return (N,)
return shape
@@ -0,0 +1,103 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Implements a portion of the selection operations.
"""
import numpy as np
from .. import h5s
def read_dtypes(dataset_dtype, names):
""" Returns a 2-tuple containing:
1. Output dataset dtype
2. Dtype containing HDF5-appropriate description of destination
"""
if len(names) == 0: # Not compound, or all fields needed
format_dtype = dataset_dtype
elif dataset_dtype.names is None:
raise ValueError("Field names only allowed for compound types")
elif any(x not in dataset_dtype.names for x in names):
raise ValueError("Field does not appear in this type.")
else:
format_dtype = np.dtype([(name, dataset_dtype.fields[name][0]) for name in names])
if len(names) == 1:
# We don't preserve the field information if only one explicitly selected.
output_dtype = format_dtype.fields[names[0]][0]
else:
output_dtype = format_dtype
return output_dtype, format_dtype
def read_selections_scalar(dsid, args):
""" Returns a 2-tuple containing:
1. Output dataset shape
2. HDF5 dataspace containing source selection.
Works for scalar datasets.
"""
if dsid.shape != ():
raise RuntimeError("Illegal selection function for non-scalar dataset")
if args == ():
# This is a signal that an array scalar should be returned instead
# of an ndarray with shape ()
out_shape = None
elif args == (Ellipsis,):
out_shape = ()
else:
raise ValueError("Illegal slicing argument for scalar dataspace")
source_space = dsid.get_space()
source_space.select_all()
return out_shape, source_space
class ScalarReadSelection:
"""
Implements slicing for scalar datasets.
"""
def __init__(self, fspace, args):
if args == ():
self.mshape = None
elif args == (Ellipsis,):
self.mshape = ()
else:
raise ValueError("Illegal slicing argument for scalar dataspace")
self.mspace = h5s.create(h5s.SCALAR)
self.fspace = fspace
def __iter__(self):
self.mspace.select_all()
yield self.fspace, self.mspace
def select_read(fspace, args):
""" Top-level dispatch function for reading.
At the moment, only supports reading from scalar datasets.
"""
if fspace.shape == ():
return ScalarReadSelection(fspace, args)
raise NotImplementedError()
@@ -0,0 +1,250 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
High-level interface for creating HDF5 virtual datasets
"""
from copy import deepcopy as copy
from collections import namedtuple
import numpy as np
from .compat import filename_encode
from .datatype import Datatype
from .selections import SimpleSelection, select
from .. import h5d, h5p, h5s, h5t
class VDSmap(namedtuple('VDSmap', ('vspace', 'file_name',
'dset_name', 'src_space'))):
'''Defines a region in a virtual dataset mapping to part of a source dataset
'''
vds_support = True
def _convert_space_for_key(space, key):
"""
Converts the space with the given key. Mainly used to allow unlimited
dimensions in virtual space selection.
"""
key = key if isinstance(key, tuple) else (key,)
type_code = space.get_select_type()
# check for unlimited selections in case where selection is regular
# hyperslab, which is the only allowed case for h5s.UNLIMITED to be
# in the selection
if type_code == h5s.SEL_HYPERSLABS and space.is_regular_hyperslab():
rank = space.get_simple_extent_ndims()
nargs = len(key)
idx_offset = 0
start, stride, count, block = space.get_regular_hyperslab()
# iterate through keys. we ignore numeral indices. if we get a
# slice, we check for an h5s.UNLIMITED value as the stop
# if we get an ellipsis, we offset index by (rank - nargs)
for i, sl in enumerate(key):
if isinstance(sl, slice):
if sl.stop == h5s.UNLIMITED:
counts = list(count)
idx = i + idx_offset
counts[idx] = h5s.UNLIMITED
count = tuple(counts)
elif sl is Ellipsis:
idx_offset = rank - nargs
space.select_hyperslab(start, count, stride, block)
class VirtualSource:
"""Source definition for virtual data sets.
Instantiate this class to represent an entire source dataset, and then
slice it to indicate which regions should be used in the virtual dataset.
path_or_dataset
The path to a file, or an h5py dataset. If a dataset is given,
no other parameters are allowed, as the relevant values are taken from
the dataset instead.
name
The name of the source dataset within the file.
shape
A tuple giving the shape of the dataset.
dtype
Numpy dtype or string.
maxshape
The source dataset is resizable up to this shape. Use None for
axes you want to be unlimited.
"""
def __init__(self, path_or_dataset, name=None,
shape=None, dtype=None, maxshape=None):
from .dataset import Dataset
if isinstance(path_or_dataset, Dataset):
failed = {k: v
for k, v in
{'name': name, 'shape': shape,
'dtype': dtype, 'maxshape': maxshape}.items()
if v is not None}
if failed:
raise TypeError("If a Dataset is passed as the first argument "
"then no other arguments may be passed. You "
"passed {failed}".format(failed=failed))
ds = path_or_dataset
path = ds.file.filename
name = ds.name
shape = ds.shape
dtype = ds.dtype
maxshape = ds.maxshape
else:
path = path_or_dataset
if name is None:
raise TypeError("The name parameter is required when "
"specifying a source by path")
if shape is None:
raise TypeError("The shape parameter is required when "
"specifying a source by path")
elif isinstance(shape, int):
shape = (shape,)
if isinstance(maxshape, int):
maxshape = (maxshape,)
self.path = path
self.name = name
self.dtype = dtype
if maxshape is None:
self.maxshape = shape
else:
self.maxshape = tuple([h5s.UNLIMITED if ix is None else ix
for ix in maxshape])
self.sel = SimpleSelection(shape)
self._all_selected = True
@property
def shape(self):
return self.sel.array_shape
def __getitem__(self, key):
if not self._all_selected:
raise RuntimeError("VirtualSource objects can only be sliced once.")
tmp = copy(self)
tmp.sel = select(self.shape, key, dataset=None)
_convert_space_for_key(tmp.sel.id, key)
tmp._all_selected = False
return tmp
class VirtualLayout:
"""Object for building a virtual dataset.
Instantiate this class to define a virtual dataset, assign to slices of it
(using VirtualSource objects), and then pass it to
group.create_virtual_dataset() to add the virtual dataset to a file.
This class does not allow access to the data; the virtual dataset must
be created in a file before it can be used.
shape
A tuple giving the shape of the dataset.
dtype
Numpy dtype or string.
maxshape
The virtual dataset is resizable up to this shape. Use None for
axes you want to be unlimited.
filename
The name of the destination file, if known in advance. Mappings from
data in the same file will be stored with filename '.', allowing the
file to be renamed later.
"""
def __init__(self, shape, dtype, maxshape=None, filename=None):
self.shape = (shape,) if isinstance(shape, int) else shape
self.dtype = dtype
self.maxshape = (maxshape,) if isinstance(maxshape, int) else maxshape
self._filename = filename
self._src_filenames = set()
self.dcpl = h5p.create(h5p.DATASET_CREATE)
self.dcpl.set_layout(h5d.VIRTUAL)
def __setitem__(self, key, source):
sel = select(self.shape, key, dataset=None)
_convert_space_for_key(sel.id, key)
src_filename = self._source_file_name(source.path, self._filename)
self.dcpl.set_virtual(
sel.id, src_filename, source.name.encode('utf-8'), source.sel.id
)
if self._filename is None:
self._src_filenames.add(src_filename)
@staticmethod
def _source_file_name(src_filename, dst_filename) -> bytes:
src_filename = filename_encode(src_filename)
if dst_filename and (src_filename == filename_encode(dst_filename)):
# use relative path if the source dataset is in the same
# file, in order to keep the virtual dataset valid in case
# the file is renamed.
return b'.'
return filename_encode(src_filename)
def _get_dcpl(self, dst_filename):
"""Get the property list containing virtual dataset mappings
If the destination filename wasn't known when the VirtualLayout was
created, it is handled here.
"""
dst_filename = filename_encode(dst_filename)
if self._filename is not None:
# filename was known in advance; check dst_filename matches
if dst_filename != filename_encode(self._filename):
raise Exception(f"{dst_filename!r} != {self._filename!r}")
return self.dcpl
# destination file not known in advance
if dst_filename in self._src_filenames:
# At least 1 source file is the same as the destination file,
# but we didn't know this when making the mapping. Copy the mappings
# to a new property list, replacing the dest filename with '.'
new_dcpl = h5p.create(h5p.DATASET_CREATE)
new_dcpl.set_layout(h5d.VIRTUAL)
for i in range(self.dcpl.get_virtual_count()):
src_filename = self.dcpl.get_virtual_filename(i)
new_dcpl.set_virtual(
self.dcpl.get_virtual_vspace(i),
self._source_file_name(src_filename, dst_filename),
self.dcpl.get_virtual_dsetname(i).encode('utf-8'),
self.dcpl.get_virtual_srcspace(i),
)
return new_dcpl
else:
return self.dcpl # Mappings are all from other files
def make_dataset(self, parent, name, fillvalue=None):
""" Return a new low-level dataset identifier for a virtual dataset """
dcpl = self._get_dcpl(parent.file.filename)
if fillvalue is not None:
dcpl.set_fill_value(np.array([fillvalue]))
maxshape = self.maxshape
if maxshape is not None:
maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape)
virt_dspace = h5s.create_simple(self.shape, maxshape)
if isinstance(self.dtype, Datatype):
# Named types are used as-is
tid = self.dtype.id
else:
dtype = np.dtype(self.dtype)
tid = h5t.py_create(dtype, logical=1)
return h5d.create(parent.id, name=name, tid=tid, space=virt_dspace,
dcpl=dcpl)
@@ -0,0 +1,21 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
This module contains the warning classes for h5py. These classes are part of
the public API of h5py, and should be imported from this module.
"""
class H5pyWarning(UserWarning):
pass
class H5pyDeprecationWarning(H5pyWarning):
pass
@@ -0,0 +1,156 @@
#+
#
# This file is part of h5py, a low-level Python interface to the HDF5 library.
#
# Contributed by Darren Dale
#
# Copyright (C) 2009 Darren Dale
#
# http://h5py.org
# License: BSD (See LICENSE.txt for full license)
#
#-
# pylint: disable=eval-used,protected-access
"""
This is the h5py completer extension for ipython. It is loaded by
calling the function h5py.enable_ipython_completer() from within an
interactive IPython session.
It will let you do things like::
f=File('foo.h5')
f['<tab>
# or:
f['ite<tab>
which will do tab completion based on the subgroups of `f`. Also::
f['item1'].at<tab>
will perform tab completion for the attributes in the usual way. This should
also work::
a = b = f['item1'].attrs.<tab>
as should::
f['item1/item2/it<tab>
"""
import posixpath
import re
from ._hl.attrs import AttributeManager
from ._hl.base import HLObject
from IPython import get_ipython
from IPython.core.error import TryNext
from IPython.utils import generics
re_attr_match = re.compile(r"(?:.*\=)?(.+\[.*\].*)\.(\w*)$")
re_item_match = re.compile(r"""(?:.*\=)?(.*)\[(?P<s>['|"])(?!.*(?P=s))(.*)$""")
re_object_match = re.compile(r"(?:.*\=)?(.+?)(?:\[)")
def _retrieve_obj(name, context):
""" Filter function for completion. """
# we don't want to call any functions, but I couldn't find a robust regex
# that filtered them without unintended side effects. So keys containing
# "(" will not complete.
if '(' in name:
raise ValueError()
return eval(name, context.user_ns)
def h5py_item_completer(context, command):
"""Compute possible item matches for dict-like objects"""
base, item = re_item_match.split(command)[1:4:2]
try:
obj = _retrieve_obj(base, context)
except Exception:
return []
path, _ = posixpath.split(item)
try:
if path:
items = (posixpath.join(path, name) for name in obj[path].keys())
else:
items = obj.keys()
except AttributeError:
return []
items = list(items)
return [i for i in items if i[:len(item)] == item]
def h5py_attr_completer(context, command):
"""Compute possible attr matches for nested dict-like objects"""
base, attr = re_attr_match.split(command)[1:3]
base = base.strip()
try:
obj = _retrieve_obj(base, context)
except Exception:
return []
attrs = dir(obj)
try:
attrs = generics.complete_object(obj, attrs)
except TryNext:
pass
try:
# support >=ipython-0.12
omit__names = get_ipython().Completer.omit__names
except AttributeError:
omit__names = 0
if omit__names == 1:
attrs = [a for a in attrs if not a.startswith('__')]
elif omit__names == 2:
attrs = [a for a in attrs if not a.startswith('_')]
return ["%s.%s" % (base, a) for a in attrs if a[:len(attr)] == attr]
def h5py_completer(self, event):
""" Completer function to be loaded into IPython """
base = re_object_match.split(event.line)[1]
try:
obj = self._ofind(base).obj
except AttributeError:
obj = self._ofind(base).get('obj')
if not isinstance(obj, (AttributeManager, HLObject)):
raise TryNext
try:
return h5py_attr_completer(self, event.line)
except ValueError:
pass
try:
return h5py_item_completer(self, event.line)
except ValueError:
pass
return []
def load_ipython_extension(ip=None):
""" Load completer function into IPython """
if ip is None:
ip = get_ipython()
ip.set_hook('complete_command', h5py_completer, re_key=r"(?:.*\=)?(.+?)\[")
@@ -0,0 +1,21 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
import sys
import shlex
from importlib.util import find_spec
from subprocess import call
def run_tests(args=''):
if find_spec("pytest") is None:
print("Tests require pytest, pytest not installed")
return 1
cli = [sys.executable, "-m", "pytest", "--pyargs", "h5py"]
cli.extend(shlex.split(args))
return call(cli)
@@ -0,0 +1,238 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
import sys
import os
import shutil
import inspect
import tempfile
import subprocess
from contextlib import contextmanager
from functools import wraps
import numpy as np
from numpy.lib.recfunctions import repack_fields
import h5py
import unittest as ut
# Check if non-ascii filenames are supported
# Evidently this is the most reliable way to check
# See also h5py issue #263 and ipython #466
# To test for this, run the testsuite with LC_ALL=C
try:
testfile, fname = tempfile.mkstemp(chr(0x03b7))
except UnicodeError:
UNICODE_FILENAMES = False
else:
UNICODE_FILENAMES = True
os.close(testfile)
os.unlink(fname)
del fname
del testfile
class TestCase(ut.TestCase):
"""
Base class for unit tests.
"""
@classmethod
def setUpClass(cls):
cls.tempdir = tempfile.mkdtemp(prefix='h5py-test_')
@classmethod
def tearDownClass(cls):
shutil.rmtree(cls.tempdir)
def mktemp(self, suffix='.hdf5', prefix='', dir=None):
if dir is None:
dir = self.tempdir
return tempfile.mktemp(suffix, prefix, dir=dir)
def mktemp_mpi(self, comm=None, suffix='.hdf5', prefix='', dir=None):
if comm is None:
from mpi4py import MPI
comm = MPI.COMM_WORLD
fname = None
if comm.Get_rank() == 0:
fname = self.mktemp(suffix, prefix, dir)
fname = comm.bcast(fname, 0)
return fname
def setUp(self):
self.f = h5py.File(self.mktemp(), 'w')
def tearDown(self):
try:
if self.f:
self.f.close()
except:
pass
def assertSameElements(self, a, b):
for x in a:
match = False
for y in b:
if x == y:
match = True
if not match:
raise AssertionError("Item '%s' appears in a but not b" % x)
for x in b:
match = False
for y in a:
if x == y:
match = True
if not match:
raise AssertionError("Item '%s' appears in b but not a" % x)
def assertArrayEqual(self, dset, arr, message=None, precision=None, check_alignment=True):
""" Make sure dset and arr have the same shape, dtype and contents, to
within the given precision, optionally ignoring differences in dtype alignment.
Note that dset may be a NumPy array or an HDF5 dataset.
"""
if precision is None:
precision = 1e-5
if message is None:
message = ''
else:
message = ' (%s)' % message
if np.isscalar(dset) or np.isscalar(arr):
assert np.isscalar(dset) and np.isscalar(arr), \
'Scalar/array mismatch ("%r" vs "%r")%s' % (dset, arr, message)
dset = np.asarray(dset)
arr = np.asarray(arr)
assert dset.shape == arr.shape, \
"Shape mismatch (%s vs %s)%s" % (dset.shape, arr.shape, message)
if dset.dtype != arr.dtype:
if check_alignment:
normalized_dset_dtype = dset.dtype
normalized_arr_dtype = arr.dtype
else:
normalized_dset_dtype = repack_fields(dset.dtype)
normalized_arr_dtype = repack_fields(arr.dtype)
assert normalized_dset_dtype == normalized_arr_dtype, \
"Dtype mismatch (%s vs %s)%s" % (normalized_dset_dtype, normalized_arr_dtype, message)
if not check_alignment:
if normalized_dset_dtype != dset.dtype:
dset = repack_fields(np.asarray(dset))
if normalized_arr_dtype != arr.dtype:
arr = repack_fields(np.asarray(arr))
if arr.dtype.names is not None:
for n in arr.dtype.names:
message = '[FIELD %s] %s' % (n, message)
self.assertArrayEqual(dset[n], arr[n], message=message, precision=precision, check_alignment=check_alignment)
elif arr.dtype.kind in ('i', 'f'):
assert np.all(np.abs(dset[...] - arr[...]) < precision), \
"Arrays differ by more than %.3f%s" % (precision, message)
elif arr.dtype.kind == 'O':
for v1, v2 in zip(dset.flat, arr.flat, strict=True):
self.assertArrayEqual(v1, v2, message=message, precision=precision, check_alignment=check_alignment)
else:
assert np.all(dset[...] == arr[...]), \
"Arrays are not equal (dtype %s) %s" % (arr.dtype.str, message)
def assertNumpyBehavior(self, dset, arr, s, skip_fast_reader=False):
""" Apply slicing arguments "s" to both dset and arr.
Succeeds if the results of the slicing are identical, or the
exception raised is of the same type for both.
"arr" must be a Numpy array; "dset" may be a NumPy array or dataset.
"""
exc = None
try:
arr_result = arr[s]
except Exception as e:
exc = type(e)
s_fast = s if isinstance(s, tuple) else (s,)
if exc is None:
self.assertArrayEqual(dset[s], arr_result)
if not skip_fast_reader:
self.assertArrayEqual(
dset._fast_reader.read(s_fast),
arr_result,
)
else:
with self.assertRaises(exc):
dset[s]
if not skip_fast_reader:
with self.assertRaises(exc):
dset._fast_reader.read(s_fast)
NUMPY_RELEASE_VERSION = tuple([int(i) for i in np.__version__.split(".")[0:2]])
@contextmanager
def closed_tempfile(suffix='', text=None):
"""
Context manager which yields the path to a closed temporary file with the
suffix `suffix`. The file will be deleted on exiting the context. An
additional argument `text` can be provided to have the file contain `text`.
"""
with tempfile.NamedTemporaryFile(
'w+t', suffix=suffix, delete=False
) as test_file:
file_name = test_file.name
if text is not None:
test_file.write(text)
test_file.flush()
yield file_name
shutil.rmtree(file_name, ignore_errors=True)
def insubprocess(f):
"""Runs a test in its own subprocess"""
@wraps(f)
def wrapper(request, *args, **kwargs):
curr_test = inspect.getsourcefile(f) + "::" + request.node.name
# get block around test name
insub = "IN_SUBPROCESS_" + curr_test
for c in "/\\,:.":
insub = insub.replace(c, "_")
defined = os.environ.get(insub, None)
# fork process
if defined:
return f(request, *args, **kwargs)
else:
os.environ[insub] = '1'
env = os.environ.copy()
env[insub] = '1'
env.update(getattr(f, 'subproc_env', {}))
with closed_tempfile() as stdout:
with open(stdout, 'w+t') as fh:
rtn = subprocess.call([sys.executable, '-m', 'pytest', curr_test],
stdout=fh, stderr=fh, env=env)
with open(stdout, 'rt') as fh:
out = fh.read()
assert rtn == 0, "\n" + out
return wrapper
def subproc_env(d):
"""Set environment variables for the @insubprocess decorator"""
def decorator(f):
f.subproc_env = d
return f
return decorator
@@ -0,0 +1,22 @@
import h5py
import pytest
@pytest.fixture()
def writable_file(tmp_path):
with h5py.File(tmp_path / 'test.h5', 'w') as f:
yield f
def pytest_addoption(parser):
parser.addoption(
'--no-network', action='store_true', default=False, help='No network access'
)
def pytest_collection_modifyitems(config, items):
if config.getoption('--no-network'):
nonet = pytest.mark.skip(reason='No Internet')
for item in items:
if 'network' in item.keywords:
item.add_marker(nonet)
@@ -0,0 +1,7 @@
from os.path import dirname, join
def get_data_file_path(basename):
"""
Returns the path to the test data file given by `basename`
"""
return join(dirname(__file__), basename)
@@ -0,0 +1,95 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Tests the h5py.AttributeManager.create() method.
"""
import numpy as np
from .. import h5t, h5a
from .common import TestCase
class TestArray(TestCase):
"""
Check that top-level array types can be created and read.
"""
def test_int(self):
# See issue 498
dt = np.dtype('(3,)i')
data = np.arange(3, dtype='i')
self.f.attrs.create('x', data=data, dtype=dt)
aid = h5a.open(self.f.id, b'x')
htype = aid.get_type()
self.assertEqual(htype.get_class(), h5t.ARRAY)
out = self.f.attrs['x']
self.assertArrayEqual(out, data)
def test_string_dtype(self):
# See issue 498 discussion
self.f.attrs.create('x', data=42, dtype='i8')
def test_str(self):
# See issue 1057
self.f.attrs.create('x', chr(0x03A9))
out = self.f.attrs['x']
self.assertEqual(out, chr(0x03A9))
self.assertIsInstance(out, str)
def test_tuple_of_unicode(self):
# Test that a tuple of unicode strings can be set as an attribute. It will
# be converted to a numpy array of vlen unicode type:
data = ('a', 'b')
self.f.attrs.create('x', data=data)
result = self.f.attrs['x']
self.assertTrue(all(result == data))
self.assertEqual(result.dtype, np.dtype('O'))
# However, a numpy array of type U being passed in will not be
# automatically converted, and should raise an error as it does
# not map to a h5py dtype
data_as_U_array = np.array(data)
self.assertEqual(data_as_U_array.dtype, np.dtype('U1'))
with self.assertRaises(TypeError):
self.f.attrs.create('y', data=data_as_U_array)
def test_shape(self):
self.f.attrs.create('x', data=42, shape=1)
result = self.f.attrs['x']
self.assertEqual(result.shape, (1,))
self.f.attrs.create('y', data=np.arange(3), shape=3)
result = self.f.attrs['y']
self.assertEqual(result.shape, (3,))
def test_dtype(self):
dt = np.dtype('(3,)i')
array = np.arange(3, dtype='i')
self.f.attrs.create('x', data=array, dtype=dt)
# Array dtype shape is incompatible with data shape
array = np.arange(4, dtype='i')
with self.assertRaises(ValueError):
self.f.attrs.create('x', data=array, dtype=dt)
# Shape of new attribute conflicts with shape of data
dt = np.dtype('()i')
with self.assertRaises(ValueError):
self.f.attrs.create('x', data=array, shape=(5,), dtype=dt)
def test_key_type(self):
with self.assertRaises(TypeError):
self.f.attrs.create(1, data=('a', 'b'))
@@ -0,0 +1,299 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Attributes testing module
Covers all operations which access the .attrs property, with the
exception of data read/write and type conversion. Those operations
are tested by module test_attrs_data.
"""
import numpy as np
from collections.abc import MutableMapping
from .common import TestCase
import h5py
from h5py import File
from h5py import h5a, h5t
from h5py import AttributeManager
class BaseAttrs(TestCase):
def setUp(self):
self.f = File(self.mktemp(), 'w')
def tearDown(self):
if self.f:
self.f.close()
class TestRepr(TestCase):
""" Feature: AttributeManager provide a helpful
__repr__ string
"""
def test_repr(self):
grp = self.f.create_group('grp')
grp.attrs.create('att', 1)
self.assertIsInstance(repr(grp.attrs), str)
grp.id.close()
self.assertIsInstance(repr(grp.attrs), str)
class TestAccess(BaseAttrs):
"""
Feature: Attribute creation/retrieval via special methods
"""
def test_create(self):
""" Attribute creation by direct assignment """
self.f.attrs['a'] = 4.0
self.assertEqual(list(self.f.attrs.keys()), ['a'])
self.assertEqual(self.f.attrs['a'], 4.0)
def test_create_2(self):
""" Attribute creation by create() method """
self.f.attrs.create('a', 4.0)
self.assertEqual(list(self.f.attrs.keys()), ['a'])
self.assertEqual(self.f.attrs['a'], 4.0)
def test_modify(self):
""" Attributes are modified by direct assignment"""
self.f.attrs['a'] = 3
self.assertEqual(list(self.f.attrs.keys()), ['a'])
self.assertEqual(self.f.attrs['a'], 3)
self.f.attrs['a'] = 4
self.assertEqual(list(self.f.attrs.keys()), ['a'])
self.assertEqual(self.f.attrs['a'], 4)
def test_modify_2(self):
""" Attributes are modified by modify() method """
self.f.attrs.modify('a',3)
self.assertEqual(list(self.f.attrs.keys()), ['a'])
self.assertEqual(self.f.attrs['a'], 3)
self.f.attrs.modify('a', 4)
self.assertEqual(list(self.f.attrs.keys()), ['a'])
self.assertEqual(self.f.attrs['a'], 4)
# If the attribute doesn't exist, create new
self.f.attrs.modify('b', 5)
self.assertEqual(list(self.f.attrs.keys()), ['a', 'b'])
self.assertEqual(self.f.attrs['a'], 4)
self.assertEqual(self.f.attrs['b'], 5)
# Shape of new value is incompatible with the previous
new_value = np.arange(5)
with self.assertRaises(TypeError):
self.f.attrs.modify('b', new_value)
def test_overwrite(self):
""" Attributes are silently overwritten """
self.f.attrs['a'] = 4.0
self.f.attrs['a'] = 5.0
self.assertEqual(self.f.attrs['a'], 5.0)
def test_rank(self):
""" Attribute rank is preserved """
self.f.attrs['a'] = (4.0, 5.0)
self.assertEqual(self.f.attrs['a'].shape, (2,))
self.assertArrayEqual(self.f.attrs['a'], np.array((4.0,5.0)))
def test_single(self):
""" Attributes of shape (1,) don't become scalars """
self.f.attrs['a'] = np.ones((1,))
out = self.f.attrs['a']
self.assertEqual(out.shape, (1,))
self.assertEqual(out[()], 1)
def test_access_exc(self):
""" Attempt to access missing item raises KeyError """
with self.assertRaises(KeyError):
self.f.attrs['a']
def test_get_id(self):
self.f.attrs['a'] = 4.0
aid = self.f.attrs.get_id('a')
assert isinstance(aid, h5a.AttrID)
with self.assertRaises(KeyError):
self.f.attrs.get_id('b')
class TestDelete(BaseAttrs):
"""
Feature: Deletion of attributes using __delitem__
"""
def test_delete(self):
""" Deletion via "del" """
self.f.attrs['a'] = 4.0
self.assertIn('a', self.f.attrs)
del self.f.attrs['a']
self.assertNotIn('a', self.f.attrs)
def test_delete_exc(self):
""" Attempt to delete missing item raises KeyError """
with self.assertRaises(KeyError):
del self.f.attrs['a']
class TestUnicode(BaseAttrs):
"""
Feature: Attributes can be accessed via Unicode or byte strings
"""
def test_ascii(self):
""" Access via pure-ASCII byte string """
self.f.attrs[b"ascii"] = 42
out = self.f.attrs[b"ascii"]
self.assertEqual(out, 42)
def test_raw(self):
""" Access via non-ASCII byte string """
name = b"non-ascii\xfe"
self.f.attrs[name] = 42
out = self.f.attrs[name]
self.assertEqual(out, 42)
def test_unicode(self):
""" Access via Unicode string with non-ascii characters """
name = "Omega" + chr(0x03A9)
self.f.attrs[name] = 42
out = self.f.attrs[name]
self.assertEqual(out, 42)
class TestCreate(BaseAttrs):
"""
Options for explicit attribute creation
"""
def test_named(self):
""" Attributes created from named types link to the source type object
"""
self.f['type'] = np.dtype('u8')
self.f.attrs.create('x', 42, dtype=self.f['type'])
self.assertEqual(self.f.attrs['x'], 42)
aid = h5a.open(self.f.id, b'x')
htype = aid.get_type()
htype2 = self.f['type'].id
self.assertEqual(htype, htype2)
self.assertTrue(htype.committed())
def test_empty(self):
# https://github.com/h5py/h5py/issues/1540
""" Create attribute with h5py.Empty value
"""
self.f.attrs.create('empty', h5py.Empty('f'))
self.assertEqual(self.f.attrs['empty'], h5py.Empty('f'))
self.f.attrs.create('empty', h5py.Empty(None))
self.assertEqual(self.f.attrs['empty'], h5py.Empty(None))
class TestMutableMapping(BaseAttrs):
'''Tests if the registration of AttributeManager as a MutableMapping
behaves as expected
'''
def test_resolution(self):
assert issubclass(AttributeManager, MutableMapping)
assert isinstance(self.f.attrs, MutableMapping)
def test_validity(self):
'''
Test that the required functions are implemented.
'''
AttributeManager.__getitem__
AttributeManager.__setitem__
AttributeManager.__delitem__
AttributeManager.__iter__
AttributeManager.__len__
class TestVlen(BaseAttrs):
def test_vlen(self):
a = np.array([np.arange(3), np.arange(4)],
dtype=h5t.vlen_dtype(int))
self.f.attrs['a'] = a
self.assertArrayEqual(self.f.attrs['a'][0], a[0])
def test_vlen_s1(self):
dt = h5py.vlen_dtype(np.dtype('S1'))
a = np.empty((1,), dtype=dt)
a[0] = np.array([b'a', b'b'], dtype='S1')
self.f.attrs.create('test', a)
self.assertArrayEqual(self.f.attrs['test'][0], a[0])
class TestTrackOrder(BaseAttrs):
def fill_attrs(self, track_order):
attrs = self.f.create_group('test', track_order=track_order).attrs
for i in range(100):
attrs[str(i)] = i
return attrs
# https://forum.hdfgroup.org/t/bug-h5arename-fails-unexpectedly/4881
def test_track_order(self):
attrs = self.fill_attrs(track_order=True) # creation order
self.assertEqual(list(attrs),
[str(i) for i in range(100)])
def test_no_track_order(self):
attrs = self.fill_attrs(track_order=False) # name alphanumeric
self.assertEqual(list(attrs),
sorted([str(i) for i in range(100)]))
def fill_attrs2(self, track_order):
group = self.f.create_group('test', track_order=track_order)
for i in range(12):
group.attrs[str(i)] = i
return group
def test_track_order_overwrite_delete(self):
# issue 1385
group = self.fill_attrs2(track_order=True) # creation order
self.assertEqual(group.attrs["11"], 11)
# overwrite attribute
group.attrs['11'] = 42.0
self.assertEqual(group.attrs["11"], 42.0)
# delete attribute
self.assertIn('10', group.attrs)
del group.attrs['10']
self.assertNotIn('10', group.attrs)
class TestDatatype(BaseAttrs):
def test_datatype(self):
self.f['foo'] = np.dtype('f')
dt = self.f['foo']
self.assertEqual(list(dt.attrs.keys()), [])
dt.attrs.create('a', 4.0)
self.assertEqual(list(dt.attrs.keys()), ['a'])
self.assertEqual(list(dt.attrs.values()), [4.0])
def test_python_int_uint64(writable_file):
f = writable_file
data = [np.iinfo(np.int64).max, np.iinfo(np.int64).max + 1]
# Check creating a new attribute
f.attrs.create('a', data, dtype=np.uint64)
assert f.attrs['a'].dtype == np.dtype(np.uint64)
np.testing.assert_array_equal(f.attrs['a'], np.array(data, dtype=np.uint64))
# Check modifying an existing attribute
f.attrs.modify('a', data)
np.testing.assert_array_equal(f.attrs['a'], np.array(data, dtype=np.uint64))
@@ -0,0 +1,311 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Attribute data transfer testing module
Covers all data read/write and type-conversion operations for attributes.
"""
import numpy as np
from .common import TestCase
import h5py
from h5py import h5a, h5s, h5t
from h5py import File
from h5py._hl.base import is_empty_dataspace
class BaseAttrs(TestCase):
def setUp(self):
self.f = File(self.mktemp(), 'w')
def tearDown(self):
if self.f:
self.f.close()
class TestScalar(BaseAttrs):
"""
Feature: Scalar types map correctly to array scalars
"""
def test_int(self):
""" Integers are read as correct NumPy type """
self.f.attrs['x'] = np.array(1, dtype=np.int8)
out = self.f.attrs['x']
self.assertIsInstance(out, np.int8)
def test_compound(self):
""" Compound scalars are read as numpy.void """
dt = np.dtype([('a', 'i'), ('b', 'f')])
data = np.array((1, 4.2), dtype=dt)
self.f.attrs['x'] = data
out = self.f.attrs['x']
self.assertIsInstance(out, np.void)
self.assertEqual(out, data)
self.assertEqual(out['b'], data['b'])
def test_compound_with_vlen_fields(self):
""" Compound scalars with vlen fields can be written and read """
dt = np.dtype([('a', h5py.vlen_dtype(np.int32)),
('b', h5py.vlen_dtype(np.int32))])
data = np.array((np.array(list(range(1, 5)), dtype=np.int32),
np.array(list(range(8, 10)), dtype=np.int32)), dtype=dt)[()]
self.f.attrs['x'] = data
out = self.f.attrs['x']
# Specifying check_alignment=False because vlen fields have 8 bytes of padding
# because the vlen datatype in hdf5 occupies 16 bytes
self.assertArrayEqual(out, data, check_alignment=False)
def test_nesting_compound_with_vlen_fields(self):
""" Compound scalars with nested compound vlen fields can be written and read """
dt_inner = np.dtype([('a', h5py.vlen_dtype(np.int32)),
('b', h5py.vlen_dtype(np.int32))])
dt = np.dtype([('f1', h5py.vlen_dtype(dt_inner)),
('f2', np.int64)])
inner1 = (np.array(range(1, 3), dtype=np.int32),
np.array(range(6, 9), dtype=np.int32))
inner2 = (np.array(range(10, 14), dtype=np.int32),
np.array(range(16, 20), dtype=np.int32))
data = np.array((np.array([inner1, inner2], dtype=dt_inner),
2),
dtype=dt)[()]
self.f.attrs['x'] = data
out = self.f.attrs['x']
self.assertArrayEqual(out, data, check_alignment=False)
def test_vlen_compound_with_vlen_string(self):
""" Compound scalars with vlen compounds containing vlen strings can be written and read """
dt_inner = np.dtype([('a', h5py.string_dtype()),
('b', h5py.string_dtype())])
dt = np.dtype([('f', h5py.vlen_dtype(dt_inner))])
data = np.array((np.array([(b"apples", b"bananas"), (b"peaches", b"oranges")], dtype=dt_inner),),dtype=dt)[()]
self.f.attrs['x'] = data
out = self.f.attrs['x']
self.assertArrayEqual(out, data, check_alignment=False)
class TestArray(BaseAttrs):
"""
Feature: Non-scalar types are correctly retrieved as ndarrays
"""
def test_single(self):
""" Single-element arrays are correctly recovered """
data = np.ndarray((1,), dtype='f')
self.f.attrs['x'] = data
out = self.f.attrs['x']
self.assertIsInstance(out, np.ndarray)
self.assertEqual(out.shape, (1,))
def test_multi(self):
""" Rank-1 arrays are correctly recovered """
data = np.ndarray((42,), dtype='f')
data[:] = 42.0
data[10:35] = -47.0
self.f.attrs['x'] = data
out = self.f.attrs['x']
self.assertIsInstance(out, np.ndarray)
self.assertEqual(out.shape, (42,))
self.assertArrayEqual(out, data)
class TestTypes(BaseAttrs):
"""
Feature: All supported types can be stored in attributes
"""
def test_int(self):
""" Storage of integer types """
dtypes = (np.int8, np.int16, np.int32, np.int64,
np.uint8, np.uint16, np.uint32, np.uint64)
for dt in dtypes:
data = np.ndarray((1,), dtype=dt)
data[...] = 42
self.f.attrs['x'] = data
out = self.f.attrs['x']
self.assertEqual(out.dtype, dt)
self.assertArrayEqual(out, data)
def test_float(self):
""" Storage of floating point types """
dtypes = tuple(np.dtype(x) for x in ('<f4', '>f4', '>f8', '<f8'))
for dt in dtypes:
data = np.ndarray((1,), dtype=dt)
data[...] = 42.3
self.f.attrs['x'] = data
out = self.f.attrs['x']
# TODO: Clean up after issue addressed !
print("dtype: ", out.dtype, dt)
print("value: ", out, data)
self.assertEqual(out.dtype, dt)
self.assertArrayEqual(out, data)
def test_complex(self):
""" Storage of complex types """
dtypes = tuple(np.dtype(x) for x in ('<c8', '>c8', '<c16', '>c16'))
for dt in dtypes:
data = np.ndarray((1,), dtype=dt)
data[...] = -4.2j + 35.9
self.f.attrs['x'] = data
out = self.f.attrs['x']
self.assertEqual(out.dtype, dt)
self.assertArrayEqual(out, data)
def test_string(self):
""" Storage of fixed-length strings """
dtypes = tuple(np.dtype(x) for x in ('|S1', '|S10'))
for dt in dtypes:
data = np.ndarray((1,), dtype=dt)
data[...] = 'h'
self.f.attrs['x'] = data
out = self.f.attrs['x']
self.assertEqual(out.dtype, dt)
self.assertEqual(out[0], data[0])
def test_bool(self):
""" Storage of NumPy booleans """
data = np.ndarray((2,), dtype=np.bool_)
data[...] = True, False
self.f.attrs['x'] = data
out = self.f.attrs['x']
self.assertEqual(out.dtype, data.dtype)
self.assertEqual(out[0], data[0])
self.assertEqual(out[1], data[1])
def test_vlen_string_array(self):
""" Storage of vlen byte string arrays"""
dt = h5py.string_dtype(encoding='ascii')
data = np.ndarray((2,), dtype=dt)
data[...] = "Hello", "Hi there! This is HDF5!"
self.f.attrs['x'] = data
out = self.f.attrs['x']
self.assertEqual(out.dtype, dt)
self.assertEqual(out[0], data[0])
self.assertEqual(out[1], data[1])
def test_string_scalar(self):
""" Storage of variable-length byte string scalars (auto-creation) """
self.f.attrs['x'] = b'Hello'
out = self.f.attrs['x']
self.assertEqual(out, 'Hello')
self.assertEqual(type(out), str)
aid = h5py.h5a.open(self.f.id, b"x")
tid = aid.get_type()
self.assertEqual(type(tid), h5py.h5t.TypeStringID)
self.assertEqual(tid.get_cset(), h5py.h5t.CSET_ASCII)
self.assertTrue(tid.is_variable_str())
def test_unicode_scalar(self):
""" Storage of variable-length unicode strings (auto-creation) """
self.f.attrs['x'] = u"Hello" + chr(0x2340) + u"!!"
out = self.f.attrs['x']
self.assertEqual(out, u"Hello" + chr(0x2340) + u"!!")
self.assertEqual(type(out), str)
aid = h5py.h5a.open(self.f.id, b"x")
tid = aid.get_type()
self.assertEqual(type(tid), h5py.h5t.TypeStringID)
self.assertEqual(tid.get_cset(), h5py.h5t.CSET_UTF8)
self.assertTrue(tid.is_variable_str())
class TestEmpty(BaseAttrs):
def setUp(self):
BaseAttrs.setUp(self)
sid = h5s.create(h5s.NULL)
tid = h5t.C_S1.copy()
tid.set_size(10)
aid = h5a.create(self.f.id, b'x', tid, sid)
self.empty_obj = h5py.Empty(np.dtype("S10"))
def test_read(self):
self.assertEqual(
self.empty_obj, self.f.attrs['x']
)
def test_write(self):
self.f.attrs["y"] = self.empty_obj
self.assertTrue(is_empty_dataspace(h5a.open(self.f.id, b'y')))
def test_modify(self):
with self.assertRaises(OSError):
self.f.attrs.modify('x', 1)
def test_values(self):
# list() is for Py3 where these are iterators
values = list(self.f.attrs.values())
self.assertEqual(
[self.empty_obj], values
)
def test_items(self):
items = list(self.f.attrs.items())
self.assertEqual(
[(u"x", self.empty_obj)], items
)
def test_itervalues(self):
values = list(self.f.attrs.values())
self.assertEqual(
[self.empty_obj], values
)
def test_iteritems(self):
items = list(self.f.attrs.items())
self.assertEqual(
[(u"x", self.empty_obj)], items
)
class TestWriteException(BaseAttrs):
"""
Ensure failed attribute writes don't leave garbage behind.
"""
def test_write(self):
""" ValueError on string write wipes out attribute """
s = b"Hello\x00Hello"
try:
self.f.attrs['x'] = s
except ValueError:
pass
with self.assertRaises(KeyError):
self.f.attrs['x']
@@ -0,0 +1,146 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Common high-level operations test
Tests features common to all high-level objects, like the .name property.
"""
from h5py import File
from h5py._hl.base import is_hdf5, Empty
from .common import ut, TestCase, UNICODE_FILENAMES
import numpy as np
import os
import tempfile
class BaseTest(TestCase):
def setUp(self):
self.f = File(self.mktemp(), 'w')
def tearDown(self):
if self.f:
self.f.close()
class TestName(BaseTest):
"""
Feature: .name attribute returns the object name
"""
def test_anonymous(self):
""" Anonymous objects have name None """
grp = self.f.create_group(None)
self.assertIs(grp.name, None)
class TestParent(BaseTest):
"""
test the parent group of the high-level interface objects
"""
def test_object_parent(self):
# Anonymous objects
grp = self.f.create_group(None)
# Parent of an anonymous object is undefined
with self.assertRaises(ValueError):
grp.parent
# Named objects
grp = self.f.create_group("bar")
sub_grp = grp.create_group("foo")
parent = sub_grp.parent.name
self.assertEqual(parent, "/bar")
class TestMapping(BaseTest):
"""
Test if the registration of Group as a
Mapping behaves as expected
"""
def setUp(self):
super().setUp()
data = ('a', 'b')
self.grp = self.f.create_group('bar')
self.attr = self.f.attrs.create('x', data)
def test_keys(self):
key_1 = self.f.keys()
self.assertIsInstance(repr(key_1), str)
key_2 = self.grp.keys()
self.assertIsInstance(repr(key_2), str)
def test_values(self):
value_1 = self.f.values()
self.assertIsInstance(repr(value_1), str)
value_2 = self.grp.values()
self.assertIsInstance(repr(value_2), str)
def test_items(self):
item_1 = self.f.items()
self.assertIsInstance(repr(item_1), str)
item_2 = self.grp.items()
self.assertIsInstance(repr(item_1), str)
class TestRepr(BaseTest):
"""
repr() works correctly with Unicode names
"""
USTRING = chr(0xfc) + chr(0xdf)
def _check_type(self, obj):
self.assertIsInstance(repr(obj), str)
def test_group(self):
""" Group repr() with unicode """
grp = self.f.create_group(self.USTRING)
self._check_type(grp)
def test_dataset(self):
""" Dataset repr() with unicode """
dset = self.f.create_dataset(self.USTRING, (1,))
self._check_type(dset)
def test_namedtype(self):
""" Named type repr() with unicode """
self.f['type'] = np.dtype('f')
typ = self.f['type']
self._check_type(typ)
def test_empty(self):
data = Empty(dtype='f')
self.assertNotEqual(Empty(dtype='i'), data)
self._check_type(data)
@ut.skipIf(not UNICODE_FILENAMES, "Filesystem unicode support required")
def test_file(self):
""" File object repr() with unicode """
fname = tempfile.mktemp(self.USTRING+'.hdf5')
try:
with File(fname,'w') as f:
self._check_type(f)
finally:
try:
os.unlink(fname)
except Exception:
pass
def test_is_hdf5():
filename = File(tempfile.mktemp(), "w").filename
assert is_hdf5(filename)
# non-existing HDF5 file
filename = tempfile.mktemp()
assert not is_hdf5(filename)
@@ -0,0 +1,49 @@
import numpy as np
from h5py import File
from .common import TestCase
from .data_files import get_data_file_path
def test_vlen_big_endian():
with File(get_data_file_path("vlen_string_s390x.h5")) as f:
assert f.attrs["created_on_s390x"] == 1
dset = f["DSvariable"]
assert dset[0] == b"Parting"
assert dset[1] == b"is such"
assert dset[2] == b"sweet"
assert dset[3] == b"sorrow..."
dset = f["DSLEfloat"]
assert dset[0] == 3.14
assert dset[1] == 1.61
assert dset[2] == 2.71
assert dset[3] == 2.41
assert dset[4] == 1.2
assert dset.dtype == "<f8"
# Same float values with big endianness
assert f["DSBEfloat"][0] == 3.14
assert f["DSBEfloat"].dtype == ">f8"
assert f["DSLEint"][0] == 1
assert f["DSLEint"].dtype == "<u8"
# Same int values with big endianness
assert f["DSBEint"][0] == 1
assert f["DSBEint"].dtype == ">i8"
class TestEndianess(TestCase):
def test_simple_int_be(self):
fname = self.mktemp()
arr = np.ndarray(shape=(1,), dtype=">i4", buffer=bytearray([0, 1, 3, 2]))
be_number = 0 * 256 ** 3 + 1 * 256 ** 2 + 3 * 256 ** 1 + 2 * 256 ** 0
with File(fname, mode="w") as f:
f.create_dataset("int", data=arr)
with File(fname, mode="r") as f:
assert f["int"][()][0] == be_number
@@ -0,0 +1,52 @@
from .common import TestCase
class TestCompletions(TestCase):
def test_group_completions(self):
# Test completions on top-level file.
g = self.f.create_group('g')
self.f.create_group('h')
self.f.create_dataset('data', [1, 2, 3])
self.assertEqual(
self.f._ipython_key_completions_(),
['data', 'g', 'h'],
)
self.f.create_group('data2', [1, 2, 3])
self.assertEqual(
self.f._ipython_key_completions_(),
['data', 'data2', 'g', 'h'],
)
# Test on subgroup.
g.create_dataset('g_data1', [1, 2, 3])
g.create_dataset('g_data2', [4, 5, 6])
self.assertEqual(
g._ipython_key_completions_(),
['g_data1', 'g_data2'],
)
g.create_dataset('g_data3', [7, 8, 9])
self.assertEqual(
g._ipython_key_completions_(),
['g_data1', 'g_data2', 'g_data3'],
)
def test_attrs_completions(self):
attrs = self.f.attrs
# Write out of alphabetical order to test that completions come back in
# alphabetical order, as opposed to, say, insertion order.
attrs['b'] = 1
attrs['a'] = 2
self.assertEqual(
attrs._ipython_key_completions_(),
['a', 'b']
)
attrs['c'] = 3
self.assertEqual(
attrs._ipython_key_completions_(),
['a', 'b', 'c']
)
@@ -0,0 +1,618 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Tests the h5py.Dataset.__getitem__ method.
This module does not specifically test type conversion. The "type" axis
therefore only tests objects which interact with the slicing system in
unreliable ways; for example, compound and array types.
See test_dataset_getitem_types for type-conversion tests.
Tests are organized into TestCases by dataset shape and type. Test
methods vary by slicing arg type.
1. Dataset shape:
Empty
Scalar
1D
3D
2. Type:
Float
Compound
Array
3. Slicing arg types:
Ellipsis
Empty tuple
Regular slice
MultiBlockSlice
Indexing
Index list
Boolean mask
Field names
"""
import sys
import numpy as np
import h5py
from .common import ut, TestCase
class TestEmpty(TestCase):
def setUp(self):
TestCase.setUp(self)
sid = h5py.h5s.create(h5py.h5s.NULL)
tid = h5py.h5t.C_S1.copy()
tid.set_size(10)
dsid = h5py.h5d.create(self.f.id, b'x', tid, sid)
self.dset = h5py.Dataset(dsid)
self.empty_obj = h5py.Empty(np.dtype("S10"))
def test_ndim(self):
""" Verify number of dimensions """
self.assertEqual(self.dset.ndim, 0)
def test_shape(self):
""" Verify shape """
self.assertEqual(self.dset.shape, None)
def test_size(self):
""" Verify shape """
self.assertEqual(self.dset.size, None)
def test_nbytes(self):
""" Verify nbytes """
self.assertEqual(self.dset.nbytes, 0)
def test_ellipsis(self):
self.assertEqual(self.dset[...], self.empty_obj)
def test_tuple(self):
self.assertEqual(self.dset[()], self.empty_obj)
def test_slice(self):
""" slice -> ValueError """
with self.assertRaises(ValueError):
self.dset[0:4]
def test_multi_block_slice(self):
""" MultiBlockSlice -> ValueError """
with self.assertRaises(ValueError):
self.dset[h5py.MultiBlockSlice()]
def test_index(self):
""" index -> ValueError """
with self.assertRaises(ValueError):
self.dset[0]
def test_indexlist(self):
""" index list -> ValueError """
with self.assertRaises(ValueError):
self.dset[[1,2,5]]
def test_mask(self):
""" mask -> ValueError """
mask = np.array(True, dtype='bool')
with self.assertRaises(ValueError):
self.dset[mask]
def test_fieldnames(self):
""" field name -> ValueError """
with self.assertRaises(ValueError):
self.dset['field']
class TestScalarFloat(TestCase):
def setUp(self):
TestCase.setUp(self)
self.data = np.array(42.5, dtype=np.double)
self.dset = self.f.create_dataset('x', data=self.data)
def test_ndim(self):
""" Verify number of dimensions """
self.assertEqual(self.dset.ndim, 0)
def test_size(self):
""" Verify size """
self.assertEqual(self.dset.size, 1)
def test_nbytes(self):
""" Verify nbytes """
self.assertEqual(self.dset.nbytes, self.data.dtype.itemsize) # not sure if 'f' is always alias for 'f4'
def test_shape(self):
""" Verify shape """
self.assertEqual(self.dset.shape, tuple())
def test_ellipsis(self):
""" Ellipsis -> scalar ndarray """
out = self.dset[...]
self.assertArrayEqual(out, self.data)
def test_tuple(self):
""" () -> bare item """
out = self.dset[()]
self.assertArrayEqual(out, self.data.item())
def test_slice(self):
""" slice -> ValueError """
with self.assertRaises(ValueError):
self.dset[0:4]
def test_multi_block_slice(self):
""" MultiBlockSlice -> ValueError """
with self.assertRaises(ValueError):
self.dset[h5py.MultiBlockSlice()]
def test_index(self):
""" index -> ValueError """
with self.assertRaises(ValueError):
self.dset[0]
# FIXME: NumPy has IndexError instead
def test_indexlist(self):
""" index list -> ValueError """
with self.assertRaises(ValueError):
self.dset[[1,2,5]]
# FIXME: NumPy permits this
def test_mask(self):
""" mask -> ValueError """
mask = np.array(True, dtype='bool')
with self.assertRaises(ValueError):
self.dset[mask]
def test_fieldnames(self):
""" field name -> ValueError (no fields) """
with self.assertRaises(ValueError):
self.dset['field']
class TestScalarCompound(TestCase):
def setUp(self):
TestCase.setUp(self)
self.data = np.array((42.5, -118, "Hello"), dtype=[('a', 'f'), ('b', 'i'), ('c', '|S10')])
self.dset = self.f.create_dataset('x', data=self.data)
def test_ndim(self):
""" Verify number of dimensions """
self.assertEqual(self.dset.ndim, 0)
def test_shape(self):
""" Verify shape """
self.assertEqual(self.dset.shape, tuple())
def test_size(self):
""" Verify size """
self.assertEqual(self.dset.size, 1)
def test_nbytes(self):
""" Verify nbytes """
self.assertEqual(self.dset.nbytes, self.data.dtype.itemsize)
def test_ellipsis(self):
""" Ellipsis -> scalar ndarray """
out = self.dset[...]
# assertArrayEqual doesn't work with compounds; do manually
self.assertIsInstance(out, np.ndarray)
self.assertEqual(out.shape, self.data.shape)
self.assertEqual(out.dtype, self.data.dtype)
def test_tuple(self):
""" () -> np.void instance """
out = self.dset[()]
self.assertIsInstance(out, np.void)
self.assertEqual(out.dtype, self.data.dtype)
def test_slice(self):
""" slice -> ValueError """
with self.assertRaises(ValueError):
self.dset[0:4]
def test_multi_block_slice(self):
""" MultiBlockSlice -> ValueError """
with self.assertRaises(ValueError):
self.dset[h5py.MultiBlockSlice()]
def test_index(self):
""" index -> ValueError """
with self.assertRaises(ValueError):
self.dset[0]
# FIXME: NumPy has IndexError instead
def test_indexlist(self):
""" index list -> ValueError """
with self.assertRaises(ValueError):
self.dset[[1,2,5]]
# FIXME: NumPy permits this
def test_mask(self):
""" mask -> ValueError """
mask = np.array(True, dtype='bool')
with self.assertRaises(ValueError):
self.dset[mask]
# FIXME: NumPy returns a scalar ndarray
def test_fieldnames(self):
""" field name -> bare value """
out = self.dset['a']
self.assertIsInstance(out, np.float32)
self.assertEqual(out, self.dset['a'])
class TestScalarArray(TestCase):
def setUp(self):
TestCase.setUp(self)
self.dt = np.dtype('(3,2)f')
self.data = np.array([(3.2, -119), (42, 99.8), (3.14, 0)], dtype='f')
self.dset = self.f.create_dataset('x', (), dtype=self.dt)
self.dset[...] = self.data
def test_ndim(self):
""" Verify number of dimensions """
self.assertEqual(self.data.ndim, 2)
self.assertEqual(self.dset.ndim, 0)
def test_size(self):
""" Verify size """
self.assertEqual(self.dset.size, 1)
def test_nbytes(self):
""" Verify nbytes """
self.assertEqual(self.dset.nbytes, self.dset.dtype.itemsize) # not sure if 'f' is always alias for 'f4'
def test_shape(self):
""" Verify shape """
self.assertEqual(self.data.shape, (3, 2))
self.assertEqual(self.dset.shape, tuple())
def test_ellipsis(self):
""" Ellipsis -> ndarray promoted to underlying shape """
out = self.dset[...]
self.assertArrayEqual(out, self.data)
def test_tuple(self):
""" () -> same as ellipsis """
out = self.dset[...]
self.assertArrayEqual(out, self.data)
def test_slice(self):
""" slice -> ValueError """
with self.assertRaises(ValueError):
self.dset[0:4]
def test_multi_block_slice(self):
""" MultiBlockSlice -> ValueError """
with self.assertRaises(ValueError):
self.dset[h5py.MultiBlockSlice()]
def test_index(self):
""" index -> ValueError """
with self.assertRaises(ValueError):
self.dset[0]
def test_indexlist(self):
""" index list -> ValueError """
with self.assertRaises(ValueError):
self.dset[[]]
def test_mask(self):
""" mask -> ValueError """
mask = np.array(True, dtype='bool')
with self.assertRaises(ValueError):
self.dset[mask]
def test_fieldnames(self):
""" field name -> ValueError (no fields) """
with self.assertRaises(ValueError):
self.dset['field']
class Test1DZeroFloat(TestCase):
def setUp(self):
TestCase.setUp(self)
self.data = np.ones((0,), dtype='f')
self.dset = self.f.create_dataset('x', data=self.data)
def test_ndim(self):
""" Verify number of dimensions """
self.assertEqual(self.dset.ndim, 1)
def test_shape(self):
""" Verify shape """
self.assertEqual(self.dset.shape, (0,))
def test_ellipsis(self):
""" Ellipsis -> ndarray of matching shape """
self.assertNumpyBehavior(self.dset, self.data, np.s_[...])
def test_tuple(self):
""" () -> same as ellipsis """
self.assertNumpyBehavior(self.dset, self.data, np.s_[()])
def test_slice(self):
""" slice -> ndarray of shape (0,) """
self.assertNumpyBehavior(self.dset, self.data, np.s_[0:4])
def test_slice_stop_less_than_start(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[7:5])
def test_index(self):
""" index -> out of range """
with self.assertRaises(IndexError):
self.dset[0]
def test_indexlist(self):
""" index list """
self.assertNumpyBehavior(self.dset, self.data, np.s_[[]])
def test_mask(self):
""" mask -> ndarray of matching shape """
mask = np.ones((0,), dtype='bool')
self.assertNumpyBehavior(
self.dset,
self.data,
np.s_[mask],
# Fast reader doesn't work with boolean masks
skip_fast_reader=True,
)
def test_fieldnames(self):
""" field name -> ValueError (no fields) """
with self.assertRaises(ValueError):
self.dset['field']
class Test1DFloat(TestCase):
def setUp(self):
TestCase.setUp(self)
self.data = np.arange(13).astype('f')
self.dset = self.f.create_dataset('x', data=self.data)
def test_ndim(self):
""" Verify number of dimensions """
self.assertEqual(self.dset.ndim, 1)
def test_shape(self):
""" Verify shape """
self.assertEqual(self.dset.shape, (13,))
def test_ellipsis(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[...])
def test_tuple(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[()])
def test_slice_simple(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[0:4])
def test_slice_zerosize(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[4:4])
def test_slice_strides(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[1:7:3])
def test_slice_negindexes(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[-8:-2:3])
def test_slice_stop_less_than_start(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[7:5])
def test_slice_outofrange(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[100:400:3])
def test_slice_backwards(self):
""" we disallow negative steps """
with self.assertRaises(ValueError):
self.dset[::-1]
def test_slice_zerostride(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[::0])
def test_index_simple(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[3])
def test_index_neg(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[-4])
# FIXME: NumPy permits this... it adds a new axis in front
def test_index_none(self):
with self.assertRaises(TypeError):
self.dset[None]
def test_index_illegal(self):
""" Illegal slicing argument """
with self.assertRaises(TypeError):
self.dset[{}]
def test_index_outofrange(self):
with self.assertRaises(IndexError):
self.dset[100]
def test_indexlist_simple(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[[1,2,5]])
def test_indexlist_numpyarray(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[np.array([1, 2, 5])])
def test_indexlist_single_index_ellipsis(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[[0], ...])
def test_indexlist_numpyarray_single_index_ellipsis(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[np.array([0]), ...])
def test_indexlist_numpyarray_ellipsis(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[np.array([1, 2, 5]), ...])
def test_indexlist_empty(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[[]])
def test_indexlist_outofrange(self):
with self.assertRaises(IndexError):
self.dset[[100]]
def test_indexlist_nonmonotonic(self):
""" we require index list values to be strictly increasing """
with self.assertRaises(TypeError):
self.dset[[1,3,2]]
def test_indexlist_monotonic_negative(self):
# This should work: indices are logically increasing
self.assertNumpyBehavior(self.dset, self.data, np.s_[[0, 2, -2]])
with self.assertRaises(TypeError):
self.dset[[-2, -3]]
def test_indexlist_repeated(self):
""" we forbid repeated index values """
with self.assertRaises(TypeError):
self.dset[[1,1,2]]
def test_mask_true(self):
self.assertNumpyBehavior(
self.dset,
self.data,
np.s_[self.data > -100],
# Fast reader doesn't work with boolean masks
skip_fast_reader=True,
)
def test_mask_false(self):
self.assertNumpyBehavior(
self.dset,
self.data,
np.s_[self.data > 100],
# Fast reader doesn't work with boolean masks
skip_fast_reader=True,
)
def test_mask_partial(self):
self.assertNumpyBehavior(
self.dset,
self.data,
np.s_[self.data > 5],
# Fast reader doesn't work with boolean masks
skip_fast_reader=True,
)
def test_mask_wrongsize(self):
""" we require the boolean mask shape to match exactly """
with self.assertRaises(TypeError):
self.dset[np.ones((2,), dtype='bool')]
def test_fieldnames(self):
""" field name -> ValueError (no fields) """
with self.assertRaises(ValueError):
self.dset['field']
class Test2DZeroFloat(TestCase):
def setUp(self):
TestCase.setUp(self)
self.data = np.ones((0,3), dtype='f')
self.dset = self.f.create_dataset('x', data=self.data)
def test_ndim(self):
""" Verify number of dimensions """
self.assertEqual(self.dset.ndim, 2)
def test_shape(self):
""" Verify shape """
self.assertEqual(self.dset.shape, (0, 3))
def test_indexlist(self):
""" see issue #473 """
self.assertNumpyBehavior(self.dset, self.data, np.s_[:,[0,1,2]])
class Test2DFloat(TestCase):
def setUp(self):
TestCase.setUp(self)
self.data = np.ones((5,3), dtype='f')
self.dset = self.f.create_dataset('x', data=self.data)
def test_ndim(self):
""" Verify number of dimensions """
self.assertEqual(self.dset.ndim, 2)
def test_size(self):
""" Verify size """
self.assertEqual(self.dset.size, 15)
def test_nbytes(self):
""" Verify nbytes """
self.assertEqual(self.dset.nbytes, 15*self.data.dtype.itemsize) # not sure if 'f' is always alias for 'f4'
def test_shape(self):
""" Verify shape """
self.assertEqual(self.dset.shape, (5, 3))
def test_indexlist(self):
""" see issue #473 """
self.assertNumpyBehavior(self.dset, self.data, np.s_[:,[0,1,2]])
def test_index_emptylist(self):
self.assertNumpyBehavior(self.dset, self.data, np.s_[:, []])
self.assertNumpyBehavior(self.dset, self.data, np.s_[[]])
class TestVeryLargeArray(TestCase):
def setUp(self):
TestCase.setUp(self)
self.dset = self.f.create_dataset('x', shape=(2**15, 2**16))
@ut.skipIf(sys.maxsize < 2**31, 'Maximum integer size >= 2**31 required')
def test_size(self):
self.assertEqual(self.dset.size, 2**31)
def test_read_no_fill_value(writable_file):
# With FILL_TIME_NEVER, HDF5 doesn't write zeros in the output array for
# unallocated chunks. If we read into uninitialized memory, it can appear
# to read random values. https://github.com/h5py/h5py/issues/2069
dcpl = h5py.h5p.create(h5py.h5p.DATASET_CREATE)
dcpl.set_chunk((1,))
dcpl.set_fill_time(h5py.h5d.FILL_TIME_NEVER)
ds = h5py.Dataset(h5py.h5d.create(
writable_file.id, b'a', h5py.h5t.IEEE_F64LE, h5py.h5s.create_simple((5,)), dcpl
))
np.testing.assert_array_equal(ds[:3], np.zeros(3, np.float64))
class TestBoolIndex(TestCase):
"""
Tests for indexing with Boolean arrays
"""
def setUp(self):
super().setUp()
self.arr = np.arange(9).reshape(3,-1)
self.dset = self.f.create_dataset('x', data=self.arr)
def test_select_first_axis(self):
sel = np.s_[[False, True, False],:]
self.assertNumpyBehavior(self.dset, self.arr, sel)
def test_wrong_size(self):
sel = np.s_[[False, True, False, False],:]
with self.assertRaises(TypeError):
self.dset[sel]
@@ -0,0 +1,118 @@
import numpy as np
import h5py
from .common import TestCase
class TestDatasetSwmrRead(TestCase):
""" Testing SWMR functions when reading a dataset.
Skip this test if the HDF5 library does not have the SWMR features.
"""
def setUp(self):
TestCase.setUp(self)
self.data = np.arange(13).astype('f')
self.dset = self.f.create_dataset('data', chunks=(13,), maxshape=(None,), data=self.data)
fname = self.f.filename
self.f.close()
self.f = h5py.File(fname, 'r', swmr=True)
self.dset = self.f['data']
def test_initial_swmr_mode_on(self):
""" Verify that the file is initially in SWMR mode"""
self.assertTrue(self.f.swmr_mode)
def test_read_data(self):
self.assertArrayEqual(self.dset, self.data)
def test_refresh(self):
self.dset.refresh()
def test_force_swmr_mode_on_raises(self):
""" Verify when reading a file cannot be forcibly switched to swmr mode.
When reading with SWMR the file must be opened with swmr=True."""
with self.assertRaises(Exception):
self.f.swmr_mode = True
self.assertTrue(self.f.swmr_mode)
def test_force_swmr_mode_off_raises(self):
""" Switching SWMR write mode off is only possible by closing the file.
Attempts to forcibly switch off the SWMR mode should raise a ValueError.
"""
with self.assertRaises(ValueError):
self.f.swmr_mode = False
self.assertTrue(self.f.swmr_mode)
class TestDatasetSwmrWrite(TestCase):
""" Testing SWMR functions when reading a dataset.
Skip this test if the HDF5 library does not have the SWMR features.
"""
def setUp(self):
""" First setup a file with a small chunked and empty dataset.
No data written yet.
"""
# Note that when creating the file, the swmr=True is not required for
# write, but libver='latest' is required.
self.f = h5py.File(self.mktemp(), 'w', libver='latest')
self.data = np.arange(4).astype('f')
self.dset = self.f.create_dataset('data', shape=(0,), dtype=self.data.dtype, chunks=(2,), maxshape=(None,))
def test_initial_swmr_mode_off(self):
""" Verify that the file is not initially in SWMR mode"""
self.assertFalse(self.f.swmr_mode)
def test_switch_swmr_mode_on(self):
""" Switch to SWMR mode and verify """
self.f.swmr_mode = True
self.assertTrue(self.f.swmr_mode)
def test_switch_swmr_mode_off_raises(self):
""" Switching SWMR write mode off is only possible by closing the file.
Attempts to forcibly switch off the SWMR mode should raise a ValueError.
"""
self.f.swmr_mode = True
self.assertTrue(self.f.swmr_mode)
with self.assertRaises(ValueError):
self.f.swmr_mode = False
self.assertTrue(self.f.swmr_mode)
def test_extend_dset(self):
""" Extend and flush a SWMR dataset
"""
self.f.swmr_mode = True
self.assertTrue(self.f.swmr_mode)
self.dset.resize( self.data.shape )
self.dset[:] = self.data
self.dset.flush()
# Refresh and read back data for assertion
self.dset.refresh()
self.assertArrayEqual(self.dset, self.data)
def test_extend_dset_multiple(self):
self.f.swmr_mode = True
self.assertTrue(self.f.swmr_mode)
self.dset.resize( (4,) )
self.dset[0:] = self.data
self.dset.flush()
# Refresh and read back 1st data block for assertion
self.dset.refresh()
self.assertArrayEqual(self.dset, self.data)
self.dset.resize( (8,) )
self.dset[4:] = self.data
self.dset.flush()
# Refresh and read back 1st data block for assertion
self.dset.refresh()
self.assertArrayEqual(self.dset[0:4], self.data)
self.assertArrayEqual(self.dset[4:8], self.data)
@@ -0,0 +1,40 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
File-resident datatype tests.
Tests "committed" file-resident datatype objects.
"""
import numpy as np
from .common import TestCase
from h5py import Datatype
class TestCreation(TestCase):
"""
Feature: repr() works sensibly on datatype objects
"""
def test_repr(self):
""" repr() on datatype objects """
self.f['foo'] = np.dtype('S10')
dt = self.f['foo']
self.assertIsInstance(repr(dt), str)
self.f.close()
self.assertIsInstance(repr(dt), str)
def test_appropriate_low_level_id(self):
" Binding a group to a non-TypeID identifier fails with ValueError "
with self.assertRaises(ValueError):
Datatype(self.f['/'].id)
@@ -0,0 +1,216 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
import numpy as np
from .common import TestCase
from h5py import File
import h5py
class BaseDataset(TestCase):
"""
data is a 3-dimensional dataset with dimensions [z, y, x]
The z dimension is labeled. It does not have any attached scales.
The y dimension is not labeled. It has one attached scale.
The x dimension is labeled. It has two attached scales.
data2 is a 3-dimensional dataset with no associated dimension scales.
"""
def setUp(self):
self.f = File(self.mktemp(), 'w')
self.f['data'] = np.ones((4, 3, 2), 'f')
self.f['data2'] = np.ones((4, 3, 2), 'f')
self.f['x1'] = np.ones((2), 'f')
h5py.h5ds.set_scale(self.f['x1'].id)
h5py.h5ds.attach_scale(self.f['data'].id, self.f['x1'].id, 2)
self.f['x2'] = np.ones((2), 'f')
h5py.h5ds.set_scale(self.f['x2'].id, b'x2 name')
h5py.h5ds.attach_scale(self.f['data'].id, self.f['x2'].id, 2)
self.f['y1'] = np.ones((3), 'f')
h5py.h5ds.set_scale(self.f['y1'].id, b'y1 name')
h5py.h5ds.attach_scale(self.f['data'].id, self.f['y1'].id, 1)
self.f['z1'] = np.ones((4), 'f')
h5py.h5ds.set_label(self.f['data'].id, 0, b'z')
h5py.h5ds.set_label(self.f['data'].id, 2, b'x')
def tearDown(self):
if self.f:
self.f.close()
class TestH5DSBindings(BaseDataset):
"""
Feature: Datasets can be created from existing data
"""
def test_create_dimensionscale(self):
""" Create a dimension scale from existing dataset """
self.assertTrue(h5py.h5ds.is_scale(self.f['x1'].id))
self.assertEqual(h5py.h5ds.get_scale_name(self.f['x1'].id), b'')
self.assertEqual(self.f['x1'].attrs['CLASS'], b"DIMENSION_SCALE")
self.assertEqual(h5py.h5ds.get_scale_name(self.f['x2'].id), b'x2 name')
def test_attach_dimensionscale(self):
self.assertTrue(
h5py.h5ds.is_attached(self.f['data'].id, self.f['x1'].id, 2)
)
self.assertFalse(
h5py.h5ds.is_attached(self.f['data'].id, self.f['x1'].id, 1))
self.assertEqual(h5py.h5ds.get_num_scales(self.f['data'].id, 0), 0)
self.assertEqual(h5py.h5ds.get_num_scales(self.f['data'].id, 1), 1)
self.assertEqual(h5py.h5ds.get_num_scales(self.f['data'].id, 2), 2)
def test_detach_dimensionscale(self):
self.assertTrue(
h5py.h5ds.is_attached(self.f['data'].id, self.f['x1'].id, 2)
)
h5py.h5ds.detach_scale(self.f['data'].id, self.f['x1'].id, 2)
self.assertFalse(
h5py.h5ds.is_attached(self.f['data'].id, self.f['x1'].id, 2)
)
self.assertEqual(h5py.h5ds.get_num_scales(self.f['data'].id, 2), 1)
def test_label_dimensionscale(self):
self.assertEqual(h5py.h5ds.get_label(self.f['data'].id, 0), b'z')
self.assertEqual(h5py.h5ds.get_label(self.f['data'].id, 1), b'')
self.assertEqual(h5py.h5ds.get_label(self.f['data'].id, 2), b'x')
def test_iter_dimensionscales(self):
def func(dsid):
res = h5py.h5ds.get_scale_name(dsid)
if res == b'x2 name':
return dsid
res = h5py.h5ds.iterate(self.f['data'].id, 2, func, 0)
self.assertEqual(h5py.h5ds.get_scale_name(res), b'x2 name')
class TestDimensionManager(BaseDataset):
def test_make_scale(self):
# test recreating or renaming an existing scale:
self.f['x1'].make_scale(b'foobar')
self.assertEqual(self.f['data'].dims[2]['foobar'], self.f['x1'])
# test creating entirely new scale:
self.f['data2'].make_scale(b'foobaz')
self.f['data'].dims[2].attach_scale(self.f['data2'])
self.assertEqual(self.f['data'].dims[2]['foobaz'], self.f['data2'])
def test_get_dimension(self):
with self.assertRaises(IndexError):
self.f['data'].dims[3]
def test_len(self):
self.assertEqual(len(self.f['data'].dims), 3)
self.assertEqual(len(self.f['data2'].dims), 3)
def test_iter(self):
dims = self.f['data'].dims
self.assertEqual(
[d for d in dims],
[dims[0], dims[1], dims[2]]
)
def test_repr(self):
ds = self.f.create_dataset('x', (2,3))
self.assertIsInstance(repr(ds.dims), str)
self.f.close()
self.assertIsInstance(repr(ds.dims), str)
class TestDimensionsHighLevel(BaseDataset):
def test_len(self):
self.assertEqual(len(self.f['data'].dims[0]), 0)
self.assertEqual(len(self.f['data'].dims[1]), 1)
self.assertEqual(len(self.f['data'].dims[2]), 2)
self.assertEqual(len(self.f['data2'].dims[0]), 0)
self.assertEqual(len(self.f['data2'].dims[1]), 0)
self.assertEqual(len(self.f['data2'].dims[2]), 0)
def test_get_label(self):
self.assertEqual(self.f['data'].dims[2].label, 'x')
self.assertEqual(self.f['data'].dims[1].label, '')
self.assertEqual(self.f['data'].dims[0].label, 'z')
self.assertEqual(self.f['data2'].dims[2].label, '')
self.assertEqual(self.f['data2'].dims[1].label, '')
self.assertEqual(self.f['data2'].dims[0].label, '')
def test_set_label(self):
self.f['data'].dims[0].label = 'foo'
self.assertEqual(self.f['data'].dims[2].label, 'x')
self.assertEqual(self.f['data'].dims[1].label, '')
self.assertEqual(self.f['data'].dims[0].label, 'foo')
def test_detach_scale(self):
self.f['data'].dims[2].detach_scale(self.f['x1'])
self.assertEqual(len(self.f['data'].dims[2]), 1)
self.assertEqual(self.f['data'].dims[2][0], self.f['x2'])
self.f['data'].dims[2].detach_scale(self.f['x2'])
self.assertEqual(len(self.f['data'].dims[2]), 0)
def test_attach_scale(self):
self.f['x3'] = self.f['x2'][...]
self.f['data'].dims[2].attach_scale(self.f['x3'])
self.assertEqual(len(self.f['data'].dims[2]), 3)
self.assertEqual(self.f['data'].dims[2][2], self.f['x3'])
def test_get_dimension_scale(self):
self.assertEqual(self.f['data'].dims[2][0], self.f['x1'])
with self.assertRaises(RuntimeError):
self.f['data2'].dims[2][0], self.f['x2']
self.assertEqual(self.f['data'].dims[2][''], self.f['x1'])
self.assertEqual(self.f['data'].dims[2]['x2 name'], self.f['x2'])
def test_get_items(self):
self.assertEqual(
self.f['data'].dims[2].items(),
[('', self.f['x1']), ('x2 name', self.f['x2'])]
)
def test_get_keys(self):
self.assertEqual(self.f['data'].dims[2].keys(), ['', 'x2 name'])
def test_get_values(self):
self.assertEqual(
self.f['data'].dims[2].values(),
[self.f['x1'], self.f['x2']]
)
def test_iter(self):
self.assertEqual([i for i in self.f['data'].dims[2]], ['', 'x2 name'])
def test_repr(self):
ds = self.f["data"]
self.assertEqual(repr(ds.dims[2])[1:16], '"x" dimension 2')
self.f.close()
self.assertIsInstance(repr(ds.dims), str)
def test_attributes(self):
self.f["data2"].attrs["DIMENSION_LIST"] = self.f["data"].attrs[
"DIMENSION_LIST"]
self.assertEqual(len(self.f['data2'].dims[0]), 0)
self.assertEqual(len(self.f['data2'].dims[1]), 1)
self.assertEqual(len(self.f['data2'].dims[2]), 2)
def test_is_scale(self):
"""Test Dataset.is_scale property"""
self.assertTrue(self.f['x1'].is_scale)
self.assertTrue(self.f['x2'].is_scale)
self.assertTrue(self.f['y1'].is_scale)
self.assertFalse(self.f['z1'].is_scale)
self.assertFalse(self.f['data'].is_scale)
self.assertFalse(self.f['data2'].is_scale)
@@ -0,0 +1,22 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Tests the h5py.Dataset.dims.DimensionProxy class.
"""
from .common import TestCase
class TestItems(TestCase):
def test_empty(self):
""" no dimension scales -> empty list """
dset = self.f.create_dataset('x', (10,))
self.assertEqual(dset.dims[0].items(), [])
@@ -0,0 +1,532 @@
"""
Tests for converting between numpy dtypes and h5py data types
"""
from itertools import count
import platform
import numpy as np
import h5py
try:
import tables
except ImportError:
tables = None
from .common import ut, TestCase
UNSUPPORTED_LONG_DOUBLE = ('i386', 'i486', 'i586', 'i686', 'ppc64le')
UNSUPPORTED_LONG_DOUBLE_TYPES = ('float96', 'float128', 'complex192',
'complex256')
class TestVlen(TestCase):
"""
Check that storage of vlen strings is carried out correctly.
"""
def assertVlenArrayEqual(self, dset, arr, message=None, precision=None):
assert dset.shape == arr.shape, \
"Shape mismatch (%s vs %s)%s" % (dset.shape, arr.shape, message)
for (i, d, a) in zip(count(), dset, arr):
self.assertArrayEqual(d, a, message, precision)
def test_compound(self):
fields = []
fields.append(('field_1', h5py.string_dtype()))
fields.append(('field_2', np.int32))
dt = np.dtype(fields)
self.f['mytype'] = np.dtype(dt)
dt_out = self.f['mytype'].dtype.fields['field_1'][0]
string_inf = h5py.check_string_dtype(dt_out)
self.assertEqual(string_inf.encoding, 'utf-8')
def test_compound_vlen_bool(self):
vidt = h5py.vlen_dtype(np.uint8)
def a(items):
return np.array(items, dtype=np.uint8)
f = self.f
dt_vb = np.dtype([
('foo', vidt),
('logical', bool)])
vb = f.create_dataset('dt_vb', shape=(4,), dtype=dt_vb)
data = np.array([(a([1, 2, 3]), True),
(a([1 ]), False),
(a([1, 5 ]), True),
(a([],), False), ],
dtype=dt_vb)
vb[:] = data
actual = f['dt_vb'][:]
self.assertVlenArrayEqual(data['foo'], actual['foo'])
self.assertArrayEqual(data['logical'], actual['logical'])
dt_vv = np.dtype([
('foo', vidt),
('bar', vidt)])
f.create_dataset('dt_vv', shape=(4,), dtype=dt_vv)
dt_vvb = np.dtype([
('foo', vidt),
('bar', vidt),
('logical', bool)])
vvb = f.create_dataset('dt_vvb', shape=(2,), dtype=dt_vvb)
dt_bvv = np.dtype([
('logical', bool),
('foo', vidt),
('bar', vidt)])
bvv = f.create_dataset('dt_bvv', shape=(2,), dtype=dt_bvv)
data = np.array([(True, a([1, 2, 3]), a([1, 2])),
(False, a([]), a([2, 4, 6])), ],
dtype=bvv)
bvv[:] = data
actual = bvv[:]
self.assertVlenArrayEqual(data['foo'], actual['foo'])
self.assertVlenArrayEqual(data['bar'], actual['bar'])
self.assertArrayEqual(data['logical'], actual['logical'])
def test_compound_vlen_enum(self):
eidt = h5py.enum_dtype({'OFF': 0, 'ON': 1}, basetype=np.uint8)
vidt = h5py.vlen_dtype(np.uint8)
def a(items):
return np.array(items, dtype=np.uint8)
f = self.f
dt_vve = np.dtype([
('foo', vidt),
('bar', vidt),
('switch', eidt)])
vve = f.create_dataset('dt_vve', shape=(2,), dtype=dt_vve)
data = np.array([(a([1, 2, 3]), a([1, 2]), 1),
(a([]), a([2, 4, 6]), 0), ],
dtype=dt_vve)
vve[:] = data
actual = vve[:]
self.assertVlenArrayEqual(data['foo'], actual['foo'])
self.assertVlenArrayEqual(data['bar'], actual['bar'])
self.assertArrayEqual(data['switch'], actual['switch'])
def test_vlen_enum(self):
fname = self.mktemp()
arr1 = [[1], [1, 2]]
dt1 = h5py.vlen_dtype(h5py.enum_dtype(dict(foo=1, bar=2), 'i'))
with h5py.File(fname, 'w') as f:
df1 = f.create_dataset('test', (len(arr1),), dtype=dt1)
df1[:] = np.array(arr1, dtype=object)
with h5py.File(fname, 'r') as f:
df2 = f['test']
dt2 = df2.dtype
arr2 = [e.tolist() for e in df2[:]]
self.assertEqual(arr1, arr2)
self.assertEqual(h5py.check_enum_dtype(h5py.check_vlen_dtype(dt1)),
h5py.check_enum_dtype(h5py.check_vlen_dtype(dt2)))
def test_write_empty_vlen(writable_file):
# vlen dtype with no entries
d = np.rec.fromarrays([[], []], names='a,b', formats='|V16,O')
dset = writable_file.create_dataset(
'test', data=d, dtype=[('a', '|V16'), ('b', h5py.special_dtype(vlen=np.float64))]
)
assert dset.size == 0
def test_write_vlen_length0_compound(writable_file):
# one entry has variable length 0 (using the variable length)
# https://github.com/h5py/h5py/issues/2693
compound_dtype = np.dtype([('id', 'i4'), ('value', 'f8'), ('name', 'S10')])
vlen_compound_dtype = h5py.special_dtype(vlen=compound_dtype)
arr0 = np.array([(1, 3.14, b'test1'), (2, 2.71, b'test2')], dtype=compound_dtype)
arr1 = np.array([], dtype=compound_dtype)
dset = writable_file.create_dataset(
'vlen_compound_data', shape=(2,), dtype=vlen_compound_dtype
)
dset[0] = arr0
dset[1] = arr1
np.testing.assert_array_equal(dset[0], arr0) # With data
np.testing.assert_array_equal(dset[1], arr1) # Without data
class TestExplicitCast(TestCase):
def test_f2_casting(self):
fname = self.mktemp()
np.random.seed(1)
A = np.random.rand(1500, 20)
# Save to HDF5 file
with h5py.File(fname, "w") as Fid:
Fid.create_dataset("Data", data=A, dtype='f2')
with h5py.File(fname, "r") as Fid:
B = Fid["Data"][:]
# Compare
self.assertTrue(np.all(A.astype('f2') == B))
class TestOffsets(TestCase):
"""
Check that compound members with aligned or manual offsets are handled
correctly.
"""
def test_compound_vlen(self):
vidt = h5py.vlen_dtype(np.uint8)
eidt = h5py.enum_dtype({'OFF': 0, 'ON': 1}, basetype=np.uint8)
for np_align in (False, True):
dt = np.dtype([
('a', eidt),
('foo', vidt),
('bar', vidt),
('switch', eidt)], align=np_align)
np_offsets = [dt.fields[i][1] for i in dt.names]
for logical in (False, True):
if logical and np_align:
# Vlen types have different size in the numpy struct
self.assertRaises(TypeError, h5py.h5t.py_create, dt,
logical=logical)
else:
ht = h5py.h5t.py_create(dt, logical=logical)
offsets = [ht.get_member_offset(i)
for i in range(ht.get_nmembers())]
if np_align:
self.assertEqual(np_offsets, offsets)
def test_aligned_offsets(self):
dt = np.dtype('i4,i8,i2', align=True)
ht = h5py.h5t.py_create(dt)
self.assertEqual(dt.itemsize, ht.get_size())
self.assertEqual(
[dt.fields[i][1] for i in dt.names],
[ht.get_member_offset(i) for i in range(ht.get_nmembers())]
)
def test_aligned_data(self):
dt = np.dtype('i4,f8,i2', align=True)
data = np.zeros(10, dtype=dt)
data['f0'] = np.array(np.random.randint(-100, 100, size=data.size),
dtype='i4')
data['f1'] = np.random.rand(data.size)
data['f2'] = np.array(np.random.randint(-100, 100, size=data.size),
dtype='i2')
fname = self.mktemp()
with h5py.File(fname, 'w') as f:
f['data'] = data
with h5py.File(fname, 'r') as f:
self.assertArrayEqual(f['data'], data)
def test_compound_robustness(self):
# make an out of order compound type with gaps in it, and larger itemsize than minimum
# Idea is to be robust to type descriptions we *could* get out of HDF5 files, from custom descriptions
# of types in addition to numpy's flakey history on unaligned fields with non-standard or padded layouts.
fields = [
('f0', np.float64, 25),
('f1', np.uint64, 9),
('f2', np.uint32, 0),
('f3', np.uint16, 5)
]
lastfield = fields[np.argmax([ x[2] for x in fields ])]
itemsize = lastfield[2] + np.dtype(lastfield[1]).itemsize + 6
extract_index = lambda index, sequence: [ x[index] for x in sequence ]
dt = np.dtype({
'names' : extract_index(0, fields),
'formats' : extract_index(1, fields),
'offsets' : extract_index(2, fields),
# 'aligned': False, - already defaults to False
'itemsize': itemsize
})
self.assertTrue(dt.itemsize == itemsize)
data = np.zeros(10, dtype=dt)
# don't trust numpy struct handling, keep fields out of band in case content insertion is erroneous
# yes... this has also been known to happen.
f1 = np.array([1 + i * 4 for i in range(data.shape[0])], dtype=dt.fields['f1'][0])
f2 = np.array([2 + i * 4 for i in range(data.shape[0])], dtype=dt.fields['f2'][0])
f3 = np.array([3 + i * 4 for i in range(data.shape[0])], dtype=dt.fields['f3'][0])
f0c = 3.14
data['f0'] = f0c
data['f3'] = f3
data['f1'] = f1
data['f2'] = f2
# numpy consistency checks
self.assertTrue(np.all(data['f0'] == f0c))
self.assertArrayEqual(data['f3'], f3)
self.assertArrayEqual(data['f1'], f1)
self.assertArrayEqual(data['f2'], f2)
fname = self.mktemp()
with h5py.File(fname, 'w') as fd:
fd.create_dataset('data', data=data)
with h5py.File(fname, 'r') as fd:
readback = fd['data']
self.assertTrue(readback.dtype == dt)
self.assertArrayEqual(readback, data)
self.assertTrue(np.all(readback['f0'] == f0c))
self.assertArrayEqual(readback['f1'], f1)
self.assertArrayEqual(readback['f2'], f2)
self.assertArrayEqual(readback['f3'], f3)
def test_out_of_order_offsets(self):
dt = np.dtype({
'names' : ['f1', 'f2', 'f3'],
'formats' : ['<f4', '<i4', '<f8'],
'offsets' : [0, 16, 8]
})
data = np.zeros(10, dtype=dt)
data['f1'] = np.random.rand(data.size)
data['f2'] = np.random.randint(-10, 11, data.size)
data['f3'] = np.random.rand(data.size) * -1
fname = self.mktemp()
with h5py.File(fname, 'w') as fd:
fd.create_dataset('data', data=data)
with h5py.File(fname, 'r') as fd:
self.assertArrayEqual(fd['data'], data)
def test_float_round_tripping(self):
dtypes = set(f for f in np.sctypeDict.values()
if (np.issubdtype(f, np.floating) or
np.issubdtype(f, np.complexfloating)))
unsupported_types = []
if platform.machine() in UNSUPPORTED_LONG_DOUBLE:
for x in UNSUPPORTED_LONG_DOUBLE_TYPES:
if hasattr(np, x):
unsupported_types.append(getattr(np, x))
dtype_dset_map = {str(j): d
for j, d in enumerate(dtypes)
if d not in unsupported_types}
fname = self.mktemp()
with h5py.File(fname, 'w') as f:
for n, d in dtype_dset_map.items():
data = np.zeros(10, dtype=d)
data[...] = np.arange(10)
f.create_dataset(n, data=data)
with h5py.File(fname, 'r') as f:
for n, d in dtype_dset_map.items():
ldata = f[n][:]
self.assertEqual(ldata.dtype, d)
class TestStrings(TestCase):
def test_vlen_utf8(self):
dt = h5py.string_dtype()
string_info = h5py.check_string_dtype(dt)
assert string_info.encoding == 'utf-8'
assert string_info.length is None
assert h5py.check_vlen_dtype(dt) is str
def test_vlen_ascii(self):
dt = h5py.string_dtype(encoding='ascii')
string_info = h5py.check_string_dtype(dt)
assert string_info.encoding == 'ascii'
assert string_info.length is None
assert h5py.check_vlen_dtype(dt) is bytes
def test_fixed_utf8(self):
dt = h5py.string_dtype(length=10)
string_info = h5py.check_string_dtype(dt)
assert string_info.encoding == 'utf-8'
assert string_info.length == 10
assert h5py.check_vlen_dtype(dt) is None
def test_fixed_ascii(self):
dt = h5py.string_dtype(encoding='ascii', length=10)
string_info = h5py.check_string_dtype(dt)
assert string_info.encoding == 'ascii'
assert string_info.length == 10
assert h5py.check_vlen_dtype(dt) is None
class TestDateTime(TestCase):
datetime_units = [
# Dates
'Y', 'M', 'D',
# Times
'h', 'm', 's', 'ms', 'us',
'ns', 'ps', 'fs', 'as',
]
def test_datetime(self):
fname = self.mktemp()
for dt_unit in self.datetime_units:
for dt_order in ['<', '>']:
dt_descr = f'{dt_order}M8[{dt_unit}]'
dt = h5py.opaque_dtype(np.dtype(dt_descr))
arr = np.array([0], dtype=np.int64).view(dtype=dt)
with h5py.File(fname, 'w') as f:
dset = f.create_dataset("default", data=arr, dtype=dt)
self.assertArrayEqual(arr, dset)
self.assertEqual(arr.dtype, dset.dtype)
def test_timedelta(self):
fname = self.mktemp()
for dt_unit in self.datetime_units:
for dt_order in ['<', '>']:
dt_descr = f'{dt_order}m8[{dt_unit}]'
dt = h5py.opaque_dtype(np.dtype(dt_descr))
arr = np.array([np.timedelta64(500, dt_unit)], dtype=dt)
with h5py.File(fname, 'w') as f:
dset = f.create_dataset("default", data=arr, dtype=dt)
self.assertArrayEqual(arr, dset)
self.assertEqual(arr.dtype, dset.dtype)
@ut.skipUnless(tables is not None, 'tables is required')
class TestBitfield(TestCase):
"""
Test H5T_NATIVE_B8 reading
"""
def test_b8_bool(self):
arr1 = np.array([False, True], dtype=bool)
self._test_b8(
arr1,
expected_default_cast_dtype=np.uint8
)
self._test_b8(
arr1,
expected_default_cast_dtype=np.uint8,
cast_dtype=np.uint8
)
def test_b8_bool_compound(self):
arr1 = np.array([(False,), (True,)], dtype=np.dtype([('x', '?')]))
self._test_b8(
arr1,
expected_default_cast_dtype=np.dtype([('x', 'u1')])
)
self._test_b8(
arr1,
expected_default_cast_dtype=np.dtype([('x', 'u1')]),
cast_dtype=np.dtype([('x', 'u1')])
)
def test_b8_bool_compound_nested(self):
arr1 = np.array(
[(True, (True, False)), (True, (False, True))],
dtype=np.dtype([('x', '?'), ('y', [('a', '?'), ('b', '?')])]),
)
self._test_b8(
arr1,
expected_default_cast_dtype=np.dtype(
[('x', 'u1'), ('y', [('a', 'u1'), ('b', 'u1')])]
)
)
self._test_b8(
arr1,
expected_default_cast_dtype=np.dtype(
[('x', 'u1'), ('y', [('a', 'u1'), ('b', 'u1')])]
),
cast_dtype=np.dtype([('x', 'u1'), ('y', [('a', 'u1'), ('b', 'u1')])]),
)
def test_b8_bool_compound_mixed_types(self):
arr1 = np.array(
[(True, 0.5), (False, 0.2)], dtype=np.dtype([('x','?'), ('y', '<f8')])
)
self._test_b8(
arr1,
expected_default_cast_dtype=np.dtype([('x', 'u1'), ('y', '<f8')])
)
self._test_b8(
arr1,
expected_default_cast_dtype=np.dtype([('x', 'u1'), ('y', '<f8')]),
cast_dtype=np.dtype([('x', 'u1'), ('y', '<f8')])
)
def test_b8_bool_array(self):
arr1 = np.array(
[((True, True, False),), ((True, False, True),)],
dtype=np.dtype([('x', ('?', (3,)))]),
)
self._test_b8(
arr1,
expected_default_cast_dtype=np.dtype([('x', ('u1', (3,)))])
)
self._test_b8(
arr1,
expected_default_cast_dtype=np.dtype([('x', ('u1', (3,)))]),
cast_dtype=np.dtype([('x', ('?', (3,)))]),
)
def _test_b8(self, arr1, expected_default_cast_dtype, cast_dtype=None):
path = self.mktemp()
with tables.open_file(path, 'w') as f:
if arr1.dtype.names:
f.create_table('/', 'test', obj=arr1)
else:
f.create_array('/', 'test', obj=arr1)
with h5py.File(path, 'r') as f:
dset = f['test']
# This should do an implicit uint8 cast
# Expect that the "No NumPy equivalent for TypeBitfieldID exists"
# error is not thrown.
arr2 = dset[:]
self.assertArrayEqual(
arr2,
arr1.astype(expected_default_cast_dtype, copy=False)
)
# read cast dset and make sure it's equal
if cast_dtype is None:
cast_dtype = arr1.dtype
arr3 = dset.astype(cast_dtype)[:]
self.assertArrayEqual(arr3, arr1.astype(cast_dtype, copy=False))
def test_b16_uint16(self):
arr1 = np.arange(10, dtype=np.uint16)
path = self.mktemp()
with h5py.File(path, 'w') as f:
space = h5py.h5s.create_simple(arr1.shape)
dset_id = h5py.h5d.create(f.id, b'test', h5py.h5t.STD_B16LE, space)
dset = h5py.Dataset(dset_id)
dset[:] = arr1
with h5py.File(path, 'r') as f:
dset = f['test']
self.assertArrayEqual(dset[:], arr1)
def test_opaque(writable_file):
# opaque without an h5py tag corresponds to numpy void dtypes
arr = np.zeros(3, dtype='V2')
ds = writable_file.create_dataset('v', data=arr)
assert isinstance(ds.id.get_type(), h5py.h5t.TypeOpaqueID)
assert ds.id.get_type().get_size() == 2
np.testing.assert_array_equal(ds[:], arr)
@@ -0,0 +1,84 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Tests the h5py.File object.
"""
import threading
import h5py
def _access_not_existing_object(filename):
"""Create a file and access not existing key"""
with h5py.File(filename, 'w') as newfile:
try:
doesnt_exist = newfile['doesnt_exist'].value
except KeyError:
pass
def test_unsilence_errors(tmp_path, capfd):
"""Check that HDF5 errors can be muted/unmuted from h5py"""
filename = tmp_path / 'test.h5'
# Unmute HDF5 errors
try:
h5py._errors.unsilence_errors()
_access_not_existing_object(filename)
captured = capfd.readouterr()
assert captured.err != ''
assert captured.out == ''
# Mute HDF5 errors
finally:
h5py._errors.silence_errors()
_access_not_existing_object(filename)
captured = capfd.readouterr()
assert captured.err == ''
assert captured.out == ''
def test_thread_hdf5_silence_error_membership(tmp_path, capfd):
"""Verify the error printing is squashed in all threads.
No console messages should be shown from membership tests
"""
th = threading.Thread(target=_access_not_existing_object,
args=(tmp_path / 'test.h5',))
th.start()
th.join()
captured = capfd.readouterr()
assert captured.err == ''
assert captured.out == ''
def test_thread_hdf5_silence_error_attr(tmp_path, capfd):
"""Verify the error printing is squashed in all threads.
No console messages should be shown for non-existing attributes
"""
def test():
with h5py.File(tmp_path/'test.h5', 'w') as newfile:
newfile['newdata'] = [1, 2, 3]
try:
nonexistent_attr = newfile['newdata'].attrs['nonexistent_attr']
except KeyError:
pass
th = threading.Thread(target=test)
th.start()
th.join()
captured = capfd.readouterr()
assert captured.err == ''
assert captured.out == ''
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,333 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Tests the h5py.File object.
"""
import h5py
from h5py._hl.files import _drivers
from h5py import File
from .common import ut, TestCase
import pytest
import io
import tempfile
import os
def nfiles():
return h5py.h5f.get_obj_count(h5py.h5f.OBJ_ALL, h5py.h5f.OBJ_FILE)
def ngroups():
return h5py.h5f.get_obj_count(h5py.h5f.OBJ_ALL, h5py.h5f.OBJ_GROUP)
class TestDealloc(TestCase):
"""
Behavior on object deallocation. Note most of this behavior is
delegated to FileID.
"""
def test_autoclose(self):
""" File objects close automatically when out of scope, but
other objects remain open. """
start_nfiles = nfiles()
start_ngroups = ngroups()
fname = self.mktemp()
f = h5py.File(fname, 'w')
g = f['/']
self.assertEqual(nfiles(), start_nfiles+1)
self.assertEqual(ngroups(), start_ngroups+1)
del f
self.assertTrue(g)
self.assertEqual(nfiles(), start_nfiles)
self.assertEqual(ngroups(), start_ngroups+1)
f = g.file
self.assertTrue(f)
self.assertEqual(nfiles(), start_nfiles+1)
self.assertEqual(ngroups(), start_ngroups+1)
del g
self.assertEqual(nfiles(), start_nfiles+1)
self.assertEqual(ngroups(), start_ngroups)
del f
self.assertEqual(nfiles(), start_nfiles)
self.assertEqual(ngroups(), start_ngroups)
class TestDriverRegistration(TestCase):
def test_register_driver(self):
called_with = [None]
def set_fapl(plist, *args, **kwargs):
called_with[0] = args, kwargs
return _drivers['sec2'](plist)
h5py.register_driver('new-driver', set_fapl)
self.assertIn('new-driver', h5py.registered_drivers())
fname = self.mktemp()
h5py.File(fname, driver='new-driver', driver_arg_0=0, driver_arg_1=1,
mode='w')
self.assertEqual(
called_with,
[((), {'driver_arg_0': 0, 'driver_arg_1': 1})],
)
def test_unregister_driver(self):
h5py.register_driver('new-driver', lambda plist: None)
self.assertIn('new-driver', h5py.registered_drivers())
h5py.unregister_driver('new-driver')
self.assertNotIn('new-driver', h5py.registered_drivers())
with self.assertRaises(ValueError) as e:
fname = self.mktemp()
h5py.File(fname, driver='new-driver', mode='w')
self.assertEqual(str(e.exception), "Unknown driver type 'new-driver'")
class TestCache(TestCase):
def setUp(self):
MiB = 1024 * 1024
if h5py.version.hdf5_version_tuple < (2, 0, 0):
self.dflt_chunk_cache = MiB
else:
self.dflt_chunk_cache = 8 * MiB
def test_defaults(self):
fname = self.mktemp()
f = h5py.File(fname, 'w')
self.assertEqual(list(f.id.get_access_plist().get_cache()),
[0, 521, self.dflt_chunk_cache, 0.75])
def test_nbytes(self):
fname = self.mktemp()
f = h5py.File(fname, 'w', rdcc_nbytes=1024)
self.assertEqual(list(f.id.get_access_plist().get_cache()),
[0, 521, 1024, 0.75])
def test_nslots(self):
fname = self.mktemp()
f = h5py.File(fname, 'w', rdcc_nslots=125)
self.assertEqual(list(f.id.get_access_plist().get_cache()),
[0, 125, self.dflt_chunk_cache, 0.75])
def test_w0(self):
fname = self.mktemp()
f = h5py.File(fname, 'w', rdcc_w0=0.25)
self.assertEqual(list(f.id.get_access_plist().get_cache()),
[0, 521, self.dflt_chunk_cache, 0.25])
class TestFileObj(TestCase):
def check_write(self, fileobj):
f = h5py.File(fileobj, 'w')
self.assertEqual(f.driver, 'fileobj')
self.assertEqual(f.filename, repr(fileobj))
f.create_dataset('test', data=list(range(12)))
self.assertEqual(list(f), ['test'])
self.assertEqual(list(f['test'][:]), list(range(12)))
f.close()
def check_read(self, fileobj):
f = h5py.File(fileobj, 'r')
self.assertEqual(list(f), ['test'])
self.assertEqual(list(f['test'][:]), list(range(12)))
self.assertRaises(Exception, f.create_dataset, 'another.test', data=list(range(3)))
f.close()
def test_BytesIO(self):
with io.BytesIO() as fileobj:
self.assertEqual(len(fileobj.getvalue()), 0)
self.check_write(fileobj)
self.assertGreater(len(fileobj.getvalue()), 0)
self.check_read(fileobj)
def test_file(self):
fname = self.mktemp()
try:
with open(fname, 'wb+') as fileobj:
self.assertEqual(os.path.getsize(fname), 0)
self.check_write(fileobj)
self.assertGreater(os.path.getsize(fname), 0)
self.check_read(fileobj)
with open(fname, 'rb') as fileobj:
self.check_read(fileobj)
finally:
os.remove(fname)
@pytest.mark.filterwarnings(
# at least on Windows and MacOS, a resource warning may be emitted
# when this test returns
"ignore::ResourceWarning"
)
def test_TemporaryFile(self):
# in this test, we check explicitly that temp file gets
# automatically deleted upon h5py.File.close()...
fileobj = tempfile.NamedTemporaryFile()
fname = fileobj.name
f = h5py.File(fileobj, 'w')
del fileobj
# ... but in your code feel free to simply
# f = h5py.File(tempfile.TemporaryFile())
f.create_dataset('test', data=list(range(12)))
self.assertEqual(list(f), ['test'])
self.assertEqual(list(f['test'][:]), list(range(12)))
self.assertTrue(os.path.isfile(fname))
f.close()
self.assertFalse(os.path.isfile(fname))
def test_exception_open(self):
self.assertRaises(Exception, h5py.File, None,
driver='fileobj', mode='x')
self.assertRaises(Exception, h5py.File, 'rogue',
driver='fileobj', mode='x')
self.assertRaises(Exception, h5py.File, self,
driver='fileobj', mode='x')
def test_exception_read(self):
class BrokenBytesIO(io.BytesIO):
def readinto(self, b):
raise Exception('I am broken')
f = h5py.File(BrokenBytesIO(), 'w')
f.create_dataset('test', data=list(range(12)))
self.assertRaises(Exception, list, f['test'])
def test_exception_write(self):
class BrokenBytesIO(io.BytesIO):
allow_write = False
def write(self, b):
if self.allow_write:
return super().write(b)
else:
raise Exception('I am broken')
bio = BrokenBytesIO()
f = h5py.File(bio, 'w')
try:
self.assertRaises(Exception, f.create_dataset, 'test',
data=list(range(12)))
finally:
# Un-break writing so we can close: errors while closing get messy.
bio.allow_write = True
f.close()
@ut.skip("Incompletely closed files can cause segfaults")
def test_exception_close(self):
fileobj = io.BytesIO()
f = h5py.File(fileobj, 'w')
fileobj.close()
self.assertRaises(Exception, f.close)
def test_exception_writeonly(self):
# HDF5 expects read & write access to a file it's writing;
# check that we get the correct exception on a write-only file object.
fileobj = open(os.path.join(self.tempdir, 'a.h5'), 'wb')
f = h5py.File(fileobj, 'w')
group = f.create_group("group")
with self.assertRaises(io.UnsupportedOperation):
group.create_dataset("data", data='foo', dtype=h5py.string_dtype())
f.close()
fileobj.close()
def test_method_vanish(self):
fileobj = io.BytesIO()
f = h5py.File(fileobj, 'w')
f.create_dataset('test', data=list(range(12)))
self.assertEqual(list(f['test'][:]), list(range(12)))
fileobj.readinto = None
self.assertRaises(Exception, list, f['test'])
class TestTrackOrder(TestCase):
def populate(self, f):
for i in range(100):
# Mix group and dataset creation.
if i % 10 == 0:
f.create_group(str(i))
else:
f[str(i)] = [i]
def test_track_order(self):
fname = self.mktemp()
f = h5py.File(fname, 'w', track_order=True) # creation order
self.populate(f)
self.assertEqual(list(f), [str(i) for i in range(100)])
f.close()
# Check order tracking after reopening the file
f2 = h5py.File(fname)
self.assertEqual(list(f2), [str(i) for i in range(100)])
def test_no_track_order(self):
fname = self.mktemp()
f = h5py.File(fname, 'w', track_order=False) # name alphanumeric
self.populate(f)
self.assertEqual(list(f),
sorted([str(i) for i in range(100)]))
class TestFileMetaBlockSize(TestCase):
"""
Feature: The meta block size can be manipulated, changing how metadata
is aggregated and the offset of the first dataset.
"""
def test_file_create_with_meta_block_size_4096(self):
# Test a large meta block size of 4 kibibytes
meta_block_size = 4096
with File(
self.mktemp(), 'w',
meta_block_size=meta_block_size,
libver="latest"
) as f:
f["test"] = 5
self.assertEqual(f.meta_block_size, meta_block_size)
# Equality is expected for HDF5 1.10
self.assertGreaterEqual(f["test"].id.get_offset(), meta_block_size)
def test_file_create_with_meta_block_size_512(self):
# Test a small meta block size of 512 bytes
# The smallest verifiable meta_block_size is 463
meta_block_size = 512
libver = "latest"
with File(
self.mktemp(), 'w',
meta_block_size=meta_block_size,
libver=libver
) as f:
f["test"] = 3
self.assertEqual(f.meta_block_size, meta_block_size)
# Equality is expected for HDF5 1.10
self.assertGreaterEqual(f["test"].id.get_offset(), meta_block_size)
# Default meta_block_size is 2048. This should fail if meta_block_size is not set.
self.assertLess(f["test"].id.get_offset(), meta_block_size*2)
@@ -0,0 +1,103 @@
import h5py
from .common import TestCase
def is_aligned(dataset, offset=4096):
# Here we check if the dataset is aligned
return dataset.id.get_offset() % offset == 0
def dataset_name(i):
return f"data{i:03}"
class TestFileAlignment(TestCase):
"""
Ensure that setting the file alignment has the desired effect
in the internal structure.
"""
def test_no_alignment_set(self):
fname = self.mktemp()
# 881 is a prime number, so hopefully this help randomize the alignment
# enough
# A nice even number might give a pathological case where
# While we don't want the data to be aligned, it ends up aligned...
shape = (881,)
with h5py.File(fname, 'w') as h5file:
# Create up to 1000 datasets
# At least one of them should be misaligned.
# While this isn't perfect, it seems that there
# The case where 1000 datasets get created is one where the data
# is aligned. Therefore, during correct operation, this test is
# expected to finish quickly
for i in range(1000):
dataset = h5file.create_dataset(
dataset_name(i), shape, dtype='uint8')
# Assign data so that the dataset is instantiated in
# the file
dataset[...] = i
if not is_aligned(dataset):
# Break early asserting that the file is not aligned
break
else:
raise RuntimeError("Data was all found to be aligned to 4096")
def test_alignment_set_above_threshold(self):
# 2022/01/19 hmaarrfk
# UnitTest (TestCase) doesn't play well with pytest parametrization.
alignment_threshold = 1000
alignment_interval = 4096
for shape in [
(1033,), # A prime number above the threshold
(1000,), # Exactly equal to the threshold
(1001,), # one above the threshold
]:
fname = self.mktemp()
with h5py.File(fname, 'w',
alignment_threshold=alignment_threshold,
alignment_interval=alignment_interval) as h5file:
# Create up to 1000 datasets
# They are all expected to be aligned
for i in range(1000):
dataset = h5file.create_dataset(
dataset_name(i), shape, dtype='uint8')
# Assign data so that the dataset is instantiated in
# the file
dataset[...] = (i % 256) # Truncate to uint8
assert is_aligned(dataset, offset=alignment_interval)
def test_alignment_set_below_threshold(self):
# 2022/01/19 hmaarrfk
# UnitTest (TestCase) doesn't play well with pytest parametrization.
alignment_threshold = 1000
alignment_interval = 1024
for shape in [
(881,), # A prime number below the threshold
(999,), # Exactly one below the threshold
]:
fname = self.mktemp()
with h5py.File(fname, 'w',
alignment_threshold=alignment_threshold,
alignment_interval=alignment_interval) as h5file:
# Create up to 1000 datasets
# At least one of them should be misaligned.
# While this isn't perfect, it seems that there
# The case where 1000 datasets get created is one where the
# data is aligned. Therefore, during correct operation, this
# test is expected to finish quickly
for i in range(1000):
dataset = h5file.create_dataset(
dataset_name(i), shape, dtype='uint8')
# Assign data so that the dataset is instantiated in
# the file
dataset[...] = i
if not is_aligned(dataset, offset=alignment_interval):
# Break early asserting that the file is not aligned
break
else:
raise RuntimeError(
"Data was all found to be aligned to "
f"{alignment_interval}. This is highly unlikely.")
@@ -0,0 +1,54 @@
import numpy as np
import h5py
from h5py import h5f, h5p
from .common import TestCase
class TestFileImage(TestCase):
def test_load_from_image(self):
from binascii import a2b_base64
from zlib import decompress
compressed_image = 'eJzr9HBx4+WS4mIAAQ4OBhYGAQZk8B8KKjhQ+TD5BCjNCKU7oPQKJpg4I1hOAiouCDUfXV1IkKsrSPV/NACzx4AFQnMwjIKRCDxcHQNAdASUD0ulJ5hQ1ZWkFpeAaFh69KDQXkYGNohZjDA+JCUzMkIEmKHqELQAWKkAByytOoBJViAPJM7ExATWyAE0B8RgZkyAJmlYDoEAIahukJoNU6+HMTA0UOgT6oBgP38XUI6G5UMFZrzKR8EoGAUjGMDKYVgxDSsuAHcfMK8='
image = decompress(a2b_base64(compressed_image))
fapl = h5p.create(h5py.h5p.FILE_ACCESS)
fapl.set_fapl_core()
fapl.set_file_image(image)
fid = h5f.open(self.mktemp().encode(), h5py.h5f.ACC_RDONLY, fapl=fapl)
f = h5py.File(fid)
self.assertTrue('test' in f)
def test_open_from_image(self):
from binascii import a2b_base64
from zlib import decompress
compressed_image = 'eJzr9HBx4+WS4mIAAQ4OBhYGAQZk8B8KKjhQ+TD5BCjNCKU7oPQKJpg4I1hOAiouCDUfXV1IkKsrSPV/NACzx4AFQnMwjIKRCDxcHQNAdASUD0ulJ5hQ1ZWkFpeAaFh69KDQXkYGNohZjDA+JCUzMkIEmKHqELQAWKkAByytOoBJViAPJM7ExATWyAE0B8RgZkyAJmlYDoEAIahukJoNU6+HMTA0UOgT6oBgP38XUI6G5UMFZrzKR8EoGAUjGMDKYVgxDSsuAHcfMK8='
image = decompress(a2b_base64(compressed_image))
fid = h5f.open_file_image(image)
f = h5py.File(fid)
self.assertTrue('test' in f)
def test_in_memory():
arr = np.arange(10)
# Passing one fcpl & one fapl parameter to exercise the code splitting them:
with h5py.File.in_memory(track_order=True, rdcc_nbytes=2_000_000) as f1:
f1['a'] = arr
f1.flush()
img = f1.id.get_file_image()
# Open while f1 is still open
with h5py.File.in_memory(img) as f2:
np.testing.assert_array_equal(f2['a'][:], arr)
# Reuse image now that previous files are closed
with h5py.File.in_memory(img) as f3:
np.testing.assert_array_equal(f3['a'][:], arr)
@@ -0,0 +1,93 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Tests the h5py._hl.filters module.
"""
import os
import numpy as np
import h5py
from .common import ut, TestCase
class TestFilters(TestCase):
def setUp(self):
""" like TestCase.setUp but also store the file path """
self.path = self.mktemp()
self.f = h5py.File(self.path, 'w')
@ut.skipUnless(h5py.h5z.filter_avail(h5py.h5z.FILTER_SZIP), 'szip filter required')
def test_wr_szip_fletcher32_64bit(self):
""" test combination of szip, fletcher32, and 64bit arrays
The fletcher32 checksum must be computed after the szip
compression is applied.
References:
- GitHub issue #953
- https://forum.hdfgroup.org/t/fletcher32-checksum-error-with-szip-compression-and-64bit-data/4141
"""
self.f.create_dataset("test_data",
data=np.zeros(10000, dtype=np.float64),
fletcher32=True,
compression="szip",
)
self.f.close()
with h5py.File(self.path, "r") as h5:
# Access the data which will compute the fletcher32
# checksum and raise an OSError if something is wrong.
h5["test_data"][0]
def test_wr_scaleoffset_fletcher32(self):
""" make sure that scaleoffset + fletcher32 is prevented
"""
data = np.linspace(0, 1, 100)
with self.assertRaises(ValueError):
self.f.create_dataset("test_data",
data=data,
fletcher32=True,
# retain 3 digits after the decimal point
scaleoffset=3,
)
@ut.skipIf('gzip' not in h5py.filters.encode, "DEFLATE is not installed")
def test_filter_ref_obj(writable_file):
gzip8 = h5py.filters.Gzip(level=8)
# **kwargs unpacking (compatible with earlier h5py versions)
assert dict(**gzip8) == {
'compression': h5py.h5z.FILTER_DEFLATE,
'compression_opts': (8,)
}
# Pass object as compression argument (new in h5py 3.0)
ds = writable_file.create_dataset(
'x', shape=(100,), dtype=np.uint32, compression=gzip8
)
assert ds.compression == 'gzip'
assert ds.compression_opts == 8
def test_filter_ref_obj_eq():
gzip8 = h5py.filters.Gzip(level=8)
assert gzip8 == h5py.filters.Gzip(level=8)
assert gzip8 != h5py.filters.Gzip(level=7)
@ut.skipIf(not os.getenv('H5PY_TEST_CHECK_FILTERS'), "H5PY_TEST_CHECK_FILTERS not set")
def test_filters_available():
assert 'gzip' in h5py.filters.decode
assert 'gzip' in h5py.filters.encode
assert 'lzf' in h5py.filters.decode
assert 'lzf' in h5py.filters.encode
File diff suppressed because it is too large Load Diff
@@ -0,0 +1,45 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
from h5py import h5
from .common import TestCase
def fixnames():
cfg = h5.get_config()
cfg.complex_names = ('r','i')
class TestH5(TestCase):
def test_config(self):
cfg = h5.get_config()
self.assertIsInstance(cfg, h5.H5PYConfig)
cfg2 = h5.get_config()
self.assertIs(cfg, cfg2)
def test_cnames_get(self):
cfg = h5.get_config()
self.assertEqual(cfg.complex_names, ('r','i'))
def test_cnames_set(self):
self.addCleanup(fixnames)
cfg = h5.get_config()
cfg.complex_names = ('q','x')
self.assertEqual(cfg.complex_names, ('q','x'))
def test_cnames_set_exc(self):
self.addCleanup(fixnames)
cfg = h5.get_config()
with self.assertRaises(TypeError):
cfg.complex_names = ('q','i','v')
self.assertEqual(cfg.complex_names, ('r','i'))
def test_repr(self):
cfg = h5.get_config()
repr(cfg)
@@ -0,0 +1,184 @@
import h5py
import numpy
import numpy.testing
import pytest
from .common import ut, TestCase
class TestWriteDirectChunk(TestCase):
def test_write_direct_chunk(self):
filename = self.mktemp().encode()
with h5py.File(filename, "w") as filehandle:
dataset = filehandle.create_dataset("data", (100, 100, 100),
maxshape=(None, 100, 100),
chunks=(1, 100, 100),
dtype='float32')
# writing
array = numpy.zeros((10, 100, 100))
for index in range(10):
a = numpy.random.rand(100, 100).astype('float32')
dataset.id.write_direct_chunk((index, 0, 0), a.tobytes(), filter_mask=1)
array[index] = a
# checking
with h5py.File(filename, "r") as filehandle:
for i in range(10):
read_data = filehandle["data"][i]
numpy.testing.assert_array_equal(array[i], read_data)
@ut.skipIf('gzip' not in h5py.filters.encode, "DEFLATE is not installed")
class TestReadDirectChunk(TestCase):
def test_read_compressed_offsets(self):
filename = self.mktemp().encode()
with h5py.File(filename, "w") as filehandle:
frame = numpy.arange(16).reshape(4, 4)
frame_dataset = filehandle.create_dataset("frame",
data=frame,
compression="gzip",
compression_opts=9)
dataset = filehandle.create_dataset("compressed_chunked",
data=[frame, frame, frame],
compression="gzip",
compression_opts=9,
chunks=(1, ) + frame.shape)
filter_mask, compressed_frame = frame_dataset.id.read_direct_chunk((0, 0))
# No filter must be disabled
self.assertEqual(filter_mask, 0)
for i in range(dataset.shape[0]):
filter_mask, data = dataset.id.read_direct_chunk((i, 0, 0))
self.assertEqual(compressed_frame, data)
# No filter must be disabled
self.assertEqual(filter_mask, 0)
def test_read_uncompressed_offsets(self):
filename = self.mktemp().encode()
frame = numpy.arange(16).reshape(4, 4)
with h5py.File(filename, "w") as filehandle:
dataset = filehandle.create_dataset("frame",
maxshape=(1,) + frame.shape,
shape=(1,) + frame.shape,
compression="gzip",
compression_opts=9)
# Write uncompressed data
DISABLE_ALL_FILTERS = 0xFFFFFFFF
dataset.id.write_direct_chunk((0, 0, 0), frame.tobytes(), filter_mask=DISABLE_ALL_FILTERS)
# FIXME: Here we have to close the file and load it back else
# a runtime error occurs:
# RuntimeError: Can't get storage size of chunk (chunk storage is not allocated)
with h5py.File(filename, "r") as filehandle:
dataset = filehandle["frame"]
filter_mask, compressed_frame = dataset.id.read_direct_chunk((0, 0, 0))
# At least 1 filter is supposed to be disabled
self.assertNotEqual(filter_mask, 0)
self.assertEqual(compressed_frame, frame.tobytes())
def test_read_write_chunk(self):
filename = self.mktemp().encode()
with h5py.File(filename, "w") as filehandle:
# create a reference
frame = numpy.arange(16).reshape(4, 4)
frame_dataset = filehandle.create_dataset("source",
data=frame,
compression="gzip",
compression_opts=9)
# configure an empty dataset
filter_mask, compressed_frame = frame_dataset.id.read_direct_chunk((0, 0))
dataset = filehandle.create_dataset("created",
shape=frame_dataset.shape,
maxshape=frame_dataset.shape,
chunks=frame_dataset.chunks,
dtype=frame_dataset.dtype,
compression="gzip",
compression_opts=9)
# copy the data
dataset.id.write_direct_chunk((0, 0), compressed_frame, filter_mask=filter_mask)
# checking
with h5py.File(filename, "r") as filehandle:
dataset = filehandle["created"][...]
numpy.testing.assert_array_equal(dataset, frame)
class TestReadDirectChunkToOut:
def test_uncompressed_data(self, writable_file):
ref_data = numpy.arange(16).reshape(4, 4)
dataset = writable_file.create_dataset(
"uncompressed", data=ref_data, chunks=ref_data.shape)
out = bytearray(ref_data.nbytes)
filter_mask, chunk = dataset.id.read_direct_chunk((0, 0), out=out)
assert numpy.array_equal(
numpy.frombuffer(out, dtype=ref_data.dtype).reshape(ref_data.shape),
ref_data,
)
assert filter_mask == 0
assert len(chunk) == ref_data.nbytes
@pytest.mark.skipif(
'gzip' not in h5py.filters.encode,
reason="DEFLATE is not installed",
)
def test_compressed_data(self, writable_file):
ref_data = numpy.arange(16).reshape(4, 4)
dataset = writable_file.create_dataset(
"gzip",
data=ref_data,
chunks=ref_data.shape,
compression="gzip",
compression_opts=9,
)
chunk_info = dataset.id.get_chunk_info(0)
out = bytearray(chunk_info.size)
filter_mask, chunk = dataset.id.read_direct_chunk(
chunk_info.chunk_offset,
out=out,
)
assert filter_mask == chunk_info.filter_mask
assert len(chunk) == chunk_info.size
assert out == dataset.id.read_direct_chunk(chunk_info.chunk_offset)[1]
def test_fail_buffer_too_small(self, writable_file):
ref_data = numpy.arange(16).reshape(4, 4)
dataset = writable_file.create_dataset(
"uncompressed", data=ref_data, chunks=ref_data.shape)
out = bytearray(ref_data.nbytes // 2)
with pytest.raises(ValueError):
dataset.id.read_direct_chunk((0, 0), out=out)
def test_fail_buffer_readonly(self, writable_file):
ref_data = numpy.arange(16).reshape(4, 4)
dataset = writable_file.create_dataset(
"uncompressed", data=ref_data, chunks=ref_data.shape)
out = bytes(ref_data.nbytes)
with pytest.raises(BufferError):
dataset.id.read_direct_chunk((0, 0), out=out)
def test_fail_buffer_not_contiguous(self, writable_file):
ref_data = numpy.arange(16).reshape(4, 4)
dataset = writable_file.create_dataset(
"uncompressed", data=ref_data, chunks=ref_data.shape)
array = numpy.empty(ref_data.shape + (2,), dtype=ref_data.dtype)
out = array[:, :, ::2] # Array is not contiguous
with pytest.raises(ValueError):
dataset.id.read_direct_chunk((0, 0), out=out)
@@ -0,0 +1,108 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
import tempfile
import shutil
import os
import numpy as np
from h5py import File, special_dtype
from h5py._hl.files import direct_vfd
from .common import ut, TestCase
class TestFileID(TestCase):
def test_descriptor_core(self):
with File('TestFileID.test_descriptor_core', driver='core',
backing_store=False, mode='x') as f:
assert isinstance(f.id.get_vfd_handle(), int)
def test_descriptor_sec2(self):
dn_tmp = tempfile.mkdtemp('h5py.lowtest.test_h5f.TestFileID.test_descriptor_sec2')
fn_h5 = os.path.join(dn_tmp, 'test.h5')
try:
with File(fn_h5, driver='sec2', mode='x') as f:
descriptor = f.id.get_vfd_handle()
self.assertNotEqual(descriptor, 0)
os.fsync(descriptor)
finally:
shutil.rmtree(dn_tmp)
@ut.skipUnless(direct_vfd,
"DIRECT driver is supported on Linux if hdf5 is "
"built with the appriorate flags.")
def test_descriptor_direct(self):
dn_tmp = tempfile.mkdtemp('h5py.lowtest.test_h5f.TestFileID.test_descriptor_direct')
fn_h5 = os.path.join(dn_tmp, 'test.h5')
try:
with File(fn_h5, driver='direct', mode='x') as f:
descriptor = f.id.get_vfd_handle()
self.assertNotEqual(descriptor, 0)
os.fsync(descriptor)
finally:
shutil.rmtree(dn_tmp)
class TestCacheConfig(TestCase):
def test_simple_gets(self):
dn_tmp = tempfile.mkdtemp('h5py.lowtest.test_h5f.TestFileID.TestCacheConfig.test_simple_gets')
fn_h5 = os.path.join(dn_tmp, 'test.h5')
try:
with File(fn_h5, mode='x') as f:
hit_rate = f._id.get_mdc_hit_rate()
mdc_size = f._id.get_mdc_size()
finally:
shutil.rmtree(dn_tmp)
def test_hitrate_reset(self):
dn_tmp = tempfile.mkdtemp('h5py.lowtest.test_h5f.TestFileID.TestCacheConfig.test_hitrate_reset')
fn_h5 = os.path.join(dn_tmp, 'test.h5')
try:
with File(fn_h5, mode='x') as f:
hit_rate = f._id.get_mdc_hit_rate()
f._id.reset_mdc_hit_rate_stats()
hit_rate = f._id.get_mdc_hit_rate()
assert hit_rate == 0
finally:
shutil.rmtree(dn_tmp)
def test_mdc_config_get(self):
dn_tmp = tempfile.mkdtemp('h5py.lowtest.test_h5f.TestFileID.TestCacheConfig.test_mdc_config_get')
fn_h5 = os.path.join(dn_tmp, 'test.h5')
try:
with File(fn_h5, mode='x') as f:
conf = f._id.get_mdc_config()
f._id.set_mdc_config(conf)
finally:
shutil.rmtree(dn_tmp)
class TestVlenData(TestCase):
def test_vlen_strings(self):
# Create file with dataset containing vlen arrays of vlen strings
dn_tmp = tempfile.mkdtemp('h5py.lowtest.test_h5f.TestVlenStrings.test_vlen_strings')
fn_h5 = os.path.join(dn_tmp, 'test.h5')
try:
with File(fn_h5, mode='w') as h:
vlen_str = special_dtype(vlen=str)
vlen_vlen_str = special_dtype(vlen=vlen_str)
ds = h.create_dataset('/com', (2,), dtype=vlen_vlen_str)
ds[0] = (np.array(["a", "b", "c"], dtype=vlen_vlen_str))
ds[1] = (np.array(["d", "e", "f","g"], dtype=vlen_vlen_str))
with File(fn_h5, "r") as h:
ds = h["com"]
assert ds[0].tolist() == [b'a', b'b', b'c']
assert ds[1].tolist() == [b'd', b'e', b'f', b'g']
finally:
shutil.rmtree(dn_tmp)
@@ -0,0 +1,21 @@
import pytest
from .common import TestCase
from h5py import File
class SampleException(Exception):
pass
def throwing(name, obj):
print(name, obj)
raise SampleException("throwing exception")
class TestVisit(TestCase):
def test_visit(self):
fname = self.mktemp()
fid = File(fname, 'w')
fid.create_dataset('foo', (100,), dtype='uint8')
with pytest.raises(SampleException, match='throwing exception'):
fid.visititems(throwing)
fid.close()
@@ -0,0 +1,233 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
import unittest as ut
from h5py import h5p, h5f, version
from .common import TestCase
class TestLibver(TestCase):
"""
Feature: Setting/getting lib ver bounds
"""
def test_libver(self):
""" Test libver bounds set/get """
plist = h5p.create(h5p.FILE_ACCESS)
plist.set_libver_bounds(h5f.LIBVER_EARLIEST, h5f.LIBVER_LATEST)
self.assertEqual((h5f.LIBVER_EARLIEST, h5f.LIBVER_LATEST),
plist.get_libver_bounds())
def test_libver_v18(self):
""" Test libver bounds set/get for H5F_LIBVER_V18"""
plist = h5p.create(h5p.FILE_ACCESS)
plist.set_libver_bounds(h5f.LIBVER_EARLIEST, h5f.LIBVER_V18)
self.assertEqual((h5f.LIBVER_EARLIEST, h5f.LIBVER_V18),
plist.get_libver_bounds())
def test_libver_v110(self):
""" Test libver bounds set/get for H5F_LIBVER_V110"""
plist = h5p.create(h5p.FILE_ACCESS)
plist.set_libver_bounds(h5f.LIBVER_V18, h5f.LIBVER_V110)
self.assertEqual((h5f.LIBVER_V18, h5f.LIBVER_V110),
plist.get_libver_bounds())
@ut.skipIf(version.hdf5_version_tuple < (1, 11, 4),
'Requires HDF5 1.11.4 or later')
def test_libver_v112(self):
""" Test libver bounds set/get for H5F_LIBVER_V112"""
plist = h5p.create(h5p.FILE_ACCESS)
plist.set_libver_bounds(h5f.LIBVER_V18, h5f.LIBVER_V112)
self.assertEqual((h5f.LIBVER_V18, h5f.LIBVER_V112),
plist.get_libver_bounds())
@ut.skipIf(version.hdf5_version_tuple < (1, 14, 0),
'Requires HDF5 1.14 or later')
def test_libver_v114(self):
""" Test libver bounds set/get for H5F_LIBVER_V114"""
plist = h5p.create(h5p.FILE_ACCESS)
plist.set_libver_bounds(h5f.LIBVER_V18, h5f.LIBVER_V114)
self.assertEqual((h5f.LIBVER_V18, h5f.LIBVER_V114),
plist.get_libver_bounds())
@ut.skipIf(version.hdf5_version_tuple < (2, 0, 0),
'Requires HDF5 2.0 or later')
def test_libver_v200(self):
""" Test libver bounds set/get for H5F_LIBVER_V200"""
plist = h5p.create(h5p.FILE_ACCESS)
plist.set_libver_bounds(h5f.LIBVER_V18, h5f.LIBVER_V200)
self.assertEqual((h5f.LIBVER_V18, h5f.LIBVER_V200),
plist.get_libver_bounds())
class TestDA(TestCase):
'''
Feature: setting/getting chunk cache size on a dataset access property list
'''
def test_chunk_cache(self):
'''test get/set chunk cache '''
dalist = h5p.create(h5p.DATASET_ACCESS)
nslots = 10000 # 40kb hash table
nbytes = 1000000 # 1MB cache size
w0 = .5 # even blend of eviction strategy
dalist.set_chunk_cache(nslots, nbytes, w0)
self.assertEqual((nslots, nbytes, w0),
dalist.get_chunk_cache())
def test_efile_prefix(self):
'''test get/set efile prefix '''
dalist = h5p.create(h5p.DATASET_ACCESS)
self.assertEqual(dalist.get_efile_prefix().decode(), '')
efile_prefix = "path/to/external/dataset"
dalist.set_efile_prefix(efile_prefix.encode('utf-8'))
self.assertEqual(dalist.get_efile_prefix().decode(),
efile_prefix)
efile_prefix = "${ORIGIN}"
dalist.set_efile_prefix(efile_prefix.encode('utf-8'))
self.assertEqual(dalist.get_efile_prefix().decode(),
efile_prefix)
def test_virtual_prefix(self):
'''test get/set virtual prefix '''
dalist = h5p.create(h5p.DATASET_ACCESS)
self.assertEqual(dalist.get_virtual_prefix().decode(), '')
virtual_prefix = "path/to/virtual/dataset"
dalist.set_virtual_prefix(virtual_prefix.encode('utf-8'))
self.assertEqual(dalist.get_virtual_prefix().decode(),
virtual_prefix)
class TestFA(TestCase):
'''
Feature: setting/getting mdc config on a file access property list
'''
def test_mdc_config(self):
'''test get/set mdc config '''
falist = h5p.create(h5p.FILE_ACCESS)
config = falist.get_mdc_config()
falist.set_mdc_config(config)
def test_set_alignment(self):
'''test get/set chunk cache '''
falist = h5p.create(h5p.FILE_ACCESS)
threshold = 10 * 1024 # threshold of 10kiB
alignment = 1024 * 1024 # threshold of 1kiB
falist.set_alignment(threshold, alignment)
self.assertEqual((threshold, alignment),
falist.get_alignment())
def test_set_file_locking(self):
'''test get/set file locking'''
falist = h5p.create(h5p.FILE_ACCESS)
use_file_locking = False
ignore_when_disabled = False
falist.set_file_locking(use_file_locking, ignore_when_disabled)
self.assertEqual((use_file_locking, ignore_when_disabled),
falist.get_file_locking())
class TestPL(TestCase):
def test_obj_track_times(self):
"""
tests if the object track times set/get
"""
# test for groups
gcid = h5p.create(h5p.GROUP_CREATE)
gcid.set_obj_track_times(False)
self.assertEqual(False, gcid.get_obj_track_times())
gcid.set_obj_track_times(True)
self.assertEqual(True, gcid.get_obj_track_times())
# test for datasets
dcid = h5p.create(h5p.DATASET_CREATE)
dcid.set_obj_track_times(False)
self.assertEqual(False, dcid.get_obj_track_times())
dcid.set_obj_track_times(True)
self.assertEqual(True, dcid.get_obj_track_times())
# test for generic objects
ocid = h5p.create(h5p.OBJECT_CREATE)
ocid.set_obj_track_times(False)
self.assertEqual(False, ocid.get_obj_track_times())
ocid.set_obj_track_times(True)
self.assertEqual(True, ocid.get_obj_track_times())
def test_link_creation_tracking(self):
"""
tests the link creation order set/get
"""
gcid = h5p.create(h5p.GROUP_CREATE)
gcid.set_link_creation_order(0)
self.assertEqual(0, gcid.get_link_creation_order())
flags = h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED
gcid.set_link_creation_order(flags)
self.assertEqual(flags, gcid.get_link_creation_order())
# test for file creation
fcpl = h5p.create(h5p.FILE_CREATE)
fcpl.set_link_creation_order(flags)
self.assertEqual(flags, fcpl.get_link_creation_order())
def test_attr_phase_change(self):
"""
test the attribute phase change
"""
cid = h5p.create(h5p.OBJECT_CREATE)
# test default value
ret = cid.get_attr_phase_change()
self.assertEqual((8,6), ret)
# max_compact must < 65536 (64kb)
with self.assertRaises(ValueError):
cid.set_attr_phase_change(65536, 6)
# Using dense attributes storage to avoid 64kb size limitation
# for a single attribute in compact attribute storage.
cid.set_attr_phase_change(0, 0)
self.assertEqual((0,0), cid.get_attr_phase_change())
def test_proplaid():
"""Test Link Access Property List"""
lapl = h5p.create(h5p.LINK_ACCESS)
nlinks = 3
lapl.set_nlinks(nlinks)
assert lapl.get_nlinks() == nlinks
prefix = b"/prefix"
lapl.set_elink_prefix(prefix)
assert lapl.get_elink_prefix() == prefix
flags = h5f.ACC_RDWR & h5f.ACC_SWMR_WRITE
lapl.set_elink_acc_flags(flags)
assert lapl.get_elink_acc_flags() == flags
fapl = h5p.create(h5p.FILE_ACCESS)
fapl.set_file_locking(False, False)
lapl.set_elink_fapl(fapl)
assert lapl.get_elink_fapl().get_file_locking() == (False, False)
fapl.close()
lapl.close()
@@ -0,0 +1,68 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2019 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
import pytest
from h5py import h5pl
from h5py.tests.common import insubprocess, subproc_env
@pytest.mark.mpi_skip
@insubprocess
@subproc_env({'HDF5_PLUGIN_PATH': 'h5py_plugin_test'})
def test_default(request):
assert h5pl.size() == 1
assert h5pl.get(0) == b'h5py_plugin_test'
@pytest.mark.mpi_skip
@insubprocess
@subproc_env({'HDF5_PLUGIN_PATH': 'h5py_plugin_test'})
def test_append(request):
h5pl.append(b'/opt/hdf5/vendor-plugin')
assert h5pl.size() == 2
assert h5pl.get(0) == b'h5py_plugin_test'
assert h5pl.get(1) == b'/opt/hdf5/vendor-plugin'
@pytest.mark.mpi_skip
@insubprocess
@subproc_env({'HDF5_PLUGIN_PATH': 'h5py_plugin_test'})
def test_prepend(request):
h5pl.prepend(b'/opt/hdf5/vendor-plugin')
assert h5pl.size() == 2
assert h5pl.get(0) == b'/opt/hdf5/vendor-plugin'
assert h5pl.get(1) == b'h5py_plugin_test'
@pytest.mark.mpi_skip
@insubprocess
@subproc_env({'HDF5_PLUGIN_PATH': 'h5py_plugin_test'})
def test_insert(request):
h5pl.insert(b'/opt/hdf5/vendor-plugin', 0)
assert h5pl.size() == 2
assert h5pl.get(0) == b'/opt/hdf5/vendor-plugin'
assert h5pl.get(1) == b'h5py_plugin_test'
@pytest.mark.mpi_skip
@insubprocess
@subproc_env({'HDF5_PLUGIN_PATH': 'h5py_plugin_test'})
def test_replace(request):
h5pl.replace(b'/opt/hdf5/vendor-plugin', 0)
assert h5pl.size() == 1
assert h5pl.get(0) == b'/opt/hdf5/vendor-plugin'
@pytest.mark.mpi_skip
@insubprocess
@subproc_env({'HDF5_PLUGIN_PATH': 'h5py_plugin_test'})
def test_remove(request):
h5pl.remove(0)
assert h5pl.size() == 0
@@ -0,0 +1,24 @@
from h5py import h5s
from h5py._selector import Selector
class Helper:
def __init__(self, shape: tuple):
self.shape = shape
def __getitem__(self, item) -> h5s.SpaceID:
if not isinstance(item, tuple):
item = (item,)
space = h5s.create_simple(self.shape)
sel = Selector(space)
sel.make_selection(item)
return space
def test_same_shape():
s1 = Helper((5, 6))[:3, :4]
s2 = Helper((5, 6))[2:, 2:]
assert s1.select_shape_same(s2)
s3 = Helper((5, 6))[:4, :3]
assert not s1.select_shape_same(s3)
@@ -0,0 +1,188 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
import numpy as np
import h5py
from h5py import h5t
from .common import TestCase, ut
class TestCompound(ut.TestCase):
"""
Feature: Compound types can be created from Python dtypes
"""
def test_ref(self):
""" Reference types are correctly stored in compound types (issue 144)
"""
dt = np.dtype([('a', h5py.ref_dtype), ('b', '<f4')])
tid = h5t.py_create(dt, logical=True)
t1, t2 = tid.get_member_type(0), tid.get_member_type(1)
self.assertEqual(t1, h5t.STD_REF_OBJ)
self.assertEqual(t2, h5t.IEEE_F32LE)
self.assertEqual(tid.get_member_offset(0), 0)
self.assertEqual(tid.get_member_offset(1), h5t.STD_REF_OBJ.get_size())
def test_out_of_order_offsets(self):
size = 20
type_dict = {
'names': ['f1', 'f2', 'f3'],
'formats': ['<f4', '<i4', '<f8'],
'offsets': [0, 16, 8]
}
expected_dtype = np.dtype(type_dict)
tid = h5t.create(h5t.COMPOUND, size)
for name, offset, dt in zip(
type_dict["names"], type_dict["offsets"], type_dict["formats"], strict=True
):
tid.insert(
name.encode("utf8") if isinstance(name, str) else name,
offset,
h5t.py_create(dt)
)
self.assertEqual(tid.dtype, expected_dtype)
self.assertEqual(tid.dtype.itemsize, size)
class TestTypeFloatID(TestCase):
"""Test TypeFloatID."""
def test_custom_float_promotion(self):
"""Custom floats are correctly promoted to standard floats on read."""
# This test uses the low-level API, so we need names as byte strings
test_filename = self.mktemp().encode()
dataset = b'DS1'
dataset2 = b'DS2'
dataset3 = b'DS3'
dataset4 = b'DS4'
dataset5 = b'DS5'
dims = (4, 7)
wdata = np.array([[-1.50066626e-09, 1.40062184e-09, 1.81216819e-10,
4.01087163e-10, 4.27917257e-10, -7.04858394e-11,
5.74800652e-10],
[-1.50066626e-09, 4.86579665e-10, 3.42879503e-10,
5.12045517e-10, 5.10226528e-10, 2.24190444e-10,
3.93356459e-10],
[-1.50066626e-09, 5.24778443e-10, 8.19454726e-10,
1.28966349e-09, 1.68483894e-10, 5.71276360e-11,
-1.08684617e-10],
[-1.50066626e-09, -1.08343556e-10, -1.58934199e-10,
8.52196536e-10, 6.18456397e-10, 6.16637408e-10,
1.31694833e-09]], dtype=np.float32)
wdata2 = np.array([[-1.50066626e-09, 5.63886715e-10, -8.74251782e-11,
1.32558853e-10, 1.59161573e-10, 2.29420039e-10,
-7.24185156e-11],
[-1.50066626e-09, 1.87810656e-10, 7.74889486e-10,
3.95630195e-10, 9.42236511e-10, 8.38554115e-10,
-8.71978045e-11],
[-1.50066626e-09, 6.20275387e-10, 7.34871719e-10,
6.64840627e-10, 2.64662958e-10, 1.05319486e-09,
1.68256520e-10],
[-1.50066626e-09, 1.67347025e-10, 5.12045517e-10,
3.36513040e-10, 1.02545528e-10, 1.28784450e-09,
4.06089384e-10]], dtype=np.float32)
# Create a new file using the default properties.
fid = h5py.h5f.create(test_filename)
# Create the dataspace. No maximum size parameter needed.
space = h5py.h5s.create_simple(dims)
# create a custom type with larger bias
mytype = h5t.IEEE_F16LE.copy()
mytype.set_fields(14, 9, 5, 0, 9)
mytype.set_size(2)
mytype.set_ebias(53)
mytype.lock()
dset = h5py.h5d.create(fid, dataset, mytype, space)
dset.write(h5py.h5s.ALL, h5py.h5s.ALL, wdata)
del dset
# create a custom type with larger exponent
mytype2 = h5t.IEEE_F16LE.copy()
mytype2.set_fields(15, 9, 6, 0, 9)
mytype2.set_size(2)
mytype2.set_ebias(53)
mytype2.lock()
dset = h5py.h5d.create(fid, dataset2, mytype2, space)
dset.write(h5py.h5s.ALL, h5py.h5s.ALL, wdata2)
del dset
# create a custom type which reimplements 16-bit floats
mytype3 = h5t.IEEE_F16LE.copy()
mytype3.set_fields(15, 10, 5, 0, 10)
mytype3.set_size(2)
mytype3.set_ebias(15)
mytype3.lock()
dset = h5py.h5d.create(fid, dataset3, mytype3, space)
dset.write(h5py.h5s.ALL, h5py.h5s.ALL, wdata2)
del dset
# create a custom type with larger bias
mytype4 = h5t.IEEE_F16LE.copy()
mytype4.set_fields(15, 10, 5, 0, 10)
mytype4.set_size(2)
mytype4.set_ebias(258)
mytype4.lock()
dset = h5py.h5d.create(fid, dataset4, mytype4, space)
dset.write(h5py.h5s.ALL, h5py.h5s.ALL, wdata2)
del dset
# create a dataset with long doubles
dset = h5py.h5d.create(fid, dataset5, h5t.NATIVE_LDOUBLE, space)
dset.write(h5py.h5s.ALL, h5py.h5s.ALL, wdata2)
# Explicitly close and release resources.
del space
del dset
del fid
f = h5py.File(test_filename, 'r')
# ebias promotion to float32
values = f[dataset][:]
np.testing.assert_array_equal(values, wdata)
self.assertEqual(values.dtype, np.dtype('<f4'))
# esize promotion to float32
values = f[dataset2][:]
np.testing.assert_array_equal(values, wdata2)
self.assertEqual(values.dtype, np.dtype('<f4'))
# regular half floats
dset = f[dataset3]
try:
self.assertEqual(dset.dtype, np.dtype('<f2'))
except AttributeError:
self.assertEqual(dset.dtype, np.dtype('<f4'))
# ebias promotion to float64
dset = f[dataset4]
self.assertEqual(dset.dtype, np.dtype('<f8'))
# long double floats
dset = f[dataset5]
self.assertEqual(dset.dtype, np.longdouble)
@@ -0,0 +1,85 @@
from ctypes import (
addressof,
c_char_p,
c_int,
c_long,
c_uint,
c_void_p,
CFUNCTYPE,
POINTER,
Structure,
)
import pytest
import h5py
from h5py import h5z
from .common import insubprocess
# Type of filter callback function of H5Z_class2_t
H5ZFuncT = CFUNCTYPE(
c_long, # restype
# argtypes
c_uint, # flags
c_long, # cd_nelemts
POINTER(c_uint), # cd_values
c_long, # nbytes
POINTER(c_long), # buf_size
POINTER(c_void_p), # buf
)
class H5ZClass2T(Structure):
"""H5Z_class2_t structure defining a filter"""
_fields_ = [
("version", c_int),
("id_", c_int),
("encoder_present", c_uint),
("decoder_present", c_uint),
("name", c_char_p),
("can_apply", c_void_p),
("set_local", c_void_p),
("filter_", H5ZFuncT),
]
def test_register_filter():
filter_id = 256 # Test ID
@H5ZFuncT
def failing_filter_callback(flags, cd_nelemts, cd_values, nbytes, buf_size, buf):
return 0
dummy_filter_class = H5ZClass2T(
version=h5z.CLASS_T_VERS,
id_=filter_id,
encoder_present=1,
decoder_present=1,
name=b"dummy filter",
can_apply=None,
set_local=None,
filter_=failing_filter_callback,
)
h5z.register_filter(addressof(dummy_filter_class))
try:
assert h5z.filter_avail(filter_id)
filter_flags = h5z.get_filter_info(filter_id)
assert (
filter_flags
== h5z.FILTER_CONFIG_ENCODE_ENABLED | h5z.FILTER_CONFIG_DECODE_ENABLED
)
finally:
h5z.unregister_filter(filter_id)
assert not h5z.filter_avail(filter_id)
@pytest.mark.mpi_skip
@insubprocess
def test_unregister_filter(request):
if h5py.h5z.filter_avail(h5py.h5z.FILTER_LZF):
res = h5py.h5z.unregister_filter(h5py.h5z.FILTER_LZF)
assert res
@@ -0,0 +1,178 @@
import numpy as np
import pytest
import h5py
NUMPY_GE2 = int(np.__version__.split(".")[0]) >= 2
pytestmark = pytest.mark.skipif(not NUMPY_GE2, reason="requires numpy >=2.0")
def test_create_with_dtype_T(writable_file):
ds = writable_file.create_dataset("x", shape=(2, 2), dtype="T")
data = [["foo", "bar"], ["hello world", ""]]
ds[:] = data
a = ds.asstr()[:]
np.testing.assert_array_equal(a, data)
ds = writable_file["x"]
assert ds.dtype == object
np.testing.assert_array_equal(ds.asstr()[:], data)
ds[0, 0] = "baz"
data[0][0] = "baz"
a = ds.astype("T")[:]
assert a.dtype.kind == "T"
np.testing.assert_array_equal(a, data)
ds[0, 0] = np.asarray("123", dtype="O")
data[0][0] = "123"
np.testing.assert_array_equal(ds.asstr()[:], data)
def test_fromdata(writable_file):
data = [["foo", "bar"]]
np_data = np.asarray(data, dtype="T")
x = writable_file.create_dataset("x", data=data, dtype="T")
y = writable_file.create_dataset("y", data=data, dtype=np.dtypes.StringDType())
z = writable_file.create_dataset("z", data=np_data)
for ds in (x, y, z):
assert ds.dtype.kind == "O"
np.testing.assert_array_equal(ds.astype("T")[:], np_data)
for name in ("x", "y", "z"):
ds = writable_file[name]
assert ds.dtype == object
np.testing.assert_array_equal(ds.asstr()[:], data)
ds = ds.astype("T")
assert ds.dtype.kind == "T"
a = ds[:]
assert a.dtype.kind == "T"
np.testing.assert_array_equal(a, data)
def test_fixed_to_variable_width(writable_file):
data = ["foo", "longer than 8 bytes"]
x = writable_file.create_dataset(
"x", data=data, dtype=h5py.string_dtype(length=20)
)
assert x.dtype == "S20"
# read T <- S
y = x.astype("T")
assert y.dtype.kind == "T"
assert y[:].dtype.kind == "T"
np.testing.assert_array_equal(y[:], data)
# write T -> S
x[0] = np.asarray("1234", dtype="T")
data[0] = "1234"
np.testing.assert_array_equal(y[:], data)
def test_fixed_to_variable_width_too_short(writable_file):
# Note: this test triggers calls to H5Tconvert which are otherwise skipped.
data = ["foo", "bar"]
x = writable_file.create_dataset(
"x", data=data, dtype=h5py.string_dtype(length=3)
)
assert x.dtype == "S3"
# write T -> S
x[0] = np.asarray("1234", dtype="T")
np.testing.assert_array_equal(x[:], [b"123", b"bar"])
def test_variable_to_fixed_width(writable_file):
data = ["foo", "longer than 8 bytes"]
bdata = [b"foo", b"longer than 8 bytes"]
x = writable_file.create_dataset("x", data=data, dtype="T")
# read S <- T
y = x.astype("S20")
assert y.dtype == "S20"
assert y[:].dtype == "S20"
np.testing.assert_array_equal(y[:], bdata)
y = x.astype("S3")
assert y.dtype == "S3"
assert y[:].dtype == "S3"
np.testing.assert_array_equal(y[:], [b"foo", b"lon"])
# write S -> T
x[0] = np.asarray(b"1234", dtype="S5")
bdata[0] = b"1234"
np.testing.assert_array_equal(x[:], bdata)
def test_write_object_into_npystrings(writable_file):
x = writable_file.create_dataset("x", data=["foo"], dtype="T")
x[0] = np.asarray("1234", dtype="O")
np.testing.assert_array_equal(x[:], b"1234")
def test_write_npystrings_into_object(writable_file):
x = writable_file.create_dataset(
"x", data=["foo"], dtype=h5py.string_dtype()
)
assert x.dtype == object
x[0] = np.asarray("1234", dtype="T")
np.testing.assert_array_equal(x[:], b"1234")
# Test with HDF5 variable-length strings with ASCII character set
xa = writable_file.create_dataset(
"xa", shape=(1,), dtype=h5py.string_dtype('ascii')
)
xa[0] = np.asarray("2345", dtype="T")
np.testing.assert_array_equal(xa[:], b"2345")
def test_fillvalue(writable_file):
# Create as NpyString dtype
x = writable_file.create_dataset("x", shape=(2,), dtype="T", fillvalue="foo")
assert isinstance(x.fillvalue, bytes)
assert x.fillvalue == b"foo"
assert x[0] == b"foo"
# Create as object dtype
y = writable_file.create_dataset(
"y", shape=(2,), dtype=h5py.string_dtype(), fillvalue=b"foo"
)
assert isinstance(y.fillvalue, bytes)
assert y.fillvalue == b"foo"
assert y[0] == b"foo"
# Convert object dtype to NpyString
y = y.astype("T")
assert y[0] == "foo"
def test_empty_string(writable_file):
data = np.array(["", "a", "b"], dtype="T")
x = writable_file.create_dataset("x", data=data)
np.testing.assert_array_equal(x[:], [b"", b"a", b"b"])
np.testing.assert_array_equal(x.astype("T")[:], data)
data[:2] = ["c", ""]
x[:2] = data[:2]
np.testing.assert_array_equal(x[:], [b"c", b"", b"b"])
np.testing.assert_array_equal(x.astype("T")[:], data)
def test_astype_nonstring(writable_file):
x = writable_file.create_dataset("x", shape=(2, ), dtype="i8")
with pytest.raises(TypeError, match="HDF5 string datatype"):
x.astype("T")
def test_resized_read(writable_file):
"""Read default values created by resize(). This triggers a special case
where libhdf5 returns a char** containing NULL pointers.
"""
l = ["string1", "string2", "string3"]
data = np.array(l, dtype='T')
d = writable_file.create_dataset("dset", data=data, maxshape=(None,))
d.resize((10,))
np.testing.assert_array_equal(d[:], np.array(
[s.encode() for s in l] + [b''] * 7, dtype=object
))
np.testing.assert_array_equal(d.astype('T')[:], np.array(l + [''] * 7, dtype='T'))
@@ -0,0 +1,88 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
import os
import threading
from unittest import SkipTest
import time
from h5py import _objects as o
from .common import TestCase
class TestObjects(TestCase):
def test_invalid(self):
# Check for segfault on close
oid = o.ObjectID(0)
del oid
oid = o.ObjectID(1)
del oid
def test_equality(self):
# Identifier-based equality
oid1 = o.ObjectID(42)
oid2 = o.ObjectID(42)
oid3 = o.ObjectID(43)
self.assertEqual(oid1, oid2)
self.assertNotEqual(oid1, oid3)
def test_hash(self):
# Default objects are not hashable
oid = o.ObjectID(42)
with self.assertRaises(TypeError):
hash(oid)
def test_phil_fork_with_threads(self):
# Test that handling of the phil Lock after fork is correct.
# We simulate a deadlock in the forked process by explicitly
# waiting for the phil Lock to be acquired in a different thread
# before forking.
# On Windows forking (and the register_at_fork handler)
# are not available, skip this test.
if not hasattr(os, "fork"):
raise SkipTest("os.fork not available")
thread_acquired_phil_event = threading.Event()
def f():
o.phil.acquire()
try:
thread_acquired_phil_event.set()
time.sleep(1)
finally:
o.phil.release()
thread = threading.Thread(target=f)
thread.start()
try:
# wait for the thread running "f" to have acquired the phil lock
thread_acquired_phil_event.wait()
# now fork the current (main) thread while the other thread holds the lock
pid = os.fork()
if pid == 0:
# child process
# If we handle the phil lock correctly, this should not deadlock,
# and we should be able to acquire the lock here.
if o.phil.acquire(blocking=False):
o.phil.release()
os._exit(0)
else:
os._exit(1)
else:
# parent process
# wait for the child process to finish
_, status = os.waitpid(pid, 0)
assert os.WIFEXITED(status)
assert os.WEXITSTATUS(status) == 0
finally:
thread.join()
@@ -0,0 +1,112 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Read-only S3 virtual file driver (VFD) test module.
"""
import h5py
from h5py._hl.files import make_fapl
import pytest
pytestmark = [
pytest.mark.skipif(
not h5py.h5.get_config().ros3,
reason="ros3 driver not available")
]
@pytest.mark.network
@pytest.mark.parametrize(
"kwargs",
[
pytest.param(
{},
id="HDF5-v1",
marks=pytest.mark.skipif(
h5py.version.hdf5_version_tuple >= (2, 0, 0),
reason="Requires HDF5 < 2.0",
),
),
pytest.param(
{"aws_region": b"us-east-2"},
id="HDF5-v2",
marks=pytest.mark.skipif(
h5py.version.hdf5_version_tuple < (2, 0, 0),
reason="Requires HDF5 >= 2.0",
),
),
],
)
def test_ros3(kwargs):
""" ROS3 driver and options """
with h5py.File("https://dandiarchive.s3.amazonaws.com/ros3test.hdf5", 'r',
driver='ros3', **kwargs) as f:
assert f
assert 'mydataset' in f.keys()
assert f["mydataset"].shape == (100,)
@pytest.mark.parametrize(
"exc,match_exc",
[
pytest.param(
ValueError,
[
"AWS region required for s3:// location",
r"^foo://wrong/scheme: S3 location must begin with",
],
id="HDF5-v1",
marks=pytest.mark.skipif(
h5py.version.hdf5_version_tuple >= (2, 0, 0),
reason="Requires HDF5 < 2.0",
),
),
pytest.param(
OSError,
[None, "can't parse object key from path"],
id="HDF5-v2",
marks=pytest.mark.skipif(
h5py.version.hdf5_version_tuple < (2, 0, 0),
reason="Requires HDF5 >= 2.0",
),
),
],
)
def test_ros3_s3_fails(exc, match_exc):
"""ROS3 exceptions for s3:// location"""
with pytest.raises(exc, match=match_exc[0]):
h5py.File('s3://fakebucket/fakekey', 'r', driver='ros3')
with pytest.raises(exc, match=match_exc[1]):
h5py.File('foo://wrong/scheme', 'r', driver='ros3')
@pytest.mark.network
def test_ros3_s3uri():
"""Use S3 URI with ROS3 driver"""
with h5py.File('s3://dandiarchive/ros3test.hdf5', 'r', driver='ros3',
aws_region=b'us-east-2') as f:
assert f
assert 'mydataset' in f.keys()
assert f["mydataset"].shape == (100,)
@pytest.mark.skipif(h5py.version.hdf5_version_tuple < (1, 14, 2),
reason='AWS S3 access token support in HDF5 >= 1.14.2')
def test_ros3_temp_token():
"""Set and get S3 access token"""
token = b'#0123FakeToken4567/8/9'
fapl = make_fapl('ros3', libver=None, rdcc_nslots=None, rdcc_nbytes=None,
rdcc_w0=None, locking=None, page_buf_size=None, min_meta_keep=None,
min_raw_keep=None, alignment_threshold=1, alignment_interval=1,
meta_block_size=None, session_token=token)
assert token, fapl.get_fapl_ros3_token()
@@ -0,0 +1,142 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Tests for the (internal) selections module
"""
import numpy as np
import h5py
import h5py._hl.selections as sel
import h5py._hl.selections2 as sel2
from .common import TestCase
class BaseSelection(TestCase):
def setUp(self):
self.f = h5py.File(self.mktemp(), 'w')
self.dsid = self.f.create_dataset('x', ()).id
def tearDown(self):
if self.f:
self.f.close()
class TestTypeGeneration(BaseSelection):
"""
Internal feature: Determine output types from dataset dtype and fields.
"""
def test_simple(self):
""" Non-compound types are handled appropriately """
dt = np.dtype('i')
out, format = sel2.read_dtypes(dt, ())
self.assertEqual(out, format)
self.assertEqual(out, np.dtype('i'))
def test_simple_fieldexc(self):
""" Field names for non-field types raises ValueError """
dt = np.dtype('i')
with self.assertRaises(ValueError):
out, format = sel2.read_dtypes(dt, ('a',))
def test_compound_simple(self):
""" Compound types with elemental subtypes """
dt = np.dtype( [('a','i'), ('b','f'), ('c','|S10')] )
# Implicit selection of all fields -> all fields
out, format = sel2.read_dtypes(dt, ())
self.assertEqual(out, format)
self.assertEqual(out, dt)
# Explicit selection of fields -> requested fields
out, format = sel2.read_dtypes(dt, ('a','b'))
self.assertEqual(out, format)
self.assertEqual(out, np.dtype( [('a','i'), ('b','f')] ))
# Explicit selection of exactly one field -> no fields
out, format = sel2.read_dtypes(dt, ('a',))
self.assertEqual(out, np.dtype('i'))
self.assertEqual(format, np.dtype( [('a','i')] ))
# Field does not appear in named typed
with self.assertRaises(ValueError):
out, format = sel2.read_dtypes(dt, ('j', 'k'))
class TestScalarSliceRules(BaseSelection):
"""
Internal feature: selections rules for scalar datasets
"""
def test_args(self):
""" Permissible arguments for scalar slicing """
shape, selection = sel2.read_selections_scalar(self.dsid, ())
self.assertEqual(shape, None)
self.assertEqual(selection.get_select_npoints(), 1)
shape, selection = sel2.read_selections_scalar(self.dsid, (Ellipsis,))
self.assertEqual(shape, ())
self.assertEqual(selection.get_select_npoints(), 1)
with self.assertRaises(ValueError):
shape, selection = sel2.read_selections_scalar(self.dsid, (1,))
dsid = self.f.create_dataset('y', (1,)).id
with self.assertRaises(RuntimeError):
shape, selection = sel2.read_selections_scalar(dsid, (1,))
class TestSelection(BaseSelection):
""" High-level routes to generate a selection
"""
def test_selection(self):
dset = self.f.create_dataset('dset', (100,100))
regref = dset.regionref[0:100, 0:100]
# args is list, return a FancySelection
st = sel.select((10,), list([1,2,3]), dset)
self.assertIsInstance(st, sel.FancySelection)
# args[0] is tuple, return a FancySelection
st = sel.select((10,), ((1, 2, 3),), dset)
self.assertIsInstance(st, sel.FancySelection)
# args is a Boolean mask, return a PointSelection
st1 = sel.select((5,), np.array([True,False,False,False,True]), dset)
self.assertIsInstance(st1, sel.PointSelection)
# args is int, return a SimpleSelection
st2 = sel.select((10,), 1, dset)
self.assertIsInstance(st2, sel.SimpleSelection)
# args is str, should be rejected
with self.assertRaises(TypeError):
sel.select((100,), "foo", dset)
# args is RegionReference, return a Selection instance
st3 = sel.select((100,100), regref, dset)
self.assertIsInstance(st3, sel.Selection)
# args is RegionReference, but dataset is None
with self.assertRaises(TypeError):
sel.select((100,), regref, None)
# args is RegionReference, but its shape doesn't match dataset shape
with self.assertRaises(TypeError):
sel.select((100,), regref, dset)
# args is a single Selection instance, return the arg
st4 = sel.select((100,100), st3, dset)
self.assertEqual(st4,st3)
# args is a single Selection instance, but args shape doesn't match Shape
with self.assertRaises(TypeError):
sel.select((100,), st3, dset)
@@ -0,0 +1,415 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Dataset slicing test module.
Tests all supported slicing operations, including read/write and
broadcasting operations. Does not test type conversion except for
corner cases overlapping with slicing; for example, when selecting
specific fields of a compound type.
"""
import numpy as np
from .common import TestCase
import h5py
from h5py import File, MultiBlockSlice
class BaseSlicing(TestCase):
def setUp(self):
self.f = File(self.mktemp(), 'w')
def tearDown(self):
if self.f:
self.f.close()
class TestSingleElement(BaseSlicing):
"""
Feature: Retrieving a single element works with NumPy semantics
"""
def test_single_index(self):
""" Single-element selection with [index] yields array scalar """
dset = self.f.create_dataset('x', (1,), dtype='i1')
out = dset[0]
self.assertIsInstance(out, np.int8)
def test_single_null(self):
""" Single-element selection with [()] yields ndarray """
dset = self.f.create_dataset('x', (1,), dtype='i1')
out = dset[()]
self.assertIsInstance(out, np.ndarray)
self.assertEqual(out.shape, (1,))
def test_scalar_index(self):
""" Slicing with [...] yields scalar ndarray """
dset = self.f.create_dataset('x', shape=(), dtype='f')
out = dset[...]
self.assertIsInstance(out, np.ndarray)
self.assertEqual(out.shape, ())
def test_scalar_null(self):
""" Slicing with [()] yields array scalar """
dset = self.f.create_dataset('x', shape=(), dtype='i1')
out = dset[()]
self.assertIsInstance(out, np.int8)
def test_compound(self):
""" Compound scalar is numpy.void, not tuple (issue 135) """
dt = np.dtype([('a','i4'),('b','f8')])
v = np.ones((4,), dtype=dt)
dset = self.f.create_dataset('foo', (4,), data=v)
self.assertEqual(dset[0], v[0])
self.assertIsInstance(dset[0], np.void)
class TestObjectIndex(BaseSlicing):
"""
Feature: numpy.object_ subtypes map to real Python objects
"""
def test_reference(self):
""" Indexing a reference dataset returns a h5py.Reference instance """
dset = self.f.create_dataset('x', (1,), dtype=h5py.ref_dtype)
dset[0] = self.f.ref
self.assertEqual(type(dset[0]), h5py.Reference)
def test_regref(self):
""" Indexing a region reference dataset returns a h5py.RegionReference
"""
dset1 = self.f.create_dataset('x', (10,10))
regref = dset1.regionref[...]
dset2 = self.f.create_dataset('y', (1,), dtype=h5py.regionref_dtype)
dset2[0] = regref
self.assertEqual(type(dset2[0]), h5py.RegionReference)
def test_reference_field(self):
""" Compound types of which a reference is an element work right """
dt = np.dtype([('a', 'i'),('b', h5py.ref_dtype)])
dset = self.f.create_dataset('x', (1,), dtype=dt)
dset[0] = (42, self.f['/'].ref)
out = dset[0]
self.assertEqual(type(out[1]), h5py.Reference) # isinstance does NOT work
def test_scalar(self):
""" Indexing returns a real Python object on scalar datasets """
dset = self.f.create_dataset('x', (), dtype=h5py.ref_dtype)
dset[()] = self.f.ref
self.assertEqual(type(dset[()]), h5py.Reference)
def test_bytestr(self):
""" Indexing a byte string dataset returns a real python byte string
"""
dset = self.f.create_dataset('x', (1,), dtype=h5py.string_dtype(encoding='ascii'))
dset[0] = b"Hello there!"
self.assertEqual(type(dset[0]), bytes)
class TestSimpleSlicing(TestCase):
"""
Feature: Simple NumPy-style slices (start:stop:step) are supported.
"""
def setUp(self):
self.f = File(self.mktemp(), 'w')
self.arr = np.arange(10)
self.dset = self.f.create_dataset('x', data=self.arr)
def tearDown(self):
if self.f:
self.f.close()
def test_negative_stop(self):
""" Negative stop indexes work as they do in NumPy """
self.assertArrayEqual(self.dset[2:-2], self.arr[2:-2])
def test_write(self):
"""Assigning to a 1D slice of a 2D dataset
"""
dset = self.f.create_dataset('x2', (10, 2))
x = np.zeros((10, 1))
dset[:, 0] = x[:, 0]
with self.assertRaises(TypeError):
dset[:, 1] = x
class TestArraySlicing(BaseSlicing):
"""
Feature: Array types are handled appropriately
"""
def test_read(self):
""" Read arrays tack array dimensions onto end of shape tuple """
dt = np.dtype('(3,)f8')
dset = self.f.create_dataset('x',(10,),dtype=dt)
self.assertEqual(dset.shape, (10,))
self.assertEqual(dset.dtype, dt)
# Full read
out = dset[...]
self.assertEqual(out.dtype, np.dtype('f8'))
self.assertEqual(out.shape, (10,3))
# Single element
out = dset[0]
self.assertEqual(out.dtype, np.dtype('f8'))
self.assertEqual(out.shape, (3,))
# Range
out = dset[2:8:2]
self.assertEqual(out.dtype, np.dtype('f8'))
self.assertEqual(out.shape, (3,3))
def test_write_broadcast(self):
""" Array fill from constant is not supported (issue 211).
"""
dt = np.dtype('(3,)i')
dset = self.f.create_dataset('x', (10,), dtype=dt)
with self.assertRaises(TypeError):
dset[...] = 42
def test_write_element(self):
""" Write a single element to the array
Issue 211.
"""
dt = np.dtype('(3,)f8')
dset = self.f.create_dataset('x', (10,), dtype=dt)
data = np.array([1,2,3.0])
dset[4] = data
out = dset[4]
self.assertTrue(np.all(out == data))
def test_write_slices(self):
""" Write slices to array type """
dt = np.dtype('(3,)i')
data1 = np.ones((2,), dtype=dt)
data2 = np.ones((4,5), dtype=dt)
dset = self.f.create_dataset('x', (10,9,11), dtype=dt)
dset[0,0,2:4] = data1
self.assertArrayEqual(dset[0,0,2:4], data1)
dset[3, 1:5, 6:11] = data2
self.assertArrayEqual(dset[3, 1:5, 6:11], data2)
def test_roundtrip(self):
""" Read the contents of an array and write them back
Issue 211.
"""
dt = np.dtype('(3,)f8')
dset = self.f.create_dataset('x', (10,), dtype=dt)
out = dset[...]
dset[...] = out
self.assertTrue(np.all(dset[...] == out))
class TestZeroLengthSlicing(BaseSlicing):
"""
Slices resulting in empty arrays
"""
def test_slice_zero_length_dimension(self):
""" Slice a dataset with a zero in its shape vector
along the zero-length dimension """
for i, shape in enumerate([(0,), (0, 3), (0, 2, 1)]):
dset = self.f.create_dataset('x%d'%i, shape, dtype=int, maxshape=(None,)*len(shape))
self.assertEqual(dset.shape, shape)
out = dset[...]
self.assertIsInstance(out, np.ndarray)
self.assertEqual(out.shape, shape)
out = dset[:]
self.assertIsInstance(out, np.ndarray)
self.assertEqual(out.shape, shape)
if len(shape) > 1:
out = dset[:, :1]
self.assertIsInstance(out, np.ndarray)
self.assertEqual(out.shape[:2], (0, 1))
def test_slice_other_dimension(self):
""" Slice a dataset with a zero in its shape vector
along a non-zero-length dimension """
for i, shape in enumerate([(3, 0), (1, 2, 0), (2, 0, 1)]):
dset = self.f.create_dataset('x%d'%i, shape, dtype=int, maxshape=(None,)*len(shape))
self.assertEqual(dset.shape, shape)
out = dset[:1]
self.assertIsInstance(out, np.ndarray)
self.assertEqual(out.shape, (1,)+shape[1:])
def test_slice_of_length_zero(self):
""" Get a slice of length zero from a non-empty dataset """
for i, shape in enumerate([(3,), (2, 2,), (2, 1, 5)]):
dset = self.f.create_dataset('x%d'%i, data=np.zeros(shape, int), maxshape=(None,)*len(shape))
self.assertEqual(dset.shape, shape)
out = dset[1:1]
self.assertIsInstance(out, np.ndarray)
self.assertEqual(out.shape, (0,)+shape[1:])
class TestFieldNames(BaseSlicing):
"""
Field names for read & write
"""
dt = np.dtype([('a', 'f'), ('b', 'i'), ('c', 'f4')])
data = np.ones((100,), dtype=dt)
def setUp(self):
BaseSlicing.setUp(self)
self.dset = self.f.create_dataset('x', (100,), dtype=self.dt)
self.dset[...] = self.data
def test_read(self):
""" Test read with field selections """
self.assertArrayEqual(self.dset['a'], self.data['a'])
def test_unicode_names(self):
""" Unicode field names for for read and write """
self.assertArrayEqual(self.dset['a'], self.data['a'])
self.dset['a'] = 42
data = self.data.copy()
data['a'] = 42
self.assertArrayEqual(self.dset['a'], data['a'])
def test_write(self):
""" Test write with field selections """
data2 = self.data.copy()
data2['a'] *= 2
self.dset['a'] = data2
self.assertTrue(np.all(self.dset[...] == data2))
data2['b'] *= 4
self.dset['b'] = data2
self.assertTrue(np.all(self.dset[...] == data2))
data2['a'] *= 3
data2['c'] *= 3
self.dset['a','c'] = data2
self.assertTrue(np.all(self.dset[...] == data2))
def test_write_noncompound(self):
""" Test write with non-compound source (single-field) """
data2 = self.data.copy()
data2['b'] = 1.0
self.dset['b'] = 1.0
self.assertTrue(np.all(self.dset[...] == data2))
class TestMultiBlockSlice(BaseSlicing):
def setUp(self):
super().setUp()
self.arr = np.arange(10)
self.dset = self.f.create_dataset('x', data=self.arr)
def test_default(self):
# Default selects entire dataset as one block
mbslice = MultiBlockSlice()
self.assertEqual(mbslice.indices(10), (0, 1, 10, 1))
np.testing.assert_array_equal(self.dset[mbslice], self.arr)
def test_default_explicit(self):
mbslice = MultiBlockSlice(start=0, count=10, stride=1, block=1)
self.assertEqual(mbslice.indices(10), (0, 1, 10, 1))
np.testing.assert_array_equal(self.dset[mbslice], self.arr)
def test_start(self):
mbslice = MultiBlockSlice(start=4)
self.assertEqual(mbslice.indices(10), (4, 1, 6, 1))
np.testing.assert_array_equal(self.dset[mbslice], np.array([4, 5, 6, 7, 8, 9]))
def test_count(self):
mbslice = MultiBlockSlice(count=7)
self.assertEqual(mbslice.indices(10), (0, 1, 7, 1))
np.testing.assert_array_equal(
self.dset[mbslice], np.array([0, 1, 2, 3, 4, 5, 6])
)
def test_count_more_than_length_error(self):
mbslice = MultiBlockSlice(count=11)
with self.assertRaises(ValueError):
mbslice.indices(10)
def test_stride(self):
mbslice = MultiBlockSlice(stride=2)
self.assertEqual(mbslice.indices(10), (0, 2, 5, 1))
np.testing.assert_array_equal(self.dset[mbslice], np.array([0, 2, 4, 6, 8]))
def test_stride_zero_error(self):
with self.assertRaises(ValueError):
# This would cause a ZeroDivisionError if not caught
MultiBlockSlice(stride=0, block=0).indices(10)
def test_stride_block_equal(self):
mbslice = MultiBlockSlice(stride=2, block=2)
self.assertEqual(mbslice.indices(10), (0, 2, 5, 2))
np.testing.assert_array_equal(self.dset[mbslice], self.arr)
def test_block_more_than_stride_error(self):
with self.assertRaises(ValueError):
MultiBlockSlice(block=3)
with self.assertRaises(ValueError):
MultiBlockSlice(stride=2, block=3)
def test_stride_more_than_block(self):
mbslice = MultiBlockSlice(stride=3, block=2)
self.assertEqual(mbslice.indices(10), (0, 3, 3, 2))
np.testing.assert_array_equal(self.dset[mbslice], np.array([0, 1, 3, 4, 6, 7]))
def test_block_overruns_extent_error(self):
# If fully described then must fit within extent
mbslice = MultiBlockSlice(start=2, count=2, stride=5, block=4)
with self.assertRaises(ValueError):
mbslice.indices(10)
def test_fully_described(self):
mbslice = MultiBlockSlice(start=1, count=2, stride=5, block=4)
self.assertEqual(mbslice.indices(10), (1, 5, 2, 4))
np.testing.assert_array_equal(
self.dset[mbslice], np.array([1, 2, 3, 4, 6, 7, 8, 9])
)
def test_count_calculated(self):
# If not given, count should be calculated to select as many full blocks as possible
mbslice = MultiBlockSlice(start=1, stride=3, block=2)
self.assertEqual(mbslice.indices(10), (1, 3, 3, 2))
np.testing.assert_array_equal(self.dset[mbslice], np.array([1, 2, 4, 5, 7, 8]))
def test_zero_count_calculated_error(self):
# In this case, there is no possible count to select even one block, so error
mbslice = MultiBlockSlice(start=8, stride=4, block=3)
with self.assertRaises(ValueError):
mbslice.indices(10)
@@ -0,0 +1,4 @@
from .test_virtual_source import *
from .test_highlevel_vds import *
from .test_lowlevel_vds import *
@@ -0,0 +1,476 @@
'''
Unit test for the high level vds interface for eiger
https://support.hdfgroup.org/HDF5/docNewFeatures/VDS/HDF5-VDS-requirements-use-cases-2014-12-10.pdf
'''
import numpy as np
from numpy.testing import assert_array_equal
import os
import os.path as osp
import shutil
import tempfile
import h5py as h5
from ..common import ut
from ..._hl.vds import vds_support
@ut.skipUnless(vds_support,
'VDS requires HDF5 >= 1.9.233')
class TestEigerHighLevel(ut.TestCase):
def setUp(self):
self.working_dir = tempfile.mkdtemp()
self.fname = ['raw_file_1.h5', 'raw_file_2.h5', 'raw_file_3.h5']
for k, outfile in enumerate(self.fname):
filename = osp.join(self.working_dir, outfile)
f = h5.File(filename, 'w')
f['data'] = np.ones((20, 200, 200)) * k
f.close()
f = h5.File(osp.join(self.working_dir, 'raw_file_4.h5'), 'w')
f['data'] = np.ones((18, 200, 200)) * 3
self.fname.append('raw_file_4.h5')
self.fname = [osp.join(self.working_dir, ix) for ix in self.fname]
f.close()
def test_eiger_high_level(self):
outfile = osp.join(self.working_dir, 'eiger.h5')
layout = h5.VirtualLayout(shape=(78, 200, 200), dtype=float)
M_minus_1 = 0
# Create the virtual dataset file
with h5.File(outfile, 'w', libver='latest') as f:
for foo in self.fname:
in_data = h5.File(foo, 'r')['data']
src_shape = in_data.shape
in_data.file.close()
M = M_minus_1 + src_shape[0]
vsource = h5.VirtualSource(foo, 'data', shape=src_shape)
layout[M_minus_1:M, :, :] = vsource
M_minus_1 = M
f.create_virtual_dataset('data', layout, fillvalue=45)
f = h5.File(outfile, 'r')['data']
self.assertEqual(f[10, 100, 10], 0.0)
self.assertEqual(f[30, 100, 100], 1.0)
self.assertEqual(f[50, 100, 100], 2.0)
self.assertEqual(f[70, 100, 100], 3.0)
f.file.close()
def tearDown(self):
shutil.rmtree(self.working_dir)
'''
Unit test for the high level vds interface for excalibur
https://support.hdfgroup.org/HDF5/docNewFeatures/VDS/HDF5-VDS-requirements-use-cases-2014-12-10.pdf
'''
class ExcaliburData:
FEM_PIXELS_PER_CHIP_X = 256
FEM_PIXELS_PER_CHIP_Y = 256
FEM_CHIPS_PER_STRIPE_X = 8
FEM_CHIPS_PER_STRIPE_Y = 1
FEM_STRIPES_PER_MODULE = 2
@property
def sensor_module_dimensions(self):
x_pixels = self.FEM_PIXELS_PER_CHIP_X * self.FEM_CHIPS_PER_STRIPE_X
y_pixels = self.FEM_PIXELS_PER_CHIP_Y * self.FEM_CHIPS_PER_STRIPE_Y * self.FEM_STRIPES_PER_MODULE
return y_pixels, x_pixels,
@property
def fem_stripe_dimensions(self):
x_pixels = self.FEM_PIXELS_PER_CHIP_X * self.FEM_CHIPS_PER_STRIPE_X
y_pixels = self.FEM_PIXELS_PER_CHIP_Y * self.FEM_CHIPS_PER_STRIPE_Y
return y_pixels, x_pixels,
def generate_sensor_module_image(self, value, dtype='uint16'):
dset = np.empty(shape=self.sensor_module_dimensions, dtype=dtype)
dset.fill(value)
return dset
def generate_fem_stripe_image(self, value, dtype='uint16'):
dset = np.empty(shape=self.fem_stripe_dimensions, dtype=dtype)
dset.fill(value)
return dset
@ut.skipUnless(vds_support,
'VDS requires HDF5 >= 1.9.233')
class TestExcaliburHighLevel(ut.TestCase):
def create_excalibur_fem_stripe_datafile(self, fname, nframes, excalibur_data,scale):
shape = (nframes,) + excalibur_data.fem_stripe_dimensions
max_shape = shape#(None,) + excalibur_data.fem_stripe_dimensions
chunk = (1,) + excalibur_data.fem_stripe_dimensions
with h5.File(fname, 'w', libver='latest') as f:
dset = f.create_dataset('data', shape=shape, maxshape=max_shape, chunks=chunk, dtype='uint16')
for data_value_index in np.arange(nframes):
dset[data_value_index] = excalibur_data.generate_fem_stripe_image(data_value_index*scale)
def setUp(self):
self.working_dir = tempfile.mkdtemp()
self.fname = ["stripe_%d.h5" % stripe for stripe in range(1,7)]
self.fname = [osp.join(self.working_dir, f) for f in self.fname]
nframes = 5
self.edata = ExcaliburData()
for k, raw_file in enumerate(self.fname):
self.create_excalibur_fem_stripe_datafile(raw_file, nframes, self.edata,k)
def test_excalibur_high_level(self):
outfile = osp.join(self.working_dir, 'excalibur.h5')
f = h5.File(outfile,'w',libver='latest') # create an output file.
in_key = 'data' # where is the data at the input?
in_sh = h5.File(self.fname[0],'r')[in_key].shape # get the input shape
dtype = h5.File(self.fname[0],'r')[in_key].dtype # get the datatype
# now generate the output shape
vertical_gap = 10 # pixels spacing in the vertical
nfiles = len(self.fname)
nframes = in_sh[0]
width = in_sh[2]
height = (in_sh[1]*nfiles) + (vertical_gap*(nfiles-1))
out_sh = (nframes, height, width)
# Virtual layout is a representation of the output dataset
layout = h5.VirtualLayout(shape=out_sh, dtype=dtype)
offset = 0 # initial offset
for i, filename in enumerate(self.fname):
# A representation of the input dataset
vsource = h5.VirtualSource(filename, in_key, shape=in_sh)
layout[:, offset:(offset + in_sh[1]), :] = vsource # map them with indexing
offset += in_sh[1] + vertical_gap # increment the offset
# pass the fill value and list of maps
f.create_virtual_dataset('data', layout, fillvalue=0x1)
f.close()
f = h5.File(outfile,'r')['data']
self.assertEqual(f[3,100,0], 0.0)
self.assertEqual(f[3,260,0], 1.0)
self.assertEqual(f[3,350,0], 3.0)
self.assertEqual(f[3,650,0], 6.0)
self.assertEqual(f[3,900,0], 9.0)
self.assertEqual(f[3,1150,0], 12.0)
self.assertEqual(f[3,1450,0], 15.0)
f.file.close()
def tearDown(self):
shutil.rmtree(self.working_dir)
'''
Unit test for the high level vds interface for percival
https://support.hdfgroup.org/HDF5/docNewFeatures/VDS/HDF5-VDS-requirements-use-cases-2014-12-10.pdf
'''
@ut.skipUnless(vds_support,
'VDS requires HDF5 >= 1.9.233')
class TestPercivalHighLevel(ut.TestCase):
def setUp(self):
self.working_dir = tempfile.mkdtemp()
self.fname = ['raw_file_1.h5','raw_file_2.h5','raw_file_3.h5']
k = 0
for outfile in self.fname:
filename = osp.join(self.working_dir, outfile)
f = h5.File(filename,'w')
f['data'] = np.ones((20,200,200))*k
k +=1
f.close()
f = h5.File(osp.join(self.working_dir, 'raw_file_4.h5'), 'w')
f['data'] = np.ones((19,200,200))*3
self.fname.append('raw_file_4.h5')
self.fname = [osp.join(self.working_dir, ix) for ix in self.fname]
f.close()
def test_percival_high_level(self):
outfile = osp.join(self.working_dir, 'percival.h5')
# Virtual layout is a representation of the output dataset
layout = h5.VirtualLayout(shape=(79, 200, 200), dtype=np.float64)
for k, filename in enumerate(self.fname):
dim1 = 19 if k == 3 else 20
vsource = h5.VirtualSource(filename, 'data',shape=(dim1, 200, 200))
layout[k:79:4, :, :] = vsource[:, :, :]
# Create the virtual dataset file
with h5.File(outfile, 'w', libver='latest') as f:
f.create_virtual_dataset('data', layout, fillvalue=-5)
foo = np.array(2 * list(range(4)))
with h5.File(outfile,'r') as f:
ds = f['data']
line = ds[:8,100,100]
self.assertEqual(ds.shape, (79,200,200),)
assert_array_equal(line, foo)
def test_percival_source_from_dataset(self):
outfile = osp.join(self.working_dir, 'percival.h5')
# Virtual layout is a representation of the output dataset
layout = h5.VirtualLayout(shape=(79, 200, 200), dtype=np.float64)
for k, filename in enumerate(self.fname):
with h5.File(filename, 'r') as f:
vsource = h5.VirtualSource(f['data'])
layout[k:79:4, :, :] = vsource
# Create the virtual dataset file
with h5.File(outfile, 'w', libver='latest') as f:
f.create_virtual_dataset('data', layout, fillvalue=-5)
foo = np.array(2 * list(range(4)))
with h5.File(outfile,'r') as f:
ds = f['data']
line = ds[:8,100,100]
self.assertEqual(ds.shape, (79,200,200),)
assert_array_equal(line, foo)
def tearDown(self):
shutil.rmtree(self.working_dir)
@ut.skipUnless(vds_support,
'VDS requires HDF5 >= 1.9.233')
class SlicingTestCase(ut.TestCase):
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
# Create source files (1.h5 to 4.h5)
for n in range(1, 5):
with h5.File(osp.join(self.tmpdir, '{}.h5'.format(n)), 'w') as f:
d = f.create_dataset('data', (100,), 'i4')
d[:] = np.arange(100) + n
def make_virtual_ds(self):
# Assemble virtual dataset
layout = h5.VirtualLayout((4, 100), 'i4', maxshape=(4, None))
for n in range(1, 5):
filename = osp.join(self.tmpdir, "{}.h5".format(n))
vsource = h5.VirtualSource(filename, 'data', shape=(100,))
# Fill the first half with positions 0, 2, 4... from the source
layout[n - 1, :50] = vsource[0:100:2]
# Fill the second half with places 1, 3, 5... from the source
layout[n - 1, 50:] = vsource[1:100:2]
outfile = osp.join(self.tmpdir, 'VDS.h5')
# Add virtual dataset to output file
with h5.File(outfile, 'w', libver='latest') as f:
f.create_virtual_dataset('/group/data', layout, fillvalue=-5)
return outfile
def test_slice_source(self):
outfile = self.make_virtual_ds()
with h5.File(outfile, 'r') as f:
assert_array_equal(f['/group/data'][0][:3], [1, 3, 5])
assert_array_equal(f['/group/data'][0][50:53], [2, 4, 6])
assert_array_equal(f['/group/data'][3][:3], [4, 6, 8])
assert_array_equal(f['/group/data'][3][50:53], [5, 7, 9])
def test_inspection(self):
with h5.File(osp.join(self.tmpdir, '1.h5'), 'r') as f:
assert not f['data'].is_virtual
outfile = self.make_virtual_ds()
with h5.File(outfile, 'r') as f:
ds = f['/group/data']
assert ds.is_virtual
src_files = {osp.join(self.tmpdir, '{}.h5'.format(n))
for n in range(1, 5)}
assert {s.file_name for s in ds.virtual_sources()} == src_files
def test_mismatched_selections(self):
layout = h5.VirtualLayout((4, 100), 'i4', maxshape=(4, None))
filename = osp.join(self.tmpdir, "1.h5")
vsource = h5.VirtualSource(filename, 'data', shape=(100,))
with self.assertRaisesRegex(ValueError, r'different number'):
layout[0, :49] = vsource[0:100:2]
def tearDown(self):
shutil.rmtree(self.tmpdir)
@ut.skipUnless(vds_support,
'VDS requires HDF5 >= 1.9.233')
class IndexingTestCase(ut.TestCase):
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
# Create source file (1.h5)
with h5.File(osp.join(self.tmpdir, '1.h5'), 'w') as f:
d = f.create_dataset('data', (10,), 'i4')
d[:] = np.arange(10)*10
def test_index_layout(self):
# Assemble virtual dataset (indexing target)
layout = h5.VirtualLayout((100,), 'i4')
inds = [3,6,20,25,33,47,70,75,96,98]
filename = osp.join(self.tmpdir, "1.h5")
vsource = h5.VirtualSource(filename, 'data', shape=(10,))
layout[inds] = vsource
outfile = osp.join(self.tmpdir, 'VDS.h5')
# Assembly virtual dataset (indexing source)
layout2 = h5.VirtualLayout((6,), 'i4')
inds2 = [0,1,4,5,8]
layout2[1:] = vsource[inds2]
# Add virtual datasets to output file and close
with h5.File(outfile, 'w', libver='latest') as f:
f.create_virtual_dataset('/data', layout, fillvalue=-5)
f.create_virtual_dataset(b'/data2', layout2, fillvalue=-3)
# Read data from virtual datasets
with h5.File(outfile, 'r') as f:
data = f['/data'][()]
data2 = f['/data2'][()]
# Verify
assert_array_equal(data[inds], np.arange(10)*10)
assert_array_equal(data2[1:], [0,10,40,50,80])
mask = np.zeros(100)
mask[inds] = 1
self.assertEqual(data[mask == 0].min(), -5)
self.assertEqual(data[mask == 0].max(), -5)
self.assertEqual(data2[0], -3)
def tearDown(self):
shutil.rmtree(self.tmpdir)
@ut.skipUnless(vds_support,
'VDS requires HDF5 >= 1.9.233')
class RelativeLinkTestCase(ut.TestCase):
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
self.f1 = osp.join(self.tmpdir, 'testfile1.h5')
self.f2 = osp.join(self.tmpdir, 'testfile2.h5')
self.data1 = np.arange(10)
self.data2 = np.arange(10) * -1
with h5.File(self.f1, 'w') as f:
# dataset
ds = f.create_dataset('data', (10,), 'f4')
ds[:] = self.data1
with h5.File(self.f2, 'w') as f:
# dataset
ds = f.create_dataset('data', (10,), 'f4')
ds[:] = self.data2
self.make_vds(f)
def make_vds(self, f):
# virtual dataset
layout = h5.VirtualLayout((2, 10), 'f4')
vsource1 = h5.VirtualSource(self.f1, 'data', shape=(10,))
vsource2 = h5.VirtualSource(self.f2, 'data', shape=(10,))
layout[0] = vsource1
layout[1] = vsource2
f.create_virtual_dataset('virtual', layout)
def test_relative_vds(self):
with h5.File(self.f2) as f:
data = f['virtual'][:]
np.testing.assert_array_equal(data[0], self.data1)
np.testing.assert_array_equal(data[1], self.data2)
# move f2 -> f3
f3 = osp.join(self.tmpdir, 'testfile3.h5')
os.rename(self.f2, f3)
with h5.File(f3) as f:
data = f['virtual'][:]
assert data.dtype == 'f4'
np.testing.assert_array_equal(data[0], self.data1)
np.testing.assert_array_equal(data[1], self.data2)
# moving other file
f4 = osp.join(self.tmpdir, 'testfile4.h5')
os.rename(self.f1, f4)
with h5.File(f3) as f:
data = f['virtual'][:]
assert data.dtype == 'f4'
# unavailable data is silently converted to default value
np.testing.assert_array_equal(data[0], 0)
np.testing.assert_array_equal(data[1], self.data2)
def tearDown(self):
shutil.rmtree(self.tmpdir)
class RelativeLinkBuildVDSTestCase(RelativeLinkTestCase):
# Test a link to the same file with the virtual dataset created by
# File.build_virtual_dataset()
def make_vds(self, f):
with f.build_virtual_dataset('virtual', (2, 10), dtype='f4') as layout:
layout[0] = h5.VirtualSource(self.f1, 'data', shape=(10,))
layout[1] = h5.VirtualSource(self.f2, 'data', shape=(10,))
@ut.skipUnless(vds_support,
'VDS requires HDF5 >= 1.9.233')
class VDSUnlimitedTestCase(ut.TestCase):
def setUp(self):
self.tmpdir = tempfile.mkdtemp()
self.path = osp.join(self.tmpdir, "resize.h5")
with h5.File(self.path, "w") as f:
source_dset = f.create_dataset(
"source",
data=np.arange(20),
shape=(10, 2),
maxshape=(None, 2),
chunks=(10, 1),
fillvalue=-1
)
self.layout = h5.VirtualLayout((10, 1), int, maxshape=(None, 1))
layout_source = h5.VirtualSource(source_dset)
self.layout[:h5.UNLIMITED, 0] = layout_source[:h5.UNLIMITED, 1]
f.create_virtual_dataset("virtual", self.layout)
def test_unlimited_axis(self):
comp1 = np.arange(1, 20, 2).reshape(10, 1)
comp2 = np.vstack((
comp1,
np.full(shape=(10, 1), fill_value=-1)
))
comp3 = np.vstack((
comp1,
np.full(shape=(10, 1), fill_value=0)
))
with h5.File(self.path, "a") as f:
source_dset = f['source']
virtual_dset = f['virtual']
np.testing.assert_array_equal(comp1, virtual_dset)
source_dset.resize(20, axis=0)
np.testing.assert_array_equal(comp2, virtual_dset)
source_dset[10:, 1] = np.zeros((10,), dtype=int)
np.testing.assert_array_equal(comp3, virtual_dset)
def tearDown(self):
shutil.rmtree(self.tmpdir)
def test_no_mappings(writable_file):
with writable_file.build_virtual_dataset("foo", (10, 20), np.int32):
pass
dset = writable_file['foo']
assert dset.is_virtual
assert dset.virtual_sources() == []
np.testing.assert_array_equal(dset[()], np.zeros((10, 20), np.int32))
if __name__ == "__main__":
ut.main()
@@ -0,0 +1,298 @@
'''
Unit test for the low level vds interface for eiger
https://support.hdfgroup.org/HDF5/docNewFeatures/VDS/HDF5-VDS-requirements-use-cases-2014-12-10.pdf
'''
import os.path as osp
import tempfile
import numpy as np
import h5py as h5
from ..common import ut
class TestEigerLowLevel(ut.TestCase):
def setUp(self):
self.working_dir = tempfile.TemporaryDirectory()
self.fname = ['raw_file_1.h5', 'raw_file_2.h5', 'raw_file_3.h5']
k = 0
for outfile in self.fname:
filename = osp.join(self.working_dir.name, outfile)
with h5.File(filename, 'w') as f:
f['data'] = np.ones((20, 200, 200))*k
k += 1
with h5.File(osp.join(self.working_dir.name, 'raw_file_4.h5'), 'w') as f:
f['data'] = np.ones((18, 200, 200))*3
self.fname.append('raw_file_4.h5')
self.fname = [osp.join(self.working_dir.name, ix) for ix in self.fname]
def test_eiger_low_level(self):
outfile = osp.join(self.working_dir.name, 'eiger.h5')
with h5.File(outfile, 'w', libver='latest') as f:
vdset_shape = (78, 200, 200)
vdset_max_shape = vdset_shape
virt_dspace = h5.h5s.create_simple(vdset_shape, vdset_max_shape)
dcpl = h5.h5p.create(h5.h5p.DATASET_CREATE)
dcpl.set_fill_value(np.array([-1]))
# Create the source dataset dataspace
k = 0
for foo in self.fname:
with h5.File(foo, 'r') as in_f:
src_shape = in_f['data'].shape
src_dspace = h5.h5s.create_simple(src_shape, src_shape)
# Select the source dataset hyperslab
src_dspace.select_hyperslab(start=(0, 0, 0),
stride=(1, 1, 1),
count=(1, 1, 1),
block=src_shape)
virt_dspace.select_hyperslab(start=(k, 0, 0),
stride=(1, 1, 1),
count=(1, 1, 1),
block=src_shape)
dcpl.set_virtual(virt_dspace, foo.encode('utf-8'),
b'data', src_dspace)
k += src_shape[0]
# Create the virtual dataset
h5.h5d.create(f.id, name=b"data", tid=h5.h5t.NATIVE_INT16,
space=virt_dspace, dcpl=dcpl)
with h5.File(outfile, 'r') as f:
d = f['data']
self.assertEqual(d[10, 100, 10], 0.0)
self.assertEqual(d[30, 100, 100], 1.0)
self.assertEqual(d[50, 100, 100], 2.0)
self.assertEqual(d[70, 100, 100], 3.0)
def tearDown(self):
self.working_dir.cleanup()
if __name__ == "__main__":
ut.main()
'''
Unit test for the low level vds interface for excalibur
https://support.hdfgroup.org/HDF5/docNewFeatures/VDS/HDF5-VDS-requirements-use-cases-2014-12-10.pdf
'''
class ExcaliburData:
FEM_PIXELS_PER_CHIP_X = 256
FEM_PIXELS_PER_CHIP_Y = 256
FEM_CHIPS_PER_STRIPE_X = 8
FEM_CHIPS_PER_STRIPE_Y = 1
FEM_STRIPES_PER_MODULE = 2
@property
def sensor_module_dimensions(self):
x_pixels = self.FEM_PIXELS_PER_CHIP_X * self.FEM_CHIPS_PER_STRIPE_X
y_pixels = self.FEM_PIXELS_PER_CHIP_Y * self.FEM_CHIPS_PER_STRIPE_Y * self.FEM_STRIPES_PER_MODULE
return y_pixels, x_pixels,
@property
def fem_stripe_dimensions(self):
x_pixels = self.FEM_PIXELS_PER_CHIP_X * self.FEM_CHIPS_PER_STRIPE_X
y_pixels = self.FEM_PIXELS_PER_CHIP_Y * self.FEM_CHIPS_PER_STRIPE_Y
return y_pixels, x_pixels,
def generate_sensor_module_image(self, value, dtype='uint16'):
dset = np.empty(shape=self.sensor_module_dimensions, dtype=dtype)
dset.fill(value)
return dset
def generate_fem_stripe_image(self, value, dtype='uint16'):
dset = np.empty(shape=self.fem_stripe_dimensions, dtype=dtype)
dset.fill(value)
return dset
class TestExcaliburLowLevel(ut.TestCase):
def create_excalibur_fem_stripe_datafile(self, fname, nframes, excalibur_data,scale):
shape = (nframes,) + excalibur_data.fem_stripe_dimensions
max_shape = (nframes,) + excalibur_data.fem_stripe_dimensions
chunk = (1,) + excalibur_data.fem_stripe_dimensions
with h5.File(fname, 'w', libver='latest') as f:
dset = f.create_dataset('data', shape=shape, maxshape=max_shape, chunks=chunk, dtype='uint16')
for data_value_index in np.arange(nframes):
dset[data_value_index] = excalibur_data.generate_fem_stripe_image(data_value_index*scale)
def setUp(self):
self.working_dir = tempfile.TemporaryDirectory()
self.fname = ["stripe_%d.h5" % stripe for stripe in range(1,7)]
self.fname = [osp.join(self.working_dir.name, ix) for ix in self.fname]
nframes = 5
self.edata = ExcaliburData()
k=0
for raw_file in self.fname:
self.create_excalibur_fem_stripe_datafile(raw_file, nframes, self.edata,k)
k+=1
def test_excalibur_low_level(self):
excalibur_data = self.edata
outfile = osp.join(self.working_dir.name, 'excalibur.h5')
vdset_stripe_shape = (1,) + excalibur_data.fem_stripe_dimensions
vdset_stripe_max_shape = (5, ) + excalibur_data.fem_stripe_dimensions
vdset_shape = (5,
excalibur_data.fem_stripe_dimensions[0] * len(self.fname) + (10 * (len(self.fname)-1)),
excalibur_data.fem_stripe_dimensions[1])
vdset_max_shape = (5,
excalibur_data.fem_stripe_dimensions[0] * len(self.fname) + (10 * (len(self.fname)-1)),
excalibur_data.fem_stripe_dimensions[1])
vdset_y_offset = 0
# Create the virtual dataset file
with h5.File(outfile, 'w', libver='latest') as f:
# Create the source dataset dataspace
src_dspace = h5.h5s.create_simple(vdset_stripe_shape, vdset_stripe_max_shape)
# Create the virtual dataset dataspace
virt_dspace = h5.h5s.create_simple(vdset_shape, vdset_max_shape)
# Create the virtual dataset property list
dcpl = h5.h5p.create(h5.h5p.DATASET_CREATE)
dcpl.set_fill_value(np.array([0x01]))
# Select the source dataset hyperslab
src_dspace.select_hyperslab(start=(0, 0, 0), count=(1, 1, 1), block=vdset_stripe_max_shape)
for raw_file in self.fname:
# Select the virtual dataset hyperslab (for the source dataset)
virt_dspace.select_hyperslab(start=(0, vdset_y_offset, 0),
count=(1, 1, 1),
block=vdset_stripe_max_shape)
# Set the virtual dataset hyperslab to point to the real first dataset
dcpl.set_virtual(virt_dspace, raw_file.encode('utf-8'),
b"/data", src_dspace)
vdset_y_offset += vdset_stripe_shape[1] + 10
# Create the virtual dataset
h5.h5d.create(
f.id,
name=b"data",
tid=h5.h5t.NATIVE_INT16,
space=virt_dspace,
dcpl=dcpl,
)
assert(f['data'].fillvalue == 0x01)
with h5.File(outfile,'r') as f:
d = f['data']
self.assertEqual(d[3,100,0], 0.0)
self.assertEqual(d[3,260,0], 1.0)
self.assertEqual(d[3,350,0], 3.0)
self.assertEqual(d[3,650,0], 6.0)
self.assertEqual(d[3,900,0], 9.0)
self.assertEqual(d[3,1150,0], 12.0)
self.assertEqual(d[3,1450,0], 15.0)
def tearDown(self):
self.working_dir.cleanup()
'''
Unit test for the low level vds interface for percival
https://support.hdfgroup.org/HDF5/docNewFeatures/VDS/HDF5-VDS-requirements-use-cases-2014-12-10.pdf
'''
class TestPercivalLowLevel(ut.TestCase):
def setUp(self):
self.working_dir = tempfile.TemporaryDirectory()
self.fname = ['raw_file_1.h5','raw_file_2.h5','raw_file_3.h5']
k = 0
for outfile in self.fname:
filename = osp.join(self.working_dir.name, outfile)
with h5.File(filename,'w') as f:
f['data'] = np.ones((20,200,200))*k
k +=1
with h5.File(osp.join(self.working_dir.name, 'raw_file_4.h5'),'w') as f:
f['data'] = np.ones((19,200,200))*3
self.fname.append('raw_file_4.h5')
self.fname = [osp.join(self.working_dir.name, ix) for ix in self.fname]
def test_percival_low_level(self):
outfile = osp.join(self.working_dir.name, 'percival.h5')
with h5.File(outfile, 'w', libver='latest') as f:
vdset_shape = (1,200,200)
num = h5.h5s.UNLIMITED
vdset_max_shape = (num,)+vdset_shape[1:]
virt_dspace = h5.h5s.create_simple(vdset_shape, vdset_max_shape)
dcpl = h5.h5p.create(h5.h5p.DATASET_CREATE)
dcpl.set_fill_value(np.array([-1]))
# Create the source dataset dataspace
k = 0
for foo in self.fname:
with h5.File(foo, 'r') as in_f:
src_shape = in_f['data'].shape
max_src_shape = (num,)+src_shape[1:]
src_dspace = h5.h5s.create_simple(src_shape, max_src_shape)
# Select the source dataset hyperslab
src_dspace.select_hyperslab(start=(0, 0, 0),
stride=(1,1,1),
count=(num, 1, 1),
block=(1,)+src_shape[1:])
virt_dspace.select_hyperslab(start=(k, 0, 0),
stride=(4,1,1),
count=(num, 1, 1),
block=(1,)+src_shape[1:])
dcpl.set_virtual(virt_dspace, foo.encode('utf-8'), b'data', src_dspace)
k+=1
# Create the virtual dataset
h5.h5d.create(
f.id,
name=b"data",
tid=h5.h5t.NATIVE_INT16,
space=virt_dspace,
dcpl=dcpl,
)
with h5.File(outfile, 'r') as f:
sh = f['data'].shape
line = f['data'][:8,100,100]
foo = np.array(2*list(range(4)))
self.assertEqual(sh,(79,200,200),)
np.testing.assert_array_equal(line,foo)
def tearDown(self):
self.working_dir.cleanup()
def test_virtual_prefix(tmp_path):
a = tmp_path / 'a'
b = tmp_path / 'b'
a.mkdir()
b.mkdir()
src_file = h5.File(a / 'src.h5', 'w')
src_file['data'] = np.arange(10)
vds_file = h5.File(b / 'vds.h5', 'w')
layout = h5.VirtualLayout(shape=(10,), dtype=np.int64)
layout[:] = h5.VirtualSource('src.h5', 'data', shape=(10,))
vds_file.create_virtual_dataset('data', layout, fillvalue=-1)
# Path doesn't resolve
np.testing.assert_array_equal(vds_file['data'], np.full(10, fill_value=-1))
a_bytes = bytes(a)
dapl = h5.h5p.create(h5.h5p.DATASET_ACCESS)
dapl.set_virtual_prefix(a_bytes)
vds_id = h5.h5d.open(vds_file.id, b'data', dapl=dapl)
vds = h5.Dataset(vds_id)
# Now it should find the source file and read the data correctly
np.testing.assert_array_equal(vds[:], np.arange(10))
# Check that get_virtual_prefix gives back what we put in
assert vds.id.get_access_plist().get_virtual_prefix() == a_bytes
src_file.close()
vds_file.close()
@@ -0,0 +1,166 @@
from ..common import ut
import h5py as h5
import numpy as np
class TestVirtualSource(ut.TestCase):
def test_full_slice(self):
dataset = h5.VirtualSource('test','test',(20,30,30))
sliced = dataset[:,:,:]
self.assertEqual(dataset.shape,sliced.shape)
# def test_full_slice_inverted(self):
# dataset = h5.VirtualSource('test','test',(20,30,30))
# sliced = dataset[:,:,::-1]
# self.assertEqual(dataset.shape,sliced.shape)
#
# def test_subsampled_slice_inverted(self):
# dataset = h5.VirtualSource('test','test',(20,30,30))
# sliced = dataset[:,:,::-2]
# self.assertEqual((20,30,15),sliced.shape)
def test_integer_indexed(self):
dataset = h5.VirtualSource('test','test',(20,30,30))
sliced = dataset[5,:,:]
self.assertEqual((30,30),sliced.shape)
def test_integer_single_indexed(self):
dataset = h5.VirtualSource('test','test',(20,30,30))
sliced = dataset[5]
self.assertEqual((30,30),sliced.shape)
def test_two_integer_indexed(self):
dataset = h5.VirtualSource('test','test',(20,30,30))
sliced = dataset[5,:,10]
self.assertEqual((30,),sliced.shape)
def test_single_range(self):
dataset = h5.VirtualSource('test','test',(20,30,30))
sliced = dataset[5:10,:,:]
self.assertEqual((5,)+dataset.shape[1:],sliced.shape)
def test_shape_calculation_positive_step(self):
dataset = h5.VirtualSource('test','test',(20,))
cmp = []
for i in range(5):
d = dataset[2:12+i:3].shape[0]
ref = np.arange(20)[2:12+i:3].size
cmp.append(ref==d)
self.assertEqual(5, sum(cmp))
# def test_shape_calculation_positive_step_switched_start_stop(self):
# dataset = h5.VirtualSource('test','test',(20,))
# cmp = []
# for i in range(5):
# d = dataset[12+i:2:3].shape[0]
# ref = np.arange(20)[12+i:2:3].size
# cmp.append(ref==d)
# self.assertEqual(5, sum(cmp))
#
#
# def test_shape_calculation_negative_step(self):
# dataset = h5.VirtualSource('test','test',(20,))
# cmp = []
# for i in range(5):
# d = dataset[12+i:2:-3].shape[0]
# ref = np.arange(20)[12+i:2:-3].size
# cmp.append(ref==d)
# self.assertEqual(5, sum(cmp))
#
# def test_shape_calculation_negative_step_switched_start_stop(self):
# dataset = h5.VirtualSource('test','test',(20,))
# cmp = []
# for i in range(5):
# d = dataset[2:12+i:-3].shape[0]
# ref = np.arange(20)[2:12+i:-3].size
# cmp.append(ref==d)
# self.assertEqual(5, sum(cmp))
def test_double_range(self):
dataset = h5.VirtualSource('test','test',(20,30,30))
sliced = dataset[5:10,:,20:25]
self.assertEqual((5,30,5),sliced.shape)
def test_double_strided_range(self):
dataset = h5.VirtualSource('test','test',(20,30,30))
sliced = dataset[6:12:2,:,20:26:3]
self.assertEqual((3,30,2,),sliced.shape)
# def test_double_strided_range_inverted(self):
# dataset = h5.VirtualSource('test','test',(20,30,30))
# sliced = dataset[12:6:-2,:,26:20:-3]
# self.assertEqual((3,30,2),sliced.shape)
def test_negative_start_index(self):
dataset = h5.VirtualSource('test','test',(20,30,30))
sliced = dataset[-10:16]
self.assertEqual((6,30,30),sliced.shape)
def test_negative_stop_index(self):
dataset = h5.VirtualSource('test','test',(20,30,30))
sliced = dataset[10:-4]
self.assertEqual((6,30,30),sliced.shape)
def test_negative_start_and_stop_index(self):
dataset = h5.VirtualSource('test','test',(20,30,30))
sliced = dataset[-10:-4]
self.assertEqual((6,30,30),sliced.shape)
# def test_negative_start_and_stop_and_stride_index(self):
# dataset = h5.VirtualSource('test','test',(20,30,30))
# sliced = dataset[-4:-10:-2]
# self.assertEqual((3,30,30),sliced.shape)
#
def test_ellipsis(self):
dataset = h5.VirtualSource('test','test',(20,30,30))
sliced = dataset[...]
self.assertEqual(dataset.shape,sliced.shape)
def test_ellipsis_end(self):
dataset = h5.VirtualSource('test','test',(20,30,30))
sliced = dataset[0:1,...]
self.assertEqual((1,)+dataset.shape[1:],sliced.shape)
def test_ellipsis_start(self):
dataset = h5.VirtualSource('test','test',(20,30,30))
sliced = dataset[...,0:1]
self.assertEqual(dataset.shape[:-1]+(1,),sliced.shape)
def test_ellipsis_sandwich(self):
dataset = h5.VirtualSource('test','test',(20,30,30,40))
sliced = dataset[0:1,...,5:6]
self.assertEqual((1,)+dataset.shape[1:-1]+(1,),sliced.shape)
def test_integer_shape(self):
dataset = h5.VirtualSource('test','test', 20)
self.assertEqual(dataset.shape, (20,))
def test_integer_maxshape(self):
dataset = h5.VirtualSource('test','test', 20, maxshape=30)
self.assertEqual(dataset.maxshape, (30,))
def test_extra_args(self):
with h5.File(name='f1', driver='core',
backing_store=False, mode='w') as ftest:
ftest['a'] = [1, 2, 3]
a = ftest['a']
with self.assertRaises(TypeError):
h5.VirtualSource(a, 'b')
with self.assertRaises(TypeError):
h5.VirtualSource(a, shape=(1, ))
with self.assertRaises(TypeError):
h5.VirtualSource(a, maxshape=(None,))
with self.assertRaises(TypeError):
h5.VirtualSource(a, dtype=int)
def test_repeated_slice(self):
dataset = h5.VirtualSource('test', 'test', (20, 30, 30))
sliced = dataset[5:10, :, :]
with self.assertRaises(RuntimeError):
sliced[:, :4]
if __name__ == "__main__":
ut.main()
@@ -0,0 +1,65 @@
# This file is part of h5py, a Python interface to the HDF5 library.
#
# http://www.h5py.org
#
# Copyright 2008-2013 Andrew Collette and contributors
#
# License: Standard 3-clause BSD; see "license.txt" for full license terms
# and contributor agreement.
"""
Versioning module for h5py.
"""
from collections import namedtuple
from . import h5 as _h5
import sys
import numpy
# All should be integers, except pre, as validating versions is more than is
# needed for our use case
_H5PY_VERSION_CLS = namedtuple("_H5PY_VERSION_CLS",
"major minor bugfix pre post dev")
hdf5_built_version_tuple = _h5.HDF5_VERSION_COMPILED_AGAINST
version_tuple = _H5PY_VERSION_CLS(3, 15, 1, None, None, None)
version = "{0.major:d}.{0.minor:d}.{0.bugfix:d}".format(version_tuple)
if version_tuple.pre is not None:
version += version_tuple.pre
if version_tuple.post is not None:
version += ".post{0.post:d}".format(version_tuple)
if version_tuple.dev is not None:
version += ".dev{0.dev:d}".format(version_tuple)
hdf5_version_tuple = _h5.get_libversion()
hdf5_version = "%d.%d.%d" % hdf5_version_tuple
api_version_tuple = (1,8)
api_version = "%d.%d" % api_version_tuple
info = """\
Summary of the h5py configuration
---------------------------------
h5py %(h5py)s
HDF5 %(hdf5)s
Python %(python)s
sys.platform %(platform)s
sys.maxsize %(maxsize)s
numpy %(numpy)s
cython (built with) %(cython_version)s
numpy (built against) %(numpy_build_version)s
HDF5 (built against) %(hdf5_build_version)s
""" % {
'h5py': version,
'hdf5': hdf5_version,
'python': sys.version,
'platform': sys.platform,
'maxsize': sys.maxsize,
'numpy': numpy.__version__,
'cython_version': _h5.CYTHON_VERSION_COMPILED_WITH,
'numpy_build_version': _h5.NUMPY_VERSION_COMPILED_AGAINST,
'hdf5_build_version': "%d.%d.%d" % hdf5_built_version_tuple,
}