Import tensorflow

2026-02-15 21:45:42 -08:00
parent f3e8b90764
commit c530630153
20524 changed files with 9017694 additions and 25 deletions
@@ -0,0 +1,15 @@
+# This file is part of h5py, a Python interface to the HDF5 library.
+#
+# http://www.h5py.org
+#
+# Copyright 2008-2013 Andrew Collette and contributors
+#
+# License:  Standard 3-clause BSD; see "license.txt" for full license terms
+#           and contributor agreement.
+
+"""
+    This subpackage implements the high-level interface for h5py.
+
+    Don't manually import things from here; the public API lives directly
+    in the top-level package namespace.
+"""
@@ -0,0 +1,277 @@
+# This file is part of h5py, a Python interface to the HDF5 library.
+#
+# http://www.h5py.org
+#
+# Copyright 2008-2013 Andrew Collette and contributors
+#
+# License:  Standard 3-clause BSD; see "license.txt" for full license terms
+#           and contributor agreement.
+
+"""
+    Implements high-level operations for attributes.
+
+    Provides the AttributeManager class, available on high-level objects
+    as <obj>.attrs.
+"""
+
+import numpy
+
+from .. import h5, h5s, h5t, h5a, h5p
+from . import base
+from .base import phil, with_phil, Empty, is_empty_dataspace, product
+from .datatype import Datatype
+
+
+class AttributeManager(base.MutableMappingHDF5, base.CommonStateObject):
+
+    """
+        Allows dictionary-style access to an HDF5 object's attributes.
+
+        These are created exclusively by the library and are available as
+        a Python attribute at <object>.attrs
+
+        Like Group objects, attributes provide a minimal dictionary-
+        style interface.  Anything which can be reasonably converted to a
+        Numpy array or Numpy scalar can be stored.
+
+        Attributes are automatically created on assignment with the
+        syntax <obj>.attrs[name] = value, with the HDF5 type automatically
+        deduced from the value.  Existing attributes are overwritten.
+
+        To modify an existing attribute while preserving its type, use the
+        method modify().  To specify an attribute of a particular type and
+        shape, use create().
+    """
+
+    def __init__(self, parent):
+        """ Private constructor.
+        """
+        self._id = parent.id
+
+    @with_phil
+    def __getitem__(self, name):
+        """ Read the value of an attribute.
+        """
+        attr = h5a.open(self._id, self._e(name))
+        shape = attr.shape
+
+        # shape is None for empty dataspaces
+        if shape is None:
+            return Empty(attr.dtype)
+
+        dtype = attr.dtype
+
+        # Do this first, as we'll be fiddling with the dtype for top-level
+        # array types
+        htype = h5t.py_create(dtype)
+
+        # NumPy doesn't support top-level array types, so we have to "fake"
+        # the correct type and shape for the array.  For example, consider
+        # attr.shape == (5,) and attr.dtype == '(3,)f'. Then:
+        if dtype.subdtype is not None:
+            subdtype, subshape = dtype.subdtype
+            shape = attr.shape + subshape   # (5, 3)
+            dtype = subdtype                # 'f'
+
+        arr = numpy.zeros(shape, dtype=dtype, order='C')
+        attr.read(arr, mtype=htype)
+
+        string_info = h5t.check_string_dtype(dtype)
+        if string_info and (string_info.length is None):
+            # Vlen strings: convert bytes to Python str
+            arr = numpy.array([
+                b.decode('utf-8', 'surrogateescape') for b in arr.flat
+            ], dtype=dtype).reshape(arr.shape)
+
+        if arr.ndim == 0:
+            return arr[()]
+        return arr
+
+    def get_id(self, name):
+        """Get a low-level AttrID object for the named attribute.
+        """
+        return h5a.open(self._id, self._e(name))
+
+    @with_phil
+    def __setitem__(self, name, value):
+        """ Set a new attribute, overwriting any existing attribute.
+
+        The type and shape of the attribute are determined from the data.  To
+        use a specific type or shape, or to preserve the type of an attribute,
+        use the methods create() and modify().
+        """
+        self.create(name, data=value)
+
+    @with_phil
+    def __delitem__(self, name):
+        """ Delete an attribute (which must already exist). """
+        h5a.delete(self._id, self._e(name))
+
+    def create(self, name, data, shape=None, dtype=None):
+        """ Create a new attribute, overwriting any existing attribute.
+
+        name
+            Name of the new attribute (required)
+        data
+            An array to initialize the attribute (required)
+        shape
+            Shape of the attribute.  Overrides data.shape if both are
+            given, in which case the total number of points must be unchanged.
+        dtype
+            Data type of the attribute.  Overrides data.dtype if both
+            are given.
+        """
+        name = self._e(name)
+
+        with phil:
+            # First, make sure we have a NumPy array.  We leave the data type
+            # conversion for HDF5 to perform.
+            if not isinstance(data, Empty):
+                data = base.array_for_new_object(data, specified_dtype=dtype)
+
+            if shape is None:
+                shape = data.shape
+            elif isinstance(shape, int):
+                shape = (shape,)
+
+            use_htype = None    # If a committed type is given, we must use it
+                                # in the call to h5a.create.
+
+            if isinstance(dtype, Datatype):
+                use_htype = dtype.id
+                dtype = dtype.dtype
+            elif dtype is None:
+                dtype = data.dtype
+            else:
+                dtype = numpy.dtype(dtype) # In case a string, e.g. 'i8' is passed
+
+            original_dtype = dtype  # We'll need this for top-level array types
+
+            # Where a top-level array type is requested, we have to do some
+            # fiddling around to present the data as a smaller array of
+            # subarrays.
+            if dtype.subdtype is not None:
+
+                subdtype, subshape = dtype.subdtype
+
+                # Make sure the subshape matches the last N axes' sizes.
+                if shape[-len(subshape):] != subshape:
+                    raise ValueError("Array dtype shape %s is incompatible with data shape %s" % (subshape, shape))
+
+                # New "advertised" shape and dtype
+                shape = shape[0:len(shape)-len(subshape)]
+                dtype = subdtype
+
+            # Not an array type; make sure to check the number of elements
+            # is compatible, and reshape if needed.
+            else:
+
+                if shape is not None and product(shape) != product(data.shape):
+                    raise ValueError("Shape of new attribute conflicts with shape of data")
+
+                if shape != data.shape:
+                    data = data.reshape(shape)
+
+            # We need this to handle special string types.
+            if not isinstance(data, Empty):
+                data = numpy.asarray(data, dtype=dtype)
+
+            # Make HDF5 datatype and dataspace for the H5A calls
+            if use_htype is None:
+                htype = h5t.py_create(original_dtype, logical=True)
+                htype2 = h5t.py_create(original_dtype)  # Must be bit-for-bit representation rather than logical
+            else:
+                htype = use_htype
+                htype2 = None
+
+            if isinstance(data, Empty):
+                space = h5s.create(h5s.NULL)
+            else:
+                space = h5s.create_simple(shape)
+
+            # For a long time, h5py would create attributes with a random name
+            # and then rename them, imitating how you can atomically replace
+            # a file in a filesystem. But HDF5 does not offer atomic replacement
+            # (you have to delete the existing attribute first), and renaming
+            # exposes some bugs - see https://github.com/h5py/h5py/issues/1385
+            # So we've gone back to the simpler delete & recreate model.
+            if h5a.exists(self._id, name):
+                h5a.delete(self._id, name)
+
+            attr = h5a.create(self._id, name, htype, space)
+            try:
+                if not isinstance(data, Empty):
+                    attr.write(data, mtype=htype2)
+            except:
+                attr.close()
+                h5a.delete(self._id, name)
+                raise
+            attr.close()
+
+    def modify(self, name, value):
+        """ Change the value of an attribute while preserving its type.
+
+        Differs from __setitem__ in that if the attribute already exists, its
+        type is preserved.  This can be very useful for interacting with
+        externally generated files.
+
+        If the attribute doesn't exist, it will be automatically created.
+        """
+        with phil:
+            if not name in self:
+                self[name] = value
+            else:
+                attr = h5a.open(self._id, self._e(name))
+
+                if is_empty_dataspace(attr):
+                    raise OSError("Empty attributes can't be modified")
+
+                # If the input data is already an array, let HDF5 do the conversion.
+                # If it's a list or similar, don't make numpy guess a dtype for it.
+                dt = None if isinstance(value, numpy.ndarray) else attr.dtype
+                value = numpy.asarray(value, order='C', dtype=dt)
+
+                # Allow the case of () <-> (1,)
+                if (value.shape != attr.shape) and not \
+                   (value.size == 1 and product(attr.shape) == 1):
+                    raise TypeError("Shape of data is incompatible with existing attribute")
+                attr.write(value)
+
+    @with_phil
+    def __len__(self):
+        """ Number of attributes attached to the object. """
+        # I expect we will not have more than 2**32 attributes
+        return h5a.get_num_attrs(self._id)
+
+    def __iter__(self):
+        """ Iterate over the names of attributes. """
+        with phil:
+
+            attrlist = []
+            def iter_cb(name, *args):
+                """ Callback to gather attribute names """
+                attrlist.append(self._d(name))
+
+            cpl = self._id.get_create_plist()
+            crt_order = cpl.get_attr_creation_order()
+            cpl.close()
+            if crt_order & h5p.CRT_ORDER_TRACKED:
+                idx_type = h5.INDEX_CRT_ORDER
+            else:
+                idx_type = h5.INDEX_NAME
+
+            h5a.iterate(self._id, iter_cb, index_type=idx_type)
+
+        for name in attrlist:
+            yield name
+
+    @with_phil
+    def __contains__(self, name):
+        """ Determine if an attribute exists, by name. """
+        return h5a.exists(self._id, self._e(name))
+
+    @with_phil
+    def __repr__(self):
+        if not self._id:
+            return "<Attributes of closed HDF5 object>"
+        return "<Attributes of HDF5 object at %s>" % id(self._id)
@@ -0,0 +1,535 @@
+# This file is part of h5py, a Python interface to the HDF5 library.
+#
+# http://www.h5py.org
+#
+# Copyright 2008-2013 Andrew Collette and contributors
+#
+# License:  Standard 3-clause BSD; see "license.txt" for full license terms
+#           and contributor agreement.
+
+"""
+    Implements operations common to all high-level objects (File, etc.).
+"""
+
+from collections.abc import (
+    Mapping, MutableMapping, KeysView, ValuesView, ItemsView
+)
+import os
+import posixpath
+
+import numpy as np
+
+# The high-level interface is serialized; every public API function & method
+# is wrapped in a lock.  We reuse the low-level lock because (1) it's fast,
+# and (2) it eliminates the possibility of deadlocks due to out-of-order
+# lock acquisition.
+from .._objects import phil, with_phil
+from .. import h5d, h5i, h5r, h5p, h5f, h5t, h5s
+from .compat import filename_encode
+
+
+def is_hdf5(fname):
+    """ Determine if a file is valid HDF5 (False if it doesn't exist). """
+    with phil:
+        fname = os.path.abspath(os.fspath(fname))
+
+        if os.path.isfile(fname):
+            return h5f.is_hdf5(filename_encode(fname))
+        return False
+
+
+def find_item_type(data):
+    """Find the item type of a simple object or collection of objects.
+
+    E.g. [[['a']]] -> str
+
+    The focus is on collections where all items have the same type; we'll return
+    None if that's not the case.
+
+    The aim is to treat numpy arrays of Python objects like normal Python
+    collections, while treating arrays with specific dtypes differently.
+    We're also only interested in array-like collections - lists and tuples,
+    possibly nested - not things like sets or dicts.
+    """
+    if isinstance(data, np.ndarray):
+        if (
+            data.dtype.kind == 'O'
+            and not h5t.check_string_dtype(data.dtype)
+            and not h5t.check_vlen_dtype(data.dtype)
+        ):
+            item_types = {type(e) for e in data.flat}
+        else:
+            return None
+    elif isinstance(data, (list, tuple)):
+        item_types = {find_item_type(e) for e in data}
+    else:
+        return type(data)
+
+    if len(item_types) != 1:
+        return None
+    return item_types.pop()
+
+
+def guess_dtype(data):
+    """ Attempt to guess an appropriate dtype for the object, returning None
+    if nothing is appropriate (or if it should be left up the the array
+    constructor to figure out)
+    """
+    with phil:
+        if isinstance(data, h5r.RegionReference):
+            return h5t.regionref_dtype
+        if isinstance(data, h5r.Reference):
+            return h5t.ref_dtype
+
+        item_type = find_item_type(data)
+
+        if item_type is bytes:
+            return h5t.string_dtype(encoding='ascii')
+        if item_type is str:
+            return h5t.string_dtype()
+
+        return None
+
+
+def is_float16_dtype(dt):
+    if dt is None:
+        return False
+
+    dt = np.dtype(dt)  # normalize strings -> np.dtype objects
+    return dt.kind == 'f' and dt.itemsize == 2
+
+
+def array_for_new_object(data, specified_dtype=None):
+    """Prepare an array from data used to create a new dataset or attribute"""
+
+    if not isinstance(specified_dtype, (np.dtype, type(None))):
+        specified_dtype = np.dtype(specified_dtype)
+
+    # We mostly let HDF5 convert data as necessary when it's written.
+    # But if we are going to a float16 datatype, pre-convert in python
+    # to workaround a bug in the conversion.
+    # https://github.com/h5py/h5py/issues/819
+    if is_float16_dtype(specified_dtype):
+        as_dtype = specified_dtype
+    elif not isinstance(data, np.ndarray) and (specified_dtype is not None):
+        # If we need to convert e.g. a list to an array, don't leave numpy
+        # to guess a dtype we already know.
+        as_dtype = specified_dtype
+    else:
+        as_dtype = guess_dtype(data)
+
+    data = np.asarray(data, order="C", dtype=as_dtype)
+
+    # In most cases, this does nothing. But if data was already an array,
+    # and as_dtype is a tagged h5py dtype (e.g. for an object array of strings),
+    # asarray() doesn't replace its dtype object. This gives it the tagged dtype:
+    if as_dtype is not None:
+        data = data.view(dtype=as_dtype)
+
+    return data
+
+
+def default_lapl():
+    """ Default link access property list """
+    return None
+
+
+def default_lcpl():
+    """ Default link creation property list """
+    lcpl = h5p.create(h5p.LINK_CREATE)
+    lcpl.set_create_intermediate_group(True)
+    return lcpl
+
+dlapl = default_lapl()
+dlcpl = default_lcpl()
+
+
+def is_empty_dataspace(obj):
+    """ Check if an object's dataspace is empty """
+    if obj.get_space().get_simple_extent_type() == h5s.NULL:
+        return True
+    return False
+
+
+class CommonStateObject:
+
+    """
+        Mixin class that allows sharing information between objects which
+        reside in the same HDF5 file.  Requires that the host class have
+        a ".id" attribute which returns a low-level ObjectID subclass.
+
+        Also implements Unicode operations.
+    """
+
+    @property
+    def _lapl(self):
+        """ Fetch the link access property list appropriate for this object
+        """
+        return dlapl
+
+    @property
+    def _lcpl(self):
+        """ Fetch the link creation property list appropriate for this object
+        """
+        return dlcpl
+
+    def _e(self, name, lcpl=None):
+        """ Encode a name according to the current file settings.
+
+        Returns name, or 2-tuple (name, lcpl) if lcpl is True
+
+        - Binary strings are always passed as-is, h5t.CSET_ASCII
+        - Unicode strings are encoded utf8, h5t.CSET_UTF8
+
+        If name is None, returns either None or (None, None) appropriately.
+        """
+        def get_lcpl(coding):
+            """ Create an appropriate link creation property list """
+            lcpl = self._lcpl.copy()
+            lcpl.set_char_encoding(coding)
+            return lcpl
+
+        if name is None:
+            return (None, None) if lcpl else None
+
+        if isinstance(name, bytes):
+            coding = h5t.CSET_ASCII
+        elif isinstance(name, str):
+            try:
+                name = name.encode('ascii')
+                coding = h5t.CSET_ASCII
+            except UnicodeEncodeError:
+                name = name.encode('utf8')
+                coding = h5t.CSET_UTF8
+        else:
+            raise TypeError(f"A name should be string or bytes, not {type(name)}")
+
+        if lcpl:
+            return name, get_lcpl(coding)
+        return name
+
+    def _d(self, name):
+        """ Decode a name according to the current file settings.
+
+        - Try to decode utf8
+        - Failing that, return the byte string
+
+        If name is None, returns None.
+        """
+        if name is None:
+            return None
+
+        try:
+            return name.decode('utf8')
+        except UnicodeDecodeError:
+            pass
+        return name
+
+
+class _RegionProxy:
+
+    """
+        Proxy object which handles region references.
+
+        To create a new region reference (datasets only), use slicing syntax:
+
+            >>> newref = obj.regionref[0:10:2]
+
+        To determine the target dataset shape from an existing reference:
+
+            >>> shape = obj.regionref.shape(existingref)
+
+        where <obj> may be any object in the file. To determine the shape of
+        the selection in use on the target dataset:
+
+            >>> selection_shape = obj.regionref.selection(existingref)
+    """
+
+    def __init__(self, obj):
+        self.obj = obj
+        self.id = obj.id
+
+    def __getitem__(self, args):
+        if not isinstance(self.id, h5d.DatasetID):
+            raise TypeError("Region references can only be made to datasets")
+        from . import selections
+        with phil:
+            selection = selections.select(self.id.shape, args, dataset=self.obj)
+            return h5r.create(self.id, b'.', h5r.DATASET_REGION, selection.id)
+
+    def shape(self, ref):
+        """ Get the shape of the target dataspace referred to by *ref*. """
+        with phil:
+            sid = h5r.get_region(ref, self.id)
+            return sid.shape
+
+    def selection(self, ref):
+        """ Get the shape of the target dataspace selection referred to by *ref*
+        """
+        from . import selections
+        with phil:
+            sid = h5r.get_region(ref, self.id)
+            return selections.guess_shape(sid)
+
+
+class HLObject(CommonStateObject):
+
+    """
+        Base class for high-level interface objects.
+    """
+
+    @property
+    def file(self):
+        """ Return a File instance associated with this object """
+        from . import files
+        with phil:
+            return files.File(self.id)
+
+    @property
+    @with_phil
+    def name(self):
+        """ Return the full name of this object.  None if anonymous. """
+        return self._d(h5i.get_name(self.id))
+
+    @property
+    @with_phil
+    def parent(self):
+        """Return the parent group of this object.
+
+        This is always equivalent to obj.file[posixpath.dirname(obj.name)].
+        ValueError if this object is anonymous.
+        """
+        if self.name is None:
+            raise ValueError("Parent of an anonymous object is undefined")
+        return self.file[posixpath.dirname(self.name)]
+
+    @property
+    @with_phil
+    def id(self):
+        """ Low-level identifier appropriate for this object """
+        return self._id
+
+    @property
+    @with_phil
+    def ref(self):
+        """ An (opaque) HDF5 reference to this object """
+        return h5r.create(self.id, b'.', h5r.OBJECT)
+
+    @property
+    @with_phil
+    def regionref(self):
+        """Create a region reference (Datasets only).
+
+        The syntax is regionref[<slices>]. For example, dset.regionref[...]
+        creates a region reference in which the whole dataset is selected.
+
+        Can also be used to determine the shape of the referenced dataset
+        (via .shape property), or the shape of the selection (via the
+        .selection property).
+        """
+        return _RegionProxy(self)
+
+    @property
+    def attrs(self):
+        """ Attributes attached to this object """
+        from . import attrs
+        with phil:
+            return attrs.AttributeManager(self)
+
+    @with_phil
+    def __init__(self, oid):
+        """ Setup this object, given its low-level identifier """
+        self._id = oid
+
+    @with_phil
+    def __hash__(self):
+        return hash(self.id)
+
+    @with_phil
+    def __eq__(self, other):
+        if hasattr(other, 'id'):
+            return self.id == other.id
+        return NotImplemented
+
+    def __bool__(self):
+        with phil:
+            return bool(self.id)
+
+    def __getnewargs__(self):
+        """Disable pickle.
+
+        Handles for HDF5 objects can't be reliably deserialised, because the
+        recipient may not have access to the same files. So we do this to
+        fail early.
+
+        If you really want to pickle h5py objects and can live with some
+        limitations, look at the h5pickle project on PyPI.
+        """
+        raise TypeError("h5py objects cannot be pickled")
+
+    def __getstate__(self):
+        # Pickle protocols 0 and 1 use this instead of __getnewargs__
+        raise TypeError("h5py objects cannot be pickled")
+
+# --- Dictionary-style interface ----------------------------------------------
+
+# To implement the dictionary-style interface from groups and attributes,
+# we inherit from the appropriate abstract base classes in collections.
+#
+# All locking is taken care of by the subclasses.
+# We have to override ValuesView and ItemsView here because Group and
+# AttributeManager can only test for key names.
+
+
+class KeysViewHDF5(KeysView):
+    def __str__(self):
+        return "<KeysViewHDF5 {}>".format(list(self))
+
+    def __reversed__(self):
+        yield from reversed(self._mapping)
+
+    __repr__ = __str__
+
+class ValuesViewHDF5(ValuesView):
+
+    """
+        Wraps e.g. a Group or AttributeManager to provide a value view.
+
+        Note that __contains__ will have poor performance as it has
+        to scan all the links or attributes.
+    """
+
+    def __contains__(self, value):
+        with phil:
+            for key in self._mapping:
+                if value == self._mapping.get(key):
+                    return True
+            return False
+
+    def __iter__(self):
+        with phil:
+            for key in self._mapping:
+                yield self._mapping.get(key)
+
+    def __reversed__(self):
+        with phil:
+            for key in reversed(self._mapping):
+                yield self._mapping.get(key)
+
+
+class ItemsViewHDF5(ItemsView):
+
+    """
+        Wraps e.g. a Group or AttributeManager to provide an items view.
+    """
+
+    def __contains__(self, item):
+        with phil:
+            key, val = item
+            if key in self._mapping:
+                return val == self._mapping.get(key)
+            return False
+
+    def __iter__(self):
+        with phil:
+            for key in self._mapping:
+                yield (key, self._mapping.get(key))
+
+    def __reversed__(self):
+        with phil:
+            for key in reversed(self._mapping):
+                yield (key, self._mapping.get(key))
+
+
+class MappingHDF5(Mapping):
+
+    """
+        Wraps a Group, AttributeManager or DimensionManager object to provide
+        an immutable mapping interface.
+
+        We don't inherit directly from MutableMapping because certain
+        subclasses, for example DimensionManager, are read-only.
+    """
+    def keys(self):
+        """ Get a view object on member names """
+        return KeysViewHDF5(self)
+
+    def values(self):
+        """ Get a view object on member objects """
+        return ValuesViewHDF5(self)
+
+    def items(self):
+        """ Get a view object on member items """
+        return ItemsViewHDF5(self)
+
+    def _ipython_key_completions_(self):
+        """ Custom tab completions for __getitem__ in IPython >=5.0. """
+        return sorted(self.keys())
+
+
+class MutableMappingHDF5(MappingHDF5, MutableMapping):
+
+    """
+        Wraps a Group or AttributeManager object to provide a mutable
+        mapping interface, in contrast to the read-only mapping of
+        MappingHDF5.
+    """
+
+    pass
+
+
+class Empty:
+
+    """
+        Proxy object to represent empty/null dataspaces (a.k.a H5S_NULL).
+
+        This can have an associated dtype, but has no shape or data. This is not
+        the same as an array with shape (0,).
+    """
+    shape = None
+    size = None
+
+    def __init__(self, dtype):
+        self.dtype = np.dtype(dtype)
+
+    def __eq__(self, other):
+        if isinstance(other, Empty) and self.dtype == other.dtype:
+            return True
+        return False
+
+    def __repr__(self):
+        return "Empty(dtype={0!r})".format(self.dtype)
+
+
+def product(nums):
+    """Calculate a numeric product
+
+    For small amounts of data (e.g. shape tuples), this simple code is much
+    faster than calling numpy.prod().
+    """
+    prod = 1
+    for n in nums:
+        prod *= n
+    return prod
+
+
+# Simple variant of cached_property:
+# Unlike functools, this has no locking, so we don't have to worry about
+# deadlocks with phil (see issue gh-2064). Unlike cached-property on PyPI, it
+# doesn't try to import asyncio (which can be ~100 extra modules).
+# Many projects seem to have similar variants of this, often without attribution,
+# but to be cautious, this code comes from cached-property (Copyright (c) 2015,
+# Daniel Greenfeld, BSD license), where it is attributed to bottle (Copyright
+# (c) 2009-2022, Marcel Hellkamp, MIT license).
+
+class cached_property:
+    def __init__(self, func):
+        self.__doc__ = getattr(func, "__doc__")
+        self.func = func
+
+    def __get__(self, obj, cls):
+        if obj is None:
+            return self
+
+        value = obj.__dict__[self.func.__name__] = self.func(obj)
+        return value
@@ -0,0 +1,46 @@
+"""
+Compatibility module for high-level h5py
+"""
+import os
+import sys
+from ..version import hdf5_built_version_tuple
+
+# HDF5 supported passing paths as UTF-8 for Windows from 1.10.6, but this
+# was broken again in 1.14.4 - https://github.com/HDFGroup/hdf5/issues/5037 .
+# The change was reverted in 1.14.6.
+if (1, 14, 4) <= hdf5_built_version_tuple < (1, 14, 6):
+    WINDOWS_ENCODING = "mbcs"
+else:
+    WINDOWS_ENCODING = "utf-8"
+
+
+def filename_encode(filename):
+    """
+    Encode filename for use in the HDF5 library.
+
+    Due to how HDF5 handles filenames on different systems, this should be
+    called on any filenames passed to the HDF5 library. See the documentation on
+    filenames in h5py for more information.
+    """
+    filename = os.fspath(filename)
+    if sys.platform == "win32" and isinstance(filename, str):
+        return filename.encode(WINDOWS_ENCODING, "strict")
+    else:
+        return os.fsencode(filename)
+
+
+def filename_decode(filename):
+    """
+    Decode filename used by HDF5 library.
+
+    Due to how HDF5 handles filenames on different systems, this should be
+    called on any filenames passed from the HDF5 library. See the documentation
+    on filenames in h5py for more information.
+    """
+    if not isinstance(filename, (str, bytes)):
+        raise TypeError(f"expect bytes or str, not {type(filename).__name__}")
+
+    if sys.platform == "win32" and isinstance(filename, bytes):
+        return filename.decode(WINDOWS_ENCODING, "strict")
+    else:
+        return os.fsdecode(filename)
@@ -0,0 +1,55 @@
+# This file is part of h5py, a Python interface to the HDF5 library.
+#
+# http://www.h5py.org
+#
+# Copyright 2008-2013 Andrew Collette and contributors
+#
+# License:  Standard 3-clause BSD; see "license.txt" for full license terms
+#           and contributor agreement.
+
+"""
+    Implements high-level access to committed datatypes in the file.
+"""
+
+import posixpath as pp
+
+from ..h5t import TypeID
+from .base import HLObject, with_phil
+
+class Datatype(HLObject):
+
+    """
+        Represents an HDF5 named datatype stored in a file.
+
+        To store a datatype, simply assign it to a name in a group:
+
+        >>> MyGroup["name"] = numpy.dtype("f")
+        >>> named_type = MyGroup["name"]
+        >>> assert named_type.dtype == numpy.dtype("f")
+    """
+
+    @property
+    @with_phil
+    def dtype(self):
+        """Numpy dtype equivalent for this datatype"""
+        return self.id.dtype
+
+    @with_phil
+    def __init__(self, bind):
+        """ Create a new Datatype object by binding to a low-level TypeID.
+        """
+        if not isinstance(bind, TypeID):
+            raise ValueError("%s is not a TypeID" % bind)
+        super().__init__(bind)
+
+    @with_phil
+    def __repr__(self):
+        if not self.id:
+            return "<Closed HDF5 named type>"
+        if self.name is None:
+            namestr = '("anonymous")'
+        else:
+            name = pp.basename(pp.normpath(self.name))
+            namestr = '"%s"' % (name if name != '' else '/')
+        return '<HDF5 named type %s (dtype %s)>' % \
+            (namestr, self.dtype.str)
@@ -0,0 +1,181 @@
+# This file is part of h5py, a Python interface to the HDF5 library.
+#
+# http://www.h5py.org
+#
+# Copyright 2008-2013 Andrew Collette and contributors
+#
+# License:  Standard 3-clause BSD; see "license.txt" for full license terms
+#           and contributor agreement.
+
+"""
+    Implements support for HDF5 dimension scales.
+"""
+
+import warnings
+
+from .. import h5ds
+from ..h5py_warnings import H5pyDeprecationWarning
+from . import base
+from .base import phil, with_phil
+from .dataset import Dataset
+
+
+class DimensionProxy(base.CommonStateObject):
+
+    """
+        Represents an HDF5 "dimension".
+    """
+
+    @property
+    @with_phil
+    def label(self):
+        """ Get or set the dimension scale label """
+        return self._d(h5ds.get_label(self._id, self._dimension))
+
+    @label.setter
+    @with_phil
+    def label(self, val):
+        # pylint: disable=missing-docstring
+        h5ds.set_label(self._id, self._dimension, self._e(val))
+
+    @with_phil
+    def __init__(self, id_, dimension):
+        self._id = id_
+        self._dimension = dimension
+
+    @with_phil
+    def __hash__(self):
+        return hash((type(self), self._id, self._dimension))
+
+    @with_phil
+    def __eq__(self, other):
+        return hash(self) == hash(other)
+
+    @with_phil
+    def __iter__(self):
+        yield from self.keys()
+
+    @with_phil
+    def __len__(self):
+        return h5ds.get_num_scales(self._id, self._dimension)
+
+    @with_phil
+    def __getitem__(self, item):
+
+        if isinstance(item, int):
+            scales = []
+            h5ds.iterate(self._id, self._dimension, scales.append, 0)
+            return Dataset(scales[item])
+
+        else:
+            def f(dsid):
+                """ Iterate over scales to find a matching name """
+                if h5ds.get_scale_name(dsid) == self._e(item):
+                    return dsid
+
+            res = h5ds.iterate(self._id, self._dimension, f, 0)
+            if res is None:
+                raise KeyError(item)
+            return Dataset(res)
+
+    def attach_scale(self, dset):
+        """ Attach a scale to this dimension.
+
+        Provide the Dataset of the scale you would like to attach.
+        """
+        with phil:
+            h5ds.attach_scale(self._id, dset.id, self._dimension)
+
+    def detach_scale(self, dset):
+        """ Remove a scale from this dimension.
+
+        Provide the Dataset of the scale you would like to remove.
+        """
+        with phil:
+            h5ds.detach_scale(self._id, dset.id, self._dimension)
+
+    def items(self):
+        """ Get a list of (name, Dataset) pairs with all scales on this
+        dimension.
+        """
+        with phil:
+            scales = []
+
+            # H5DSiterate raises an error if there are no dimension scales,
+            # rather than iterating 0 times.  See #483.
+            if len(self) > 0:
+                h5ds.iterate(self._id, self._dimension, scales.append, 0)
+
+            return [
+                (self._d(h5ds.get_scale_name(x)), Dataset(x))
+                for x in scales
+                ]
+
+    def keys(self):
+        """ Get a list of names for the scales on this dimension. """
+        with phil:
+            return [key for (key, _) in self.items()]
+
+    def values(self):
+        """ Get a list of Dataset for scales on this dimension. """
+        with phil:
+            return [val for (_, val) in self.items()]
+
+    @with_phil
+    def __repr__(self):
+        if not self._id:
+            return "<Dimension of closed HDF5 dataset>"
+        return ('<"%s" dimension %d of HDF5 dataset at %s>'
+               % (self.label, self._dimension, id(self._id)))
+
+
+class DimensionManager(base.CommonStateObject):
+
+    """
+        Represents a collection of dimension associated with a dataset.
+
+        Like AttributeManager, an instance of this class is returned when
+        accessing the ".dims" property on a Dataset.
+    """
+
+    @with_phil
+    def __init__(self, parent):
+        """ Private constructor.
+        """
+        self._id = parent.id
+
+    @with_phil
+    def __getitem__(self, index):
+        """ Return a Dimension object
+        """
+        if index > len(self) - 1:
+            raise IndexError('Index out of range')
+        return DimensionProxy(self._id, index)
+
+    @with_phil
+    def __len__(self):
+        """ Number of dimensions associated with the dataset. """
+        return self._id.rank
+
+    @with_phil
+    def __iter__(self):
+        """ Iterate over the dimensions. """
+        for i in range(len(self)):
+            yield self[i]
+
+    @with_phil
+    def __repr__(self):
+        if not self._id:
+            return "<Dimensions of closed HDF5 dataset>"
+        return "<Dimensions of HDF5 object at %s>" % id(self._id)
+
+    def create_scale(self, dset, name=''):
+        """ Create a new dimension, from an initial scale.
+
+        Provide the dataset and a name for the scale.
+        """
+        warnings.warn("other_ds.dims.create_scale(ds, name) is deprecated. "
+                      "Use ds.make_scale(name) instead.",
+                      H5pyDeprecationWarning, stacklevel=2,
+                     )
+        dset.make_scale(name)
@@ -0,0 +1,664 @@
+# This file is part of h5py, a Python interface to the HDF5 library.
+#
+# http://www.h5py.org
+#
+# Copyright 2008-2013 Andrew Collette and contributors
+#
+# License:  Standard 3-clause BSD; see "license.txt" for full license terms
+#           and contributor agreement.
+
+"""
+    Implements high-level support for HDF5 file objects.
+"""
+
+import inspect
+import os
+import sys
+from warnings import warn
+
+from .compat import filename_decode, filename_encode
+
+from .base import phil, with_phil
+from .group import Group
+from .. import h5, h5f, h5p, h5i, h5fd, _objects
+from .. import version
+
+mpi = h5.get_config().mpi
+ros3 = h5.get_config().ros3
+direct_vfd = h5.get_config().direct_vfd
+hdf5_version = version.hdf5_version_tuple[0:3]
+
+swmr_support = True
+
+
+libver_dict = {'earliest': h5f.LIBVER_EARLIEST, 'latest': h5f.LIBVER_LATEST,
+               'v108': h5f.LIBVER_V18, 'v110': h5f.LIBVER_V110}
+libver_dict_r = dict((y, x) for x, y in libver_dict.items())
+
+if hdf5_version >= (1, 11, 4):
+    libver_dict.update({'v112': h5f.LIBVER_V112})
+    libver_dict_r.update({h5f.LIBVER_V112: 'v112'})
+
+if hdf5_version >= (1, 13, 0):
+    libver_dict.update({'v114': h5f.LIBVER_V114})
+    libver_dict_r.update({h5f.LIBVER_V114: 'v114'})
+
+if hdf5_version >= (2, 0, 0):
+    libver_dict.update({'v200': h5f.LIBVER_V200})
+    libver_dict_r.update({h5f.LIBVER_V200: 'v200'})
+
+
+def _set_fapl_mpio(plist, **kwargs):
+    """Set file access property list for mpio driver"""
+    if not mpi:
+        raise ValueError("h5py was built without MPI support, can't use mpio driver")
+
+    import mpi4py.MPI
+    kwargs.setdefault('info', mpi4py.MPI.Info())
+    plist.set_fapl_mpio(**kwargs)
+
+
+def _set_fapl_fileobj(plist, **kwargs):
+    """Set the Python file object driver in a file access property list"""
+    plist.set_fileobj_driver(h5fd.fileobj_driver, kwargs.get('fileobj'))
+
+
+_drivers = {
+    'sec2': lambda plist, **kwargs: plist.set_fapl_sec2(**kwargs),
+    'stdio': lambda plist, **kwargs: plist.set_fapl_stdio(**kwargs),
+    'core': lambda plist, **kwargs: plist.set_fapl_core(**kwargs),
+    'family': lambda plist, **kwargs: plist.set_fapl_family(
+        memb_fapl=plist.copy(),
+        **kwargs
+    ),
+    'mpio': _set_fapl_mpio,
+    'fileobj': _set_fapl_fileobj,
+    'split': lambda plist, **kwargs: plist.set_fapl_split(**kwargs),
+}
+
+if ros3:
+    _drivers['ros3'] = lambda plist, **kwargs: plist.set_fapl_ros3(**kwargs)
+
+if direct_vfd:
+    _drivers['direct'] = lambda plist, **kwargs: plist.set_fapl_direct(**kwargs)  # noqa
+
+
+def register_driver(name, set_fapl):
+    """Register a custom driver.
+
+    Parameters
+    ----------
+    name : str
+        The name of the driver.
+    set_fapl : callable[PropFAID, **kwargs] -> NoneType
+        The function to set the fapl to use your custom driver.
+    """
+    _drivers[name] = set_fapl
+
+
+def unregister_driver(name):
+    """Unregister a custom driver.
+
+    Parameters
+    ----------
+    name : str
+        The name of the driver.
+    """
+    del _drivers[name]
+
+
+def registered_drivers():
+    """Return a frozenset of the names of all of the registered drivers.
+    """
+    return frozenset(_drivers)
+
+
+def make_fapl(
+    driver, libver=None, rdcc_nslots=None, rdcc_nbytes=None, rdcc_w0=None,
+    locking=None, page_buf_size=None, min_meta_keep=0, min_raw_keep=0,
+    alignment_threshold=1, alignment_interval=1, meta_block_size=None,
+    **kwds
+):
+    """ Set up a file access property list """
+    plist = h5p.create(h5p.FILE_ACCESS)
+
+    if libver is not None:
+        if libver in libver_dict:
+            low = libver_dict[libver]
+            high = h5f.LIBVER_LATEST
+        else:
+            low, high = (libver_dict[x] for x in libver)
+    else:
+        # we default to earliest
+        low, high = h5f.LIBVER_EARLIEST, h5f.LIBVER_LATEST
+    plist.set_libver_bounds(low, high)
+    plist.set_alignment(alignment_threshold, alignment_interval)
+
+    cache_settings = list(plist.get_cache())
+    if rdcc_nslots is not None:
+        cache_settings[1] = rdcc_nslots
+    if rdcc_nbytes is not None:
+        cache_settings[2] = rdcc_nbytes
+    if rdcc_w0 is not None:
+        cache_settings[3] = rdcc_w0
+    plist.set_cache(*cache_settings)
+
+    if page_buf_size:
+        plist.set_page_buffer_size(int(page_buf_size), int(min_meta_keep),
+                                   int(min_raw_keep))
+
+    if meta_block_size is not None:
+        plist.set_meta_block_size(int(meta_block_size))
+
+    if locking is not None:
+        if locking in ("false", False):
+            plist.set_file_locking(False, ignore_when_disabled=False)
+        elif locking in ("true", True):
+            plist.set_file_locking(True, ignore_when_disabled=False)
+        elif locking == "best-effort":
+            plist.set_file_locking(True, ignore_when_disabled=True)
+        else:
+            raise ValueError(f"Unsupported locking value: {locking}")
+
+    if driver is None or (driver == 'windows' and sys.platform == 'win32'):
+        # Prevent swallowing unused key arguments
+        if kwds:
+            msg = "'{key}' is an invalid keyword argument for this function" \
+                  .format(key=next(iter(kwds)))
+            raise TypeError(msg)
+        return plist
+
+    try:
+        set_fapl = _drivers[driver]
+    except KeyError as exc:
+        raise ValueError(f'Unknown driver type {driver!r}') from exc
+    else:
+        if driver == 'ros3':
+            token = kwds.pop('session_token', None)
+            set_fapl(plist, **kwds)
+            if token:
+                if hdf5_version < (1, 14, 2):
+                    raise ValueError('HDF5 >= 1.14.2 required for AWS session token')
+                plist.set_fapl_ros3_token(token)
+        else:
+            set_fapl(plist, **kwds)
+
+    return plist
+
+
+def make_fcpl(track_order=False, track_times=False, fs_strategy=None, fs_persist=False,
+              fs_threshold=1, fs_page_size=None):
+    """ Set up a file creation property list """
+    plist = h5p.create(h5p.FILE_CREATE)
+    if track_order:
+        plist.set_link_creation_order(
+            h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED)
+        plist.set_attr_creation_order(
+            h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED)
+    if track_times is None:
+        track_times = False  # Allow explicit None to mean h5py's default
+    if track_times in (True, False):
+        plist.set_obj_track_times(track_times)
+    else:
+        raise TypeError("track_times must be either True or False")
+    if fs_strategy:
+        strategies = {
+            'fsm': h5f.FSPACE_STRATEGY_FSM_AGGR,
+            'page': h5f.FSPACE_STRATEGY_PAGE,
+            'aggregate': h5f.FSPACE_STRATEGY_AGGR,
+            'none': h5f.FSPACE_STRATEGY_NONE
+        }
+        fs_strat_num = strategies.get(fs_strategy, -1)
+        if fs_strat_num == -1:
+            raise ValueError("Invalid file space strategy type")
+
+        plist.set_file_space_strategy(fs_strat_num, fs_persist, fs_threshold)
+        if fs_page_size and fs_strategy == 'page':
+            plist.set_file_space_page_size(int(fs_page_size))
+    return plist
+
+
+def make_fid(name, mode, userblock_size, fapl, fcpl=None, swmr=False):
+    """ Get a new FileID by opening or creating a file.
+    Also validates mode argument."""
+
+    if userblock_size is not None:
+        if mode in ('r', 'r+'):
+            raise ValueError("User block may only be specified "
+                             "when creating a file")
+        try:
+            userblock_size = int(userblock_size)
+        except (TypeError, ValueError):
+            raise ValueError("User block size must be an integer") from None
+        if fcpl is None:
+            fcpl = h5p.create(h5p.FILE_CREATE)
+        fcpl.set_userblock(userblock_size)
+
+    if mode == 'r':
+        flags = h5f.ACC_RDONLY
+        if swmr and swmr_support:
+            flags |= h5f.ACC_SWMR_READ
+        fid = h5f.open(name, flags, fapl=fapl)
+    elif mode == 'r+':
+        fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)
+    elif mode in ['w-', 'x']:
+        fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
+    elif mode == 'w':
+        fid = h5f.create(name, h5f.ACC_TRUNC, fapl=fapl, fcpl=fcpl)
+    elif mode == 'a':
+        # Open in append mode (read/write).
+        # If that fails, create a new file only if it won't clobber an
+        # existing one (ACC_EXCL)
+        try:
+            fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)
+        # Not all drivers raise FileNotFoundError (commented those that do not)
+        except FileNotFoundError if fapl.get_driver() in (
+            h5fd.SEC2,
+            h5fd.DIRECT if direct_vfd else -1,
+            # h5fd.STDIO,
+            # h5fd.CORE,
+            h5fd.FAMILY,
+            h5fd.WINDOWS,
+            # h5fd.MPIO,
+            # h5fd.MPIPOSIX,
+            h5fd.fileobj_driver,
+            h5fd.ROS3D if ros3 else -1,
+        ) else OSError:
+            fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
+    else:
+        raise ValueError("Invalid mode; must be one of r, r+, w, w-, x, a")
+
+    try:
+        if userblock_size is not None:
+            existing_fcpl = fid.get_create_plist()
+            if existing_fcpl.get_userblock() != userblock_size:
+                raise ValueError("Requested userblock size (%d) does not match that of existing file (%d)" % (userblock_size, existing_fcpl.get_userblock()))
+    except Exception as e:
+        fid.close()
+        raise e
+
+    return fid
+
+
+class File(Group):
+
+    """
+        Represents an HDF5 file.
+    """
+
+    @property
+    def attrs(self):
+        """ Attributes attached to this object """
+        # hdf5 complains that a file identifier is an invalid location for an
+        # attribute. Instead of self, pass the root group to AttributeManager:
+        from . import attrs
+        with phil:
+            return attrs.AttributeManager(self['/'])
+
+    @property
+    @with_phil
+    def filename(self):
+        """File name on disk"""
+        return filename_decode(h5f.get_name(self.id))
+
+    @property
+    @with_phil
+    def driver(self):
+        """Low-level HDF5 file driver used to open file"""
+        drivers = {h5fd.SEC2: 'sec2',
+                   h5fd.STDIO: 'stdio',
+                   h5fd.CORE: 'core',
+                   h5fd.FAMILY: 'family',
+                   h5fd.WINDOWS: 'windows',
+                   h5fd.MPIO: 'mpio',
+                   h5fd.MPIPOSIX: 'mpiposix',
+                   h5fd.fileobj_driver: 'fileobj'}
+        if ros3:
+            drivers[h5fd.ROS3D] = 'ros3'
+        if direct_vfd:
+            drivers[h5fd.DIRECT] = 'direct'
+        return drivers.get(self.id.get_access_plist().get_driver(), 'unknown')
+
+    @property
+    @with_phil
+    def mode(self):
+        """ Python mode used to open file """
+        write_intent = h5f.ACC_RDWR
+        if swmr_support:
+            write_intent |= h5f.ACC_SWMR_WRITE
+        return 'r+' if self.id.get_intent() & write_intent else 'r'
+
+    @property
+    @with_phil
+    def libver(self):
+        """File format version bounds (2-tuple: low, high)"""
+        bounds = self.id.get_access_plist().get_libver_bounds()
+        return tuple(libver_dict_r[x] for x in bounds)
+
+    @property
+    @with_phil
+    def userblock_size(self):
+        """ User block size (in bytes) """
+        fcpl = self.id.get_create_plist()
+        return fcpl.get_userblock()
+
+    @property
+    @with_phil
+    def meta_block_size(self):
+        """ Meta block size (in bytes) """
+        fapl = self.id.get_access_plist()
+        return fapl.get_meta_block_size()
+
+    if mpi:
+
+        @property
+        @with_phil
+        def atomic(self):
+            """ Set/get MPI-IO atomic mode
+            """
+            return self.id.get_mpi_atomicity()
+
+        @atomic.setter
+        @with_phil
+        def atomic(self, value):
+            # pylint: disable=missing-docstring
+            self.id.set_mpi_atomicity(value)
+
+    @property
+    @with_phil
+    def swmr_mode(self):
+        """ Controls single-writer multiple-reader mode """
+        return swmr_support and bool(self.id.get_intent() & (h5f.ACC_SWMR_READ | h5f.ACC_SWMR_WRITE))
+
+    @swmr_mode.setter
+    @with_phil
+    def swmr_mode(self, value):
+        # pylint: disable=missing-docstring
+        if value:
+            self.id.start_swmr_write()
+        else:
+            raise ValueError("It is not possible to forcibly switch SWMR mode off.")
+
+    def __init__(self, name, mode='r', driver=None, libver=None, userblock_size=None, swmr=False,
+                 rdcc_nslots=None, rdcc_nbytes=None, rdcc_w0=None, track_order=None,
+                 fs_strategy=None, fs_persist=False, fs_threshold=1, fs_page_size=None,
+                 page_buf_size=None, min_meta_keep=0, min_raw_keep=0, locking=None,
+                 alignment_threshold=1, alignment_interval=1, meta_block_size=None,
+                 *, track_times=False, **kwds):
+        """Create a new file object.
+
+        See the h5py user guide for a detailed explanation of the options.
+
+        name
+            Name of the file on disk, or file-like object.  Note: for files
+            created with the 'core' driver, HDF5 still requires this be
+            non-empty.
+        mode
+            r        Readonly, file must exist (default)
+            r+       Read/write, file must exist
+            w        Create file, truncate if exists
+            w- or x  Create file, fail if exists
+            a        Read/write if exists, create otherwise
+        driver
+            Name of the driver to use.  Legal values are None (default,
+            recommended), 'core', 'sec2', 'direct', 'stdio', 'mpio', 'ros3'.
+        libver
+            Library version bounds.  Supported values: 'earliest', 'v108',
+            'v110', 'v112', 'v114', 'v200' and 'latest' depending on the
+            version of libhdf5 h5py is built against.
+        userblock_size
+            Desired size of user block.  Only allowed when creating a new
+            file (mode w, w- or x).
+        swmr
+            Open the file in SWMR read mode. Only used when mode = 'r'.
+        rdcc_nslots
+            The number of chunk slots in the raw data chunk cache for this
+            file. Increasing this value reduces the number of cache collisions,
+            but slightly increases the memory used. Due to the hashing
+            strategy, this value should ideally be a prime number. As a rule of
+            thumb, this value should be at least 10 times the number of chunks
+            that can fit in rdcc_nbytes bytes. For maximum performance, this
+            value should be set approximately 100 times that number of
+            chunks. The default value is 521. Applies to all datasets unless individually changed.
+        rdcc_nbytes
+            Total size of the dataset chunk cache in bytes. The default size per
+            dataset is 1024**2 (1 MiB) for HDF5 before 2.0 and 8 MiB for HDF5
+            2.0 and later. Applies to all datasets unless individually changed.
+        rdcc_w0
+            The chunk preemption policy for all datasets.  This must be
+            between 0 and 1 inclusive and indicates the weighting according to
+            which chunks which have been fully read or written are penalized
+            when determining which chunks to flush from cache.  A value of 0
+            means fully read or written chunks are treated no differently than
+            other chunks (the preemption is strictly LRU) while a value of 1
+            means fully read or written chunks are always preempted before
+            other chunks.  If your application only reads or writes data once,
+            this can be safely set to 1.  Otherwise, this should be set lower
+            depending on how often you re-read or re-write the same data.  The
+            default value is 0.75. Applies to all datasets unless individually changed.
+        track_order
+            Track dataset/group/attribute creation order under root group
+            if True. If None use global default h5.get_config().track_order.
+        track_times: bool or None, default: False
+            If True, store timestamps for this group in the file.
+            If None, fall back to the default value.
+        fs_strategy
+            The file space handling strategy to be used.  Only allowed when
+            creating a new file (mode w, w- or x).  Defined as:
+            "fsm"        FSM, Aggregators, VFD
+            "page"       Paged FSM, VFD
+            "aggregate"  Aggregators, VFD
+            "none"       VFD
+            If None use HDF5 defaults.
+        fs_page_size
+            File space page size in bytes. Only used when fs_strategy="page". If
+            None use the HDF5 default (4096 bytes).
+        fs_persist
+            A boolean value to indicate whether free space should be persistent
+            or not.  Only allowed when creating a new file.  The default value
+            is False.
+        fs_threshold
+            The smallest free-space section size that the free space manager
+            will track.  Only allowed when creating a new file.  The default
+            value is 1.
+        page_buf_size
+            Page buffer size in bytes. Only allowed for HDF5 files created with
+            fs_strategy="page". Must be a power of two value and greater or
+            equal than the file space page size when creating the file. It is
+            not used by default.
+        min_meta_keep
+            Minimum percentage of metadata to keep in the page buffer before
+            allowing pages containing metadata to be evicted. Applicable only if
+            page_buf_size is set. Default value is zero.
+        min_raw_keep
+            Minimum percentage of raw data to keep in the page buffer before
+            allowing pages containing raw data to be evicted. Applicable only if
+            page_buf_size is set. Default value is zero.
+        locking
+            The file locking behavior. Defined as:
+
+            - False (or "false") --  Disable file locking
+            - True (or "true")   --  Enable file locking
+            - "best-effort"      --  Enable file locking but ignore some errors
+            - None               --  Use HDF5 defaults
+
+            .. warning::
+
+                The HDF5_USE_FILE_LOCKING environment variable can override
+                this parameter.
+
+        alignment_threshold
+            Together with ``alignment_interval``, this property ensures that
+            any file object greater than or equal in size to the alignment
+            threshold (in bytes) will be aligned on an address which is a
+            multiple of alignment interval.
+
+        alignment_interval
+            This property should be used in conjunction with
+            ``alignment_threshold``. See the description above. For more
+            details, see
+            https://support.hdfgroup.org/documentation/hdf5/latest/group___f_a_p_l.html#gab99d5af749aeb3896fd9e3ceb273677a
+
+        meta_block_size
+            Set the current minimum size, in bytes, of new metadata block allocations.
+            See https://support.hdfgroup.org/documentation/hdf5/latest/group___f_a_p_l.html#ga8822e3dedc8e1414f20871a87d533cb1
+
+        Additional keywords
+            Passed on to the selected file driver.
+        """
+        if driver == 'ros3':
+            if not ros3:
+                raise ValueError("h5py was built without ROS3 support, can't use ros3 driver")
+            if hdf5_version < (2, 0, 0):
+                from urllib.parse import urlparse
+                url = urlparse(name)
+                if url.scheme == 's3':
+                    aws_region = kwds.get('aws_region', b'').decode('ascii')
+                    if len(aws_region) == 0:
+                        raise ValueError('AWS region required for s3:// location')
+                    name = f'https://s3.{aws_region}.amazonaws.com/{url.netloc}{url.path}'
+                elif url.scheme not in ('https', 'http'):
+                    raise ValueError(f'{name}: S3 location must begin with '
+                                     'either "https://", "http://", or "s3://"')
+
+        if isinstance(name, _objects.ObjectID):
+            if fs_strategy:
+                raise ValueError("Unable to set file space strategy of an existing file")
+
+            with phil:
+                fid = h5i.get_file_id(name)
+        else:
+            if hasattr(name, 'read') and hasattr(name, 'seek'):
+                if driver not in (None, 'fileobj'):
+                    raise ValueError("Driver must be 'fileobj' for file-like object if specified.")
+                driver = 'fileobj'
+                if kwds.get('fileobj', name) != name:
+                    raise ValueError("Invalid value of 'fileobj' argument; "
+                                     "must equal to file-like object if specified.")
+                kwds.update(fileobj=name)
+                name = repr(name).encode('ASCII', 'replace')
+            else:
+                name = filename_encode(name)
+
+            if track_order is None:
+                track_order = h5.get_config().track_order
+
+            if fs_strategy and mode not in ('w', 'w-', 'x'):
+                raise ValueError("Unable to set file space strategy of an existing file")
+
+            if swmr and mode != 'r':
+                warn(
+                    "swmr=True only affects read ('r') mode. For swmr write "
+                    "mode, set f.swmr_mode = True after opening the file.",
+                    stacklevel=2,
+                )
+
+            with phil:
+                fapl = make_fapl(driver, libver, rdcc_nslots, rdcc_nbytes, rdcc_w0,
+                                 locking, page_buf_size, min_meta_keep, min_raw_keep,
+                                 alignment_threshold=alignment_threshold,
+                                 alignment_interval=alignment_interval,
+                                 meta_block_size=meta_block_size,
+                                 **kwds)
+                fcpl = make_fcpl(track_order=track_order, track_times=track_times,
+                                 fs_strategy=fs_strategy, fs_persist=fs_persist,
+                                 fs_threshold=fs_threshold, fs_page_size=fs_page_size)
+                fid = make_fid(name, mode, userblock_size, fapl, fcpl, swmr=swmr)
+
+            if isinstance(libver, tuple):
+                self._libver = libver
+            else:
+                self._libver = (libver, 'latest')
+
+        super().__init__(fid)
+
+    _in_memory_file_counter = 0
+
+    @classmethod
+    @with_phil
+    def in_memory(cls, file_image=None, **kwargs):
+        """Create an HDF5 file in memory, without an underlying file
+
+        file_image
+            The initial file contents as bytes (or anything that supports the
+            Python buffer interface). HDF5 takes a copy of this data.
+        block_size
+            Chunk size for new memory alloactions (default 64 KiB).
+
+        Other keyword arguments are like File(), although name, mode,
+        driver and locking can't be passed.
+        """
+        for k in ('driver', 'locking', 'backing_store'):
+            if k in kwargs:
+                raise TypeError(
+                    f"File.in_memory() got an unexpected keyword argument {k!r}"
+                )
+        fcpl_kwargs = {}
+        for k in inspect.signature(make_fcpl).parameters:
+            if k in kwargs:
+                fcpl_kwargs[k] = kwargs.pop(k)
+        fcpl = make_fcpl(**fcpl_kwargs)
+
+        fapl = make_fapl(driver="core", backing_store=False, **kwargs)
+        if file_image:
+            if fcpl_kwargs:
+                kw = ', '.join(fcpl_kwargs)
+                raise TypeError(f"{kw} parameters cannot be used with file_image")
+            fapl.set_file_image(file_image)
+
+        # We have to give HDF5 a filename, but it should never use it.
+        # This is a hint both in memory, and in case a bug ever creates a file.
+        # The name also needs to be different from any other open file;
+        # we use a simple counter (protected by the 'phil' lock) for this.
+        name = b"h5py_in_memory_nonfile_%d"  % cls._in_memory_file_counter
+        cls._in_memory_file_counter += 1
+
+        if file_image:
+            fid = h5f.open(name, h5f.ACC_RDWR, fapl=fapl)
+        else:
+            fid = h5f.create(name, h5f.ACC_EXCL, fapl=fapl, fcpl=fcpl)
+        return cls(fid)
+
+    def close(self):
+        """ Close the file.  All open objects become invalid """
+        with phil:
+            # Check that the file is still open, otherwise skip
+            if self.id.valid:
+                # We have to explicitly murder all open objects related to the file
+
+                # Close file-resident objects first, then the files.
+                # Otherwise we get errors in MPI mode.
+                self.id._close_open_objects(h5f.OBJ_LOCAL | ~h5f.OBJ_FILE)
+                self.id._close_open_objects(h5f.OBJ_LOCAL | h5f.OBJ_FILE)
+
+                self.id.close()
+                _objects.nonlocal_close()
+
+    def flush(self):
+        """ Tell the HDF5 library to flush its buffers.
+        """
+        with phil:
+            h5f.flush(self.id)
+
+    @with_phil
+    def __enter__(self):
+        return self
+
+    @with_phil
+    def __exit__(self, *args):
+        if self.id:
+            self.close()
+
+    @with_phil
+    def __repr__(self):
+        if not self.id:
+            r = '<Closed HDF5 file>'
+        else:
+            # Filename has to be forced to Unicode if it comes back bytes
+            # Mode is always a "native" string
+            filename = self.filename
+            if isinstance(filename, bytes):  # Can't decode fname
+                filename = filename.decode('utf8', 'replace')
+            r = f'<HDF5 file "{os.path.basename(filename)}" (mode {self.mode})>'
+
+        return r
@@ -0,0 +1,412 @@
+# This file is part of h5py, a Python interface to the HDF5 library.
+#
+# http://www.h5py.org
+#
+# Copyright 2008-2013 Andrew Collette and contributors
+#
+# License:  Standard 3-clause BSD; see "license.txt" for full license terms
+#           and contributor agreement.
+
+"""
+    Implements support for HDF5 compression filters via the high-level
+    interface.  The following types of filter are available:
+
+    "gzip"
+        Standard DEFLATE-based compression, at integer levels from 0 to 9.
+        Built-in to all public versions of HDF5.  Use this if you want a
+        decent-to-good ratio, good portability, and don't mind waiting.
+
+    "lzf"
+        Custom compression filter for h5py.  This filter is much, much faster
+        than gzip (roughly 10x in compression vs. gzip level 4, and 3x faster
+        in decompressing), but at the cost of a worse compression ratio.  Use
+        this if you want cheap compression and portability is not a concern.
+
+    "szip"
+        Access to the HDF5 SZIP encoder.  SZIP is a non-mainstream compression
+        format used in space science on integer and float datasets.  SZIP is
+        subject to license requirements, which means the encoder is not
+        guaranteed to be always available.  However, it is also much faster
+        than gzip.
+
+    The following constants in this module are also useful:
+
+    decode
+        Tuple of available filter names for decoding
+
+    encode
+        Tuple of available filter names for encoding
+"""
+from collections.abc import Mapping
+import operator
+
+import numpy as np
+from .base import product
+from .compat import filename_encode
+from .. import h5z, h5p, h5d, h5f
+
+
+_COMP_FILTERS = {'gzip': h5z.FILTER_DEFLATE,
+                'szip': h5z.FILTER_SZIP,
+                'lzf': h5z.FILTER_LZF,
+                'shuffle': h5z.FILTER_SHUFFLE,
+                'fletcher32': h5z.FILTER_FLETCHER32,
+                'scaleoffset': h5z.FILTER_SCALEOFFSET }
+_FILL_TIME_ENUM = {'alloc': h5d.FILL_TIME_ALLOC,
+                   'never': h5d.FILL_TIME_NEVER,
+                   'ifset': h5d.FILL_TIME_IFSET,
+                   }
+
+DEFAULT_GZIP = 4
+DEFAULT_SZIP = ('nn', 8)
+
+def _gen_filter_tuples():
+    """ Bootstrap function to figure out what filters are available. """
+    dec = []
+    enc = []
+    for name, code in _COMP_FILTERS.items():
+        if h5z.filter_avail(code):
+            info = h5z.get_filter_info(code)
+            if info & h5z.FILTER_CONFIG_ENCODE_ENABLED:
+                enc.append(name)
+            if info & h5z.FILTER_CONFIG_DECODE_ENABLED:
+                dec.append(name)
+
+    return tuple(dec), tuple(enc)
+
+decode, encode = _gen_filter_tuples()
+
+def _external_entry(entry):
+    """ Check for and return a well-formed entry tuple for
+    a call to h5p.set_external. """
+    # We require only an iterable entry but also want to guard against
+    # raising a confusing exception from unpacking below a str or bytes that
+    # was mistakenly passed as an entry.  We go further than that and accept
+    # only a tuple, which allows simpler documentation and exception
+    # messages.
+    if not isinstance(entry, tuple):
+        raise TypeError(
+            "Each external entry must be a tuple of (name, offset, size)")
+    name, offset, size = entry  # raise ValueError without three elements
+    name = filename_encode(name)
+    offset = operator.index(offset)
+    size = operator.index(size)
+    return (name, offset, size)
+
+def _normalize_external(external):
+    """ Normalize external into a well-formed list of tuples and return. """
+    if external is None:
+        return []
+    try:
+        # Accept a solitary name---a str, bytes, or os.PathLike acceptable to
+        # filename_encode.
+        return [_external_entry((external, 0, h5f.UNLIMITED))]
+    except TypeError:
+        pass
+    # Check and rebuild each entry to be well-formed.
+    return [_external_entry(entry) for entry in external]
+
+class FilterRefBase(Mapping):
+    """Base class for referring to an HDF5 and describing its options
+
+    Your subclass must define filter_id, and may define a filter_options tuple.
+    """
+    filter_id = None
+    filter_options = ()
+
+    # Mapping interface supports using instances as **kwargs for compatibility
+    # with older versions of h5py
+    @property
+    def _kwargs(self):
+        return {
+            'compression': self.filter_id,
+            'compression_opts': self.filter_options
+        }
+
+    def __hash__(self):
+        return hash((self.filter_id, self.filter_options))
+
+    def __eq__(self, other):
+        return (
+            isinstance(other, FilterRefBase)
+            and self.filter_id == other.filter_id
+            and self.filter_options == other.filter_options
+        )
+
+    def __len__(self):
+        return len(self._kwargs)
+
+    def __iter__(self):
+        return iter(self._kwargs)
+
+    def __getitem__(self, item):
+        return self._kwargs[item]
+
+class Gzip(FilterRefBase):
+    filter_id = h5z.FILTER_DEFLATE
+
+    def __init__(self, level=DEFAULT_GZIP):
+        self.filter_options = (level,)
+
+def fill_dcpl(plist, shape, dtype, chunks, compression, compression_opts,
+              shuffle, fletcher32, maxshape, scaleoffset, external,
+              allow_unknown_filter=False, *, fill_time=None):
+    """ Generate a dataset creation property list.
+
+    Undocumented and subject to change without warning.
+    """
+
+    if shape is None or shape == ():
+        shapetype = 'Empty' if shape is None else 'Scalar'
+        if any((chunks, compression, compression_opts, shuffle, fletcher32,
+                scaleoffset is not None)):
+            raise TypeError(
+                f"{shapetype} datasets don't support chunk/filter options"
+            )
+        if maxshape and maxshape != ():
+            raise TypeError(f"{shapetype} datasets cannot be extended")
+        return h5p.create(h5p.DATASET_CREATE)
+
+    def rq_tuple(tpl, name):
+        """ Check if chunks/maxshape match dataset rank """
+        if tpl in (None, True):
+            return
+        try:
+            tpl = tuple(tpl)
+        except TypeError as exc:
+            raise TypeError(f'{name!r} argument must be None or a sequence object') from exc
+        if len(tpl) != len(shape):
+            raise ValueError(f'{name!r} must have same rank as dataset shape')
+
+    rq_tuple(chunks, 'chunks')
+    rq_tuple(maxshape, 'maxshape')
+
+    if compression is not None:
+        if isinstance(compression, FilterRefBase):
+            compression_opts = compression.filter_options
+            compression = compression.filter_id
+
+        if compression not in encode and not isinstance(compression, int):
+            raise ValueError('Compression filter "%s" is unavailable' % compression)
+
+        if compression == 'gzip':
+            if compression_opts is None:
+                gzip_level = DEFAULT_GZIP
+            elif compression_opts in range(10):
+                gzip_level = compression_opts
+            else:
+                raise ValueError("GZIP setting must be an integer from 0-9, not %r" % compression_opts)
+
+        elif compression == 'lzf':
+            if compression_opts is not None:
+                raise ValueError("LZF compression filter accepts no options")
+
+        elif compression == 'szip':
+            if compression_opts is None:
+                compression_opts = DEFAULT_SZIP
+
+            err = "SZIP options must be a 2-tuple ('ec'|'nn', even integer 0-32)"
+            try:
+                szmethod, szpix = compression_opts
+            except TypeError as exc:
+                raise TypeError(err) from exc
+            if szmethod not in ('ec', 'nn'):
+                raise ValueError(err)
+            if not (0<szpix<=32 and szpix%2 == 0):
+                raise ValueError(err)
+
+    elif compression_opts is not None:
+        # Can't specify just compression_opts by itself.
+        raise TypeError("Compression method must be specified")
+
+    if scaleoffset is not None:
+        # scaleoffset must be an integer when it is not None or False,
+        # except for integral data, for which scaleoffset == True is
+        # permissible (will use SO_INT_MINBITS_DEFAULT)
+
+        if scaleoffset < 0:
+            raise ValueError('scale factor must be >= 0')
+
+        if dtype.kind == 'f':
+            if scaleoffset is True:
+                raise ValueError('integer scaleoffset must be provided for '
+                                 'floating point types')
+        elif dtype.kind in ('u', 'i'):
+            if scaleoffset is True:
+                scaleoffset = h5z.SO_INT_MINBITS_DEFAULT
+        else:
+            raise TypeError('scale/offset filter only supported for integer '
+                            'and floating-point types')
+
+        # Scale/offset following fletcher32 in the filter chain will (almost?)
+        # always triggers a read error, as most scale/offset settings are
+        # lossy. Since fletcher32 must come first (see comment below) we
+        # simply prohibit the combination of fletcher32 and scale/offset.
+        if fletcher32:
+            raise ValueError('fletcher32 cannot be used with potentially lossy'
+                             ' scale/offset filter')
+
+    external = _normalize_external(external)
+    # End argument validation
+
+    if (chunks is True) or (chunks is None and any((
+            shuffle,
+            fletcher32,
+            compression,
+            (maxshape and not len(external)),
+            scaleoffset is not None,
+    ))):
+        chunks = guess_chunk(shape, maxshape, dtype.itemsize)
+
+    if maxshape is True:
+        maxshape = (None,)*len(shape)
+
+    if chunks is not None:
+        plist.set_chunk(chunks)
+
+    if fill_time is not None:
+        if (ft := _FILL_TIME_ENUM.get(fill_time)) is not None:
+            plist.set_fill_time(ft)
+        else:
+            msg = ("fill_time must be one of the following choices: 'alloc', "
+                   f"'never' or 'ifset', but it is {fill_time}.")
+            raise ValueError(msg)
+
+    # scale-offset must come before shuffle and compression
+    if scaleoffset is not None:
+        if dtype.kind in ('u', 'i'):
+            plist.set_scaleoffset(h5z.SO_INT, scaleoffset)
+        else: # dtype.kind == 'f'
+            plist.set_scaleoffset(h5z.SO_FLOAT_DSCALE, scaleoffset)
+
+    for item in external:
+        plist.set_external(*item)
+
+    if shuffle:
+        plist.set_shuffle()
+
+    if compression == 'gzip':
+        plist.set_deflate(gzip_level)
+    elif compression == 'lzf':
+        plist.set_filter(h5z.FILTER_LZF, h5z.FLAG_OPTIONAL)
+    elif compression == 'szip':
+        opts = {'ec': h5z.SZIP_EC_OPTION_MASK, 'nn': h5z.SZIP_NN_OPTION_MASK}
+        plist.set_szip(opts[szmethod], szpix)
+    elif isinstance(compression, int):
+        if not allow_unknown_filter and not h5z.filter_avail(compression):
+            raise ValueError("Unknown compression filter number: %s" % compression)
+
+        plist.set_filter(compression, h5z.FLAG_OPTIONAL, compression_opts)
+
+    # `fletcher32` must come after `compression`, otherwise, if `compression`
+    # is "szip" and the data is 64bit, the fletcher32 checksum will be wrong
+    # (see GitHub issue #953).
+    if fletcher32:
+        plist.set_fletcher32()
+
+    return plist
+
+def get_filter_name(code):
+    """
+    Return the name of the compression filter for a given filter identifier.
+
+    Undocumented and subject to change without warning.
+    """
+    filters = {h5z.FILTER_DEFLATE: 'gzip', h5z.FILTER_SZIP: 'szip',
+               h5z.FILTER_SHUFFLE: 'shuffle', h5z.FILTER_FLETCHER32: 'fletcher32',
+               h5z.FILTER_LZF: 'lzf', h5z.FILTER_SCALEOFFSET: 'scaleoffset'}
+    return filters.get(code, str(code))
+
+def get_filters(plist):
+    """ Extract a dictionary of active filters from a DCPL, along with
+    their settings.
+
+    Undocumented and subject to change without warning.
+    """
+
+    pipeline = {}
+
+    nfilters = plist.get_nfilters()
+
+    for i in range(nfilters):
+
+        code, _, vals, _ = plist.get_filter(i)
+
+        if code == h5z.FILTER_DEFLATE:
+            vals = vals[0] # gzip level
+
+        elif code == h5z.FILTER_SZIP:
+            mask, pixels = vals[0:2]
+            if mask & h5z.SZIP_EC_OPTION_MASK:
+                mask = 'ec'
+            elif mask & h5z.SZIP_NN_OPTION_MASK:
+                mask = 'nn'
+            else:
+                raise TypeError("Unknown SZIP configuration")
+            vals = (mask, pixels)
+        elif code == h5z.FILTER_LZF:
+            vals = None
+        else:
+            if len(vals) == 0:
+                vals = None
+
+        pipeline[get_filter_name(code)] = vals
+
+    return pipeline
+
+CHUNK_BASE = 16*1024    # Multiplier by which chunks are adjusted
+CHUNK_MIN = 8*1024      # Soft lower limit (8k)
+CHUNK_MAX = 1024*1024   # Hard upper limit (1M)
+
+def guess_chunk(shape, maxshape, typesize):
+    """ Guess an appropriate chunk layout for a dataset, given its shape and
+    the size of each element in bytes.  Will allocate chunks only as large
+    as MAX_SIZE.  Chunks are generally close to some power-of-2 fraction of
+    each axis, slightly favoring bigger values for the last index.
+
+    Undocumented and subject to change without warning.
+    """
+    # pylint: disable=unused-argument
+
+    # For unlimited dimensions we have to guess 1024
+    shape = tuple((x if x!=0 else 1024) for i, x in enumerate(shape))
+
+    ndims = len(shape)
+    if ndims == 0:
+        raise ValueError("Chunks not allowed for scalar datasets.")
+
+    chunks = np.array(shape, dtype='=f8')
+    if not np.all(np.isfinite(chunks)):
+        raise ValueError("Illegal value in chunk tuple")
+
+    # Determine the optimal chunk size in bytes using a PyTables expression.
+    # This is kept as a float.
+    dset_size = product(chunks)*typesize
+    target_size = CHUNK_BASE * (2**np.log10(dset_size/(1024.*1024)))
+
+    if target_size > CHUNK_MAX:
+        target_size = CHUNK_MAX
+    elif target_size < CHUNK_MIN:
+        target_size = CHUNK_MIN
+
+    idx = 0
+    while True:
+        # Repeatedly loop over the axes, dividing them by 2.  Stop when:
+        # 1a. We're smaller than the target chunk size, OR
+        # 1b. We're within 50% of the target chunk size, AND
+        #  2. The chunk is smaller than the maximum chunk size
+
+        chunk_bytes = product(chunks)*typesize
+
+        if (chunk_bytes < target_size or \
+         abs(chunk_bytes-target_size)/target_size < 0.5) and \
+         chunk_bytes < CHUNK_MAX:
+            break
+
+        if product(chunks) == 1:
+            break  # Element size larger than CHUNK_MAX
+
+        chunks[idx%ndims] = np.ceil(chunks[idx%ndims] / 2.0)
+        idx += 1
+
+    return tuple(int(x) for x in chunks)
@@ -0,0 +1,811 @@
+# This file is part of h5py, a Python interface to the HDF5 library.
+#
+# http://www.h5py.org
+#
+# Copyright 2008-2013 Andrew Collette and contributors
+#
+# License:  Standard 3-clause BSD; see "license.txt" for full license terms
+#           and contributor agreement.
+
+"""
+    Implements support for high-level access to HDF5 groups.
+"""
+
+from contextlib import contextmanager
+import posixpath as pp
+import numpy
+
+
+from .compat import filename_decode, filename_encode
+
+from .. import h5, h5g, h5i, h5o, h5r, h5t, h5l, h5p
+from . import base
+from .base import HLObject, MutableMappingHDF5, phil, with_phil
+from . import dataset
+from . import datatype
+from .vds import vds_support
+
+
+class Group(HLObject, MutableMappingHDF5):
+
+    """ Represents an HDF5 group.
+    """
+
+    def __init__(self, bind):
+        """ Create a new Group object by binding to a low-level GroupID.
+        """
+        with phil:
+            if not isinstance(bind, h5g.GroupID):
+                raise ValueError("%s is not a GroupID" % bind)
+            super().__init__(bind)
+
+    def create_group(self, name, track_order=None, *, track_times=False):
+        """ Create and return a new subgroup.
+
+        Name may be absolute or relative.  Fails if the target name already
+        exists.
+
+        track_order
+            Track dataset/group/attribute creation order under this group
+            if True. If None use global default h5.get_config().track_order.
+        track_times: bool or None, default: False
+            If True, store timestamps for this group in the file.
+            If None, fall back to the default value.
+        """
+        if track_order is None:
+            track_order = h5.get_config().track_order
+
+        with phil:
+            name, lcpl = self._e(name, lcpl=True)
+            gcpl = h5p.create(h5p.GROUP_CREATE)
+            if track_order:
+                order_flags = h5p.CRT_ORDER_TRACKED | h5p.CRT_ORDER_INDEXED
+                gcpl.set_link_creation_order(order_flags)
+                gcpl.set_attr_creation_order(order_flags)
+            if track_times is None:
+                track_times = False  # Allow explicit None to mean h5py's default
+            if track_times in (True, False):
+                gcpl.set_obj_track_times(track_times)
+            else:
+                raise TypeError("track_times must be either True, False, or None")
+            gid = h5g.create(self.id, name, lcpl=lcpl, gcpl=gcpl)
+            return Group(gid)
+
+    def create_dataset(self, name, shape=None, dtype=None, data=None, **kwds):
+        """ Create a new HDF5 dataset
+
+        name
+            Name of the dataset (absolute or relative).  Provide None to make
+            an anonymous dataset.
+        shape
+            Dataset shape.  Use "()" for scalar datasets.  Required if "data"
+            isn't provided.
+        dtype
+            Numpy dtype or string.  If omitted, dtype('f') will be used.
+            Required if "data" isn't provided; otherwise, overrides data
+            array's dtype.
+        data
+            Provide data to initialize the dataset.  If used, you can omit
+            shape and dtype arguments.
+
+        Keyword-only arguments:
+
+        chunks
+            (Tuple or int) Chunk shape, or True to enable auto-chunking. Integers can
+            be used for 1D shape.
+
+        maxshape
+            (Tuple or int) Make the dataset resizable up to this shape. Use None for
+            axes within the tuple you want to be unlimited. Integers can be used for 1D shape.
+            For 1D datasets with unlimited maxshape, a shape tuple of length 1 must be
+            provided, ``(None,)``. Passing ``None`` sets ``maxshape` to `shape`, making the
+            dataset un-resizable, which is the default.
+        compression
+            (String or int) Compression strategy.  Legal values are 'gzip',
+            'szip', 'lzf'.  If an integer in range(10), this indicates gzip
+            compression level. Otherwise, an integer indicates the number of a
+            dynamically loaded compression filter.
+        compression_opts
+            Compression settings.  This is an integer for gzip, 2-tuple for
+            szip, etc. If specifying a dynamically loaded compression filter
+            number, this must be a tuple of values.
+        scaleoffset
+            (Integer) Enable scale/offset filter for (usually) lossy
+            compression of integer or floating-point data. For integer
+            data, the value of scaleoffset is the number of bits to
+            retain (pass 0 to let HDF5 determine the minimum number of
+            bits necessary for lossless compression). For floating point
+            data, scaleoffset is the number of digits after the decimal
+            place to retain; stored values thus have absolute error
+            less than 0.5*10**(-scaleoffset).
+        shuffle
+            (T/F) Enable shuffle filter.
+        fletcher32
+            (T/F) Enable fletcher32 error detection. Not permitted in
+            conjunction with the scale/offset filter.
+        fillvalue
+            (Scalar) Use this value for uninitialized parts of the dataset.
+        track_times
+            (T/F) Enable dataset creation timestamps.
+        track_order
+            (T/F) Track attribute creation order if True. If omitted use
+            global default h5.get_config().track_order.
+        external
+            (Iterable of tuples) Sets the external storage property, thus
+            designating that the dataset will be stored in one or more
+            non-HDF5 files external to the HDF5 file.  Adds each tuple
+            of (name, offset, size) to the dataset's list of external files.
+            Each name must be a str, bytes, or os.PathLike; each offset and
+            size, an integer.  If only a name is given instead of an iterable
+            of tuples, it is equivalent to [(name, 0, h5py.h5f.UNLIMITED)].
+        efile_prefix
+            (String) External dataset file prefix for dataset access property
+            list. Does not persist in the file.
+        virtual_prefix
+            (String) Virtual dataset file prefix for dataset access property
+            list. Does not persist in the file.
+        allow_unknown_filter
+            (T/F) Do not check that the requested filter is available for use.
+            This should only be used with ``write_direct_chunk``, where the caller
+            compresses the data before handing it to h5py.
+        rdcc_nbytes
+            Total size of the dataset's chunk cache in bytes. The default size
+            is 1024**2 (1 MiB) for HDF5 before 2.0 and 8 MiB for HDF5 2.0 or later.
+        rdcc_w0
+            The chunk preemption policy for this dataset.  This must be
+            between 0 and 1 inclusive and indicates the weighting according to
+            which chunks which have been fully read or written are penalized
+            when determining which chunks to flush from cache.  A value of 0
+            means fully read or written chunks are treated no differently than
+            other chunks (the preemption is strictly LRU) while a value of 1
+            means fully read or written chunks are always preempted before
+            other chunks.  If your application only reads or writes data once,
+            this can be safely set to 1.  Otherwise, this should be set lower
+            depending on how often you re-read or re-write the same data.  The
+            default value is 0.75.
+        rdcc_nslots
+            The number of chunk slots in the dataset's chunk cache. Increasing
+            this value reduces the number of cache collisions, but slightly
+            increases the memory used. Due to the hashing strategy, this value
+            should ideally be a prime number. As a rule of thumb, this value
+            should be at least 10 times the number of chunks that can fit in
+            rdcc_nbytes bytes. For maximum performance, this value should be set
+            approximately 100 times that number of chunks. The default value is
+            521.
+        """
+        if 'track_order' not in kwds:
+            kwds['track_order'] = h5.get_config().track_order
+
+        if 'efile_prefix' in kwds:
+            kwds['efile_prefix'] = self._e(kwds['efile_prefix'])
+
+        if 'virtual_prefix' in kwds:
+            kwds['virtual_prefix'] = self._e(kwds['virtual_prefix'])
+
+        with phil:
+            group = self
+            if name:
+                name = self._e(name)
+                if b'/' in name.lstrip(b'/'):
+                    parent_path, name = name.rsplit(b'/', 1)
+                    group = self.require_group(parent_path)
+
+            dsid = dataset.make_new_dset(group, shape, dtype, data, name, **kwds)
+            dset = dataset.Dataset(dsid)
+            return dset
+
+    if vds_support:
+        def create_virtual_dataset(self, name, layout, fillvalue=None):
+            """Create a new virtual dataset in this group.
+
+            See virtual datasets in the docs for more information.
+
+            name
+                (str) Name of the new dataset
+
+            layout
+                (VirtualLayout) Defines the sources for the virtual dataset
+
+            fillvalue
+                The value to use where there is no data.
+
+            """
+            with phil:
+                group = self
+
+                if name:
+                    name = self._e(name)
+                    if b'/' in name.lstrip(b'/'):
+                        parent_path, name = name.rsplit(b'/', 1)
+                        group = self.require_group(parent_path)
+
+                dsid = layout.make_dataset(
+                    group, name=name, fillvalue=fillvalue,
+                )
+                dset = dataset.Dataset(dsid)
+
+            return dset
+
+        @contextmanager
+        def build_virtual_dataset(
+                self, name, shape, dtype, maxshape=None, fillvalue=None
+        ):
+            """Assemble a virtual dataset in this group.
+
+            This is used as a context manager::
+
+                with f.build_virtual_dataset('virt', (10, 1000), np.uint32) as layout:
+                    layout[0] = h5py.VirtualSource('foo.h5', 'data', (1000,))
+
+            name
+                (str) Name of the new dataset
+            shape
+                (tuple) Shape of the dataset
+            dtype
+                A numpy dtype for data read from the virtual dataset
+            maxshape
+                (tuple, optional) Maximum dimensions if the dataset can grow.
+                Use None for unlimited dimensions.
+            fillvalue
+                The value used where no data is available.
+            """
+            from .vds import VirtualLayout
+            layout = VirtualLayout(shape, dtype, maxshape, self.file.filename)
+            yield layout
+
+            self.create_virtual_dataset(name, layout, fillvalue)
+
+    def require_dataset(self, name, shape, dtype, exact=False, **kwds):
+        """ Open a dataset, creating it if it doesn't exist.
+
+        If keyword "exact" is False (default), an existing dataset must have
+        the same shape and a conversion-compatible dtype to be returned.  If
+        True, the shape and dtype must match exactly.
+
+        If keyword "maxshape" is given, the maxshape and dtype must match
+        instead.
+
+        If any of the keywords "rdcc_nslots", "rdcc_nbytes", or "rdcc_w0" are
+        given, they will be used to configure the dataset's chunk cache.
+
+        Other dataset keywords (see create_dataset) may be provided, but are
+        only used if a new dataset is to be created.
+
+        Raises TypeError if an incompatible object already exists, or if the
+        shape, maxshape or dtype don't match according to the above rules.
+        """
+        if 'efile_prefix' in kwds:
+            kwds['efile_prefix'] = self._e(kwds['efile_prefix'])
+
+        if 'virtual_prefix' in kwds:
+            kwds['virtual_prefix'] = self._e(kwds['virtual_prefix'])
+
+        with phil:
+            if name not in self:
+                return self.create_dataset(name, *(shape, dtype), **kwds)
+
+            if isinstance(shape, int):
+                shape = (shape,)
+
+            try:
+                dsid = dataset.open_dset(self, self._e(name), **kwds)
+                dset = dataset.Dataset(dsid)
+            except KeyError as exc:
+                dset = self[name]
+                raise TypeError(f"Incompatible object ({dset.__class__.__name__}) already exists") from exc
+
+            if shape != dset.shape:
+                if "maxshape" not in kwds:
+                    raise TypeError("Shapes do not match (existing %s vs new %s)" % (dset.shape, shape))
+                elif kwds["maxshape"] != dset.maxshape:
+                    raise TypeError("Max shapes do not match (existing %s vs new %s)" % (dset.maxshape, kwds["maxshape"]))
+
+            if exact:
+                if dtype != dset.dtype:
+                    raise TypeError("Datatypes do not exactly match (existing %s vs new %s)" % (dset.dtype, dtype))
+            elif not numpy.can_cast(dtype, dset.dtype):
+                raise TypeError("Datatypes cannot be safely cast (existing %s vs new %s)" % (dset.dtype, dtype))
+
+            return dset
+
+    def create_dataset_like(self, name, other, **kwupdate):
+        """ Create a dataset similar to `other`.
+
+        name
+            Name of the dataset (absolute or relative).  Provide None to make
+            an anonymous dataset.
+        other
+            The dataset which the new dataset should mimic. All properties, such
+            as shape, dtype, chunking, ... will be taken from it, but no data
+            or attributes are being copied.
+
+        Any dataset keywords (see create_dataset) may be provided, including
+        shape and dtype, in which case the provided values take precedence over
+        those from `other`.
+        """
+        for k in ('shape', 'dtype', 'chunks', 'compression',
+                  'compression_opts', 'scaleoffset', 'shuffle', 'fletcher32',
+                  'fillvalue'):
+            kwupdate.setdefault(k, getattr(other, k))
+        # TODO: more elegant way to pass these (dcpl to create_dataset?)
+        dcpl = other.id.get_create_plist()
+        kwupdate.setdefault('track_times', dcpl.get_obj_track_times())
+        kwupdate.setdefault('track_order', dcpl.get_attr_creation_order() > 0)
+
+        # Special case: the maxshape property always exists, but if we pass it
+        # to create_dataset, the new dataset will automatically get chunked
+        # layout. So we copy it only if it is different from shape.
+        if other.maxshape != other.shape:
+            kwupdate.setdefault('maxshape', other.maxshape)
+
+        return self.create_dataset(name, **kwupdate)
+
+    def require_group(self, name):
+        # TODO: support kwargs like require_dataset
+        """Return a group, creating it if it doesn't exist.
+
+        TypeError is raised if something with that name already exists that
+        isn't a group.
+        """
+        with phil:
+            if name not in self:
+                return self.create_group(name)
+            grp = self[name]
+            if not isinstance(grp, Group):
+                raise TypeError("Incompatible object (%s) already exists" % grp.__class__.__name__)
+            return grp
+
+    @with_phil
+    def __getitem__(self, name):
+        """ Open an object in the file """
+
+        if isinstance(name, h5r.Reference):
+            oid = h5r.dereference(name, self.id)
+            if oid is None:
+                raise ValueError("Invalid HDF5 object reference")
+        elif isinstance(name, (bytes, str)):
+            oid = h5o.open(self.id, self._e(name), lapl=self._lapl)
+        else:
+            raise TypeError("Accessing a group is done with bytes or str, "
+                            "not {}".format(type(name)))
+
+        otype = h5i.get_type(oid)
+        if otype == h5i.GROUP:
+            return Group(oid)
+        elif otype == h5i.DATASET:
+            return dataset.Dataset(oid, readonly=(self.file.mode == 'r'))
+        elif otype == h5i.DATATYPE:
+            return datatype.Datatype(oid)
+        else:
+            raise TypeError("Unknown object type")
+
+    def get(self, name, default=None, getclass=False, getlink=False):
+        """ Retrieve an item or other information.
+
+        "name" given only:
+            Return the item, or "default" if it doesn't exist
+
+        "getclass" is True:
+            Return the class of object (Group, Dataset, etc.), or "default"
+            if nothing with that name exists
+
+        "getlink" is True:
+            Return HardLink, SoftLink or ExternalLink instances.  Return
+            "default" if nothing with that name exists.
+
+        "getlink" and "getclass" are True:
+            Return HardLink, SoftLink and ExternalLink classes.  Return
+            "default" if nothing with that name exists.
+
+        Example:
+
+        >>> cls = group.get('foo', getclass=True)
+        >>> if cls == SoftLink:
+        """
+        # pylint: disable=arguments-differ
+
+        with phil:
+            if not (getclass or getlink):
+                try:
+                    return self[name]
+                except KeyError:
+                    return default
+
+            if name not in self:
+                return default
+
+            elif getclass and not getlink:
+                typecode = h5o.get_info(self.id, self._e(name), lapl=self._lapl).type
+
+                try:
+                    return {h5o.TYPE_GROUP: Group,
+                            h5o.TYPE_DATASET: dataset.Dataset,
+                            h5o.TYPE_NAMED_DATATYPE: datatype.Datatype}[typecode]
+                except KeyError as exc:
+                    raise TypeError("Unknown object type") from exc
+
+            elif getlink:
+                typecode = self.id.links.get_info(self._e(name), lapl=self._lapl).type
+
+                if typecode == h5l.TYPE_SOFT:
+                    if getclass:
+                        return SoftLink
+                    linkbytes = self.id.links.get_val(self._e(name), lapl=self._lapl)
+                    return SoftLink(self._d(linkbytes))
+
+                elif typecode == h5l.TYPE_EXTERNAL:
+                    if getclass:
+                        return ExternalLink
+                    filebytes, linkbytes = self.id.links.get_val(self._e(name), lapl=self._lapl)
+                    return ExternalLink(
+                        filename_decode(filebytes), self._d(linkbytes)
+                    )
+
+                elif typecode == h5l.TYPE_HARD:
+                    return HardLink if getclass else HardLink()
+
+                else:
+                    raise TypeError("Unknown link type")
+
+    def __setitem__(self, name, obj):
+        """ Add an object to the group.  The name must not already be in use.
+
+        The action taken depends on the type of object assigned:
+
+        Named HDF5 object (Dataset, Group, Datatype)
+            A hard link is created at "name" which points to the
+            given object.
+
+        SoftLink or ExternalLink
+            Create the corresponding link.
+
+        Numpy ndarray
+            The array is converted to a dataset object, with default
+            settings (contiguous storage, etc.).
+
+        Numpy dtype
+            Commit a copy of the datatype as a named datatype in the file.
+
+        Anything else
+            Attempt to convert it to an ndarray and store it.  Scalar
+            values are stored as scalar datasets. Raise ValueError if we
+            can't understand the resulting array dtype.
+        """
+        with phil:
+            name, lcpl = self._e(name, lcpl=True)
+
+            if isinstance(obj, HLObject):
+                h5o.link(obj.id, self.id, name, lcpl=lcpl, lapl=self._lapl)
+
+            elif isinstance(obj, SoftLink):
+                self.id.links.create_soft(name, self._e(obj.path), lcpl=lcpl, lapl=self._lapl)
+
+            elif isinstance(obj, ExternalLink):
+                fn = filename_encode(obj.filename)
+                self.id.links.create_external(name, fn, self._e(obj.path),
+                                              lcpl=lcpl, lapl=self._lapl)
+
+            elif isinstance(obj, numpy.dtype):
+                htype = h5t.py_create(obj, logical=True)
+                htype.commit(self.id, name, lcpl=lcpl)
+
+            else:
+                ds = self.create_dataset(None, data=obj)
+                h5o.link(ds.id, self.id, name, lcpl=lcpl)
+
+    @with_phil
+    def __delitem__(self, name):
+        """ Delete (unlink) an item from this group. """
+        self.id.unlink(self._e(name))
+
+    @with_phil
+    def __len__(self):
+        """ Number of members attached to this group """
+        return self.id.get_num_objs()
+
+    @with_phil
+    def __iter__(self):
+        """ Iterate over member names """
+        for x in self.id.__iter__():
+            yield self._d(x)
+
+    @with_phil
+    def __reversed__(self):
+        """ Iterate over member names in reverse order. """
+        for x in self.id.__reversed__():
+            yield self._d(x)
+
+    @with_phil
+    def __contains__(self, name):
+        """ Test if a member name exists """
+        if hasattr(h5g, "_path_valid"):
+            if not self.id:
+                return False
+            return h5g._path_valid(self.id, self._e(name), self._lapl)
+        return self._e(name) in self.id
+
+    def copy(self, source, dest, name=None,
+             shallow=False, expand_soft=False, expand_external=False,
+             expand_refs=False, without_attrs=False):
+        """Copy an object or group.
+
+        The source can be a path, Group, Dataset, or Datatype object.  The
+        destination can be either a path or a Group object.  The source and
+        destinations need not be in the same file.
+
+        If the source is a Group object, all objects contained in that group
+        will be copied recursively.
+
+        When the destination is a Group object, by default the target will
+        be created in that group with its current name (basename of obj.name).
+        You can override that by setting "name" to a string.
+
+        There are various options which all default to "False":
+
+         - shallow: copy only immediate members of a group.
+
+         - expand_soft: expand soft links into new objects.
+
+         - expand_external: expand external links into new objects.
+
+         - expand_refs: copy objects that are pointed to by references.
+
+         - without_attrs: copy object without copying attributes.
+
+       Example:
+
+        >>> f = File('myfile.hdf5', 'w')
+        >>> f.create_group("MyGroup")
+        >>> list(f.keys())
+        ['MyGroup']
+        >>> f.copy('MyGroup', 'MyCopy')
+        >>> list(f.keys())
+        ['MyGroup', 'MyCopy']
+
+        """
+        with phil:
+            if isinstance(source, HLObject):
+                source_path = '.'
+            else:
+                # Interpret source as a path relative to this group
+                source_path = source
+                source = self
+
+            if isinstance(dest, Group):
+                if name is not None:
+                    dest_path = name
+                elif source_path == '.':
+                    dest_path = pp.basename(h5i.get_name(source.id))
+                else:
+                    # copy source into dest group: dest_name/source_name
+                    dest_path = pp.basename(h5i.get_name(source[source_path].id))
+
+            elif isinstance(dest, HLObject):
+                raise TypeError("Destination must be path or Group object")
+            else:
+                # Interpret destination as a path relative to this group
+                dest_path = dest
+                dest = self
+
+            flags = 0
+            if shallow:
+                flags |= h5o.COPY_SHALLOW_HIERARCHY_FLAG
+            if expand_soft:
+                flags |= h5o.COPY_EXPAND_SOFT_LINK_FLAG
+            if expand_external:
+                flags |= h5o.COPY_EXPAND_EXT_LINK_FLAG
+            if expand_refs:
+                flags |= h5o.COPY_EXPAND_REFERENCE_FLAG
+            if without_attrs:
+                flags |= h5o.COPY_WITHOUT_ATTR_FLAG
+            if flags:
+                copypl = h5p.create(h5p.OBJECT_COPY)
+                copypl.set_copy_object(flags)
+            else:
+                copypl = None
+
+            h5o.copy(source.id, self._e(source_path), dest.id, self._e(dest_path),
+                     copypl, base.dlcpl)
+
+    def move(self, source, dest):
+        """ Move a link to a new location in the file.
+
+        If "source" is a hard link, this effectively renames the object.  If
+        "source" is a soft or external link, the link itself is moved, with its
+        value unmodified.
+        """
+        with phil:
+            if source == dest:
+                return
+            self.id.links.move(self._e(source), self.id, self._e(dest),
+                               lapl=self._lapl, lcpl=self._lcpl)
+
+    def visit(self, func):
+        """ Recursively visit all names in this group and subgroups.
+
+        Note: visit ignores soft and external links. To visit those, use
+        visit_links.
+
+        You supply a callable (function, method or callable object); it
+        will be called exactly once for each link in this group and every
+        group below it. Your callable must conform to the signature:
+
+            func(<member name>) => <None or return value>
+
+        Returning None continues iteration, returning anything else stops
+        and immediately returns that value from the visit method. The
+        iteration order is lexicographic.
+
+        Example:
+
+        >>> # List the entire contents of the file
+        >>> f = File("foo.hdf5")
+        >>> list_of_names = []
+        >>> f.visit(list_of_names.append)
+        """
+        with phil:
+            def proxy(name):
+                """ Call the function with the text name, not bytes """
+                return func(self._d(name))
+            return h5o.visit(self.id, proxy)
+
+    def visititems(self, func):
+        """ Recursively visit names and objects in this group.
+
+        Note: visititems ignores soft and external links. To visit those, use
+        visititems_links.
+
+        You supply a callable (function, method or callable object); it
+        will be called exactly once for each link in this group and every
+        group below it. Your callable must conform to the signature:
+
+            func(<member name>, <object>) => <None or return value>
+
+        Returning None continues iteration, returning anything else stops
+        and immediately returns that value from the visit method. The
+        iteration order is lexicographic.
+
+        Example:
+
+        # Get a list of all datasets in the file
+        >>> mylist = []
+        >>> def func(name, obj):
+        ...     if isinstance(obj, Dataset):
+        ...         mylist.append(name)
+        ...
+        >>> f = File('foo.hdf5')
+        >>> f.visititems(func)
+        """
+        with phil:
+            def proxy(name):
+                """ Use the text name of the object, not bytes """
+                name = self._d(name)
+                return func(name, self[name])
+            return h5o.visit(self.id, proxy)
+
+    def visit_links(self, func):
+        """ Recursively visit all names in this group and subgroups.
+        Each link will be visited exactly once, regardless of its target.
+
+        You supply a callable (function, method or callable object); it
+        will be called exactly once for each link in this group and every
+        group below it. Your callable must conform to the signature:
+
+            func(<member name>) => <None or return value>
+
+        Returning None continues iteration, returning anything else stops
+        and immediately returns that value from the visit method. The
+        iteration order is lexicographic.
+
+        Example:
+
+        >>> # List the entire contents of the file
+        >>> f = File("foo.hdf5")
+        >>> list_of_names = []
+        >>> f.visit_links(list_of_names.append)
+        """
+        with phil:
+            def proxy(name):
+                """ Call the function with the text name, not bytes """
+                return func(self._d(name))
+            return self.id.links.visit(proxy)
+
+    def visititems_links(self, func):
+        """ Recursively visit links in this group.
+        Each link will be visited exactly once, regardless of its target.
+
+        You supply a callable (function, method or callable object); it
+        will be called exactly once for each link in this group and every
+        group below it. Your callable must conform to the signature:
+
+            func(<member name>, <link>) => <None or return value>
+
+        Returning None continues iteration, returning anything else stops
+        and immediately returns that value from the visit method. The
+        iteration order is lexicographic.
+
+        Example:
+
+        # Get a list of all softlinks in the file
+        >>> mylist = []
+        >>> def func(name, link):
+        ...     if isinstance(link, SoftLink):
+        ...         mylist.append(name)
+        ...
+        >>> f = File('foo.hdf5')
+        >>> f.visititems_links(func)
+        """
+        with phil:
+            def proxy(name):
+                """ Use the text name of the object, not bytes """
+                name = self._d(name)
+                return func(name, self.get(name, getlink=True))
+            return self.id.links.visit(proxy)
+
+    @with_phil
+    def __repr__(self):
+        if not self:
+            r = u"<Closed HDF5 group>"
+        else:
+            namestr = (
+                '"%s"' % self.name
+            ) if self.name is not None else u"(anonymous)"
+            r = '<HDF5 group %s (%d members)>' % (namestr, len(self))
+
+        return r
+
+
+class HardLink:
+
+    """
+        Represents a hard link in an HDF5 file.  Provided only so that
+        Group.get works in a sensible way.  Has no other function.
+    """
+
+    pass
+
+
+class SoftLink:
+
+    """
+        Represents a symbolic ("soft") link in an HDF5 file.  The path
+        may be absolute or relative.  No checking is performed to ensure
+        that the target actually exists.
+    """
+
+    @property
+    def path(self):
+        """ Soft link value.  Not guaranteed to be a valid path. """
+        return self._path
+
+    def __init__(self, path):
+        self._path = str(path)
+
+    def __repr__(self):
+        return '<SoftLink to "%s">' % self.path
+
+
+class ExternalLink:
+
+    """
+        Represents an HDF5 external link.  Paths may be absolute or relative.
+        No checking is performed to ensure either the target or file exists.
+    """
+
+    @property
+    def path(self):
+        """ Soft link path, i.e. the part inside the HDF5 file. """
+        return self._path
+
+    @property
+    def filename(self):
+        """ Path to the external HDF5 file in the filesystem. """
+        return self._filename
+
+    def __init__(self, filename, path):
+        self._filename = filename_decode(filename_encode(filename))
+        self._path = path
+
+    def __repr__(self):
+        return '<ExternalLink to "%s" in file "%s"' % (self.path,
+                                                       self.filename)
@@ -0,0 +1,439 @@
+# This file is part of h5py, a Python interface to the HDF5 library.
+#
+# http://www.h5py.org
+#
+# Copyright 2008-2013 Andrew Collette and contributors
+#
+# License:  Standard 3-clause BSD; see "license.txt" for full license terms
+#           and contributor agreement.
+
+"""
+    High-level access to HDF5 dataspace selections
+"""
+
+import numpy as np
+
+from .base import product
+from .. import h5s, h5r, _selector
+
+def select(shape, args, dataset=None):
+    """ High-level routine to generate a selection from arbitrary arguments
+    to __getitem__.  The arguments should be the following:
+
+    shape
+        Shape of the "source" dataspace.
+
+    args
+        Either a single argument or a tuple of arguments.  See below for
+        supported classes of argument.
+
+    dataset
+        A h5py.Dataset instance representing the source dataset.
+
+    Argument classes:
+
+    Single Selection instance
+        Returns the argument.
+
+    numpy.ndarray
+        Must be a boolean mask.  Returns a PointSelection instance.
+
+    RegionReference
+        Returns a Selection instance.
+
+    Indices, slices, ellipses, MultiBlockSlices only
+        Returns a SimpleSelection instance
+
+    Indices, slices, ellipses, lists or boolean index arrays
+        Returns a FancySelection instance.
+    """
+    if not isinstance(args, tuple):
+        args = (args,)
+
+    # "Special" indexing objects
+    if len(args) == 1:
+
+        arg = args[0]
+        if isinstance(arg, Selection):
+            if arg.shape != shape:
+                raise TypeError("Mismatched selection shape")
+            return arg
+
+        elif isinstance(arg, np.ndarray) and arg.dtype.kind == 'b':
+            if arg.shape != shape:
+                raise TypeError("Boolean indexing array has incompatible shape")
+            return PointSelection.from_mask(arg)
+
+        elif isinstance(arg, h5r.RegionReference):
+            if dataset is None:
+                raise TypeError("Cannot apply a region reference without a dataset")
+            sid = h5r.get_region(arg, dataset.id)
+            if shape != sid.shape:
+                raise TypeError("Reference shape does not match dataset shape")
+
+            return Selection(shape, spaceid=sid)
+
+    if dataset is not None:
+        selector = dataset._selector
+    else:
+        space = h5s.create_simple(shape)
+        selector = _selector.Selector(space)
+
+    return selector.make_selection(args)
+
+
+class Selection:
+
+    """
+        Base class for HDF5 dataspace selections.  Subclasses support the
+        "selection protocol", which means they have at least the following
+        members:
+
+        __init__(shape)   => Create a new selection on "shape"-tuple
+        __getitem__(args) => Perform a selection with the range specified.
+                             What args are allowed depends on the
+                             particular subclass in use.
+
+        id (read-only) =>      h5py.h5s.SpaceID instance
+        shape (read-only) =>   The shape of the dataspace.
+        mshape  (read-only) => The shape of the selection region.
+                               Not guaranteed to fit within "shape", although
+                               the total number of points is less than
+                               product(shape).
+        nselect (read-only) => Number of selected points.  Always equal to
+                               product(mshape).
+
+        broadcast(target_shape) => Return an iterable which yields dataspaces
+                                   for read, based on target_shape.
+
+        The base class represents "unshaped" selections (1-D).
+    """
+
+    def __init__(self, shape, spaceid=None):
+        """ Create a selection.  Shape may be None if spaceid is given. """
+        if spaceid is not None:
+            self._id = spaceid
+            self._shape = spaceid.shape
+        else:
+            shape = tuple(shape)
+            self._shape = shape
+            self._id = h5s.create_simple(shape, (h5s.UNLIMITED,)*len(shape))
+            self._id.select_all()
+
+    @property
+    def id(self):
+        """ SpaceID instance """
+        return self._id
+
+    @property
+    def shape(self):
+        """ Shape of whole dataspace """
+        return self._shape
+
+    @property
+    def nselect(self):
+        """ Number of elements currently selected """
+        return self._id.get_select_npoints()
+
+    @property
+    def mshape(self):
+        """ Shape of selection (always 1-D for this class) """
+        return (self.nselect,)
+
+    @property
+    def array_shape(self):
+        """Shape of array to read/write (always 1-D for this class)"""
+        return self.mshape
+
+    # expand_shape and broadcast only really make sense for SimpleSelection
+    def expand_shape(self, source_shape):
+        if product(source_shape) != self.nselect:
+            raise TypeError("Broadcasting is not supported for point-wise selections")
+        return source_shape
+
+    def broadcast(self, source_shape):
+        """ Get an iterable for broadcasting """
+        if product(source_shape) != self.nselect:
+            raise TypeError("Broadcasting is not supported for point-wise selections")
+        yield self._id
+
+    def __getitem__(self, args):
+        raise NotImplementedError("This class does not support indexing")
+
+class PointSelection(Selection):
+
+    """
+        Represents a point-wise selection.  You can supply sequences of
+        points to the three methods append(), prepend() and set(), or
+        instantiate it with a single boolean array using from_mask().
+    """
+    def __init__(self, shape, spaceid=None, points=None):
+        super().__init__(shape, spaceid)
+        if points is not None:
+            self._perform_selection(points, h5s.SELECT_SET)
+
+    def _perform_selection(self, points, op):
+        """ Internal method which actually performs the selection """
+        points = np.asarray(points, order='C', dtype='u8')
+        if len(points.shape) == 1:
+            points.shape = (1,points.shape[0])
+
+        if self._id.get_select_type() != h5s.SEL_POINTS:
+            op = h5s.SELECT_SET
+
+        if len(points) == 0:
+            self._id.select_none()
+        else:
+            self._id.select_elements(points, op)
+
+    @classmethod
+    def from_mask(cls, mask, spaceid=None):
+        """Create a point-wise selection from a NumPy boolean array """
+        if not (isinstance(mask, np.ndarray) and mask.dtype.kind == 'b'):
+            raise TypeError("PointSelection.from_mask only works with bool arrays")
+
+        points = np.transpose(mask.nonzero())
+        return cls(mask.shape, spaceid, points=points)
+
+    def append(self, points):
+        """ Add the sequence of points to the end of the current selection """
+        self._perform_selection(points, h5s.SELECT_APPEND)
+
+    def prepend(self, points):
+        """ Add the sequence of points to the beginning of the current selection """
+        self._perform_selection(points, h5s.SELECT_PREPEND)
+
+    def set(self, points):
+        """ Replace the current selection with the given sequence of points"""
+        self._perform_selection(points, h5s.SELECT_SET)
+
+
+class SimpleSelection(Selection):
+
+    """ A single "rectangular" (regular) selection composed of only slices
+        and integer arguments.  Can participate in broadcasting.
+    """
+
+    @property
+    def mshape(self):
+        """ Shape of current selection """
+        return self._sel[1]
+
+    @property
+    def array_shape(self):
+        scalar = self._sel[3]
+        return tuple(x for x, s in zip(self.mshape, scalar, strict=True) if not s)
+
+    def __init__(self, shape, spaceid=None, hyperslab=None):
+        super().__init__(shape, spaceid)
+        if hyperslab is not None:
+            self._sel = hyperslab
+        else:
+            # No hyperslab specified - select all
+            rank = len(self.shape)
+            self._sel = ((0,)*rank, self.shape, (1,)*rank, (False,)*rank)
+
+    def expand_shape(self, source_shape):
+        """Match the dimensions of an array to be broadcast to the selection
+
+        The returned shape describes an array of the same size as the input
+        shape, but its dimensions
+
+        E.g. with a dataset shape (10, 5, 4, 2), writing like this::
+
+            ds[..., 0] = np.ones((5, 4))
+
+        The source shape (5, 4) will expand to (1, 5, 4, 1).
+        Then the broadcast method below repeats that chunk 10
+        times to write to an effective shape of (10, 5, 4, 1).
+        """
+        start, count, step, scalar = self._sel
+
+        rank = len(count)
+        remaining_src_dims = list(source_shape)
+
+        eshape = []
+        for idx in range(1, rank + 1):
+            if len(remaining_src_dims) == 0 or scalar[-idx]:  # Skip scalar axes
+                eshape.append(1)
+            else:
+                t = remaining_src_dims.pop()
+                if t == 1 or count[-idx] == t:
+                    eshape.append(t)
+                else:
+                    raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape))  # array shape
+
+        if any([n > 1 for n in remaining_src_dims]):
+            # All dimensions from target_shape should either have been popped
+            # to match the selection shape, or be 1.
+            raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape))  # array shape
+
+        # We have built eshape backwards, so now reverse it
+        return tuple(eshape[::-1])
+
+
+    def broadcast(self, source_shape):
+        """ Return an iterator over target dataspaces for broadcasting.
+
+        Follows the standard NumPy broadcasting rules against the current
+        selection shape (self.mshape).
+        """
+        if self.shape == ():
+            if product(source_shape) != 1:
+                raise TypeError("Can't broadcast %s to scalar" % source_shape)
+            self._id.select_all()
+            yield self._id
+            return
+
+        start, count, step, scalar = self._sel
+
+        rank = len(count)
+        tshape = self.expand_shape(source_shape)
+
+        # Avoid ZeroDivisionError below (after the shape checks in expand_source)
+        if any(d == 0 for d in count):
+            return
+
+        chunks = tuple(x//y for x, y in zip(count, tshape, strict=True))
+        nchunks = product(chunks)
+
+        if nchunks == 1:
+            yield self._id
+        else:
+            sid = self._id.copy()
+            sid.select_hyperslab((0,)*rank, tshape, step)
+            for idx in range(nchunks):
+                offset = tuple(x*y*z + s for x, y, z, s in zip(np.unravel_index(idx, chunks), tshape, step, start, strict=True))
+                sid.offset_simple(offset)
+                yield sid
+
+
+class FancySelection(Selection):
+
+    """
+        Implements advanced NumPy-style selection operations in addition to
+        the standard slice-and-int behavior.
+
+        Indexing arguments may be ints, slices, lists of indices, or
+        per-axis (1D) boolean arrays.
+
+        Broadcasting is not supported for these selections.
+    """
+
+    @property
+    def mshape(self):
+        return self._mshape
+
+    @property
+    def array_shape(self):
+        return self._array_shape
+
+    def __init__(self, shape, spaceid=None, mshape=None, array_shape=None):
+        super().__init__(shape, spaceid)
+        if mshape is None:
+            mshape = self.shape
+        if array_shape is None:
+            array_shape = mshape
+        self._mshape = mshape
+        self._array_shape = array_shape
+
+    def expand_shape(self, source_shape):
+        if not source_shape == self.array_shape:
+            raise TypeError("Broadcasting is not supported for complex selections")
+        return source_shape
+
+    def broadcast(self, source_shape):
+        if not source_shape == self.array_shape:
+            raise TypeError("Broadcasting is not supported for complex selections")
+        yield self._id
+
+
+def guess_shape(sid):
+    """ Given a dataspace, try to deduce the shape of the selection.
+
+    Returns one of:
+        * A tuple with the selection shape, same length as the dataspace
+        * A 1D selection shape for point-based and multiple-hyperslab selections
+        * None, for unselected scalars and for NULL dataspaces
+    """
+
+    sel_class = sid.get_simple_extent_type()    # Dataspace class
+    sel_type = sid.get_select_type()            # Flavor of selection in use
+
+    if sel_class == h5s.NULL:
+        # NULL dataspaces don't support selections
+        return None
+
+    elif sel_class == h5s.SCALAR:
+        # NumPy has no way of expressing empty 0-rank selections, so we use None
+        if sel_type == h5s.SEL_NONE: return None
+        if sel_type == h5s.SEL_ALL: return tuple()
+
+    elif sel_class != h5s.SIMPLE:
+        raise TypeError("Unrecognized dataspace class %s" % sel_class)
+
+    # We have a "simple" (rank >= 1) dataspace
+
+    N = sid.get_select_npoints()
+    rank = len(sid.shape)
+
+    if sel_type == h5s.SEL_NONE:
+        return (0,)*rank
+
+    elif sel_type == h5s.SEL_ALL:
+        return sid.shape
+
+    elif sel_type == h5s.SEL_POINTS:
+        # Like NumPy, point-based selections yield 1D arrays regardless of
+        # the dataspace rank
+        return (N,)
+
+    elif sel_type != h5s.SEL_HYPERSLABS:
+        raise TypeError("Unrecognized selection method %s" % sel_type)
+
+    # We have a hyperslab-based selection
+
+    if N == 0:
+        return (0,)*rank
+
+    bottomcorner, topcorner = (np.array(x) for x in sid.get_select_bounds())
+
+    # Shape of full selection box
+    boxshape = topcorner - bottomcorner + np.ones((rank,))
+
+    def get_n_axis(sid, axis):
+        """ Determine the number of elements selected along a particular axis.
+
+        To do this, we "mask off" the axis by making a hyperslab selection
+        which leaves only the first point along the axis.  For a 2D dataset
+        with selection box shape (X, Y), for axis 1, this would leave a
+        selection of shape (X, 1).  We count the number of points N_leftover
+        remaining in the selection and compute the axis selection length by
+        N_axis = N/N_leftover.
+        """
+
+        if(boxshape[axis]) == 1:
+            return 1
+
+        start = bottomcorner.copy()
+        start[axis] += 1
+        count = boxshape.copy()
+        count[axis] -= 1
+
+        # Throw away all points along this axis
+        masked_sid = sid.copy()
+        masked_sid.select_hyperslab(tuple(start), tuple(count), op=h5s.SELECT_NOTB)
+
+        N_leftover = masked_sid.get_select_npoints()
+
+        return N//N_leftover
+
+
+    shape = tuple(get_n_axis(sid, x) for x in range(rank))
+
+    if product(shape) != N:
+        # This means multiple hyperslab selections are in effect,
+        # so we fall back to a 1D shape
+        return (N,)
+
+    return shape
@@ -0,0 +1,103 @@
+# This file is part of h5py, a Python interface to the HDF5 library.
+#
+# http://www.h5py.org
+#
+# Copyright 2008-2013 Andrew Collette and contributors
+#
+# License:  Standard 3-clause BSD; see "license.txt" for full license terms
+#           and contributor agreement.
+
+"""
+    Implements a portion of the selection operations.
+"""
+
+import numpy as np
+from .. import h5s
+
+def read_dtypes(dataset_dtype, names):
+    """ Returns a 2-tuple containing:
+
+    1. Output dataset dtype
+    2. Dtype containing HDF5-appropriate description of destination
+    """
+
+    if len(names) == 0:     # Not compound, or all fields needed
+        format_dtype = dataset_dtype
+
+    elif dataset_dtype.names is None:
+        raise ValueError("Field names only allowed for compound types")
+
+    elif any(x not in dataset_dtype.names for x in names):
+        raise ValueError("Field does not appear in this type.")
+
+    else:
+        format_dtype = np.dtype([(name, dataset_dtype.fields[name][0]) for name in names])
+
+    if len(names) == 1:
+        # We don't preserve the field information if only one explicitly selected.
+        output_dtype = format_dtype.fields[names[0]][0]
+
+    else:
+        output_dtype = format_dtype
+
+    return output_dtype, format_dtype
+
+
+def read_selections_scalar(dsid, args):
+    """ Returns a 2-tuple containing:
+
+    1. Output dataset shape
+    2. HDF5 dataspace containing source selection.
+
+    Works for scalar datasets.
+    """
+
+    if dsid.shape != ():
+        raise RuntimeError("Illegal selection function for non-scalar dataset")
+
+    if args == ():
+        # This is a signal that an array scalar should be returned instead
+        # of an ndarray with shape ()
+        out_shape = None
+
+    elif args == (Ellipsis,):
+        out_shape = ()
+
+    else:
+        raise ValueError("Illegal slicing argument for scalar dataspace")
+
+    source_space = dsid.get_space()
+    source_space.select_all()
+
+    return out_shape, source_space
+
+class ScalarReadSelection:
+
+    """
+        Implements slicing for scalar datasets.
+    """
+
+    def __init__(self, fspace, args):
+        if args == ():
+            self.mshape = None
+        elif args == (Ellipsis,):
+            self.mshape = ()
+        else:
+            raise ValueError("Illegal slicing argument for scalar dataspace")
+
+        self.mspace = h5s.create(h5s.SCALAR)
+        self.fspace = fspace
+
+    def __iter__(self):
+        self.mspace.select_all()
+        yield self.fspace, self.mspace
+
+def select_read(fspace, args):
+    """ Top-level dispatch function for reading.
+
+    At the moment, only supports reading from scalar datasets.
+    """
+    if fspace.shape == ():
+        return ScalarReadSelection(fspace, args)
+
+    raise NotImplementedError()
@@ -0,0 +1,250 @@
+# This file is part of h5py, a Python interface to the HDF5 library.
+#
+# http://www.h5py.org
+#
+# Copyright 2008-2013 Andrew Collette and contributors
+#
+# License:  Standard 3-clause BSD; see "license.txt" for full license terms
+#           and contributor agreement.
+
+"""
+    High-level interface for creating HDF5 virtual datasets
+"""
+
+from copy import deepcopy as copy
+from collections import namedtuple
+
+import numpy as np
+
+from .compat import filename_encode
+from .datatype import Datatype
+from .selections import SimpleSelection, select
+from .. import h5d, h5p, h5s, h5t
+
+
+class VDSmap(namedtuple('VDSmap', ('vspace', 'file_name',
+                                   'dset_name', 'src_space'))):
+    '''Defines a region in a virtual dataset mapping to part of a source dataset
+    '''
+
+
+vds_support = True
+
+
+def _convert_space_for_key(space, key):
+    """
+    Converts the space with the given key. Mainly used to allow unlimited
+    dimensions in virtual space selection.
+    """
+    key = key if isinstance(key, tuple) else (key,)
+    type_code = space.get_select_type()
+
+    # check for unlimited selections in case where selection is regular
+    # hyperslab, which is the only allowed case for h5s.UNLIMITED to be
+    # in the selection
+    if type_code == h5s.SEL_HYPERSLABS and space.is_regular_hyperslab():
+        rank = space.get_simple_extent_ndims()
+        nargs = len(key)
+
+        idx_offset = 0
+        start, stride, count, block = space.get_regular_hyperslab()
+        # iterate through keys. we ignore numeral indices. if we get a
+        # slice, we check for an h5s.UNLIMITED value as the stop
+        # if we get an ellipsis, we offset index by (rank - nargs)
+        for i, sl in enumerate(key):
+            if isinstance(sl, slice):
+                if sl.stop == h5s.UNLIMITED:
+                    counts = list(count)
+                    idx = i + idx_offset
+                    counts[idx] = h5s.UNLIMITED
+                    count = tuple(counts)
+            elif sl is Ellipsis:
+                idx_offset = rank - nargs
+
+        space.select_hyperslab(start, count, stride, block)
+
+
+class VirtualSource:
+    """Source definition for virtual data sets.
+
+    Instantiate this class to represent an entire source dataset, and then
+    slice it to indicate which regions should be used in the virtual dataset.
+
+    path_or_dataset
+        The path to a file, or an h5py dataset. If a dataset is given,
+        no other parameters are allowed, as the relevant values are taken from
+        the dataset instead.
+    name
+        The name of the source dataset within the file.
+    shape
+        A tuple giving the shape of the dataset.
+    dtype
+        Numpy dtype or string.
+    maxshape
+        The source dataset is resizable up to this shape. Use None for
+        axes you want to be unlimited.
+    """
+    def __init__(self, path_or_dataset, name=None,
+                 shape=None, dtype=None, maxshape=None):
+        from .dataset import Dataset
+        if isinstance(path_or_dataset, Dataset):
+            failed = {k: v
+                      for k, v in
+                      {'name': name, 'shape': shape,
+                       'dtype': dtype, 'maxshape': maxshape}.items()
+                      if v is not None}
+            if failed:
+                raise TypeError("If a Dataset is passed as the first argument "
+                                "then no other arguments may be passed.  You "
+                                "passed {failed}".format(failed=failed))
+            ds = path_or_dataset
+            path = ds.file.filename
+            name = ds.name
+            shape = ds.shape
+            dtype = ds.dtype
+            maxshape = ds.maxshape
+        else:
+            path = path_or_dataset
+            if name is None:
+                raise TypeError("The name parameter is required when "
+                                "specifying a source by path")
+            if shape is None:
+                raise TypeError("The shape parameter is required when "
+                                "specifying a source by path")
+            elif isinstance(shape, int):
+                shape = (shape,)
+
+            if isinstance(maxshape, int):
+                maxshape = (maxshape,)
+
+        self.path = path
+        self.name = name
+        self.dtype = dtype
+
+        if maxshape is None:
+            self.maxshape = shape
+        else:
+            self.maxshape = tuple([h5s.UNLIMITED if ix is None else ix
+                                   for ix in maxshape])
+        self.sel = SimpleSelection(shape)
+        self._all_selected = True
+
+    @property
+    def shape(self):
+        return self.sel.array_shape
+
+    def __getitem__(self, key):
+        if not self._all_selected:
+            raise RuntimeError("VirtualSource objects can only be sliced once.")
+        tmp = copy(self)
+        tmp.sel = select(self.shape, key, dataset=None)
+        _convert_space_for_key(tmp.sel.id, key)
+        tmp._all_selected = False
+        return tmp
+
+class VirtualLayout:
+    """Object for building a virtual dataset.
+
+    Instantiate this class to define a virtual dataset, assign to slices of it
+    (using VirtualSource objects), and then pass it to
+    group.create_virtual_dataset() to add the virtual dataset to a file.
+
+    This class does not allow access to the data; the virtual dataset must
+    be created in a file before it can be used.
+
+    shape
+        A tuple giving the shape of the dataset.
+    dtype
+        Numpy dtype or string.
+    maxshape
+        The virtual dataset is resizable up to this shape. Use None for
+        axes you want to be unlimited.
+    filename
+        The name of the destination file, if known in advance. Mappings from
+        data in the same file will be stored with filename '.', allowing the
+        file to be renamed later.
+    """
+    def __init__(self, shape, dtype, maxshape=None, filename=None):
+        self.shape = (shape,) if isinstance(shape, int) else shape
+        self.dtype = dtype
+        self.maxshape = (maxshape,) if isinstance(maxshape, int) else maxshape
+        self._filename = filename
+        self._src_filenames = set()
+        self.dcpl = h5p.create(h5p.DATASET_CREATE)
+        self.dcpl.set_layout(h5d.VIRTUAL)
+
+    def __setitem__(self, key, source):
+        sel = select(self.shape, key, dataset=None)
+        _convert_space_for_key(sel.id, key)
+        src_filename = self._source_file_name(source.path, self._filename)
+
+        self.dcpl.set_virtual(
+            sel.id, src_filename, source.name.encode('utf-8'), source.sel.id
+        )
+        if self._filename is None:
+            self._src_filenames.add(src_filename)
+
+    @staticmethod
+    def _source_file_name(src_filename, dst_filename) -> bytes:
+        src_filename = filename_encode(src_filename)
+        if dst_filename and (src_filename == filename_encode(dst_filename)):
+            # use relative path if the source dataset is in the same
+            # file, in order to keep the virtual dataset valid in case
+            # the file is renamed.
+            return b'.'
+        return filename_encode(src_filename)
+
+    def _get_dcpl(self, dst_filename):
+        """Get the property list containing virtual dataset mappings
+
+        If the destination filename wasn't known when the VirtualLayout was
+        created, it is handled here.
+        """
+        dst_filename = filename_encode(dst_filename)
+        if self._filename is not None:
+            # filename was known in advance; check dst_filename matches
+            if dst_filename != filename_encode(self._filename):
+                raise Exception(f"{dst_filename!r} != {self._filename!r}")
+            return self.dcpl
+
+        # destination file not known in advance
+        if dst_filename in self._src_filenames:
+            # At least 1 source file is the same as the destination file,
+            # but we didn't know this when making the mapping. Copy the mappings
+            # to a new property list, replacing the dest filename with '.'
+            new_dcpl = h5p.create(h5p.DATASET_CREATE)
+            new_dcpl.set_layout(h5d.VIRTUAL)
+            for i in range(self.dcpl.get_virtual_count()):
+                src_filename = self.dcpl.get_virtual_filename(i)
+                new_dcpl.set_virtual(
+                    self.dcpl.get_virtual_vspace(i),
+                    self._source_file_name(src_filename, dst_filename),
+                    self.dcpl.get_virtual_dsetname(i).encode('utf-8'),
+                    self.dcpl.get_virtual_srcspace(i),
+                )
+            return new_dcpl
+        else:
+            return self.dcpl  # Mappings are all from other files
+
+    def make_dataset(self, parent, name, fillvalue=None):
+        """ Return a new low-level dataset identifier for a virtual dataset """
+        dcpl = self._get_dcpl(parent.file.filename)
+
+        if fillvalue is not None:
+            dcpl.set_fill_value(np.array([fillvalue]))
+
+        maxshape = self.maxshape
+        if maxshape is not None:
+            maxshape = tuple(m if m is not None else h5s.UNLIMITED for m in maxshape)
+
+        virt_dspace = h5s.create_simple(self.shape, maxshape)
+
+        if isinstance(self.dtype, Datatype):
+            # Named types are used as-is
+            tid = self.dtype.id
+        else:
+            dtype = np.dtype(self.dtype)
+            tid = h5t.py_create(dtype, logical=1)
+
+        return h5d.create(parent.id, name=name, tid=tid, space=virt_dspace,
+                          dcpl=dcpl)