From 09a4b163ee3634d57591fd916f31ccf42ef08404 Mon Sep 17 00:00:00 2001
From: Matthew Larson <matthewjlar@gmail.com>
Date: Tue, 14 May 2024 17:07:36 -0500
Subject: [PATCH 1/8] Fix test_dataset.py, add to testall

---
 h5pyd/_hl/dataset.py     | 25 +++++++++++++++----------
 test/hl/test_vlentype.py | 16 +++++++++-------
 testall.py               |  1 +
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/h5pyd/_hl/dataset.py b/h5pyd/_hl/dataset.py
index 6bfabea..4110dfd 100644
--- a/h5pyd/_hl/dataset.py
+++ b/h5pyd/_hl/dataset.py
@@ -1369,25 +1369,29 @@ def __setitem__(self, args, val):
         # Generally we try to avoid converting the arrays on the Python
         # side.  However, for compound literals this is unavoidable.
         # For h5pyd, do extra check and convert type on client side for efficiency
-        vlen = check_dtype(vlen=self.dtype)
-
-        if not isinstance(val, numpy.ndarray) and vlen is not None and vlen not in (bytes, str):
+        vlen_base_class = check_dtype(vlen=self.dtype)
+        if vlen_base_class is not None and vlen_base_class not in (bytes, str):
             try:
-                val = numpy.asarray(val, dtype=vlen)
+                # Attempt to directly convert the input array of vlen data to its base class
+                val = numpy.asarray(val, dtype=vlen_base_class)
 
             except ValueError as ve:
+                # Failed to convert input array to vlen base class directly, instead create a new array where
+                # each element is an array of the Dataset's dtype
                 self.log.debug(f"asarray ValueError: {ve}")
                 try:
-                    val = numpy.array(
-                        [numpy.array(x, dtype=self.dtype) for x in val],
-                        dtype=self.dtype,
-                    )
+                    # Force output shape
+                    tmp = numpy.empty(shape=val.shape, dtype=self.dtype)
+                    tmp[:] = [numpy.array(x, dtype=self.dtype) for x in val]
+                    val = tmp
                 except ValueError as e:
                     msg = f"ValueError converting value element by element: {e}"
                     self.log.debug(msg)
 
-            if vlen == val.dtype:
+            if vlen_base_class == val.dtype:
                 if val.ndim > 1:
+                    # Reshape array to 2D, where first dim = product of all dims except last, and second dim = last dim
+                    # Then flatten it to 1D
                     tmp = numpy.empty(shape=val.shape[:-1], dtype=self.dtype)
                     tmp.ravel()[:] = [
                         i
@@ -1434,6 +1438,7 @@ def __setitem__(self, args, val):
             else:
                 dtype = self.dtype
                 cast_compound = False
+
             val = numpy.asarray(val, dtype=dtype, order="C")
             if cast_compound:
                 val = val.astype(numpy.dtype([(names[0], dtype)]))
@@ -1520,7 +1525,7 @@ def __setitem__(self, args, val):
             if self.id.uuid.startswith("d-"):
                 # server is HSDS, use binary data, use param values for selection
                 format = "binary"
-                body = arrayToBytes(val, vlen=vlen)
+                body = arrayToBytes(val, vlen=vlen_base_class)
                 self.log.debug(f"writing binary data, {len(body)}")
             else:
                 # h5serv, base64 encode, body json for selection
diff --git a/test/hl/test_vlentype.py b/test/hl/test_vlentype.py
index 48c8236..8152749 100644
--- a/test/hl/test_vlentype.py
+++ b/test/hl/test_vlentype.py
@@ -349,14 +349,16 @@ def test_variable_len_float_dset(self):
         e1 = np.array([1.9, 2.8, 3.7], dtype=np.float64)
 
         data = np.array([e0, e1], dtype=dtvlen)
-        try:
-            # This will fail on HSDS because data is a ndarray of shape (2,3) of floats
+
+        if isinstance(dset.id.id, str):
+            # id is str for HSDS, int for h5py
             dset[...] = data
-            if isinstance(dset.id.id, str):
-                # id is str for HSDS, int for h5py
-                self.assertTrue(False)
-        except ValueError:
-            pass  # expected
+        else:
+            try:
+                # This will fail on h5py due to a different in internal array handling.
+                dset[...] = data
+            except ValueError:
+                pass  # expected on h5py
 
         data = np.zeros((2,), dtype=dtvlen)
         data[0] = e0
diff --git a/testall.py b/testall.py
index 4914bd5..456fa0c 100755
--- a/testall.py
+++ b/testall.py
@@ -18,6 +18,7 @@
 hl_tests = ('test_attribute',
             'test_committedtype',
             'test_complex_numbers',
+            'test_dataset',
             'test_dataset_compound',
             'test_dataset_create',
             'test_dataset_extend',

From 163130e9881ee45090dfc1777395fb816cafecf1 Mon Sep 17 00:00:00 2001
From: Matthew Larson <matthewjlar@gmail.com>
Date: Wed, 15 May 2024 09:28:07 -0500
Subject: [PATCH 2/8] Fix MultiManager test import

---
 test/hl/test_dataset.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/test/hl/test_dataset.py b/test/hl/test_dataset.py
index c088774..07c4e8c 100644
--- a/test/hl/test_dataset.py
+++ b/test/hl/test_dataset.py
@@ -26,13 +26,12 @@
 
 from common import ut, TestCase
 import config
-from h5pyd import MultiManager
 
 if config.get("use_h5py"):
-    from h5py import File, Dataset
+    from h5py import File, Dataset, MultiManager
     import h5py
 else:
-    from h5pyd import File, Dataset
+    from h5pyd import File, Dataset, MultiManager
     import h5pyd as h5py
 
 

From 568000989c91dcf9dbe8aea11b469785adbee827 Mon Sep 17 00:00:00 2001
From: Matthew Larson <matthewjlar@gmail.com>
Date: Wed, 15 May 2024 09:33:41 -0500
Subject: [PATCH 3/8] Skip expected failures on h5py

---
 test/hl/test_dataset.py | 45 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/test/hl/test_dataset.py b/test/hl/test_dataset.py
index 07c4e8c..fff0453 100644
--- a/test/hl/test_dataset.py
+++ b/test/hl/test_dataset.py
@@ -122,6 +122,10 @@ def test_missing_shape(self):
     @ut.expectedFailure
     def test_long_double(self):
         """ Confirm that the default dtype is float """
+        # Expected failure on HSDS; skip with h5py
+        if config.get('use_h5py', True):
+            self.assertTrue(False)
+
         dset = self.f.create_dataset('foo', (63,), dtype=np.longdouble)
         if platform.machine() in ['ppc64le']:
             print(f"Storage of long double deactivated on {platform.machine()}")
@@ -132,6 +136,10 @@ def test_long_double(self):
     @ut.expectedFailure
     def test_complex256(self):
         """ Confirm that the default dtype is float """
+        # Expected failure on HSDS; skip with h5py
+        if config.get('use_h5py', True):
+            self.assertTrue(False)
+
         dset = self.f.create_dataset('foo', (63,),
                                      dtype=np.dtype('complex256'))
         self.assertEqual(dset.dtype, np.dtype('complex256'))
@@ -1203,6 +1211,10 @@ def test_fixed_ascii(self):
 
     @ut.expectedFailure
     def test_fixed_utf8(self):
+        # Expected failure on HSDS; skip with h5py
+        if config.get('use_h5py', True):
+            self.assertTrue(False)
+
         # TBD: Investigate
         dt = h5py.string_dtype(encoding='utf-8', length=5)
         ds = self.f.create_dataset('x', (100,), dtype=dt)
@@ -1365,6 +1377,10 @@ def test_rt(self):
 
     @ut.expectedFailure
     def test_assign(self):
+        # Expected failure on HSDS; skip with h5py
+        if config.get('use_h5py', True):
+            self.assertTrue(False)
+
         # TBD: field assignment not working
         dt = np.dtype([('weight', (np.float64, 3)),
                        ('endpoint_type', np.uint8), ])
@@ -1384,6 +1400,10 @@ def test_assign(self):
 
     @ut.expectedFailure
     def test_fields(self):
+        # Expected failure on HSDS; skip with h5py
+        if config.get('use_h5py', True):
+            self.assertTrue(False)
+
         # TBD: field assignment not working
         dt = np.dtype([
             ('x', np.float64),
@@ -1414,6 +1434,10 @@ def test_fields(self):
 class TestSubarray(BaseDataset):
     # TBD: Fix subarray
     def test_write_list(self):
+        # Expected failure on HSDS; skip with h5py
+        if config.get('use_h5py', True):
+            self.assertTrue(False)
+
         ds = self.f.create_dataset("a", (1,), dtype="3int8")
         ds[0] = [1, 2, 3]
         np.testing.assert_array_equal(ds[:], [[1, 2, 3]])
@@ -1422,6 +1446,10 @@ def test_write_list(self):
         np.testing.assert_array_equal(ds[:], [[4, 5, 6]])
 
     def test_write_array(self):
+        # Expected failure on HSDS; skip with h5py
+        if config.get('use_h5py', True):
+            self.assertTrue(False)
+
         ds = self.f.create_dataset("a", (1,), dtype="3int8")
         ds[0] = np.array([1, 2, 3])
         np.testing.assert_array_equal(ds[:], [[1, 2, 3]])
@@ -1589,6 +1617,10 @@ class TestAstype(BaseDataset):
 
     @ut.expectedFailure
     def test_astype_wrapper(self):
+        # Expected failure on HSDS; skip with h5py
+        if config.get('use_h5py', True):
+            self.assertTrue(False)
+
         dset = self.f.create_dataset('x', (100,), dtype='i2')
         dset[...] = np.arange(100)
         arr = dset.astype('f4')[:]
@@ -1600,6 +1632,7 @@ def test_astype_wrapper_len(self):
         self.assertEqual(100, len(dset.astype('f4')))
 
 
+# TBD: Supported now?
 @ut.skip("field name not supported")
 class TestScalarCompound(BaseDataset):
 
@@ -1641,6 +1674,10 @@ def test_reuse_from_other(self):
 
     @ut.expectedFailure
     def test_reuse_struct_from_other(self):
+        # Expected failure on HSDS; skip with h5py
+        if config.get('use_h5py', True):
+            self.assertTrue(False)
+
         # TBD: unable to resstore object array from mem buffer
         dt = [('a', int), ('b', h5py.vlen_dtype(int))]
         self.f.create_dataset('vlen', (1,), dtype=dt)
@@ -1742,6 +1779,10 @@ def test_numpy_float64_2(self):
     @ut.expectedFailure
     def test_non_contiguous_arrays(self):
         """Test that non-contiguous arrays are stored correctly"""
+        # Expected failure on HSDS; skip with h5py
+        if config.get('use_h5py', True):
+            self.assertTrue(False)
+
         # TBD: boolean type not supported
         self.f.create_dataset('nc', (10,), dtype=h5py.vlen_dtype('bool'))
         x = np.array([True, False, True, True, False, False, False])
@@ -1868,6 +1909,10 @@ def test_numpy_commutative(self,):
         Create a h5py dataset, extract one element convert to numpy
         Check that it returns symmetric response to == and !=
         """
+        # Expected failure on HSDS; skip with h5py
+        if config.get('use_h5py', True):
+            self.assertTrue(False)
+
         # TBD: investigate
         shape = (100, 1)
         dset = self.f.create_dataset("test", shape, dtype=float,

From d795665953de9d7a561a27c1d065c39b0423ad5d Mon Sep 17 00:00:00 2001
From: Matthew Larson <matthewjlar@gmail.com>
Date: Wed, 15 May 2024 09:47:19 -0500
Subject: [PATCH 4/8] Generalize string checks for h5py

---
 test/hl/test_dataset.py | 96 ++++++++++++++++++++++++++---------------
 1 file changed, 62 insertions(+), 34 deletions(-)

diff --git a/test/hl/test_dataset.py b/test/hl/test_dataset.py
index fff0453..966182a 100644
--- a/test/hl/test_dataset.py
+++ b/test/hl/test_dataset.py
@@ -123,7 +123,7 @@ def test_missing_shape(self):
     def test_long_double(self):
         """ Confirm that the default dtype is float """
         # Expected failure on HSDS; skip with h5py
-        if config.get('use_h5py', True):
+        if config.get('use_h5py'):
             self.assertTrue(False)
 
         dset = self.f.create_dataset('foo', (63,), dtype=np.longdouble)
@@ -137,7 +137,7 @@ def test_long_double(self):
     def test_complex256(self):
         """ Confirm that the default dtype is float """
         # Expected failure on HSDS; skip with h5py
-        if config.get('use_h5py', True):
+        if config.get('use_h5py'):
             self.assertTrue(False)
 
         dset = self.f.create_dataset('foo', (63,),
@@ -189,20 +189,34 @@ def test_appropriate_low_level_id(self):
             Dataset(self.f['/'].id)
 
     def check_h5_string(self, dset, cset, length):
-        type_json = dset.id.type_json
-        if "class" not in type_json:
-            raise TypeError()
-        assert type_json["class"] == 'H5T_STRING'
-        if "charSet" not in type_json:
-            raise TypeError()
-        assert type_json['charSet'] == cset
-        if "length" not in type_json:
-            raise TypeError()
-        if length is None:
-            assert type_json["length"] == 'H5T_VARIABLE'
+        if config.get('use_h5py'):
+            type_obj = dset.id.get_type()
+            self.assertEqual(type_obj.get_class(), h5py.h5t.STRING)
+            if cset == 'H5T_CSET_ASCII':
+                self.assertEqual(type_obj.get_cset(), h5py.h5t.CSET_ASCII)
+            elif cset == 'H5T_CSET_UTF8':
+                self.assertEqual(type_obj.get_cset(), h5py.h5t.CSET_UTF8)
+            else:
+                self.assertEqual(type_obj.get_cset(), h5py.h5t.CSET_ERROR)
+
+            if length:
+                self.assertEqual(type_obj.get_size(), length)
+
         else:
-            assert isinstance(type_json["length"], int)
-            assert type_json["length"] == length
+            type_json = dset.id.type_json
+            if "class" not in type_json:
+                raise TypeError()
+            self.assertEqual(type_json["class"], 'H5T_STRING')
+            if "charSet" not in type_json:
+                raise TypeError()
+            self.assertEqual(type_json["charSet"], cset)
+            if "length" not in type_json:
+                raise TypeError()
+            if length is None:
+                self.assertEqual(type_json["length"], 'H5T_VARIABLE')
+            else:
+                self.assertTrue(isinstance(type_json["length"], int))
+                self.assertEqual(type_json["length"], length)
 
     def test_create_bytestring(self):
         """ Creating dataset with byte string yields vlen ASCII dataset """
@@ -915,17 +929,31 @@ class TestAutoCreate(BaseDataset):
         Feature: Datasets auto-created from data produce the correct types
     """
     def assert_string_type(self, ds, cset, variable=True):
-        type_json = ds.id.type_json
-        if "class" not in type_json:
-            raise TypeError()
-        self.assertEqual(type_json["class"], 'H5T_STRING')
-        if "charSet" not in type_json:
-            raise TypeError()
-        self.assertEqual(type_json["charSet"], cset)
-        if variable:
-            if "length" not in type_json:
+        if config.get('use_h5py'):
+            type_obj = ds.id.get_type()
+            self.assertEqual(type_obj.get_class(), h5py.h5t.STRING)
+
+            dset_cset = type_obj.get_cset()
+            if cset == 'H5T_CSET_ASCII':
+                expected_cset = h5py.h5t.CSET_ASCII
+            elif cset == 'H5T_CSET_UTF8':
+                expected_cset = h5py.h5t.CSET_UTF8
+            else:
+                expected_cset = h5py.h5t.CSET_ERROR
+
+            self.assertEqual(dset_cset, expected_cset)
+        else:
+            type_json = ds.id.type_json
+            if "class" not in type_json:
+                raise TypeError()
+            self.assertEqual(type_json["class"], 'H5T_STRING')
+            if "charSet" not in type_json:
                 raise TypeError()
-            self.assertEqual(type_json["length"], 'H5T_VARIABLE')
+            self.assertEqual(type_json["charSet"], cset)
+            if variable:
+                if "length" not in type_json:
+                    raise TypeError()
+                self.assertEqual(type_json["length"], 'H5T_VARIABLE')
 
     def test_vlen_bytes(self):
         """Assigning byte strings produces a vlen string ASCII dataset """
@@ -1212,7 +1240,7 @@ def test_fixed_ascii(self):
     @ut.expectedFailure
     def test_fixed_utf8(self):
         # Expected failure on HSDS; skip with h5py
-        if config.get('use_h5py', True):
+        if config.get('use_h5py'):
             self.assertTrue(False)
 
         # TBD: Investigate
@@ -1378,7 +1406,7 @@ def test_rt(self):
     @ut.expectedFailure
     def test_assign(self):
         # Expected failure on HSDS; skip with h5py
-        if config.get('use_h5py', True):
+        if config.get('use_h5py'):
             self.assertTrue(False)
 
         # TBD: field assignment not working
@@ -1401,7 +1429,7 @@ def test_assign(self):
     @ut.expectedFailure
     def test_fields(self):
         # Expected failure on HSDS; skip with h5py
-        if config.get('use_h5py', True):
+        if config.get('use_h5py'):
             self.assertTrue(False)
 
         # TBD: field assignment not working
@@ -1435,7 +1463,7 @@ class TestSubarray(BaseDataset):
     # TBD: Fix subarray
     def test_write_list(self):
         # Expected failure on HSDS; skip with h5py
-        if config.get('use_h5py', True):
+        if config.get('use_h5py'):
             self.assertTrue(False)
 
         ds = self.f.create_dataset("a", (1,), dtype="3int8")
@@ -1447,7 +1475,7 @@ def test_write_list(self):
 
     def test_write_array(self):
         # Expected failure on HSDS; skip with h5py
-        if config.get('use_h5py', True):
+        if config.get('use_h5py'):
             self.assertTrue(False)
 
         ds = self.f.create_dataset("a", (1,), dtype="3int8")
@@ -1618,7 +1646,7 @@ class TestAstype(BaseDataset):
     @ut.expectedFailure
     def test_astype_wrapper(self):
         # Expected failure on HSDS; skip with h5py
-        if config.get('use_h5py', True):
+        if config.get('use_h5py'):
             self.assertTrue(False)
 
         dset = self.f.create_dataset('x', (100,), dtype='i2')
@@ -1675,7 +1703,7 @@ def test_reuse_from_other(self):
     @ut.expectedFailure
     def test_reuse_struct_from_other(self):
         # Expected failure on HSDS; skip with h5py
-        if config.get('use_h5py', True):
+        if config.get('use_h5py'):
             self.assertTrue(False)
 
         # TBD: unable to resstore object array from mem buffer
@@ -1780,7 +1808,7 @@ def test_numpy_float64_2(self):
     def test_non_contiguous_arrays(self):
         """Test that non-contiguous arrays are stored correctly"""
         # Expected failure on HSDS; skip with h5py
-        if config.get('use_h5py', True):
+        if config.get('use_h5py'):
             self.assertTrue(False)
 
         # TBD: boolean type not supported
@@ -1910,7 +1938,7 @@ def test_numpy_commutative(self,):
         Check that it returns symmetric response to == and !=
         """
         # Expected failure on HSDS; skip with h5py
-        if config.get('use_h5py', True):
+        if config.get('use_h5py'):
             self.assertTrue(False)
 
         # TBD: investigate

From 036375875a4e6b8da44150fef9d9f6987b15f4a3 Mon Sep 17 00:00:00 2001
From: Matthew Larson <matthewjlar@gmail.com>
Date: Wed, 15 May 2024 09:55:15 -0500
Subject: [PATCH 5/8] Generalize empty dset check for h5py

---
 test/hl/test_dataset.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/test/hl/test_dataset.py b/test/hl/test_dataset.py
index 966182a..aa8249f 100644
--- a/test/hl/test_dataset.py
+++ b/test/hl/test_dataset.py
@@ -36,14 +36,18 @@
 
 
 def is_empty_dataspace(obj):
-    shape_json = obj.shape_json
-
-    if "class" not in shape_json:
-        raise KeyError()
-    if shape_json["class"] == 'H5S_NULL':
-        return True
+    if config.get('use_h5py'):
+        space = obj.get_space()
+        return (space.get_simple_extent_type() == h5py.h5s.NULL)
     else:
-        return False
+        shape_json = obj.shape_json
+
+        if "class" not in shape_json:
+            raise KeyError()
+        if shape_json["class"] == 'H5S_NULL':
+            return True
+        else:
+            return False
 
 
 class BaseDataset(TestCase):

From 376f9211d7a1e0b5d84dda91b6bf6d32e1cc267a Mon Sep 17 00:00:00 2001
From: Matthew Larson <matthewjlar@gmail.com>
Date: Wed, 15 May 2024 10:01:21 -0500
Subject: [PATCH 6/8] Generalize string checks to h5py

---
 test/hl/test_dataset.py | 77 ++++++++++++++++++-----------------------
 1 file changed, 33 insertions(+), 44 deletions(-)

diff --git a/test/hl/test_dataset.py b/test/hl/test_dataset.py
index aa8249f..f952a63 100644
--- a/test/hl/test_dataset.py
+++ b/test/hl/test_dataset.py
@@ -60,6 +60,36 @@ def tearDown(self):
         if self.f:
             self.f.close()
 
+    def check_h5_string(self, dset, cset, length):
+        if config.get('use_h5py'):
+            type_obj = dset.id.get_type()
+            self.assertEqual(type_obj.get_class(), h5py.h5t.STRING)
+            if cset == 'H5T_CSET_ASCII':
+                self.assertEqual(type_obj.get_cset(), h5py.h5t.CSET_ASCII)
+            elif cset == 'H5T_CSET_UTF8':
+                self.assertEqual(type_obj.get_cset(), h5py.h5t.CSET_UTF8)
+            else:
+                self.assertEqual(type_obj.get_cset(), h5py.h5t.CSET_ERROR)
+
+            if length:
+                self.assertEqual(type_obj.get_size(), length)
+
+        else:
+            type_json = dset.id.type_json
+            if "class" not in type_json:
+                raise TypeError()
+            self.assertEqual(type_json["class"], 'H5T_STRING')
+            if "charSet" not in type_json:
+                raise TypeError()
+            self.assertEqual(type_json["charSet"], cset)
+            if "length" not in type_json:
+                raise TypeError()
+            if length is None:
+                self.assertEqual(type_json["length"], 'H5T_VARIABLE')
+            else:
+                self.assertTrue(isinstance(type_json["length"], int))
+                self.assertEqual(type_json["length"], length)
+
 
 class TestRepr(BaseDataset):
     """
@@ -192,36 +222,6 @@ def test_appropriate_low_level_id(self):
         with self.assertRaises(ValueError):
             Dataset(self.f['/'].id)
 
-    def check_h5_string(self, dset, cset, length):
-        if config.get('use_h5py'):
-            type_obj = dset.id.get_type()
-            self.assertEqual(type_obj.get_class(), h5py.h5t.STRING)
-            if cset == 'H5T_CSET_ASCII':
-                self.assertEqual(type_obj.get_cset(), h5py.h5t.CSET_ASCII)
-            elif cset == 'H5T_CSET_UTF8':
-                self.assertEqual(type_obj.get_cset(), h5py.h5t.CSET_UTF8)
-            else:
-                self.assertEqual(type_obj.get_cset(), h5py.h5t.CSET_ERROR)
-
-            if length:
-                self.assertEqual(type_obj.get_size(), length)
-
-        else:
-            type_json = dset.id.type_json
-            if "class" not in type_json:
-                raise TypeError()
-            self.assertEqual(type_json["class"], 'H5T_STRING')
-            if "charSet" not in type_json:
-                raise TypeError()
-            self.assertEqual(type_json["charSet"], cset)
-            if "length" not in type_json:
-                raise TypeError()
-            if length is None:
-                self.assertEqual(type_json["length"], 'H5T_VARIABLE')
-            else:
-                self.assertTrue(isinstance(type_json["length"], int))
-                self.assertEqual(type_json["length"], length)
-
     def test_create_bytestring(self):
         """ Creating dataset with byte string yields vlen ASCII dataset """
         def check_vlen_ascii(dset):
@@ -1212,31 +1212,20 @@ def test_vlen_bytes(self):
         """ Vlen bytes dataset maps to vlen ascii in the file """
         dt = h5py.string_dtype(encoding='ascii')
         ds = self.f.create_dataset('x', (100,), dtype=dt)
-        type_json = ds.id.type_json
-        self.assertEqual(type_json["class"], 'H5T_STRING')
-        self.assertEqual(type_json['charSet'], 'H5T_CSET_ASCII')
-        string_info = h5py.check_string_dtype(ds.dtype)
-        self.assertEqual(string_info.encoding, 'ascii')
+        self.check_h5_string(ds, 'H5T_CSET_ASCII', None)
 
     def test_vlen_unicode(self):
         """ Vlen unicode dataset maps to vlen utf-8 in the file """
         dt = h5py.string_dtype()
         ds = self.f.create_dataset('x', (100,), dtype=dt)
-        type_json = ds.id.type_json
-        self.assertEqual(type_json["class"], 'H5T_STRING')
-        self.assertEqual(type_json['charSet'], 'H5T_CSET_UTF8')
-        string_info = h5py.check_string_dtype(ds.dtype)
-        self.assertEqual(string_info.encoding, 'utf-8')
+        self.check_h5_string(ds, 'H5T_CSET_UTF8', None)
 
     def test_fixed_ascii(self):
         """ Fixed-length bytes dataset maps to fixed-length ascii in the file
         """
         dt = np.dtype("|S10")
         ds = self.f.create_dataset('x', (100,), dtype=dt)
-        type_json = ds.id.type_json
-        self.assertEqual(type_json["class"], 'H5T_STRING')
-        self.assertEqual(type_json["length"], 10)
-        self.assertEqual(type_json['charSet'], 'H5T_CSET_ASCII')
+        self.check_h5_string(ds, 'H5T_CSET_ASCII', 10)
         string_info = h5py.check_string_dtype(ds.dtype)
         self.assertEqual(string_info.encoding, 'ascii')
         self.assertEqual(string_info.length, 10)

From bd23e19cd6043831975eb6ac06be90449e162712 Mon Sep 17 00:00:00 2001
From: Matthew Larson <matthewjlar@gmail.com>
Date: Wed, 15 May 2024 10:10:36 -0500
Subject: [PATCH 7/8] Skip MultiManager tests with h5py

---
 test/hl/test_dataset.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/test/hl/test_dataset.py b/test/hl/test_dataset.py
index f952a63..6eb6940 100644
--- a/test/hl/test_dataset.py
+++ b/test/hl/test_dataset.py
@@ -28,7 +28,7 @@
 import config
 
 if config.get("use_h5py"):
-    from h5py import File, Dataset, MultiManager
+    from h5py import File, Dataset
     import h5py
 else:
     from h5pyd import File, Dataset, MultiManager
@@ -1970,6 +1970,7 @@ def test_basetype_commutative(self,):
         assert (val != dset) == (dset != val)
 
 
+@ut.skipIf(config.get('use_h5py'), "h5py does not support MultiManager")
 class TestMultiManager(BaseDataset):
     def test_multi_read_scalar_dataspaces(self):
         """
@@ -2350,16 +2351,17 @@ def test_multi_write_mixed_shapes(self):
             out = self.f["data" + str(i)][...]
             np.testing.assert_array_equal(out[sel_idx, sel_idx], data_in + i)
 
-    def test_multi_selection_rw(self):
+    def test_multi_selection(self):
         """
-        Test reading and writing a unique selection in each dataset
+        Test using a different selection
+        for each dataset in a MultiManager
         """
         shape = (10, 10, 10)
         count = 3
         dt = np.int32
 
         # Create datasets
-        data_in = np.reshape(np.arange(np.prod(shape)), shape)
+        data_in = np.reshape(np.arange(np.prod(shape), dtype=dt), shape)
         data_in_original = data_in.copy()
         datasets = []
 
@@ -2368,7 +2370,7 @@ def test_multi_selection_rw(self):
                                          dtype=dt, data=data_in)
             datasets.append(dset)
 
-        mm = MultiManager(datasets=datasets)
+        mm = h5py.MultiManager(datasets=datasets)
 
         # Selections to read from
         sel = [np.s_[0:10, 0:10, 0:10], np.s_[0:5, 5:10, 1:4:2], np.s_[4, 5, 6]]
@@ -2379,7 +2381,7 @@ def test_multi_selection_rw(self):
 
         # If selection list has only a single element, apply it to all dsets
         sel = [np.s_[0:10, 0:10, 0:10]]
-        data_out = mm[sel[0]]
+        data_out = mm[sel]
 
         for d in data_out:
             np.testing.assert_array_equal(d, data_in[sel[0]])
@@ -2387,7 +2389,7 @@ def test_multi_selection_rw(self):
         # Selections to write to
         sel = [np.s_[0:10, 0:10, 0:10], np.s_[0:5, 0:5, 0:5], np.s_[0, 0, 0]]
         data_in = [np.zeros_like(data_in), np.ones_like(data_in), np.full_like(data_in, 2)]
-        mm[sel] = data_in
+        mm[sel] = [data_in[i][sel[i]] for i in range(count)]
 
         for i in range(count):
             np.testing.assert_array_equal(self.f["data" + str(i)][sel[i]], data_in[i][sel[i]])

From dfa771142a4b1898f1d11dff518d8c75782e9fae Mon Sep 17 00:00:00 2001
From: mattjala <matthewjlar@gmail.com>
Date: Mon, 6 May 2024 10:44:46 -0500
Subject: [PATCH 8/8] Fix dtype in windows tests

---
 test/hl/test_dataset.py | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/test/hl/test_dataset.py b/test/hl/test_dataset.py
index 6eb6940..11173a7 100644
--- a/test/hl/test_dataset.py
+++ b/test/hl/test_dataset.py
@@ -157,7 +157,7 @@ def test_missing_shape(self):
     def test_long_double(self):
         """ Confirm that the default dtype is float """
         # Expected failure on HSDS; skip with h5py
-        if config.get('use_h5py'):
+        if config.get('use_h5py') or platform.system() == 'Windows':
             self.assertTrue(False)
 
         dset = self.f.create_dataset('foo', (63,), dtype=np.longdouble)
@@ -1671,7 +1671,13 @@ def test_scalar_compound(self):
 
 class TestVlen(BaseDataset):
     def test_int(self):
-        dt = h5py.vlen_dtype(int)
+        if platform.system() == "Windows":
+            # default np int type is 32 bit
+            dt = h5py.vlen_dtype(np.int32)
+        else:
+            # defualt np int type is 64 bit
+            dt = h5py.vlen_dtype(np.int64)
+
         ds = self.f.create_dataset('vlen', (4,), dtype=dt)
         ds[0] = np.arange(3)
         ds[1] = np.arange(0)
@@ -1708,7 +1714,12 @@ def test_reuse_struct_from_other(self):
         self.f.create_dataset('vlen2', (1,), self.f['vlen']['b'][()].dtype)
 
     def test_convert(self):
-        dt = h5py.vlen_dtype(int)
+        if platform.system() == "Windows":
+            # default np int type is 32 bit
+            dt = h5py.vlen_dtype(np.int32)
+        else:
+            # defualt np int type is 64 bit
+            dt = h5py.vlen_dtype(np.int64)
         ds = self.f.create_dataset('vlen', (3,), dtype=dt)
         ds[0] = np.array([1.4, 1.2])
         ds[1] = np.array([1.2])
@@ -1725,7 +1736,13 @@ def test_convert(self):
         self.assertArrayEqual(ds[1], np.arange(3))
 
     def test_multidim(self):
-        dt = h5py.vlen_dtype(int)
+        if platform.system() == "Windows":
+            # default np int type is 32 bit
+            dt = h5py.vlen_dtype(np.int32)
+        else:
+            # defualt np int type is 64 bit
+            dt = h5py.vlen_dtype(np.int64)
+
         ds = self.f.create_dataset('vlen', (2, 2), dtype=dt)
         # ds[0, 0] = np.arange(1)
         ds[:, :] = np.array([[np.arange(3), np.arange(2)],