Use variable length strings in VTKHDF files

这个提交包含在:
Nathan Mannall
2025-05-02 11:14:25 +01:00
父节点 fc547ab75e
当前提交 12477223b3

查看文件

@@ -239,39 +239,62 @@ class VtkHdfFile(AbstractContextManager):
and offset are invalid. and offset are invalid.
""" """
# If dtype is a string and using parallel I/O, ensure using # Ensure data is a numpy array
# fixed length strings
if isinstance(dtype, np.dtype) and self.comm is not None:
string_info = h5py.check_string_dtype(dtype)
if string_info is not None and string_info.length is None:
logger.warning(
"HDF5 does not support variable length strings with parallel I/O."
" Using fixed length strings instead."
)
dtype = h5py.string_dtype(encoding="ascii", length=0)
if not isinstance(data, np.ndarray): if not isinstance(data, np.ndarray):
data = np.array(data, dtype=dtype) data = np.array(data, dtype=dtype)
if data.ndim < 1: if data.ndim < 1:
data = np.expand_dims(data, axis=-1) data = np.expand_dims(data, axis=-1)
if data.dtype.kind == "U": string_info = None
if dtype is not None: # Only log warning if user specified a data type
# Warn if string data type will be converted from unicode to a
# byte array (ascii). Only output a warning if the user
# specified dtype
if dtype is not None and np.dtype(dtype).kind == "U":
logger.warning(
"NumPy UTF-32 ('U' dtype) is not supported by HDF5."
" Converting to bytes array ('S' dtype)."
)
# Ensure dtype is a numpy dtype
if dtype is None:
dtype = data.dtype
elif isinstance(dtype, np.dtype):
string_info = h5py.check_string_dtype(dtype)
# Warn if user specified h5py string data type is invalid
if string_info is not None and string_info.encoding == "utf-8":
logger.warning( logger.warning(
"NumPy UTF-32 ('U' dtype) is not supported by HDF5." "utf-8 encoding is not supported by VTKHDF. Converting to ascii encoding."
" Converting to bytes array ('S' dtype)."
) )
data = data.astype("S")
if string_info is not None and string_info.length is not None:
if self.comm is None:
logger.warning(
"Fixed length strings are not supported by VTKHDF."
" Converting to variable length strings."
)
else:
logger.warning(
"HDF5 does not support variable length strings with parallel I/O."
" Using fixed length strings instead."
)
logger.warning(
"VTKHDF does not support fixed length strings. File readers may generate"
" error messages when reading in a VTKHDF file containing fixed length"
" strings."
)
else:
dtype = np.dtype(dtype)
# Explicitly define string datatype # Explicitly define string datatype
# VTKHDF only supports ascii strings (not UTF-8) # VTKHDF only supports variable length ascii strings (not UTF-8)
if data.dtype.kind == "S": if dtype.kind == "U" or dtype.kind == "S" or string_info is not None:
dtype = h5py.string_dtype(encoding="ascii", length=data.dtype.itemsize) # If using parallel I/O, use fixed length strings
length = None if self.comm is None else 0
dtype = h5py.string_dtype(encoding="ascii", length=length)
data = data.astype(dtype) data = data.astype(dtype)
elif dtype is None:
dtype = data.dtype
# VTKHDF stores datasets using ZYX ordering rather than XYZ # VTKHDF stores datasets using ZYX ordering rather than XYZ
if xyz_data_ordering: if xyz_data_ordering:
data = data.transpose() data = data.transpose()
@@ -348,19 +371,30 @@ class VtkHdfFile(AbstractContextManager):
if string_info is not None and string_info.length is None: if string_info is not None and string_info.length is None:
raise TypeError( raise TypeError(
"HDF5 does not support variable length strings with parallel I/O." "HDF5 does not support variable length strings with parallel I/O."
" Use fixed length strings instead." " Use a serial driver or fixed length strings instead."
) )
string_info = h5py.check_string_dtype(dtype)
if dtype.kind == "U": if dtype.kind == "U":
logger.warning( logger.warning(
"NumPy UTF-32 ('U' dtype) is not supported by HDF5." "NumPy UTF-32 ('U' dtype) is not supported by HDF5."
" Converting to bytes array ('S' dtype)." " Converting to bytes array ('S' dtype)."
) )
if string_info is not None and string_info.encoding == "utf-8":
logger.warning(
"utf-8 encoding is not supported by VTKHDF. Converting to ascii encoding."
)
# Explicitly define string datatype # Explicitly define string datatype
# VTKHDF only supports ascii strings (not UTF-8) # VTKHDF only supports variable length ascii strings (not UTF-8)
if dtype.kind == "U" or dtype.kind == "S": if (
dtype = h5py.string_dtype(encoding="ascii", length=dtype.itemsize) dtype.kind == "U"
or dtype.kind == "S"
or (string_info is not None and string_info.encoding == "utf-8")
):
dtype = h5py.string_dtype(encoding="ascii", length=None)
logger.debug(f"Creating dataset '{path}', shape: {shape}, dtype: {dtype}") logger.debug(f"Creating dataset '{path}', shape: {shape}, dtype: {dtype}")