文件
gpr-sidl-inv/readgssi/translate.py
葛峻恺 a02133baed program
Signed-off-by: 葛峻恺 <202115006@mail.sdu.edu.cn>
2025-04-07 12:31:46 +00:00

339 行
18 KiB
Python

import h5py
import pandas as pd
import numpy as np
import json
import struct
from readgssi.gps import readdzg
import readgssi.functions as fx
from datetime import datetime
"""
contains translations to common formats
"""
def json_header(header, outfile_abspath, verbose=False):
"""
Save header values as a .json so another script can take what it needs. This is used to export to `GPRPy <https://github.com/NSGeophysics/gprpy>`_.
:param dict header: The file header dictionary
:param str outfile_abspath: Output file path
:param bool verbose: Verbose, defaults to False
"""
with open('%s.json' % (outfile_abspath), 'w') as f:
if verbose:
fx.printmsg('serializing header as %s' % (f.name))
json.dump(obj=header, fp=f, indent=4, sort_keys=True, default=str)
def csv(ar, outfile_abspath, header=None, verbose=False):
"""
Output to csv. Data is read into a :py:class:`pandas.DataFrame`, then written using :py:func:`pandas.DataFrame.to_csv`.
:param numpy.ndarray ar: Radar array
:param str outfile_abspath: Output file path
:param dict header: File header dictionary to write, if desired. Defaults to None.
:param bool verbose: Verbose, defaults to False
"""
if verbose:
t = ''
if header:
t = ' with json header'
fx.printmsg('output format is csv%s. writing data to: %s.csv' % (t, outfile_abspath))
data = pd.DataFrame(ar) # using pandas to output csv
data.to_csv('%s.csv' % (outfile_abspath)) # write
if header:
json_header(header=header, outfile_abspath=outfile_abspath, verbose=verbose)
def numpy(ar, outfile_abspath, header=None, verbose=False):
"""
Output to binary numpy binary file (.npy) with the option of writing the header to .json as well.
:param numpy.ndarray ar: Radar array
:param str outfile_abspath: Output file path
:param dict header: File header dictionary to write, if desired. Defaults to None.
:param bool verbose: Verbose, defaults to False
"""
if verbose:
t = ''
if header:
t = ' with json header (compatible with GPRPy)'
fx.printmsg('output format is numpy binary%s' % t)
fx.printmsg('writing data to %s.npy' % outfile_abspath)
np.save('%s.npy' % outfile_abspath, ar, allow_pickle=False)
if header:
json_header(header=header, outfile_abspath=outfile_abspath, verbose=verbose)
def gprpy(ar, header, outfile_abspath, verbose=False):
"""
Save in a format `GPRPy <https://github.com/NSGeophysics/gprpy>`_ can open (numpy binary .npy and a .json formatted header file).
.. note:: GPRPy support for this feature is forthcoming (https://github.com/NSGeophysics/GPRPy/issues/3#issuecomment-460462612).
:param numpy.ndarray ar: Radar array
:param str outfile_abspath: Output file path
:param dict header: File header dictionary to write, if desired. Defaults to None.
:param bool verbose: Verbose, defaults to False
"""
numpy(ar=ar, header=header, outfile_abspath=outfile_abspath, verbose=verbose)
def segy(ar, outfile_abspath, header, verbose=False):
"""
.. warning:: SEGY output is not yet available.
In the future, this function will output to SEGY format.
:param numpy.ndarray ar: Radar array
:param str outfile_abspath: Output file path
:param dict header: File header dictionary to write, if desired. Defaults to None.
:param bool verbose: Verbose, defaults to False
"""
fx.printmsg('ERROR: SEG-Y is not yet supported, please choose another format.')
raise NotImplementedError('SEG-Y is not yet supported.')
def h5(ar, infile_basename, outfile_abspath, header, verbose=False):
"""
.. warning:: HDF5 output is not yet available.
In the future, this function will output to HDF5 format.
:param numpy.ndarray ar: Radar array
:param str infile_basename: Input file basename
:param str outfile_abspath: Output file path
:param dict header: File header dictionary to write, if desired. Defaults to None.
:param bool verbose: Verbose, defaults to False
"""
'''
Assumptions:
- constant velocity between marks (may be possible to add a check)
- marks are made at same time on GPS and SIR
- gps and gpr are in same location when mark is made
- good quality horizontal solution
single-channel IceRadar h5 structure is
/line_x/location_n/datacapture_0/echogram_0 (/group/group/group/dataset)
each dataset has an 'attributes' item attached, formatted in 'collections.defaultdict' style:
[('PCSavetimestamp', str), ('GPS Cluster- MetaData_xml', str), ('Digitizer-MetaData_xml', str), ('GPS Cluster_UTM-MetaData_xml', str)]
'''
if verbose:
fx.printmsg('output format is IceRadar HDF5. writing file to: %s' % outfile_abspath)
# setup formattable strings
svts = 'PCSavetimestamp'
gpsx = 'GPS Cluster- MetaData_xml'
# main gps string. 8 formattable values: gps_sec, lat, lon, qual, num_sats, hdop, altitude, geoid_ht
gpsclstr = '<Cluster>\r\n<Name>GPS Cluster</Name>\r\n<NumElts>10</NumElts>\r\n<String>\r\n<Name>GPS_timestamp_UTC</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<String>\r\n<Name>Lat_N</Name>\r\n<Val>%.4f</Val>\r\n</String>\r\n<String>\r\n<Name>Long_ W</Name>\r\n<Val>%.4f</Val>\r\n</String>\r\n<String>\r\n<Name>Fix_Quality</Name>\r\n<Val>%i</Val>\r\n</String>\r\n<String>\r\n<Name>Num _Sat</Name>\r\n<Val>%i</Val>\r\n</String>\r\n<String>\r\n<Name>Dilution</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<String>\r\n<Name>Alt_asl_m</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<String>\r\n<Name>Geoid_Heigh_m</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<Boolean>\r\n<Name>GPS Fix valid</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>GPS Message ok</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n</Cluster>\r\n'
dimx = 'Digitizer-MetaData_xml'
# digitizer string. 3 formattable values: rhf_depth, rh_nsamp, stack
dimxstr = '<Cluster>\r\n<Name>Digitizer MetaData</Name>\r\n<NumElts>3</NumElts>\r\n<Cluster>\r\n<Name>Digitizer settings</Name>\r\n<NumElts>5</NumElts>\r\n<Cluster>\r\n<Name>Vertical</Name>\r\n<NumElts>3</NumElts>\r\n<DBL>\r\n<Name>vertical range</Name>\r\n<Val>%f</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>Vertical Offset</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<I32>\r\n<Name>vertical coupling</Name>\r\n<Val>1</Val>\r\n</I32>\r\n</Cluster>\r\n<Cluster>\r\n<Name>Channel</Name>\r\n<NumElts>1</NumElts>\r\n<DBL>\r\n<Name>maximum input frequency</Name>\r\n<Val>%f</Val>\r\n</DBL>\r\n</Cluster>\r\n<Cluster>\r\n<Name>Horizontal</Name>\r\n<NumElts>2</NumElts>\r\n<DBL>\r\n<Name> Sample Rate</Name>\r\n<Val>250000000.00000000000000</Val>\r\n</DBL>\r\n<I32>\r\n<Name>Record Length</Name>\r\n<Val>%i</Val>\r\n</I32>\r\n</Cluster>\r\n<Cluster>\r\n<Name>Trigger</Name>\r\n<NumElts>12</NumElts>\r\n<U16>\r\n<Name>trigger type</Name>\r\n<Val>0</Val>\r\n</U16>\r\n<DBL>\r\n<Name>trigger delay</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>reference position</Name>\r\n<Val>10.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>trigger level</Name>\r\n<Val>2.00000000000000E-2</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>hysteresis</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>low level</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>high level</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<U16>\r\n<Name>trigger coupling</Name>\r\n<Val>1</Val>\r\n</U16>\r\n<I32>\r\n<Name>trigger window mode</Name>\r\n<Val>0</Val>\r\n</I32>\r\n<I32>\r\n<Name>trigger slope</Name>\r\n<Val>0</Val>\r\n</I32>\r\n<String>\r\n<Name>trigger source</Name>\r\n<Val>0</Val>\r\n</String>\r\n<I32>\r\n<Name>Trigger Modifier</Name>\r\n<Val>2</Val>\r\n</I32>\r\n</Cluster>\r\n<String>\r\n<Name>channel name</Name>\r\n<Val>0</Val>\r\n</String>\r\n</Cluster>\r\n<U16>\r\n<Name>Stacking</Name>\r\n<Val>%i</Val>\r\n</U16>\r\n<Cluster>\r\n<Name>Radargram extra info</Name>\r\n<NumElts>2</NumElts>\r\n<DBL>\r\n<Name>relativeInitialX</Name>\r\n<Val>-1.51999998365682E-7</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>xIncrement</Name>\r\n<Val>3.99999988687227E-9</Val>\r\n</DBL>\r\n</Cluster>\r\n</Cluster>\r\n'
gutx = 'GPS Cluster_UTM-MetaData_xml'
# gps UTM string. 1 formattable value: num_sats
gpsutmstr = '<Cluster>\r\n<Name>GPS_UTM Cluster</Name>\r\n<NumElts>10</NumElts>\r\n<String>\r\n<Name>Datum</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Easting_m</Name>\r\n<Val></Val>\r\n</String>\r\n<String>\r\n<Name>Northing_m</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Elevation</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Zone</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Satellites (dup)</Name>\r\n<Val>%i</Val>\r\n</String>\r\n<Boolean>\r\n<Name>GPS Fix Valid (dup)</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>GPS Message ok (dup)</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>Flag_1</Name>\r\n<Val>0</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>Flag_2</Name>\r\n<Val>0</Val>\r\n</Boolean>\r\n</Cluster>\r\n'
if os.path.exists(infile_basename + '.DZG'):
gps = readdzg(infile_basename + '.DZG', 'dzg', header['rhf_sps'], ar.shape[1], verbose)
else:
gps = '' # if there's no DZG file...need a way to parse another gps source if possible
# make data structure
n = 0 # line number, iteratively increased
f = h5py.File('%s.h5' % (outfile_abspath), 'w') # overwrite existing file
if verbose:
fx.printmsg('exporting to %s.h5' % outfile_abspath)
try:
li = f.create_group('line_0') # create line zero
except ValueError: # the line already exists in the file
li = f['line_0']
for sample in ar.T:
# create strings
# pcsavetimestamp
# formatting: m/d/yyyy_h:m:ss PM
svts_str = gps[n]['timestamp'].astype(datetime).strftime('%m/%d/%Y_%H:%M:%S %p')
# gpscluster
# order we need: (len(list), tracetime, y, x, q, sats, dil, z, gh, 1, 1)
# rows in gps: tracenum, lat, lon, altitude, geoid_ht, qual, num_sats, hdop, timestamp
gpsx_str = gpsclstr % (gps[n]['gps_sec'], gps[n]['lat'], gps[n]['lon'], gps[n]['qual'], gps[n]['num_sats'], gps[n]['hdop'], gps[n]['altitude'], gps[n]['geoid_ht'])
# digitizer
dimx_str = dimxstr % (r[0]['rhf_depth'], freq, r[0]['rh_nsamp'], r[0]['stack'])
# utm gpscluster
gutx_str = gpsutmstr % (gps[n]['num_sats'])
lo = li.create_group('location_' + str(n)) # create a location for each trace
dc = lo.create_group('datacapture_0')
eg = dc.create_dataset('echogram_0', (ar.shape[0],), data=sample)
eg.attrs.create(svts, svts_str) # store pcsavetimestamp attribute
eg.attrs.create(gpsx, gpsx_str) # store gpscluster attribute
eg.attrs.create(dimx, dimx_str) # store digitizer attribute
eg.attrs.create(gutx, gutx_str) # store utm gpscluster attribute
n += 1
f.close()
def writetime(d):
'''
Function to write dates to :code:`rfDateByte` binary objects in DZT headers.
An inverse of the :py:func:`readgssi.dzt.readtime` function.
DZT :code:`rfDateByte` objects are 32 bits of binary (01001010111110011010011100101111),
structured as little endian u5u6u5u5u4u7 where all numbers are base 2 unsigned int (uX)
composed of X number of bits. Four bytes is an unnecessarily high level of compression
for a single date object in a filetype that often contains tens or hundreds of megabytes
of array information anyway.
So this function reads a datetime object and outputs
(seconds/2, min, hr, day, month, year-1980).
For more information on :code:`rfDateByte`, see page 55 of
`GSSI's SIR 3000 manual <https://support.geophysical.com/gssiSupport/Products/Documents/Control%20Unit%20Manuals/GSSI%20-%20SIR-3000%20Operation%20Manual.pdf>`_.
:param datetime d: the :py:class:`datetime.datetime` to be encoded
:rtype: bytes
'''
# get binary values
sec2 = int(bin(int(d.second / 2))[2:])
mins = int(bin(d.minute)[2:])
hr = int(bin(d.hour)[2:])
day = int(bin(d.day)[2:])
mo = int(bin(d.month)[2:])
yr = int(bin(d.year - 1980)[2:])
# create binary string with proper padding
dtbits = '%07d%04d%05d%05d%06d%05d' % (yr, mo, day, hr, mins, sec2)
# create four bytes that make up rfDateByte
byt0 = int(dtbits[24:], 2)
byt1 = int(dtbits[16:24], 2)
byt2 = int(dtbits[8:16], 2)
byt3 = int(dtbits[0:8], 2)
# return a byte array
return bytes([byt0, byt1, byt2, byt3])
def dzt(ar, outfile_abspath, header, verbose=False):
"""
.. warning:: DZT output is only currently compatible with single-channel files.
This function will output a RADAN-compatible DZT file after processing.
This is useful to circumvent RADAN's distance-normalization bug
when the desired outcome is array migration.
Users can set DZT output via the command line by setting the
:code:`-f dzt` flag, or in Python by doing the following: ::
from readgssi.dzt import readdzt
from readgssi import translate
from readgssi.arrayops import stack, distance_normalize
# first, read a data file
header, data, gps = readdzt('FILE__001.DZT')
# do some stuff
# (distance normalization must be done before stacking)
for a in data:
header, data[a], gps = distance_normalize(header=header, ar=data[a], gps=gps)
header, data[a], stack = stack(header=header, ar=data[a], stack=10)
# output as modified DZT
translate.dzt(ar=data, outfile_abspath='FILE__001-DnS10.DZT', header=header)
This will output :code:`FILE__001-DnS10.DZT` as a distance-normalized DZT.
:param numpy.ndarray ar: Radar array
:param str infile_basename: Input file basename
:param str outfile_abspath: Output file path
:param dict header: File header dictionary to write, if desired. Defaults to None.
:param bool verbose: Verbose, defaults to False
"""
'''
Assumptions:
- constant velocity or distance between marks (may be possible to add a check)
'''
if len(ar) > 1:
outfile_abspath = outfile_abspath.replace('c1', '')
if not outfile_abspath.endswith(('.DZT', '.dzt')):
outfile_abspath = outfile_abspath + '.DZT'
outfile = open(outfile_abspath, 'wb')
fx.printmsg('writing to: %s' % outfile.name)
for i in range(header['rh_nchan']):
fx.printmsg('writing DZT header for channel %s' % (i))
# header should read all values per-channel no matter what
outfile.write(struct.pack('<h', header['rh_tag']))
outfile.write(struct.pack('<h', header['rh_data']))
outfile.write(struct.pack('<h', header['rh_nsamp']))
outfile.write(struct.pack('<h', 32)) # rhf_bits - for simplicity, just hard-coding 32 bit
outfile.write(struct.pack('<h', header['rh_zero']))
# byte 10
outfile.write(struct.pack('<f', header['rhf_sps']))
outfile.write(struct.pack('<f', header['rhf_spm'])) # dzt.py ln 94-97
outfile.write(struct.pack('<f', header['rhf_mpm']))
outfile.write(struct.pack('<f', header['rhf_position']))
outfile.write(struct.pack('<f', header['rhf_range']))
outfile.write(struct.pack('<h', header['rh_npass']))
# byte 32
outfile.write(writetime(header['rhb_cdt']))
outfile.write(writetime(datetime.now())) # modification date/time
# byte 40
outfile.write(struct.pack('<h', header['rh_rgain']))
outfile.write(struct.pack('<h', header['rh_nrgain']))
outfile.write(struct.pack('<h', header['rh_text']))
outfile.write(struct.pack('<h', header['rh_ntext']))
outfile.write(struct.pack('<h', header['rh_proc']))
outfile.write(struct.pack('<h', header['rh_nproc']))
outfile.write(struct.pack('<h', header['rh_nchan']))
outfile.write(struct.pack('<f', header['rhf_epsr'])) # dzt.py ln 121-126
outfile.write(struct.pack('<f', header['rhf_top']))
outfile.write(struct.pack('<f', header['rhf_depth']))
# byte 66
outfile.write(struct.pack('<f', header['rh_xstart'])) # part of rh_coordx
outfile.write(struct.pack('<f', header['rh_xend'])) # part of rh_coordx
outfile.write(struct.pack('<f', header['rhf_servo_level']))
outfile.write(bytes(3)) # "reserved"
outfile.write(struct.pack('B', header['rh_accomp']))
outfile.write(struct.pack('<h', header['rh_sconfig']))
outfile.write(struct.pack('<h', header['rh_spp']))
outfile.write(struct.pack('<h', header['rh_linenum']))
# byte 88
outfile.write(struct.pack('<f', header['rh_ystart'])) # part of rh_coordy
outfile.write(struct.pack('<f', header['rh_yend'])) # part of rh_coordy
outfile.write(header['rh_96'])
outfile.write(struct.pack('c', header['rh_dtype']))
outfile.write(header['dzt_ant'][i])
outfile.write(header['rh_112'])
# byte 113
outfile.write(header['vsbyte'])
outfile.write(header['rh_name'])
outfile.write(header['rh_chksum'])
# byte 128
outfile.write(header['INFOAREA'])
outfile.write(header['rh_RGPS0'])
outfile.write(header['rh_RGPS1'])
i += 1
outfile.write(header['header_extra'])
stack = []
i = 0
for i in range(header['rh_nchan']):
# replace zeroed rows
stack.append(np.zeros((header['timezero'][i], ar[i].shape[1]),
dtype=np.int32))
stack.append(ar[i])
i += 1
writestack = np.vstack(tuple(stack))
sh = writestack.shape
writestack = writestack.T.reshape(-1)
fx.printmsg('writing %s data samples for %s channels (%s x %s)'
% (writestack.shape[0],
int(len(stack)/2),
sh[0], sh[1]))
# hard coded to write 32 bit signed ints to keep lossiness to a minimum
outfile.write(writestack.round().astype(np.int32, casting='unsafe').tobytes(order='C'))
outfile.close()