program

Signed-off-by: 葛峻恺 <202115006@mail.sdu.edu.cn>
2025-11-06 13:33:48 +08:00 · 2025-04-07 12:31:46 +00:00
--- a/readgssi/init.py
+++ b/readgssi/init.py
@@ -0,0 +1,4 @@
 from . import _version
 name = 'readgssi'
 __version__ = _version.version
--- a/readgssi/_version.py
+++ b/readgssi/_version.py
@@ -0,0 +1 @@
 version = "0.0.22"
--- a/readgssi/arrayops.py
+++ b/readgssi/arrayops.py
@@ -0,0 +1,162 @@
 import readgssi.functions as fx
 import numpy as np
 import pandas as pd
 def flip(ar, verbose=False):
    """
    Read the array backwards. Used to reverse line direction. Usage covered in the :ref:`Reversing` tutorial section.
    :param numpy.ndarray ar: Input data array
    :param bool verbose: Verbose, defaults to False
    :rtype: radar array (:py:class:`numpy.ndarray`)
    """
    if verbose:
        fx.printmsg('flipping radargram...')
    return ar.T[::-1].T
 def reducex(ar, header, by=1, chnum=1, number=1, verbose=False):
    """
    Reduce the number of traces in the array by a number. Not the same as :py:func:`stack` since it doesn't sum adjacent traces, however :py:func:`stack` uses it to resize the array prior to stacking.
    Used by :py:func:`stack` and :py:func:`distance_normalize` but not accessible from the command line or :py:func:`readgssi.readgssi`.
    :param numpy.ndarray ar: Input data array
    :param int by: Factor to reduce by. Default is 1.
    :param int chnum: Chunk number to display in console. Default is 1.
    :param int number: Number of chunks to display in console. Default is 1.
    :param bool verbose: Verbose, defaults to False.
    :rtype: radar array (:py:class:`numpy.ndarray`)
    """
    if verbose:
        if chnum/10 == int(chnum/10):
            fx.printmsg('%s/%s reducing %sx%s chunk by a factor of %s...' % (chnum, number, ar.shape[0], ar.shape[1], by))
    return ar[:,::by]
 def stack(ar, header, stack='auto', verbose=False):
    """
    Stacking algorithm. Stacking is the process of summing adjacent traces in order to reduce noise --- the thought being that random noise around zero will cancel out and data will either add or subtract, making it easier to discern.
    It is also useful for displaying long lines on a computer screen. Usage is covered in the :ref:`stacking` section of the tutorial.
    :py:data:`stack='auto'` results in an approximately 2.5:1 x:y axis ratio. :py:data:`stack=3` sums three adjacent traces into a single trace across the width of the array.
    :param numpy.ndarray ar: Input data array
    :param int by: Factor to stack by. Default is "auto".
    :rtype: radar array (:py:class:`numpy.ndarray`)
    """
    stack0 = stack
    if str(stack).lower() in 'auto':
        am = '(automatic)'
        ratio = (ar.shape[1]/ar.shape[0])/(75/30)
        if ratio > 1:
            stack = int(round(ratio))
        else:
            stack = 1
    else:
        am = '(manually set)'
        try:
            stack = int(stack)
        except ValueError:
            fx.printmsg('NOTE: stacking must be indicated with an integer greater than 1, "auto", or None.')
            fx.printmsg('      a stacking value of 1 equates to None. "auto" will attempt to stack to')
            fx.printmsg('      about a 2.5:1 x to y axis ratio. the result will not be stacked.')
            stack = 1
    if stack > 1:
        if verbose:
            fx.printmsg('stacking %sx %s...' % (stack, am))
        i = list(range(stack))
        l = list(range(int(ar.shape[1]/stack)))
        arr = np.copy(reducex(ar=ar, by=stack, header=header, verbose=verbose))
        for s in l:
            arr[:,s] = arr[:,s] + ar[:,s*stack+1:s*stack+stack].sum(axis=1)
    else:
        arr = ar
        if str(stack0).lower() in 'auto': # this happens when distance normalization reduces the file
            pass
        else:
            fx.printmsg('WARNING: no stacking applied. this can result in very large and awkwardly-shaped figures.')
    if header['rh_nchan'] == 1:
        # this is a hack to prevent the rhf_spx vars from being changed multiple times
        # when stacking a multichannel file. this will go away when each array has its
        # own header and thus each can be dealt with individually.
        if header['rhf_sps'] != 0:
            header['rhf_sps'] = header['rhf_sps'] / stack
        if header['rhf_spm'] != 0:
            header['rhf_spm'] = header['rhf_spm'] / stack
    else:
        try:
            if header['spx_updates']:
                pass
            else:
                if header['rhf_sps'] != 0:
                    header['rhf_sps'] = header['rhf_sps'] / stack
                if header['rhf_spm'] != 0:
                    header['rhf_spm'] = header['rhf_spm'] / stack
                header['spx_updates'] = True
        except KeyError:
            header['spx_updates'] = False
    return header, arr, stack
 def distance_normalize(header, ar, gps, verbose=False):
    """
    Distance normalization algorithm. Uses a GPS array to calculate expansion and contraction needed to convert from time-triggered to distance-normalized sampling interval. Then, the samples per meter is recalculated and inserted into the header for proper plotting.
    Usage described in the :ref:`Distance normalization` section of the tutorial.
    :param dict header: Input data array
    :param numpy.ndarray ar: Input data array
    :param pandas.DataFrame gps: GPS data from :py:func:`readgssi.gps.readdzg`. This is used to calculate the expansion and compression needed to normalize traces to distance.
    :param bool verbose: Verbose, defaults to False.
    :rtype: header (:py:class:`dict`), radar array (:py:class:`numpy.ndarray`), gps (False or :py:class:`pandas.DataFrame`)
    """
    if gps.empty:
        if verbose:
            fx.printmsg('no gps information for distance normalization')
    else:
        if verbose:
            fx.printmsg('normalizing GPS velocity records...')
        while np.min(gps['velocity']) < 0.01: # fix zero and negative velocity values
            gps['velocity'].replace(gps['velocity'].min(), 0.01, inplace=True)
        norm_vel = (gps['velocity'] * (1/gps['velocity'].max())*100).to_frame('normalized') # should end up as dataframe with one column
        # upsample to match radar array shape
        nanosec_samp_rate = int((1/header['rhf_sps'])*10**9) # nanoseconds
        start = np.datetime64(str(norm_vel.index[0])) - np.timedelta64(nanosec_samp_rate*(gps.iloc[0,0]), 'ns')
        newdf = pd.DataFrame(index=pd.date_range(start=start, periods=ar.shape[1], freq=str(nanosec_samp_rate)+'N', tz='UTC'))
        norm_vel = pd.concat([norm_vel, newdf], axis=1).interpolate('time').bfill()
        del newdf
        norm_vel = norm_vel.round().astype(int, errors='ignore')#, casting='unsafe')
        try:
            rm = int(round(ar.shape[1] / (norm_vel.shape[0] - ar.shape[1])))
            norm_vel = norm_vel.drop(norm_vel.index[::rm])
        except ZeroDivisionError as e:
            fx.printmsg('equal length radar & velocity arrays; no size adjustment')
        for i in range(0,abs(norm_vel.shape[0]-ar.shape[1])):
            s = pd.DataFrame({'normalized':[norm_vel['normalized'].iloc[-1]]}) # hacky, but necessary
            norm_vel = pd.concat([norm_vel, s])
        nvm = int(round(norm_vel['normalized'].mean()))
        proc = np.ndarray((ar.shape[0], 0))
        if verbose:
            fx.printmsg('expanding array using mean of normalized velocity %.2f' % (norm_vel['normalized'].mean()))
        on, i = 0, 0
        for c in np.array_split(ar, nvm, axis=1):
            # takes (array, [transform values to broadcast], axis)
            p = np.repeat(c, norm_vel['normalized'].astype(int, errors='ignore').values[on:on+c.shape[1]], axis=1)
            p = reducex(p, header=header, by=nvm, chnum=i, number=nvm, verbose=verbose)
            proc = np.concatenate((proc, p), axis=1)
            on = on + c.shape[1]
            i += 1
        if verbose:
            fx.printmsg('total GPS distance: %.2f m' % gps['meters'].iloc[-1])
            fx.printmsg('replacing old traces per meter value of %s with %s' % (header['rhf_spm'],
                                                                            ar.shape[1] / gps['meters'].iloc[-1]))
        header['rhf_spm'] = proc.shape[1] / gps['meters'].iloc[-1]
        header['rhf_sps'] = 0
    return header, proc, gps
--- a/readgssi/config.py
+++ b/readgssi/config.py
@@ -0,0 +1,76 @@
 from datetime import datetime
 from readgssi.__init__ import __version__, name
 import pkg_resources
 import readgssi.plot as plot
 """
 This module contains some things readgssi needs to operate, both command line and python-related.
 """
 dist = pkg_resources.get_distribution(name)
 year = datetime.now().year
 author = 'Ian Nesbitt'
 affil = 'School of Earth and Climate Sciences, University of Maine'
 help_text = u'''Help text:
 ############################################################
 readgssi version %s
 Copyleft %s %s 2017-%s
 %s
 ############################################################
 usage:
 readgssi -i input.DZT [OPTIONS]
 required flags:
     OPTION     |      ARGUMENT       |       FUNCTIONALITY
 -i, --input     | file:  /dir/f.DZT   |  input DZT file
 optional flags:
     OPTION     |      ARGUMENT       |       FUNCTIONALITY
 -o, --output    | file:  /dir/f.ext   |  output file. if not set, will be named similar to input
 -f, --format    | string, eg. "csv"   |  output format (CSV and DZT are the only working formats currently available from the command line)
 -p, --plot      | +integer or "auto"  |  plot size. will be x inches high or "auto". default: 10. see also -D to set DPI
 -D, --dpi       | positive integer    |  set the plot DPI for figure making. defaults to 150
 -T, --titleoff  |                     |  turn the plot title off (useful for figure making)
 -x, --xscale    | string, eg. "dist"  |  x units. will attempt to convert the x-axis to distance, time, or trace units based on header values
 -z, --zscale    | string, eg. "time"  |  z units. attempt to convert the x-axis to depth, time, or sample units based on header values
 -e, --zoom      | list of +int [LRUD] |  set a zoom to automatically jump to. list order is [left,right,up,down] and units are the same as axis
 -n, --noshow    |                     |  suppress matplotlib popup window and simply save a figure (useful for multi-file processing)
 -c, --colormap  | string, eg. "Greys" |  specify the colormap (https://matplotlib.org/users/colormaps.html#grayscale-conversion)
 -g, --gain      | positive float      |  gain constant (higher=greater contrast, default: 1)
 -A, --absval    |                     |  Displays the absolute value of the vertical gradient of the array when plotting. Good for displaying faint array features.
 -r, --bgr       | +integer or zero    |  horizontal background removal (useful to remove ringing). zero=full width; positive=window size (after stacking)
 -R, --reverse   |                     |  reverse (flip array horizontally)
 -w, --dewow     |                     |  trinomial dewow algorithm
 -t, --bandpass  | +int-+int (MHz)     |  triangular FIR bandpass filter applied vertically (positive integer range in megahertz; ex. 70-130)
 -b, --colorbar  |                     |  add a colorbar to the radar figure
 -a, --antfreq   | positive integer    |  set antenna frequency. overrides header value
 -s, --stack     | +integer or "auto"  |  set trace stacking value or "auto" to autostack to ~2.5:1 x:y axis ratio
 -N, --normalize |                     |  distance normalize; reads .DZG NMEA data file if it exists; otherwise tries to read CSV with lat, lon, and time fields
 -P, --pausecorr |                     |  pause correction; fixes decoupling of DZG and DZT trace numbers during survey pauses using low velocity GPS marks
 -d, --spm       | positive float      |  specify the samples per meter (spm). overrides header value
 -m, --histogram |                     |  produce a histogram of data values
 -E, --epsr      | float > 1.0         |  user-defined epsilon sub r (sometimes referred to as "dielectric") if set, ignores value in DZT header
 -Z, --zero      | +int or list of int |  timezero: skip samples before direct wave. samples are removed from the top of the trace. use list for multi-channel
 naming scheme for exports:
  CHARACTERS    |    MEANING
    Ch0         |  Profile from channel 0 (can range from 0 - 3)
    Dn          |  Distance normalization
    Tz233       |  Time zero at 233 samples
    S8          |  Stacked 8 times
    Rv          |  Profile read in reverse (flipped horizontally)
    Bgr75       |  Background removal filter with window size of 75
    Dw          |  Dewow filter
    Bp70-130    |  triangular FIR filter applied from 70 to 130 MHz
    G30         |  30x contrast gain
    Abs         |  Color scale represents absolute value of vertical gradient
    Z10.20.7.5  |  zoom from 10-20 axis units on the x-axis and 5-7 on the z-axis
 matplotlib output formats supported by current backend:
   EXTENSION    |    FILE FORMAT
 %s''' % (__version__, u'\U0001F12F', author, year, affil, plot.fmtst)
 version_text = '%s %s' % (name, __version__)
--- a/readgssi/constants.py
+++ b/readgssi/constants.py
@@ -0,0 +1,107 @@
 import pytz
 """
 This module contains a number of variables that readgssi needs to perform physics calculations and interpret files from DZT files.
 """
 MINHEADSIZE = 1024 # absolute minimum total header size
 PAREASIZE = 128 # fixed info header size
 RGPSSIZE = 9 # GPS record size
 GPSAREASIZE = RGPSSIZE * 2
 INFOAREASIZE = MINHEADSIZE - PAREASIZE - GPSAREASIZE
 TZ = pytz.timezone('UTC')
 # some physical constants for Maxwell's equation for speed of light in a dielectric medium
 C = 299792458                   # speed of light in a vacuum
 Eps_0 = 8.8541878 * 10**(-12)   # epsilon naught (vacuum permittivity)
 Mu_0 = 1.257 * 10**(-6)         # mu naught (vacuum permeability)
 # the GSSI field unit used
 UNIT = {
    0: 'synthetic/gprMax',
    1: 'unknown system type',
    2: 'SIR 2000',
    3: 'SIR 3000',
    4: 'TerraVision',
    6: 'SIR 20',
    7: 'StructureScan Mini',
    8: 'SIR 4000',
    9: 'SIR 30',
    10: 'unknown system type',
    11: 'unknown system type',
    12: 'UtilityScan DF',
    13: 'HS',
    14: 'StructureScan Mini XT',
 }
 # a dictionary of standard gssi antenna codes and frequencies
 # unsure of what they all look like in code, however
 ANT = {
    '100MHz': 100,
    '200MHz': 200,
    '270MHz': 270,
    '300/800D': 300,    # technically two separate antennas and therefore, I think, a bug. users of this antenna will have to use `-a 300,800`
    '350MHz': 350,
    '400MHz': 400,
    '500MHz': 500,
    '800MHz': 800,
    '900MHz': 900,
    '1.5/1.6GHz': 1500,
    '1600MHz': 1600,
    '2000MHz': 2000,
    '2300MHz': 2300,
    '2600MHz': 2600,
    '3200': 'adjustable',
    '3200MLF': 'adjustable',
    'gprMa': 'adjustable',      # gprMax support
    'GSSI': 'adjustable',       # support for issue #11
    'CUSTOM': 'adjustable',
    '3207': 100,
    '3207AP': 100,
    '5106': 200,
    '5106A': 200,
    '50300': 300,
    '350': 350,
    '350HS': 350,
    'D400HS': 350,
    '50270': 270,
    '50270S': 270,
    'D50300': 300,
    '5103': 400,
    '5103A': 400,
    '50400': 400,
    '50400S': 400,
    '3102 500MHZ': 500,
    '3102': 500, # presumed to exist
    '800': 800,
    'D50800': 800,
    '3101': 900,
    '3101A': 900,
    '51600': 1600,
    '51600S': 1600,
    'SS MINI': 1600,
    '4105NR': 2000,
    '42000S': 2000,
    '62000': 2000,
    '62000-003': 2000,
    '62300': 2300,
    '62300XT': 2300,
    '52600': 2600,
    '52600S': 2600,
    'SSMINIXT': 2700,
 }
 # whether or not the file is GPS-enabled (does not guarantee presence of GPS data in file)
 GPS = {
    1: 'no',
    2: 'yes',
 }
 # bits per data word in radar array
 BPS = {
    8: '8 unsigned',
    16: '16 unsigned',
    32: '32 signed'
 }
--- a/readgssi/dzt.py
+++ b/readgssi/dzt.py
@@ -0,0 +1,419 @@
 import struct
 import math
 import os
 import numpy as np
 from pandas import DataFrame
 from datetime import datetime
 from itertools import takewhile
 from readgssi.gps import readdzg
 import readgssi.functions as fx
 from readgssi.constants import *
 from readgssi.dzx import get_user_marks, get_picks
 """
 helper module for reading information from GSSI DZT files
 the main function, readdzt(), returns header information in dictionary format
 and the radar profile in a numpy array.
 readdzt_gprpy() restructures output for use specifically with
 Alain Plattner's GPRPy software (https://github.com/NSGeophysics/GPRPy).
 """
 def readtime(bytez):
    """
    Function to read dates from :code:`rfDateByte` binary objects in DZT headers. 
    DZT :code:`rfDateByte` objects are 32 bits of binary (01001010111110011010011100101111),
    structured as little endian u5u6u5u5u4u7 where all numbers are base 2 unsigned int (uX)
    composed of X number of bits. Four bytes is an unnecessarily high level of compression
    for a single date object in a filetype that often contains tens or hundreds of megabytes
    of array information anyway.
    So this function reads (seconds/2, min, hr, day, month, year-1980) then does
    seconds*2 and year+1980 and returns a datetime object.
    For more information on :code:`rfDateByte`, see page 55 of
    `GSSI's SIR 3000 manual <https://support.geophysical.com/gssiSupport/Products/Documents/Control%20Unit%20Manuals/GSSI%20-%20SIR-3000%20Operation%20Manual.pdf>`_.
    :param bytes bytes: The :code:`rfDateByte` to be decoded
    :rtype: :py:class:`datetime.datetime`
    """
    dtbits = ''
    rfDateByte = (b for b in bytez)
    for byte in rfDateByte:                    # assemble the binary string
        for i in range(8):
            dtbits += str((byte >> i) & 1)
    dtbits = dtbits[::-1]               # flip the string
    sec2 = int(dtbits[27:32], 2) * 2    # seconds are stored as seconds/2 because there's only 5 bytes to work with
    mins = int(dtbits[21:27], 2)        # minutes
    hr = int(dtbits[16:21], 2)          # hours
    day = int(dtbits[11:16], 2)         # day
    mo = int(dtbits[7:11], 2)           # month
    yr = int(dtbits[0:7], 2) + 1980     # year, stored as 1980+(0:127)
    return datetime(yr, mo, day, hr, mins, sec2, 0, tzinfo=pytz.UTC)
 def arraylist(header, data):
    # create a list of n arrays, where n is the number of channels
    data = data.astype(np.int32)
    chans = list(range(header['rh_nchan']))
    # set up list of arrays
    img_arr = data[:header['rh_nchan']*header['rh_nsamp']] # test if we understand data structure. arrays should be stacked nchan*nsamp high
    new_arr = {}
    for ar in chans:
        a = []
        a = img_arr[(ar)*header['rh_nsamp']:(ar+1)*header['rh_nsamp']] # break apart
        new_arr[ar] = a[header['timezero'][ar]:,:int(img_arr.shape[1])] # put into dict form
    return new_arr
 def readdzt(infile, gps=DataFrame(), spm=None, start_scan=0, num_scans=-1,
            epsr=None, antfreq=[None,None,None,None], verbose=False,
            zero=[None,None,None,None]):
    """
    Function to unpack and return things the program needs from the file header, and the data itself.
    :param str infile: The DZT file location
    :param bool gps: Whether a GPS file exists. Defaults to False, but changed to :py:class:`pandas.DataFrame` if a DZG file with the same name as :code:`infile` exists.
    :param float spm: User value of samples per meter, if specified. Defaults to None.
    :param float epsr: User value of relative permittivity, if specified. Defaults to None.
    :param list[int,int,int,int] zero: List of time-zero values per channel. Defaults to a list of :code:`None` values, which resolves to :code:`rh_zero`.
    :param bool verbose: Verbose, defaults to False
    :rtype: header (:py:class:`dict`), radar array (:py:class:`numpy.ndarray`), gps (False or :py:class:`pandas.DataFrame`)
    """
    '''
    currently unused but potentially useful lines:
    # headerstruct = '<5h 5f h 4s 4s 7h 3I d I 3c x 3h d 2x 2c s s 14s s s 12s h 816s 76s' # the structure of the bytewise header and "gps data" as I understand it - 1024 bytes
    # readsize = (2,2,2,2,2,4,4,4,4,4,2,4,4,4,2,2,2,2,2,4,4,4,8,4,3,1,2,2,2,8,1,1,14,1,1,12,2) # the variable size of bytes in the header (most of the time) - 128 bytes
    # fx.printmsg('total header structure size: '+str(calcsize(headerstruct)))
    # packed_size = 0
    # for i in range(len(readsize)): packed_size = packed_size+readsize[i]
    # fx.printmsg('fixed header size: '+str(packed_size)+'\\n')
    '''
    infile_gps = os.path.splitext(infile)[0] + ".DZG"
    infile_dzx = os.path.splitext(infile)[0] + ".DZX"
    infile = open(infile, 'rb')
    header = {}
    header['infile'] = infile.name
    header['known_ant'] = [None, None, None, None]
    header['dzt_ant'] = [None, None, None, None]
    header['rh_ant'] = [None, None, None, None]
    header['rh_antname'] = [None, None, None, None]
    header['antfreq'] = [None, None, None, None]
    header['timezero'] = [None, None, None, None]
    # begin read
    header['rh_tag'] = struct.unpack('<h', infile.read(2))[0] # 0x00ff if header, 0xfnff if old file format
    header['rh_data'] = struct.unpack('<h', infile.read(2))[0] # offset to data from beginning of file
    header['rh_nsamp'] = struct.unpack('<h', infile.read(2))[0] # samples per scan
    header['rh_bits'] = struct.unpack('<h', infile.read(2))[0] # bits per data word
    header['rh_zero'] = struct.unpack('<h', infile.read(2))[0] # if sir-30 or utilityscan df, then repeats per sample; otherwise 0x80 for 8bit and 0x8000 for 16bit
    header['rhf_sps'] = struct.unpack('<f', infile.read(4))[0] # scans per second
    header['dzt_sps'] = header['rhf_sps']
    header['rhf_spm'] = struct.unpack('<f', infile.read(4))[0] # scans per meter
    header['dzt_spm'] = header['rhf_spm']
    if spm:
        header['rhf_spm'] = spm
    header['rhf_mpm'] = struct.unpack('<f', infile.read(4))[0] # meters per mark
    header['rhf_position'] = struct.unpack('<f', infile.read(4))[0] # position (ns)
    header['rhf_range'] = struct.unpack('<f', infile.read(4))[0] # range (ns)
    header['rh_npass'] = struct.unpack('<h', infile.read(2))[0] # number of passes for 2-D files
    # bytes 32-36 and 36-40: creation and modification date and time in bits
    # structured as little endian u5u6u5u5u4u7
    infile.seek(32)
    try:
        header['rhb_cdt'] = readtime(infile.read(4))
    except:
        header['rhb_cdt'] = datetime(1980, 1, 1)
    try:
        header['rhb_mdt'] = readtime(infile.read(4))
    except:
        header['rhb_mdt'] = datetime(1980, 1, 1)
    header['rh_rgain'] = struct.unpack('<h', infile.read(2))[0] # offset to range gain function
    header['rh_nrgain'] = struct.unpack('<h', infile.read(2))[0] # size of range gain function
    infile.seek(header['rh_rgain'])
    try:
        header['rgain_bytes'] = infile.read(header['rh_nrgain'])
    except:
        fx.printmsg('WARNING: Could not read range gain function')
    infile.seek(44)
    header['rh_text'] = struct.unpack('<h', infile.read(2))[0] # offset to text
    header['rh_ntext'] = struct.unpack('<h', infile.read(2))[0] # size of text
    header['rh_proc'] = struct.unpack('<h', infile.read(2))[0] # offset to processing history
    header['rh_nproc'] = struct.unpack('<h', infile.read(2))[0] # size of processing history
    header['rh_nchan'] = struct.unpack('<h', infile.read(2))[0] # number of channels
    if epsr != None: # in this case the user has specified an epsr value
        header['dzt_epsr'] = struct.unpack('<f', infile.read(4))[0]
        header['rhf_epsr'] = epsr
    else:
        header['rhf_epsr'] = struct.unpack('<f', infile.read(4))[0] # epsr (sometimes referred to as "dielectric permittivity")
        header['dzt_epsr'] = header['rhf_epsr']
    # calculate relative wave celerity given epsr value(s)
    header['cr'] = 1 / math.sqrt(Mu_0 * Eps_0 * header['rhf_epsr']+1)
    header['cr_true'] = 1 / math.sqrt(Mu_0 * Eps_0 * header['dzt_epsr']+1)
    header['rhf_top'] = struct.unpack('<f', infile.read(4))[0] # from experimentation, it seems this is the data top position in meters
    header['dzt_depth'] = struct.unpack('<f', infile.read(4))[0] # range in meters based on DZT rhf_epsr, before subtracting rhf_top
    if (header['dzt_depth'] == 0):
        # if dzt depth is 0, we need to calculate it using cr and rhf_range (converted to seconds)
        header['dzt_depth'] = header['cr'] * (header['rhf_range'] * (10 ** (-10)))
    header['rhf_depth'] = header['dzt_depth'] * (math.sqrt(header['dzt_epsr']) / math.sqrt(header['rhf_epsr']+1)) # range based on user epsr, before subtracting rhf_top
    # getting into largely useless territory (under "normal" operation)
    header['rh_xstart'] = struct.unpack('<f', infile.read(4))[0] # starting x grid coordinate? part of rh_coordx
    header['rh_xend'] = struct.unpack('<f', infile.read(4))[0] # ending x grid coordinate? part of rh_coordx
    header['rhf_servo_level'] = struct.unpack('<f', infile.read(4))[0] # gain servo level
    # 3 "reserved" bytes
    infile.seek(81)
    header['rh_accomp'] = struct.unpack('B', infile.read(1))[0] # Ant Conf component
    header['rh_sconfig'] = struct.unpack('<h', infile.read(2))[0] # setup config number
    header['rh_spp'] = struct.unpack('<h', infile.read(2))[0] # scans per pass
    header['rh_linenum'] = struct.unpack('<h', infile.read(2))[0] # line number
    header['rh_ystart'] = struct.unpack('<f', infile.read(4))[0] # starting y grid coordinate? part of rh_coordx
    header['rh_yend'] = struct.unpack('<f', infile.read(4))[0] # ending y grid coordinate? part of rh_coordx
    header['rh_96'] = infile.read(1)
    header['rh_lineorder'] = int('{0:08b}'.format(ord(header['rh_96']))[::-1][4:], 2)
    header['rh_slicetype'] = int('{0:08b}'.format(ord(header['rh_96']))[::-1][:4], 2)
    header['rh_dtype'] = infile.read(1) # no description of dtype
    freq = [None, None, None, None]
    for i in range(header['rh_nchan']):
        if (antfreq != None) and (antfreq != [None, None, None, None]):
            try:
                freq[i] = antfreq[i]
            except (TypeError, IndexError) as e:
                freq[i] = 200
                print('WARNING: due to an error, antenna %s frequency was set to 200 MHz' % (i))
                print('Error detail: %s' % (e))
    curpos = infile.tell()
    # read frequencies for multiple antennae
    for chan in list(range(header['rh_nchan'])):
        if chan == 0:
            infile.seek(98) # start of antenna section
        else:
            infile.seek(98 + (MINHEADSIZE*(chan))) # start of antenna bytes for channel n
        header['dzt_ant'][chan] = infile.read(14)
        header['rh_ant'][chan] = header['dzt_ant'][chan].decode('utf-8').split('\x00')[0]
        header['rh_antname'][chan] = header['rh_ant'][chan].rsplit('x')[0]
        try:
            header['antfreq'][chan] = ANT[header['rh_antname'][chan]]
            header['known_ant'][chan] = True
        except KeyError:
            header['known_ant'][chan] = False
            try:
                header['antfreq'][chan] = int("".join(takewhile(str.isdigit, header['rh_ant'][chan].replace('D5','').replace('D6','')))) # hoping this works
            except ValueError:
                header['antfreq'] = freq
            #header['antfreq'][chan] = int(header['rh_antname'][chan].replace('D5','').replace('D6',''))
    infile.seek(curpos+14)
    header['rh_112'] = infile.read(1)
    header['rh_lineorder'] = int('{0:08b}'.format(ord(header['rh_112']))[4:], 2)
    header['rh_slicetype'] = int('{0:08b}'.format(ord(header['rh_112']))[:4], 2)
    #infile.seek(113) # byte 113
    header['vsbyte'] = infile.read(1) # byte containing versioning bits
    header['rh_version'] = int('{0:08b}'.format(ord(header['vsbyte']))[5:], 2) # ord(vsbyte) >> 5 # whether or not the system is GPS-capable, 1=no 2=yes (does not mean GPS is in file)
    header['rh_system'] = int('{0:08b}'.format(ord(header['vsbyte']))[:5], 2) # ord(vsbyte) >> 3 ## the system type (values in UNIT={...} dictionary in constants.py)
    header['rh_name'] = infile.read(12)
    header['rh_chksum'] = infile.read(2)
    header['INFOAREA'] = infile.read(MINHEADSIZE-PAREASIZE-GPSAREASIZE)
    header['rh_RGPS0'] = infile.read(RGPSSIZE)
    header['rh_RGPS1'] = infile.read(RGPSSIZE)
    if header['rh_system'] == 14:   # hardcoded because this is so frustrating. assuming no other antennas can be paired with SS Mini XT
        header['rh_antname'] = ['SSMINIXT', None, None, None]
        header['antfreq'] = [2700, None, None, None]
        header['known_ant'] = [True, False, False, False]
    if header['rh_data'] < MINHEADSIZE: # whether or not the header is normal or big-->determines offset to data array
        header['data_offset'] = MINHEADSIZE * header['rh_data']
    else:
        header['data_offset'] = MINHEADSIZE * header['rh_nchan']
    infile.seek(MINHEADSIZE * header['rh_nchan'])
    header['header_extra'] = infile.read(header['data_offset'] - (MINHEADSIZE * header['rh_nchan']))
    if header['rh_bits'] == 8:
        dtype = np.uint8 # 8-bit unsigned
    elif header['rh_bits'] == 16:
        dtype = np.uint16 # 16-bit unsigned
    else:
        dtype = np.int32 # 32-bit signed
    header['dtype'] = dtype
    if start_scan != 0:
        try:
            # calculate start offset in bytes:
            start_offset = int(start_scan * header['rh_nchan'] * header['rh_nsamp'] * header['rh_bits']/8)
        except ValueError:
            # if this fails, then fall back to 0 offset.
            start_offset = 0
            fx.printmsg('WARNING: ValueError for scan offset: {start_scan} (reading from start of data)')
            # consider returning No Data?
    else:
        start_offset = 0
    if num_scans != -1:
        try:
            num_items = int(num_scans * header['rh_nsamp']*header['rh_nchan'])
        except ValueError:
            # if this fails then get all scans...
            fx.printmsg('WARNING: ValueError for number of scans: {num_scans} (reading all items from {start_scan} scans)')
            num_items = -1
    else:
        num_items = -1
    # read in and transpose data
    data = np.fromfile(infile, dtype, count=num_items)
    data = data.reshape(-1,(header['rh_nsamp']*header['rh_nchan'])) # offset=start_offset,
    data = data.T
    header['shape'] = data.shape
    header['ns_per_zsample'] = ((header['rhf_depth']-header['rhf_top']) * 2) / (header['rh_nsamp'] * header['cr'])
    header['samp_freq'] = 1 / ((header['dzt_depth'] * 2) / (header['rh_nsamp'] * header['cr_true']))
    try:
        header['sec'] = data.shape[1]/float(header['rhf_sps'])
    except ZeroDivisionError:
        header['sec'] = 1.
    infile.close()
    for i in range(header['rh_nchan']):
        try:
            header['timezero'][i] = int(list(zero)[i])
        except (TypeError, IndexError):
            fx.printmsg('WARNING: no time zero specified for channel %s, defaulting to rh_zero value (%s)' % (i, header['rh_zero']))
            header['timezero'][i] = header['rh_zero']
    if os.path.isfile(infile_gps):
        try:
            if verbose:
                fx.printmsg('reading GPS file...')
            gps = readdzg(infile_gps, 'dzg', header, verbose=verbose)
        except IOError as e0:
            fx.printmsg('WARNING: cannot read DZG file')
            try:
                infile_gps = os.path.splitext(infile_gps)[0] + ".csv"
                gps = readdzg(infile_gps, 'csv', header, verbose=verbose)
            except Exception as e1:
                try:
                    infile_gps = os.path.splitext(infile_gps)[0] + ".CSV"
                    gps = readdzg(infile_gps, 'csv', header, verbose=verbose)
                except Exception as e2:
                    fx.printmsg('ERROR reading GPS. distance normalization will not be possible.')
                    fx.printmsg('   details: %s' % e0)
                    fx.printmsg('            %s' % e1)
                    fx.printmsg('            %s' % e2)
                    gps = DataFrame()
    else:
        fx.printmsg('WARNING: no DZG file found for GPS input')
        gps = DataFrame()
    header['marks'] = []
    header['picks'] = {}
    if os.path.isfile(infile_dzx):
        header['marks'] = get_user_marks(infile_dzx, verbose=verbose)
        header['picks'] = get_picks(infile_dzx, verbose=verbose)
    else:
        fx.printmsg('WARNING: could not find DZX file to read metadata. Trying to read array for marks...')
        tnums = np.ndarray.tolist(data[0])  # the first row of the array is trace number
        usr_marks = np.ndarray.tolist(data[1])  # when the system type is SIR3000, the second row should be user marks (otherwise these are in the DZX, see note below)
        i = 0
        for m in usr_marks:
            if m > 0:
                #print(m)
                header['marks'].append(i)
            i += 1
        if len(header['marks']) == header['shape'][1]:
            fx.printmsg('number of marks matches the number of traces (%s). this is probably wrong, so throwing out the mark list.' % (len(header['marks'])))
            header['marks'] = []
        else:
            fx.printmsg('DZT marks read successfully. marks: %s' % len(header['marks']))
            fx.printmsg('                            traces: %s' % header['marks'])
    # make a list of data by channel
    data = arraylist(header, data) 
    return [header, data, gps]
 def readdzt_gprpy(infile):
    r = readdzt(infile)
    data = r[1]
    header = {
        'sptrace': r[0]['rh_nsamp'],
        'scpsec': r[0]['rhf_sps'],
        'scpmeter': r[0]['rhf_spm'],
        'startposition': r[0]['rhf_position'],
        'nanosecptrace': r[0]['rhf_range'],
        'scansppass': r[0]['rh_npass'],
    }
    return data, header
 def header_info(header, data):
    """
    Function to print relevant header data.
    :param dict header: The header dictionary
    :param numpy.ndarray data: The data array
    """
    fx.printmsg('system:             %s (system code %s)' % (UNIT[header['rh_system']], header['rh_system']))
    fx.printmsg('antennas:           %s' % header['rh_antname'])
    fx.printmsg('ant freqs. (MHz):   %s' % header['antfreq'])
    fx.printmsg('ant time zeros:     %s' % header['timezero'])
    for i in range(header['rh_nchan']):
        if header['known_ant'][i] == True:
            fx.printmsg('ant %s center freq:  %s MHz' % (i, ANT[header['rh_antname'][i]]))
        else:
            fx.printmsg('ant %s center freq:  %s MHz (antenna name %s not in current dictionary)'
                        % (i, header['antfreq'][i], header['rh_antname'][i]))
    fx.printmsg('date created:       %s' % header['rhb_cdt'])
    if header['rhb_mdt'] == datetime(1980, 1, 1):
        fx.printmsg('date modified:      (never modified)')
    else:
        fx.printmsg('date modified:      %s' % header['rhb_mdt'])
    try:
        fx.printmsg('gps-enabled file:   %s' % GPS[header['rh_version']])
    except (TypeError, KeyError) as e:
        fx.printmsg('gps-enabled file:   unknown')
    fx.printmsg('number of channels: %i' % header['rh_nchan'])
    fx.printmsg('samples per trace:  %i' % header['rh_nsamp'])
    fx.printmsg('bits per sample:    %s' % BPS[header['rh_bits']])
    fx.printmsg('traces per second:  %.1f' % header['rhf_sps'])
    if header['dzt_spm'] != header['rhf_spm']:
        fx.printmsg('traces per meter:   %.2f (manually set - value from DZT: %.2f)' % (float(header['rhf_spm']), float(header['dzt_spm'])))
    else:
        fx.printmsg('traces per meter:   %.1f' % (float(header['rhf_spm'])))
    if header['dzt_epsr'] != header['rhf_epsr']:
        fx.printmsg('user epsr:          %.1f (manually set - value from DZT: %.1f)' % (header['rhf_epsr'], header['dzt_epsr']))
    else:
        fx.printmsg('epsr:               %.1f' % header['rhf_epsr'])
    fx.printmsg('speed of wave:      %.2E m/sec (%.2f%% of vacuum)' % (header['cr'], header['cr'] / C * 100))
    fx.printmsg('time range (TWTT):  %.1f ns' % (header['rhf_range']))
    if header['dzt_depth'] != header['rhf_depth']:
        fx.printmsg('sampling depth:     %.1f m (manually set - value from DZT: %.1f)' % (header['rhf_depth'], header['dzt_depth']))
    else:
        fx.printmsg('sampling depth:     %.1f m' % (header['rhf_depth']-header['rhf_top']))
    fx.printmsg('"rhf_top":          %.1f m' % header['rhf_top'])
    fx.printmsg('"rhf_depth":        %.1f m' % header['rhf_depth'])
    fx.printmsg('offset to data:     %i bytes' % header['data_offset'])
    fx.printmsg('traces:             %i' % int(header['shape'][1]/header['rh_nchan']))
    fx.printmsg('seconds:            %.8f' % (header['sec']))
    fx.printmsg('array dimensions:   %i x %i' % (header['shape'][0], header['shape'][1]))
--- a/readgssi/dzx.py
+++ b/readgssi/dzx.py
@@ -0,0 +1,137 @@
 import os, sys
 import xml.etree.ElementTree as et
 import readgssi.functions as fx
 '''
 Much of the logic here is taken from gpx2dzg.io.readdzx
 https://github.com/iannesbitt/gpx2dzg/blob/master/gpx2dzg/io.py#L12
 ## user marks look like this in unproccesed DZX:
 <DZX xmlns="www.geophysical.com/DZX/1.02">
    <File>
        <Profile>
            <WayPt>
                <scan>1351</scan>
                <mark>User</mark>
                <name>Mark1</name>
            </WayPt>
        </Profile>
    </File>
 </DZX>
 ## user marks look like this in processed DZX:
 <DZX xmlns="www.geophysical.com/DZX/1.02">
  <ProfileGroup>
    <File>
      <Profile>
        <WayPt>
          <scan>1351</scan>
          <mark>User</mark>
          <name>Mark1</name>
        </WayPt>
      </Profile>
    </File>
  </ProfileGroup>
 </DZX>
 '''
 def get_user_marks(infile, verbose=False):
    '''
    Find and return a list of user marks from a dzx. Since there are multiple types of dzx without good documentation, this is prone to errors.
    :param str infile: The full DZX file path
    :param bool verbose: Verbosity, defaults to False
    '''
    dzxmarks = []
    with open(infile, 'r') as f:
        tree = et.parse(f)
    root = tree.getroot()
    try:
        # try the 'TargetGroup' type
        if verbose:
            fx.printmsg('testing DZX data type (currently supported types are "File", "TargetGroup", and "ProfileGroup")')
        for item in root:
            if 'TargetGroup' in item.tag:
                for child in item:
                    if 'TargetWayPt' in child.tag:
                        for gchild in child:
                            if 'scanSampChanProp' in gchild.tag:
                                dzxmarks.append(int(gchild.text.split(',')[0]))
        assert len(dzxmarks) > 0
        if verbose:
            fx.printmsg('INFO: DZX type is standard ("TargetGroup")')
    except AssertionError as e:
        try:
            # the 'File' type
            dzxmarks = []
            for b in root:
                if 'File' in b.tag:
                    for child in b:
                        if 'Profile' in child.tag:
                            for gchild in child:
                                if 'WayPt' in gchild.tag:
                                    usermark = False
                                    for ggchild in gchild:
                                        if 'scan' in ggchild.tag:
                                            mark = int(ggchild.text)
                                        if 'mark' in ggchild.tag:
                                            if ggchild.text == 'User':
                                                usermark = True
                                    if usermark:
                                        dzxmarks.append(mark)
            assert len(dzxmarks) > 1
            if verbose:
                fx.printmsg('INFO: DZX type is ("File")')
        except AssertionError as e:
            try:
                # the 'ProfileGroup' type
                dzxmarks = []
                for item in root:
                    if 'ProfileGroup' in item.tag:
                        for child in item:
                            if 'File' in child.tag:
                                for gchild in child:
                                    if 'Profile' in gchild.tag:
                                        for ggchild in gchild:
                                            usermark = False
                                            if 'WayPt' in ggchild.tag:
                                                for gggchild in ggchild:
                                                    if 'scan' in gggchild.tag:
                                                        mark = int(gggchild.text)
                                                    if 'mark' in gggchild.tag:
                                                        if gggchild.text == 'User':
                                                            usermark = True
                                                if usermark:
                                                    dzxmarks.append(mark)
                assert len(dzxmarks) > 0
                if verbose:
                    fx.printmsg('INFO: DZX type is ("ProfileGroup")')
            except AssertionError as e:
                pass
    if verbose:
        if len(dzxmarks) > 0:
            fx.printmsg('DZX read successfully. marks: %s' % len(dzxmarks))
            fx.printmsg('                      traces: %s' % dzxmarks)
        else:
            fx.printmsg('no user marks read from DZX. if you believe this is an error, please send the DZX to ian.nesbitt@gmail.com for testing. thank you.')
    return dzxmarks
 def get_picks(infile, verbose=False):
    picks = {
        'l1': [[], []],
        'l2': [[], []],
        'l3': [[], []],
        'l4': [[], []],
        'l5': [[], []],
        'l6': [[], []],
        'l7': [[], []],
        'l8': [[], []],
    }
    return picks
--- a/readgssi/filtering.py
+++ b/readgssi/filtering.py
@@ -0,0 +1,141 @@
 import numpy as np
 from scipy.ndimage.filters import uniform_filter1d
 from scipy.signal import firwin, lfilter
 import readgssi.functions as fx
 """
 Mathematical filtering routines for array manipulation
 Written in part by François-Xavier Simon (@fxsimon)
 """
 def bgr(ar, header, win=0, verbose=False):
    """
    Horizontal background removal (BGR). Subtracts off row averages for full-width or window-length slices. For usage see :ref:`Getting rid of horizontal noise`.
    :param numpy.ndarray ar: The radar array
    :param dict header: The file header dictionary
    :param int win: The window length to process. 0 resolves to full-width, whereas positive integers dictate the window size in post-stack traces.
    :rtype: :py:class:`numpy.ndarray`
    """
    if (int(win) > 1) & (int(win) < ar.shape[1]):
        window = int(win)
        how = 'boxcar (%s trace window)' % window
    else:
        how = 'full only'
    if verbose:
        fx.printmsg('removing horizontal background using method=%s...' % (how))
    i = 0
    for row in ar:          # each row
        mean = np.mean(row)
        ar[i] = row - mean
        i += 1
    if how != 'full only':
        if window < 10:
            fx.printmsg('WARNING: BGR window size is very short. be careful, this may obscure horizontal layering')
        if window < 3:
            window = 3
        elif (window / 2. == int(window / 2)):
            window = window + 1
        ar -= uniform_filter1d(ar, size=window, mode='constant', cval=0, axis=1)
    return ar
 def dewow(ar, verbose=False):
    """
    Polynomial dewow filter. Written by fxsimon.
    .. warning:: This filter is still experimental.
    :param numpy.ndarray ar: The radar array
    :param bool verbose: Verbose, default is False
    :rtype: :py:class:`numpy.ndarray`
    """
    fx.printmsg('WARNING: dewow filter is experimental')
    if verbose:
        fx.printmsg('dewowing data...')
    signal = list(zip(*ar))[10]
    model = np.polyfit(range(len(signal)), signal, 3)
    predicted = list(np.polyval(model, range(len(signal))))
    i = 0
    for column in ar.T:      # each column
        ar.T[i] = column + predicted
        i += 1
    return ar
 def bp(ar, header, freqmin, freqmax, zerophase=True, verbose=False):
    """
    Vertical butterworth bandpass. This filter is not as effective as :py:func:`triangular` and thus is not available through the command line interface or through :py:func:`readgssi.readgssi.readgssi`.
    Filter design and implementation are dictated by :py:func:`obspy.signal.filter.bandpass`.
    :param np.ndarray ar: The radar array
    :param dict header: The file header dictionary
    :param int freqmin: The lower corner of the bandpass
    :param int freqmax: The upper corner of the bandpass
    :param bool zerophase: Whether to run the filter forwards and backwards in order to counteract the phase shift
    :param bool verbose: Verbose, defaults to False
    :rtype: :py:class:`numpy.ndarray`
    """
    from obspy.signal.filter import bandpass
    if verbose:
        fx.printmsg('vertical butterworth bandpass filter')
        fx.printmsg('NOTE: better results are achieved with readgssi.filtering.triangular()')
    #samp_freq = 1 / ((header['rhf_depth'] * 2) / header['cr'] / header['rh_nsamp'])
    samp_freq = header['samp_freq']
    freqmin = freqmin * 10 ** 6
    freqmax = freqmax * 10 ** 6
    corners = 1
    if verbose:
        fx.printmsg('sampling frequency:       %.2E Hz' % samp_freq)
        fx.printmsg('minimum filter frequency: %.2E Hz' % freqmin)
        fx.printmsg('maximum filter frequency: %.2E Hz' % freqmax)
        fx.printmsg('corners: %s, zerophase: %s' % (corners, zerophase))
    i = 0
    for t in ar.T:
        f = bandpass(data=t, freqmin=freqmin, freqmax=freqmax, df=samp_freq, corners=corners, zerophase=zerophase)
        ar[:,i] = f
        i += 1
    return ar
 def triangular(ar, header, freqmin, freqmax, zerophase=True, verbose=False):
    """
    Vertical triangular FIR bandpass. This filter is designed to closely emulate that of RADAN.
    Filter design is implemented by :py:func:`scipy.signal.firwin` with :code:`numtaps=25` and implemented with :py:func:`scipy.signal.lfilter`.
    .. note:: This function is not compatible with scipy versions prior to 1.3.0.
    :param np.ndarray ar: The radar array
    :param dict header: The file header dictionary
    :param int freqmin: The lower corner of the bandpass
    :param int freqmax: The upper corner of the bandpass
    :param bool zerophase: Whether to run the filter forwards and backwards in order to counteract the phase shift
    :param bool verbose: Verbose, defaults to False
    :rtype: :py:class:`numpy.ndarray`
    """
    if verbose:
        fx.printmsg('vertical triangular FIR bandpass filter')
    #samp_freq = 1 / ((header['rhf_depth'] * 2) / header['cr'] / header['rh_nsamp'])
    samp_freq = header['samp_freq']
    freqmin = freqmin * 10 ** 6
    freqmax = freqmax * 10 ** 6
    numtaps = 25
    if verbose:
        fx.printmsg('sampling frequency:       %.2E Hz' % samp_freq)
        fx.printmsg('minimum filter frequency: %.2E Hz' % freqmin)
        fx.printmsg('maximum filter frequency: %.2E Hz' % freqmax)
        fx.printmsg('numtaps: %s, zerophase: %s' % (numtaps, zerophase))
    filt = firwin(numtaps=numtaps, cutoff=[freqmin, freqmax], window='triangle', pass_zero='bandpass', fs=samp_freq)
    far = lfilter(filt, 1.0, ar, axis=0).copy()
    if zerophase:
        far = lfilter(filt, 1.0, far[::-1], axis=0)[::-1]
    return far
--- a/readgssi/functions.py
+++ b/readgssi/functions.py
@@ -0,0 +1,170 @@
 import math
 from datetime import datetime
 from readgssi.constants import *
 import os
 def printmsg(msg):
    """
    Prints with date/timestamp.
    :param str msg: Message to print
    """
    print('%s - %s' % (datetime.now().strftime('%Y-%m-%d %H:%M:%S'), msg))
 def genericerror(filetype='file'):
    """Prints a standard message for a generic error using the `gpx2dzg.functions.printmsg()` function.
    This is called from functions in `gpx2dzg.io`.
    Parameters
    ----------
    filetype : str
        The type of file this message is about. Used to format error string.
    """
    printmsg('please attach this %s to a new github issue (https://github.com/iannesbitt/readgssi/issues/new)' % filetype)
    printmsg('        or send it to ian.nesbitt@gmail.com in order to have the format assessed. please also')
    printmsg('        include the output of the program (i.e. copy and paste this text and the text above in')
    printmsg('        the message) as this will drastically speed up my ability to help you! thanks!')
    printmsg('  ~!~>  I am happy to help but please note that I am not responsible for the content of your')
    printmsg('  ~!~>  files, only the working-ness of this software. I appreciate your understanding!')
 def dzxerror(e=''):
    """Prints an error message then calls `gpx2dzg.functions.genericerror()` and passes `filetype='DZX'`.
    Parameters
    ----------
    e : str
        The error message to print.
    """
    printmsg('ERROR TEXT: %s' % e)
    genericerror('DZX')
 def dzterror(e=''):
    """Prints an error message then calls `gpx2dzg.functions.genericerror()` and passes `filetype='DZT'`.
    Parameters
    ----------
    e : str
        The error message to print.
    """
    printmsg('ERROR TEXT: %s' % e)
    genericerror('DZT')
 def naming(outfile=None, infile_basename=None, chans=[1], chan=0, normalize=False, zero=None, stack=1, reverse=False,
           bgr=False, win=None, gain=None, dewow=None, freqmin=None, freqmax=None, plotting=None, zoom=None,
           absval=False):
    """
    The Dr. Seth W. Campbell Honorary Naming Scheme
    Descriptive naming, used to indicate the processing steps done on each file, if a specific output filename is not given. The theory behind this naming scheme is simple: it can be tough to remember how you made that plot!
    Named for my graduate advisor, whom I love and cherish, who introduced me to this way of naming outputs.
    .. literalinclude:: naming.txt
    :param str outfile: The base output filename. If None, a new :code:`outfile` will be generated from the input file basename. If it already exists, subsequent arguments will be appended. Defaults to None.
    :param str infile_basename: The input file basename (without file extension). Defaults to None.
    :param list[int,int,int,int] chans: A list of channels, which is converted to the number of channels using :py:func:`len`. Defaults to [1].
    :param int chan: The current channel number. Defaults to 0.
    :param bool normalize: Whether or not the array is distance-normalized. Defaults to False.
    :param int zero: The zero point for this particular channel. Defaults to None.
    :param int stack: The number of times the array was stacked. Defaults to 1.
    :param bool reverse: Whether or not the file was reversed. Defaults to False.
    :param bool bgr: Whether or not BGR was applied. Defaults to False.
    :param int win: The BGR window size if applicable. 0 is full-width BGR, greater than 0 is window size. Defaults to None.
    :param float gain: The gain value applied to the plot. Defaults to None.
    :param bool dewow: Whether or not dewow was applied. Defaults to None.
    :param int freqmin: The lower corner of the bandpass filter if applicable. Defaults to None.
    :param int freqmax: The upper corner of the bandpass filter if applicable. Defaults to None.
    :param int plotting: Stand-in for whether or not a plot was generated. The integer represents the plot height. Defaults to None.
    :param list[int,int,int,int] zoom: The zoom extents applied to the image. Defaults to None.
    :param bool absval: Whether or not the plot is displayed with absolute value of gradient. Defaults to False.
    """
    if outfile == None:
        outfile = '%s' % (os.path.join(infile_basename))
    else:
        if len(chans) == 1:
            return os.path.splitext(outfile)[0]
        else:
            printmsg('Warning: Input file has multiple channels but output filename was explicitly set. Avoiding clobbering by using incremental naming.')
            return '%sCh%s' % (os.path.splitext(outfile)[0], chan)
    if len(chans) > 1:
        outfile = '%sCh%s' % (outfile, chan)
    if zero and (zero > 0):
        outfile = '%sTz%s' % (outfile, zero)
    if normalize:
        outfile = '%sDn' % (outfile)
    if freqmin and freqmax:
        outfile = '%sB%s-%s' % (outfile, freqmin, freqmax)
    if dewow:
        outfile = '%sDw' % (outfile)
    if stack > 1:
        outfile = '%sS%s' % (outfile, stack)
    if bgr:
        outfile = '%sBgr%s' % (outfile, win)
    if reverse:
        outfile = '%sRv' % (outfile)
    if plotting:
        outfile = '%sG%s' % (outfile, int(gain))
    if absval:
        outfile = '%sAbs' % (outfile)
    if zoom:
        outfile = '%sZ' % (outfile)
        for ex in zoom:
            outfile = '%s.%s' % (outfile, ex)
    return outfile
 def zoom(zoom, extent, x, z, verbose=False):
    """
    Logic to figure out how to set zoom extents. If specified limits are out of bounds, they are set back to boundary extents. If limits of a specified axis are equal, they are expanded to the full extent of that axis.
    :param list[int,int,int,int] zoom: Zoom extents to set programmatically for matplotlib plots. Must pass a list of four integers: :py:data:`[left, right, up, down]`. Since the z-axis begins at the top, the "up" value is actually the one that displays lower on the page. All four values are axis units, so if you are working in nanoseconds, 10 will set a limit 10 nanoseconds down. If your x-axis is in seconds, 6 will set a limit 6 seconds from the start of the survey. It may be helpful to display the matplotlib interactive window at full extents first, to determine appropriate extents to set for this parameter. If extents are set outside the boundaries of the image, they will be set back to the boundaries. If two extents on the same axis are the same, the program will default to plotting full extents for that axis.
    :param list[int,int,int,int] extent: Full extent boundaries of the image, in the style :py:data:`[left, right, up, down]`.
    :param str x: X axis units
    :param str z: Z axis units
    :param bool verbose: Verbose, defaults to False
    """
    for i in range(4):
        if zoom[i] < 0:
            zoom[i] = 0
            if verbose:
                printmsg('WARNING: %s zoom limit was negative, now set to zero' % ['left','right','up','down'][i])
    if (zoom[0] > 0) or (zoom[1] > 0): # if a LR value has been set
        if zoom[0] > extent[1]: # if L zoom is beyond extents, set back to extent limit
            zoom[0] = extent[1]
            if verbose:
                printmsg('WARNING: left zoom limit out of bounds (limit is %s %s)' % (extent[1], x))
        if zoom[1] > extent[1]: # if R zoom is beyond extents, set back to extent limit
            zoom[1] = extent[1]
            if verbose:
                printmsg('WARNING: right zoom limit out of bounds (limit is %s %s)' % (extent[1], x))
        if zoom[0] == zoom[1]: # if LR extents are impossible,
            zoom[0] = extent[0] # set both to full extents
            zoom[1] = extent[1]
            if verbose:
                printmsg('WARNING: left and right zoom values were equal or both out of bounds, now set to full extent')
    else:
        zoom[0] = extent[0]
        zoom[1] = extent[1]
    if (zoom[2] > 0) or (zoom[3] > 0): # if a UD value has been set
        if zoom[2] > extent[2]: # if upper zoom is beyond extents, set back to extent limit
            zoom[2] = extent[2]
            if verbose:
                printmsg('WARNING: upper zoom limit out of bounds (limit is %s %s)' % (extent[3], z))
        if zoom[3] > extent[2]: # if lower zoom is beyond extents, set back to extent limit
            zoom[3] = extent[2]
            if verbose:
                printmsg('WARNING: lower zoom limit out of bounds (limit is %s %s)' % (extent[3], z))
        if zoom[2] == zoom[3]: # if UD extents are impossible,
            zoom[2] = extent[2] # set both to full extents
            zoom[3] = extent[3]
            if verbose:
                printmsg('WARNING: top and bottom zoom values were equal or both out of bounds, now set to full extent')
    else:
        zoom[2] = extent[2]
        zoom[3] = extent[3]
    return zoom
--- a/readgssi/gps.py
+++ b/readgssi/gps.py
@@ -0,0 +1,351 @@
 from datetime import datetime, timedelta
 import pynmea2
 import math
 import os
 import numpy as np
 import pandas as pd
 from geopy.distance import geodesic
 import readgssi.functions as fx
 from readgssi.constants import TZ
 from math import sin, cos, sqrt, atan2, radians
 from shutil import copyfile
 """
 contains functions for reading gps data from various formats
 """
 def msgparse(msg):
    """
    .. deprecated:: 0.0.12
    This function returns the NMEA message variables shared by both RMC and GGA.
    :param pynmea2.nmea.NMEASentence msg: A pynmea2 sentence object.
    :rtype: :py:class:`datetime.datetime`, :py:class:`float`, :py:class:`float`
    """
    return msg.timestamp, msg.latitude, msg.longitude
 def readdzg(fi, frmt, header, verbose=False):
    """
    A parser to extract gps data from DZG file format. DZG contains raw NMEA sentences, which should include at least RMC and GGA.
    NMEA RMC sentence string format:
    :py:data:`$xxRMC,UTC hhmmss,status,lat DDmm.sss,lon DDDmm.sss,SOG,COG,date ddmmyy,checksum \*xx`
    NMEA GGA sentence string format:
    :py:data:`$xxGGA,UTC hhmmss.s,lat DDmm.sss,lon DDDmm.sss,fix qual,numsats,hdop,mamsl,wgs84 geoid ht,fix age,dgps sta.,checksum \*xx`
    Shared message variables between GGA and RMC: timestamp, latitude, and longitude
    RMC contains a datestamp which makes it preferable, but this parser will read either.
    :param str fi: File containing gps information
    :param str frmt: GPS information format ('dzg' = DZG file containing gps sentence strings (see below); 'csv' = comma separated file with: lat,lon,elev,time)
    :param dict header: File header produced by :py:func:`readgssi.dzt.readdzt`
    :param bool verbose: Verbose, defaults to False
    :rtype: GPS data (pandas.DataFrame)
        The dataframe contains the following fields:
        * datetimeutc (:py:class:`datetime.datetime`)
        * trace (:py:class:`int` trace number)
        * longitude (:py:class:`float`)
        * latitude (:py:class:`float`)
        * altitude (:py:class:`float`)
        * velocity (:py:class:`float`)
        * sec_elapsed (:py:class:`float`)
        * meters (:py:class:`float` meters traveled)
    """
    if header['rhf_spm'] == 0:
        spu = header['rhf_sps']
    else:
        spu = header['rhf_spm']
    array = pd.DataFrame(columns=['datetimeutc', 'trace', 'longitude', 'latitude', # our dataframe
                                  'altitude', 'velocity', 'sec_elapsed', 'meters'])
    trace = 0 # the elapsed number of traces iterated through
    tracenum = 0 # the sequential increase in trace number
    rownp = 0 # array row number
    rowrmc = 0 # rmc record iterated through (gps file)
    rowgga = 0 # gga record
    sec_elapsed = 0 # number of seconds since the start of the line
    m = 0 # meters traveled over entire line
    m0, m1 = 0, 0 # meters traveled as of last, current loop
    u = 0 # velocity
    u0 = 0 # velocity on last loop
    timestamp = False
    prevtime = False
    init_time = False
    td = False
    prevtrace = False
    rmc, gga = False, False
    rmcwarn = True
    lathem = 'north'
    lonhem = 'east'
    x0, x1, y0, y1 = False, False, False, False # coordinates
    z0, z1 = 0, 0
    x2, y2, z2, sec2 = 0, 0, 0, 0
    with open(fi, 'r') as gf:
        if verbose:
            fx.printmsg('using gps file:     %s' % (fi))
        if frmt == 'dzg': # if we're working with DZG format
            for ln in gf: # loop through the first few sentences, check for RMC
                if 'RMC' in ln: # check to see if RMC sentence (should occur before GGA)
                    rmc = True
                    if rowrmc == 0:
                        msg = pynmea2.parse(ln.rstrip()) # convert gps sentence to pynmea2 named tuple
                        ts0 = datetime.combine(msg.datestamp, msg.timestamp).replace(tzinfo=TZ) # row 0's timestamp (not ideal)
                    if rowrmc == 1:
                        msg = pynmea2.parse(ln.rstrip())
                        ts1 = datetime.combine(msg.datestamp, msg.timestamp).replace(tzinfo=TZ) # row 1's timestamp (not ideal)
                        td = ts1 - ts0 # timedelta = datetime1 - datetime0
                    rowrmc += 1
                if 'GGA' in ln:
                    gga = True
                    if rowgga == 0:
                        msg = pynmea2.parse(ln.rstrip()) # convert gps sentence to pynmea2 named tuple
                        ts0 = datetime.combine(datetime(1980, 1, 1), msg.timestamp).replace(tzinfo=TZ) # row 0's timestamp (not ideal)
                    if rowgga == 1:
                        msg = pynmea2.parse(ln.rstrip())
                        ts1 = datetime.combine(datetime(1980, 1, 1), msg.timestamp).replace(tzinfo=TZ) # row 1's timestamp (not ideal)
                        td = ts1 - ts0 # timedelta = datetime1 - datetime0
                    rowgga += 1
            gpssps = 1 / td.total_seconds() # GPS samples per second
            if (rmcwarn) and (rowrmc == 0):
                fx.printmsg('WARNING: no RMC sentences found in GPS records. this could become an issue if your file goes through 00:00:00.')
                fx.printmsg("         if you get a time jump error please open a github issue at https://github.com/iannesbitt/readgssi/issues")
                fx.printmsg("         and attach the verbose output of this script plus a zip of the DZT and DZG files you're working with.")
                rmcwarn = False
            if (rmc and gga) and (rowrmc != rowgga):
                if verbose:
                    fx.printmsg('WARNING: GGA and RMC sentences are not recorded at the same rate! This could cause unforseen problems!')
                    fx.printmsg('    rmc: %i records' % rowrmc)
                    fx.printmsg('    gga: %i records' % rowgga)
            if verbose:
                ss0, ss1, ss2 = '', '', ''
                if gga:
                    ss0 = 'GGA'
                if rmc:
                    ss2 = 'RMC'
                if gga and rmc:
                    ss1 = ' and '
                fx.printmsg('found %i %s%s%s GPS epochs at rate of ~%.2f Hz' % (rowrmc, ss0, ss1, ss2, gpssps))
                fx.printmsg('reading gps locations to data frame...')
            gf.seek(0) # back to beginning of file
            rowgga, rowrmc = 0, 0
            for ln in gf: # loop over file line by line
                if '$GSSIS' in ln:
                    # if it's a GSSI sentence, grab the scan/trace number
                    trace = int(ln.split(',')[1])
                if (rmc and gga) and ('GGA' in ln):
                    # RMC doesn't use altitude so if it exists we include it from a neighboring GGA
                    z1 = pynmea2.parse(ln.rstrip()).altitude
                    if rowrmc != rowgga:
                        # this takes care of the case where RMC lines occur above GGA
                        z0 = array['altitude'].iat[rowgga]
                        array['altitude'].iat[rowgga] = z1
                    rowgga += 1
                if rmc == True: # if there is RMC, we can use the full datestamp but there is no altitude
                    if 'RMC' in ln:
                        msg = pynmea2.parse(ln.rstrip())
                        timestamp = datetime.combine(msg.datestamp, msg.timestamp).replace(tzinfo=TZ) # set t1 for this loop
                        u = msg.spd_over_grnd * 0.514444444 # convert from knots to m/s
                        sec1 = timestamp.timestamp()
                        x1, y1 = float(msg.longitude), float(msg.latitude)
                        if msg.lon_dir in 'W':
                            lonhem = 'west'
                        if msg.lat_dir in 'S':
                            lathem = 'south'
                        if rowrmc != 0:
                            elapsedelta = timestamp - prevtime # t1 - t0 in timedelta format
                            elapsed = float((timestamp-init_time).total_seconds()) # seconds elapsed
                            m += u * elapsedelta.total_seconds()
                        else:
                            u = 0
                            m = 0
                            elapsed = 0
                            if verbose:
                                fx.printmsg('record starts in %s and %s hemispheres' % (lonhem, lathem))
                        x0, y0, z0, sec0, m0 = x1, y1, z1, sec1, m # set xyzs0 for next loop
                        prevtime = timestamp # set t0 for next loop
                        if rowrmc == 0:
                            init_time = timestamp
                        prevtrace = trace
                        array = array.append({'datetimeutc':timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %z'),
                                              'trace':trace, 'longitude':x1, 'latitude':y1, 'altitude':z1,
                                              'velocity':u, 'sec_elapsed':elapsed, 'meters':m}, ignore_index=True)
                        rowrmc += 1
                else: # if no RMC, we hope there is no UTC 00:00:00 in the file.........
                    if 'GGA' in ln:
                        msg = pynmea2.parse(ln.rstrip())
                        if  msg.timestamp:
                            timestamp = datetime.combine(header['rhb_cdt'], msg.timestamp).replace(tzinfo=TZ) # set t1 for this loop
                            sec1 = timestamp.timestamp()
                            x1, y1 = float(msg.longitude), float(msg.latitude)
                            try:
                                z1 = float(msg.altitude)
                            except AttributeError:
                                z1 = 0
                            if msg.lon_dir in 'W':
                                lonhem = 'west'
                            if msg.lat_dir in 'S':
                                lathem = 'south'
                            if rowgga != 0:
 								#If the difference in Altitude (z) between points is not 0, uses Pythagorean Theorem to better calculate the distance 
                                if abs(z1 - z0) > 1e-6:
                                    m_geodesic = geodesic((y1, x1), (y0, x0)).meters 
                                    m += math.sqrt(m_geodesic**2 + (z1 - z0)**2)
                                else:
                                    m += geodesic((y1, x1, z1), (y0, x0, z0)).meters
                                if rmc == False:
                                    u = float((m - m0) / (sec1 - sec0))
                                elapsedelta = timestamp - prevtime # t1 - t0 in timedelta format
                                elapsed = float((timestamp-init_time).total_seconds()) # seconds elapsed
                                if elapsed > 3600.0:
                                    fx.printmsg("WARNING: Time jumps by more than an hour in this GPS dataset and there are no RMC sentences to anchor the datestamp!")
                                    fx.printmsg("         This dataset may cross over the UTC midnight dateline!\nprevious timestamp: %s\ncurrent timestamp:  %s" % (prevtime, timestamp))
                                    fx.printmsg("         trace number:       %s" % trace)
                            else:
                                u = 0
                                m = 0
                                elapsed = 0
                                if verbose:
                                    fx.printmsg('record starts in %s and %s hemispheres' % (lonhem, lathem))
                            x0, y0, z0, sec0, m0 = x1, y1, z1, sec1, m # set xyzs0 for next loop
                            prevtime = timestamp # set t0 for next loop
                            if rowgga == 0:
                                init_time = timestamp
                            prevtrace = trace
                            array = array.append({'datetimeutc':timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %z'),
                                                  'trace':trace, 'longitude':x1, 'latitude':y1, 'altitude':z1,
                                                  'velocity':u, 'sec_elapsed':elapsed, 'meters':m}, ignore_index=True)
                            rowgga += 1
            if verbose:
                if rmc:
                    fx.printmsg('processed %i gps epochs (RMC)' % (rowrmc))
                else:
                    fx.printmsg('processed %i gps epochs (GGA)' % (rowgga))
        elif frmt == 'csv':
            with open(fi, 'r') as f:
                gps = np.fromfile(f)
    array['datetimeutc'] = pd.to_datetime(array['datetimeutc'], format='%Y-%m-%d %H:%M:%S.%f +0000', utc=True)
    array.set_index('datetimeutc', inplace=True)
    ## testing purposes
    if True:
        if verbose:
            fx.printmsg('writing GPS to %s-gps.csv' % (fi))
        array.to_csv('%s-gps.csv' % (fi))
    return array
 def pause_correct(header, dzg_file, verbose=False, **kwargs):
    '''
    This is a streamlined way of removing pauses from DZG files and re-assigning trace values.
    GSSI controllers have a bug in which GPS sentences are collected with increasing trace numbers even though radar trace collection is stopped.
    This results in a misalignment between GPS and radar traces of the same number.
    The function attempts to realign the trace numbering in the GPS file by identifying "pauses", i.e. when velocity drops below a certain value for longer than a certain number of epochs.
    This assumes that each GPS pause period is correctly timed with the surveyor pressing the pause/unpause button on the radar control unit.
    .. warning:: This function will identify and remove ALL pauses longer than 3 epochs and renumber the traces accordingly. Obviously this can have unintended consequences if the radar controller remains collecting data during these periods. Please be extremely cautious and only use this functionality on files you know have radar pauses that are accompanied by movement pauses. A backup of the original DZG file is made each time this function is run on a file, which means that if you make a mistake, you can simply copy the DZG backup (.DZG.bak) and overwrite the output (.DZG).
    .. note:: Any time you are working with original files, it is always good to have a "working" and "raw" copy of your data. Experimental functionality in readgssi cannot be held responsible for its actions in modifying data. You are responsible for keeping a raw backup of your data just in case.
    A detailed explanation of each step taken by this function is available in the code comments.
    :param dict header: File header produced by :py:func:`readgssi.dzt.readdzt`
    :param str dzg_file: DZG GPS file (the original .DZG, not the backup)
    :param float threshold: Numerical velocities threshold, under which will be considered a "pause" (default: 0.25)
    :param bool verbose: Verbose, defaults to False
    :rtype: corrected, de-paused GPS data (pandas.DataFrame)
    '''
    output_file = dzg_file                  # we need RADAN and readgssi to find the modified DZG file
    backup_file = dzg_file + '.bak'         # so we back it up and rewrite the original
    # ensuring a DZG backup
    if os.path.isfile(dzg_file):            # if there is a DZG
        if not os.path.isfile(backup_file): # if there is not a backup, make one
            if verbose:
                fx.printmsg('backing up original DZG file to %s' % (backup_file))
            copyfile(dzg_file, backup_file)
        else:                               # if there already is a backup
            if verbose:
                fx.printmsg('found backed up DZG file %s' % (backup_file))
        dzg_file = backup_file              # we always want to draw from the backup and write to the main DZG file
        # threshold default
        threshold = 0.25
        # get threshold value from kwargs if it exists
        for arg in kwargs:
            if arg == "threshold":
                if (type(kwargs[arg]) == float) or (type(kwargs[arg]) == int):
                    threshold = float(kwargs[arg])
        # pandas ninja maneuvers to get a list of pause boundaries
        orig_gps = readdzg(fi=backup_file, frmt='dzg', header=header, verbose=False)    # get original GPS values
        orig_gps['groups'] = pd.cut(orig_gps.velocity,[-1,threshold,100000])            # segment file into groups based on velocity
        orig_gps['cats'] = (orig_gps.groups != orig_gps.groups.shift()).cumsum()        # give each group a number
        orig_gps['threshold'] = 0                                                       # create threshold column
        orig_gps.loc[3:-3, 'threshold'] = threshold                                     # ignoring start and end epochs, assign threshold
        orig_gps['mask'] = orig_gps['velocity'] < orig_gps['threshold']                 # where does velocity fall beneath threshold
        exceeds = orig_gps[orig_gps['mask'] == True]                                    # create a view based on non exceedence rows
        bounds = []
        if exceeds.cats.count() > 0:                                                    # test whether any exceedences were detected
            for i in range(exceeds.cats.max() + 1):                                     # for each group in the view
                if exceeds[exceeds['cats'] == i].count().cats > 3:                      # are there more than 3 epochs in the group?
                    bounds.append([exceeds[exceeds['cats'] == i].iloc[0].name,          # if so create a data point in the list
                                   exceeds[exceeds['cats'] == i].iloc[-1].name,         # with time and trace number of boundary
                                   exceeds[exceeds['cats'] == i].iloc[0].trace,
                                   exceeds[exceeds['cats'] == i].iloc[-1].trace])
        def new_row_value(trace):   # quick and dirty function to figure out new trace number
            subtract = 0
            for pause in bounds:
                if (trace >= pause[2]) and (trace >= pause[3]):
                    subtract = subtract + pause[3] - pause[2]
            return trace - subtract
        if verbose:
            fx.printmsg('found %s pause periods' % len(bounds))
        if len(bounds) > 0:
            i = 1
            for pause in bounds:
                fx.printmsg('pause %s' % i)
                fx.printmsg('  start trace: %s (%s)' % (pause[2], pause[0]))
                fx.printmsg('    end trace: %s (%s)' % (pause[3], pause[1]))
                i += 1
            if verbose:
                fx.printmsg('transcribing DZG file with new trace values...')
            with open(dzg_file, 'r') as gf:         # gps file
                with open(output_file, 'w') as tf:  # transcription file
                    subtract_amount = 0
                    for ln in gf: # loop over backup file line by line
                        if '$GSSIS' in ln:
                            write = True            # assume this line will get written 
                            # if it's a GSSI sentence, grab the scan/trace number
                            trace = int(ln.split(',')[1])
                            for pause in bounds:    # for each pause period
                                if (trace >= pause[2]) and (trace <= pause[3]): # if trace is in pause bounds
                                    write = False   # do not write this group of sentences to file
                            if write:               # if it is outside of a pause period
                                tf.write(ln.replace(str(trace), str(new_row_value(trace))))   # replace trace value and write line to file
                        else:                       # if it's not a GSSI proprietary line
                            if write:               # and it's outside a pause priod
                                tf.write(ln)        # transcribe line to new file
            if verbose:
                fx.printmsg('done. reading new values into array...')
    else:
        fx.printmsg('no dzg file found at (%s)' % (dzg_file))
    return readdzg(fi=output_file, frmt='dzg', header=header, verbose=verbose)
--- a/readgssi/plot.py
+++ b/readgssi/plot.py
@@ -0,0 +1,287 @@
 import os
 import numpy as np
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 import matplotlib.colors as colors
 import readgssi.functions as fx
 from readgssi.constants import *
 # figure out what formats are available
 def get_supported_filetypes():
    fig = plt.Figure()
    fmts = fig.canvas.get_supported_filetypes()
    plt.close('all')
    fmtst = ''
    for fmt in fmts:
        fmtst += '    %-10s  |  %s\n' % (fmt, fmts[fmt])
    return fmts, fmtst
 fmts, fmtst = get_supported_filetypes()
 """
 contains several plotting functions
 """
 def histogram(ar, verbose=True):
    """
    Shows a y-log histogram of data value distribution.
    :param numpy.ndarray ar: The radar array
    :param bool verbose: Verbose, defaults to False
    """
    mean = np.mean(ar)
    std = np.std(ar)
    ll = mean - (std * 3) # lower color limit
    ul = mean + (std * 3) # upper color limit
    if verbose:
        fx.printmsg('drawing log histogram...')
        fx.printmsg('mean:               %s (if high, use background removal)' % mean)
        fx.printmsg('stdev:              %s' % std)
        fx.printmsg('lower limit:        %s [mean - (3 * stdev)]' % ll)
        fx.printmsg('upper limit:        %s [mean + (3 * stdev)]' % ul)
    fig = plt.figure()
    hst = plt.hist(ar.ravel(), bins=256, range=(ll, ul), fc='k', ec='k')
    plt.yscale('log', nonposy='clip')
    plt.show()
 def spectrogram(ar, header, freq, tr='auto', verbose=True):
    """
    Displays a spectrogram of the center trace of the array. This is for testing purposes and not accessible from the command prompt.
    :param numpy.ndarray ar: The radar array
    :param dict header: The file header dictionary
    :type tr: int or str
    :param tr: The trace to display the spectrogram for. Defaults to "auto" but can be an integer representing the trace number to plot. "auto" will pick a trace roughly halfway through the array.
    :param bool verbose: Verbose, defaults to False
    """
    import obspy.imaging.spectrogram as sg # buried here, to avoid obspy compatibility issues
    if tr == 'auto':
        tr = int(ar.shape[1] / 2)
    if verbose:
        fx.printmsg('converting trace %s to frequency domain and drawing spectrogram...' % (tr))
    samp_rate = header['samp_freq']
    trace = ar.T[tr]
    sg.spectrogram(data=trace, samp_rate=samp_rate, wlen=samp_rate/1000, per_lap = 0.99, dbscale=True,
             title='Trace %s Spectrogram - Antenna Frequency: %.2E Hz - Sampling Frequency: %.2E Hz' % (tr, freq, samp_rate))
 def radargram(ar, ant, header, freq, figsize='auto', gain=1, stack=1, x='seconds', z='nanoseconds', title=True,
              colormap='gray', colorbar=False, absval=False, noshow=False, win=None, outfile='readgssi_plot',
              fmt='png', zero=2, zoom=[0,0,0,0], dpi=150, showmarks=False, verbose=False):
    """
    Function that creates, modifies, and saves matplotlib plots of radargram images. For usage information, see :doc:`plotting`.
    :param numpy.ndarray ar: The radar array
    :param int ant: Antenna channel number
    :param dict header: Radar file header dictionary
    :param int freq: Antenna frequency
    :param float plotsize: The height of the output plot in inches
    :param float gain: The gain applied to the image. Must be positive but can be between 0 and 1 to reduce gain.
    :param int stack: Number of times the file was stacked horizontally. Used to calculate traces on the X axis.
    :param str x: The units to display on the x-axis during plotting. Defaults to :py:data:`x='seconds'`. Acceptable values are :py:data:`x='distance'` (which sets to meters), :py:data:`'km'`, :py:data:`'m'`, :py:data:`'cm'`, :py:data:`'mm'`, :py:data:`'kilometers'`, :py:data:`'meters'`, etc., for distance; :py:data:`'seconds'`, :py:data:`'s'`, :py:data:`'temporal'` or :py:data:`'time'` for seconds, and :py:data:`'traces'`, :py:data:`'samples'`, :py:data:`'pulses'`, or :py:data:`'columns'` for traces.
    :param str z: The units to display on the z-axis during plotting. Defaults to :py:data:`z='nanoseconds'`. Acceptable values are :py:data:`z='depth'` (which sets to meters), :py:data:`'m'`, :py:data:`'cm'`, :py:data:`'mm'`, :py:data:`'meters'`, etc., for depth; :py:data:`'nanoseconds'`, :py:data:`'ns'`, :py:data:`'temporal'` or :py:data:`'time'` for seconds, and :py:data:`'samples'` or :py:data:`'rows'` for samples.
    :param bool title: Whether to add a title to the figure. Defaults to True.
    :param matplotlib.colors.Colormap colormap: The matplotlib colormap to use, defaults to 'gray' which is to say: the same as the default RADAN colormap
    :param bool colorbar: Whether to draw the colorbar. Defaults to False.
    :param bool absval: Whether to draw the array with an absolute value scale. Defaults to False.
    :param bool noshow: Whether to suppress the matplotlib figure GUI window. Defaults to False, meaning the dialog will be displayed.
    :param int win: Window size for background removal filter :py:func:`readgssi.filtering.bgr` to display in plot title.
    :param str outfile: The name of the output file. Defaults to :py:data:`fmt=readgssi_plot` (a filename :py:data:`readgssi_plot`, not including the extension which is determined by the :py:data:`fmt` variable, in the current directory).
    :param str fmt: The format of the output file. Defaults to :py:data:`fmt='png'`. Acceptable values come from :py:func:`matplotlib.backend_bases.FigureCanvasBase.get_supported_filetypes`.
    :param int zero: The zero point. This represents the number of samples sliced off the top of the profile by the timezero option in :py:func:`readgssi.readgssi.readgssi`.
    :param list[int,int,int,int] zoom: Zoom extents for matplotlib plots. Must pass a list of four integers: :py:data:`[left, right, up, down]`. Since the z-axis begins at the top, the "up" value is actually the one that displays lower on the page. All four values are axis units, so if you are working in nanoseconds, 10 will set a limit 10 nanoseconds down. If your x-axis is in seconds, 6 will set a limit 6 seconds from the start of the survey. It may be helpful to display the matplotlib interactive window at full extents first, to determine appropriate extents to set for this parameter. If extents are set outside the boundaries of the image, they will be set back to the boundaries. If two extents on the same axis are the same, the program will default to plotting full extents for that axis.
    :param int dpi: The dots per inch value to use when creating images. Defaults to 150.
    :param bool showmarks: Whether to plot user marks as vertical lines. Defaults to False.
    :param bool verbose: Verbose, defaults to False
    """
    # having lots of trouble with this line not being friendly with figsize tuple (integer coercion-related errors)
    # so we will force everything to be integers explicitly
    if figsize != 'auto':
        figx, figy = int(int(figsize)*int(int(ar.shape[1])/int(ar.shape[0]))), int(figsize)-1 # force to integer instead of coerce
        if figy <= 1:
            figy += 1 # avoid zero height error in y dimension
        if figx <= 1:
            figx += 1 # avoid zero height error in x dimension
        if verbose:
            fx.printmsg('plotting %sx%sin image with gain=%s...' % (figx, figy, gain))
        fig, ax = plt.subplots(figsize=(figx, figy), dpi=dpi)
    else:
        if verbose:
            fx.printmsg('plotting with gain=%s...' % gain)
        fig, ax = plt.subplots()
    mean = np.mean(ar)
    if verbose:
        fx.printmsg('image stats')
        fx.printmsg('size:               %sx%s' % (ar.shape[0], ar.shape[1]))
        fx.printmsg('mean:               %.3f' % mean)
    if absval:
        fx.printmsg('plotting absolute value of array gradient')
        ar = np.abs(np.gradient(ar, axis=1))
        flip = 1
        ll = np.min(ar)
        ul = np.max(ar)
        std = np.std(ar)
    else:
        if mean > 1000:
            fx.printmsg('WARNING: mean pixel value is very high. consider filtering with -t')
        flip = 1
        std = np.std(ar)
        ll = mean - (std * 3) # lower color limit
        ul = mean + (std * 3) # upper color limit
        fx.printmsg('stdev:              %.3f' % std)
        fx.printmsg('lower color limit:  %.2f [mean - (3 * stdev)]' % (ll))
        fx.printmsg('upper color limit:  %.2f [mean + (3 * stdev)]' % (ul))
    # X scaling routine
    if (x == None) or (x in 'seconds'): # plot x as time by default
        xmax = header['sec']
        xlabel = 'Time (s)'
    else:
        if (x in ('cm', 'm', 'km')) and (header['rhf_spm'] > 0): # plot as distance based on unit
            xmax = ar.shape[1] / header['rhf_spm']
            if 'cm' in x:
                xmax = xmax * 100.
            if 'km' in x:
                xmax = xmax / 1000.
            xlabel = 'Distance (%s)' % (x)
        else: # else we plot in units of stacked traces
            if header['rhf_spm'] == 0:
                fx.printmsg('samples per meter value is zero. plotting trace numbers instead.')
            xmax = ar.shape[1] # * float(stack)
            xlabel = 'Trace number'
            if stack > 1:
                xlabel = 'Trace number (after %sx stacking)' % (stack)
    # finally, relate max scale value back to array shape in order to set matplotlib axis scaling
    try:
        xscale = ar.shape[1]/xmax
    except ZeroDivisionError:
        fx.printmsg('ERROR: cannot plot x-axis in "%s" mode; header value is zero. using time instead.' % (x))
        xmax = header['sec']
        xlabel = 'Time (s)'
        xscale = ar.shape[1]/xmax
    zmin = 0
    # Z scaling routine
    if (z == None) or (z in 'nanoseconds'): # plot z as time by default
        zmax = header['rhf_range'] #could also do: header['ns_per_zsample'] * ar.shape[0] * 10**10 / 2
        zlabel = 'Two-way time (ns)'
    else:
        if z in ('mm', 'cm', 'm'): # plot z as TWTT based on unit and cr/rhf_epsr value
            zmax = header['rhf_depth'] - header['rhf_top']
            if 'cm' in z:
                zmax = zmax * 100.
            if 'mm' in z:
                zmax = zmax * 1000.
            zlabel = r'Depth at $\epsilon_r$=%.2f (%s)' % (header['rhf_epsr'], z)
        else: # else we plot in units of samples
            zmin = zero
            zmax = ar.shape[0] + zero
            zlabel = 'Sample'
    # finally, relate max scale value back to array shape in order to set matplotlib axis scaling
    try:
        zscale = ar.shape[0]/zmax
    except ZeroDivisionError: # apparently this can happen even in genuine GSSI files
        fx.printmsg('ERROR: cannot plot z-axis in "%s" mode; header max value is zero. using samples instead.' % (z))
        zmax = ar.shape[0]
        zlabel = 'Sample'
        zscale = ar.shape[0]/zmax
    if verbose:
        fx.printmsg('xmax: %.4f %s, zmax: %.4f %s' % (xmax, xlabel, zmax, zlabel))
    extent = [0, xmax, zmax, zmin]
    try:
        if verbose:
            fx.printmsg('attempting to plot with colormap %s' % (colormap))
        img = ax.imshow(ar, cmap=colormap, clim=(ll, ul), interpolation='bicubic', aspect=float(zscale)/float(xscale),
                     norm=colors.SymLogNorm(linthresh=float(std)/float(gain), linscale=flip,
                                            vmin=ll, vmax=ul, base=np.e), extent=extent)
    except:
        fx.printmsg('ERROR: matplotlib did not accept colormap "%s", using gray instead' % colormap)
        fx.printmsg('see examples here: https://matplotlib.org/users/colormaps.html#grayscale-conversion')
        img = ax.imshow(ar, cmap='gray', clim=(ll, ul), interpolation='bicubic', aspect=float(zscale)/float(xscale),
                     norm=colors.SymLogNorm(linthresh=float(std)/float(gain), linscale=flip,
                                            vmin=ll, vmax=ul, base=np.e), extent=extent)
    # user marks
    if showmarks:
        if verbose:
            fx.printmsg('plotting marks at traces: %s' % header['marks'])
        for mark in header['marks']:
            plt.axvline(x=mark/xscale, color='r', linestyle=(0, (14,14)), linewidth=1, alpha=0.7)
    # zooming
    if zoom != [0,0,0,0]: # if zoom is set
        zoom = fx.zoom(zoom=zoom, extent=extent, x=x, z=z, verbose=verbose) # figure out if the user set extents properly
    else:
        zoom = extent # otherwise, zoom is full extents
    if zoom != extent: # if zoom is set correctly, then set new axis limits
        if verbose:
            fx.printmsg('zooming in to %s [xmin, xmax, ymax, ymin]' % zoom)
        ax.set_xlim(zoom[0], zoom[1])
        ax.set_ylim(zoom[2], zoom[3])
        # add zoom extents to file name via the Seth W. Campbell honorary naming scheme
        outfile = fx.naming(outfile=outfile, zoom=[int(i) for i in zoom])
    ax.set_xlabel(xlabel)
    ax.set_ylabel(zlabel)
    if colorbar:
        fig.colorbar(img)
    if title:
        try:
            antfreq = freq[ant]
        except TypeError:
            antfreq = freq
        title = '%s - %s MHz - stacking: %s - gain: %s' % (
                    os.path.basename(header['infile']), antfreq, stack, gain)
        if win:
            if win == 0:
                win = 'full'
            title = '%s - bgr: %s' % (title, win)
        plt.title(title)
    if figx / figy >=1: # if x is longer than y (avoids plotting error where data disappears for some reason)
        plt.tight_layout()#pad=fig.get_size_inches()[1]/4.) # then it's ok to call tight_layout()
    else:
        try:
            # the old way of handling
            #plt.tight_layout(w_pad=2, h_pad=1)
            # the new way of handling
            fx.printmsg('WARNING: axis lengths are funky. using alternative sizing method. please adjust manually in matplotlib gui.')
            figManager = plt.get_current_fig_manager()
            try:
                figManager.window.showMaximized()
            except:
                figManager.resize(*figManager.window.maxsize())
            for item in ([ax.xaxis.label, ax.yaxis.label] +
                        ax.get_xticklabels() + ax.get_yticklabels()):
                item.set_fontsize(5)
            ax.title.set_fontsize(7)
            plt.draw()
            fig.canvas.start_event_loop(0.1)
            plt.tight_layout()
        except:
            fx.printmsg('WARNING: tight_layout() raised an error because axis lengths are funky. please adjust manually in matplotlib gui.')
    if outfile != 'readgssi_plot':
        # if outfile doesn't match this then save fig with the outfile name
        if verbose:
            fx.printmsg('saving figure as %s.%s' % (outfile, fmt))
        plt.savefig('%s.%s' % (outfile, fmt), dpi=dpi, bbox_inches='tight')
    else:
        # else someone has called this function from outside and forgotten the outfile field
        if verbose:
            fx.printmsg('saving figure as %s_%sMHz.%s with dpi=%s' % (os.path.splitext(header['infile'])[0], freq, fmt, dpi))
        plt.savefig('%s_%sMHz.%s' % (os.path.splitext(header['infile'])[0], freq, fmt), bbox_inches='tight')
    if noshow:
        if verbose:
            fx.printmsg('not showing matplotlib')
        plt.close()
    else:
        if verbose:
            fx.printmsg('showing matplotlib figure...')
        plt.show()
--- a/readgssi/readgssi.py
+++ b/readgssi/readgssi.py
@@ -0,0 +1,500 @@
 # coding=utf-8
 ## readgssi.py
 ## intended to translate radar data from DZT to other more workable formats.
 ## DZT is a file format maintained by Geophysical Survey Systems Incorporated (GSSI).
 ## specifically, this script is intended for use with radar data recorded
 ## with GSSI SIR 3000 and 4000 field units. Other field unit models may record DZT slightly
 ## differently, in which case this script may need to be modified.
 # readgssi was originally written as matlab code by
 # Gabe Lewis, Dartmouth College Department of Earth Sciences.
 # matlab code was adapted for python with permission by
 # Ian Nesbitt, University of Maine School of Earth and Climate Sciences.
 # Copyleft (c) 2017 Ian Nesbitt
 # this code is freely available under the GNU Affero General Public License 3.0.
 # if you did not receive a copy of the license upon obtaining this software, please visit
 # (https://opensource.org/licenses/AGPL-3.0) to obtain a copy.
 import sys, getopt, os
 import numpy as np
 from datetime import datetime, timedelta
 import readgssi.functions as fx
 import readgssi.plot as plot
 from readgssi import translate
 from readgssi import filtering
 from readgssi import arrayops
 from readgssi import config
 from readgssi.constants import *
 from readgssi.dzt import *
 from readgssi.gps import pause_correct
 def readgssi(infile, outfile=None, verbose=False, antfreq=None, frmt='python',
             plotting=False, figsize=7, dpi=150, stack=1, x='seconds',
             z='nanoseconds', histogram=False, colormap='gray', colorbar=False,
             zero=[None,None,None,None], gain=1, freqmin=None, freqmax=None, 
             reverse=False, bgr=False, win=0, dewow=False, absval=False,
             normalize=False, specgram=False, noshow=False, spm=None,
             start_scan=0, num_scans=-1, epsr=None, title=True, zoom=[0,0,0,0],
             pausecorrect=False, showmarks=False):
    """
    This is the primary directive function. It coordinates calls to reading, filtering, translation, and plotting functions, and should be used as the overarching processing function in most cases.
    :param str infile: Input DZT data file
    :param str outfile: Base output file name for plots, CSVs, and other products. Defaults to :py:data:`None`, which will cause the output filename to take a form similar to the input. The default will let the file be named via the descriptive naming function :py:data:`readgssi.functions.naming()`.
    :param bool verbose: Whether or not to display (a lot of) information about the workings of the program. Defaults to :py:data:`False`. Can be helpful for debugging but also to see various header values and processes taking place.
    :param int antfreq: User setting for antenna frequency. Defaults to :py:data:`None`, which will cause the program to try to determine the frequency from the antenna name in the header of the input file. If the antenna name is not in the dictionary :py:data:`readgssi.constants.ANT`, the function will try to determine the frequency by decoding integers in the antenna name string.
    :param str frmt: The output format to be passed to :py:mod:`readgssi.translate`. Defaults to :py:data:`None`. Presently, this can be set to :py:data:`frmt='dzt'`, :py:data:`frmt='csv'`, :py:data:`'numpy'`, :py:data:`'gprpy'`, or :py:data:`'object'` (which will return the header dictionary, the image arrays, and the gps coordinates as objects). Plotting will not interfere with output (i.e. you can output to CSV and plot a PNG in the same command).
    :param bool plotting: Whether to plot the radargram using :py:func:`readgssi.plot.radargram`. Defaults to :py:data:`False`.
    :param int figsize: Plot size in inches to be passed to :py:func:`readgssi.plot.radargram`.
    :param int dpi: Dots per inch (DPI) for figure creation.
    :param int stack: Number of consecutive traces to stack (horizontally) using :py:func:`readgssi.arrayops.stack`. Defaults to 1 (no stacking). Especially good for handling long radar lines. Algorithm combines consecutive traces together using addition, which reduces noise and enhances signal. The more stacking is done, generally the clearer signal will become. The tradeoff is that you will reduce the length of the X-axis. Sometimes this is desirable (i.e. for long survey lines).
    :param str x: The units to display on the x-axis during plotting. Defaults to :py:data:`x='seconds'`. Acceptable values are :py:data:`x='distance'` (which sets to meters), :py:data:`'km'`, :py:data:`'m'`, :py:data:`'cm'`, :py:data:`'mm'`, :py:data:`'kilometers'`, :py:data:`'meters'`, etc., for distance; :py:data:`'seconds'`, :py:data:`'s'`, :py:data:`'temporal'` or :py:data:`'time'` for seconds, and :py:data:`'traces'`, :py:data:`'samples'`, :py:data:`'pulses'`, or :py:data:`'columns'` for traces.
    :param str z: The units to display on the z-axis during plotting. Defaults to :py:data:`z='nanoseconds'`. Acceptable values are :py:data:`z='depth'` (which sets to meters), :py:data:`'m'`, :py:data:`'cm'`, :py:data:`'mm'`, :py:data:`'meters'`, etc., for depth; :py:data:`'nanoseconds'`, :py:data:`'ns'`, :py:data:`'temporal'` or :py:data:`'time'` for seconds, and :py:data:`'samples'` or :py:data:`'rows'` for samples.
    :param bool histogram: Whether to plot a histogram of array values at plot time.
    :type colormap: :py:class:`str` or :class:`matplotlib.colors.Colormap`
    :param colormap: Plot using a Matplotlib colormap. Defaults to :py:data:`gray` which is colorblind-friendly and behaves similarly to the RADAN default, but :py:data:`seismic` is a favorite of many due to its diverging nature.
    :param bool colorbar: Whether to display a graded color bar at plot time.
    :param list[int,int,int,int] zero: A list of values representing the amount of samples to slice off each channel. Defaults to :py:data:`None` for all channels, which will end up being set by the :code:`rh_zero` variable in :py:func:`readgssi.dzt.readdzt`.
    :param int gain: The amount of gain applied to plots. Defaults to 1. Gain is applied as a ratio of the standard deviation of radargram values to the value set here.
    :param int freqmin: Minimum frequency value to feed to the vertical triangular FIR bandpass filter :py:func:`readgssi.filtering.triangular`. Defaults to :py:data:`None` (no filter).
    :param int freqmax: Maximum frequency value to feed to the vertical triangular FIR bandpass filter :py:func:`readgssi.filtering.triangular`. Defaults to :py:data:`None` (no filter).
    :param bool reverse: Whether to read the array backwards (i.e. flip horizontally; :py:func:`readgssi.arrayops.flip`). Defaults to :py:data:`False`. Useful for lining up travel directions of files run opposite each other.
    :param int bgr: Background removal filter applied after stacking (:py:func:`readgssi.filtering.bgr`). Defaults to :py:data:`False` (off). :py:data:`bgr=True` must be accompanied by a valid value for :py:data:`win`.
    :param int win: Window size for background removal filter (:py:func:`readgssi.filtering.bgr`). If :py:data:`bgr=True` and :py:data:`win=0`, the full-width row average will be subtracted from each row. If :py:data:`bgr=True` and :py:data:`win=50`, a moving window will calculate the average of 25 cells on either side of the current cell, and subtract that average from the cell value, using :py:func:`scipy.ndimage.uniform_filter1d` with :py:data:`mode='constant'` and :py:data:`cval=0`. This is useful for removing non-uniform horizontal average, but the tradeoff is that it creates ghost data half the window size away from vertical figures, and that a window size set too low will obscure any horizontal layering longer than the window size.
    :param bool dewow: Whether to apply a vertical dewow filter (experimental). See :py:func:`readgssi.filtering.dewow`.
    :param bool absval: If :py:data:`True`, displays the absolute value of the vertical gradient of the array when plotting. Good for displaying faint array features.
    :param bool normalize: Distance normalization (:py:func:`readgssi.arrayops.distance_normalize`). Defaults to :py:data:`False`.
    :param bool specgram: Produce a spectrogram of a trace in the array using :py:func:`readgssi.plot.spectrogram`. Defaults to :py:data:`False` (if :py:data:`True`, defaults to a trace roughly halfway across the profile). This is mostly for debugging and is not currently accessible from the command line.
    :param bool noshow: If :py:data:`True`, this will suppress the matplotlib interactive window and simply save a file. This is useful for processing many files in a folder without user input.
    :param float spm: User-set samples per meter. This overrides the value read from the header, and typically doesn't need to be set if the samples per meter value was set correctly at survey time. This value does not need to be set if GPS input (DZG file) is present and the user sets :py:data:`normalize=True`.
    :param int start_scan: zero based start scan to read data from. Defaults to zero.
    :param int num_scans: number of scans to read from the file, Defaults to -1, which reads from start_scan to end of file.
    :param float epsr: Epsilon_r, otherwise known as relative permittivity, or dielectric constant. This determines the speed at which waves travel through the first medium they encounter. It is used to calculate the profile depth if depth units are specified on the Z-axis of plots.
    :param bool title: Whether to display descriptive titles on plots. Defaults to :py:data:`True`.
    :param list[int,int,int,int] zoom: Zoom extents to set programmatically for matplotlib plots. Must pass a list of four integers: :py:data:`[left, right, up, down]`. Since the z-axis begins at the top, the "up" value is actually the one that displays lower on the page. All four values are axis units, so if you are working in nanoseconds, 10 will set a limit 10 nanoseconds down. If your x-axis is in seconds, 6 will set a limit 6 seconds from the start of the survey. It may be helpful to display the matplotlib interactive window at full extents first, to determine appropriate extents to set for this parameter. If extents are set outside the boundaries of the image, they will be set back to the boundaries. If two extents on the same axis are the same, the program will default to plotting full extents for that axis.
    :rtype: header (:py:class:`dict`), radar arrays by channel :py:class:`dict` {0: :py:class:`numpy.ndarray`, 1: :py:class:`numpy.ndarray`, etc.}, gps (False or :py:class:`pandas.DataFrame`)
    :param bool pausecorrect: If :py:data:`True` or minimum speed given as :py:data:`+float`, search the DZG file for pauses, where GPS keeps recording but radar unit does not, and correct them if necessary. Defaults to :py:data:`False`. Minimum speed defaults to 0.25 m/s.
    :param bool showmarks: If :py:data:`True`, display mark locations in plot. Defaults to :py:data:`False`.
    """
    if infile:
        # read the file
        try:
            if verbose:
                fx.printmsg('reading...')
                fx.printmsg('input file:         %s' % (infile))
            header, data, gps = readdzt(infile, gps=normalize, spm=spm,
                                        start_scan=start_scan, num_scans=num_scans,
                                        epsr=epsr, antfreq=antfreq, zero=zero,
                                        verbose=verbose)
            # print a bunch of header info
            if verbose:
                fx.printmsg('success. header values:')
                header_info(header, data)
        except IOError as e: # the user has selected an inaccessible or nonexistent file
            fx.printmsg("ERROR: DZT file is inaccessable or does not exist at %s" % (os.path.abspath(infile)))
            raise IOError(e)
        infile_ext = os.path.splitext(infile)[1]
        infile_basename = os.path.splitext(infile)[0]
    else:
        raise IOError('ERROR: no input file specified')
    for chan in list(range(header['rh_nchan'])):
        try:
            ANT[header['rh_antname'][chan]]
        except KeyError as e:
            print('--------------------WARNING - PLEASE READ---------------------')
            fx.printmsg('WARNING: could not read frequency for antenna name "%s"' % e)
            if (antfreq != None) and (antfreq != [None, None, None, None]):
                fx.printmsg('using user-specified antenna frequency. Please ensure frequency value or list of values is correct.')
                fx.printmsg('old values: %s' % (header['antfreq']))
                fx.printmsg('new values: %s' % (antfreq))
                header['antfreq'] = antfreq
            else:
                fx.printmsg('WARNING: trying to use frequencies of %s MHz (estimated)...' % (header['antfreq'][chan]))
            fx.printmsg('more info: rh_ant=%s' % (header['rh_ant']))
            fx.printmsg('           known_ant=%s' % (header['known_ant']))
            fx.printmsg("please submit a bug report with this warning, the antenna name and frequency")
            fx.printmsg('at https://github.com/iannesbitt/readgssi/issues/new')
            fx.printmsg('or send via email to ian (dot) nesbitt (at) gmail (dot) com.')
            fx.printmsg('if possible, please attach a ZIP file with the offending DZT inside.')
            print('--------------------------------------------------------------')
    chans = list(range(header['rh_nchan']))
    outfiles = {}
    if (pausecorrect) and (not gps.empty):
        kwargs = {}
        fx.printmsg('correcting GPS errors created by user-initiated recording pauses...')
        if (type(pausecorrect) == float) or (type(pausecorrect) == int):
            kwargs['threshold'] = pausecorrect
            fx.printmsg('pause velocity threshold is %s m/s (user-specified)' % (kwargs['threshold']))
        else:
            fx.printmsg('pause velocity threshold is 0.25 m/s (default)')
        gps = pause_correct(header=header, dzg_file=os.path.splitext(infile)[0] + ".DZG", verbose=verbose, **kwargs)
    elif (pausecorrect) and (gps.empty):
        fx.printmsg("can't correct pauses without a valid DZG file to look for. are you sure the DZG has the same name as the DZT file?")
    for ar in data:
        """
        filter and construct an output file or plot from the current channel's array
        """
        if verbose:
            fx.printmsg('beginning processing for channel %s (antenna %s)' % (ar, header['rh_antname'][ar]))
        # execute filtering functions if necessary
        if normalize:
            header, data[ar], gps = arrayops.distance_normalize(header=header, ar=data[ar], gps=gps,
                                                                  verbose=verbose)
        if dewow:
            # dewow
            data[ar] = filtering.dewow(ar=data[ar], verbose=verbose)
        if freqmin and freqmax:
            # vertical triangular bandpass
            data[ar] = filtering.triangular(ar=data[ar], header=header, freqmin=freqmin, freqmax=freqmax,
                                               zerophase=True, verbose=verbose)
        if stack != 1:
            # horizontal stacking
            header, data[ar], stack = arrayops.stack(ar=data[ar],
                                                      header=header,
                                                      stack=stack,
                                                      verbose=verbose)
        else:
            stack = 1 # just in case it's not an integer
        if bgr:
            # background removal
            data[ar] = filtering.bgr(ar=data[ar], header=header, win=win, verbose=verbose)
        else:
            win = None
        if reverse:
            # read array backwards
            data[ar] = arrayops.flip(data[ar], verbose=verbose)
        ## file naming
        # name the output file
        if ar == 0: # first channel?
            orig_outfile = outfile # preserve the original
        else:
            outfile = orig_outfile # recover the original
        outfiles[ar] = fx.naming(outfile=outfile, infile_basename=infile_basename, chans=chans, chan=ar,
                                 normalize=normalize, zero=header['timezero'][ar], stack=stack, reverse=reverse,
                                 bgr=bgr, win=win, dewow=dewow, freqmin=freqmin, freqmax=freqmax, plotting=plotting,
                                 gain=gain, absval=absval)
        if plotting:
            plot.radargram(ar=data[ar], ant=ar, header=header, freq=header['antfreq'][ar], verbose=verbose,
                           figsize=figsize, dpi=dpi, stack=stack, x=x, z=z, gain=gain, colormap=colormap,
                           colorbar=colorbar, noshow=noshow, outfile=outfiles[ar], fmt=frmt, win=win, title=title,
                           zero=header['timezero'][ar], zoom=zoom, absval=absval, showmarks=showmarks)
        if histogram:
            plot.histogram(ar=data[ar], verbose=verbose)
        if specgram:
            plot.spectrogram(ar=data[ar], header=header, freq=header['antfreq'][ar], verbose=verbose)
    if frmt != None:
        if verbose:
            fx.printmsg('outputting to %s...' % frmt)
        for ar in data:
            # is there an output filepath given?
            outfile_abspath = os.path.abspath(outfiles[ar]) # set output to given location
            # what is the output format
            if frmt in 'csv':
                translate.csv(ar=data[ar], outfile_abspath=outfile_abspath,
                              header=header, verbose=verbose)
            elif frmt in 'h5':
                translate.h5(ar=data[ar], infile_basename=infile_basename,
                             outfile_abspath=outfile_abspath, verbose=verbose)
            elif frmt in 'segy':
                translate.segy(ar=data[ar], outfile_abspath=outfile_abspath,
                               verbose=verbose)
            elif frmt in 'numpy':
                translate.numpy(ar=data[ar], outfile_abspath=outfile_abspath,
                                verbose=verbose)
            elif frmt in 'gprpy':
                translate.gprpy(ar=data[ar], outfile_abspath=outfile_abspath,
                                header=header, verbose=verbose)
            elif frmt in 'dzt':
                if ar == 0:
                    translate.dzt(ar=data, outfile_abspath=outfile_abspath,
                                  header=header, verbose=verbose)
        if frmt in ('object', 'python'):
            return header, data, gps
 def main():
    """
    This function gathers and parses command line arguments with which to create function calls. It is not for use from the python console.
    """
    verbose = True
    title = True
    stack = 1
    win = 0
    dpi = 150
    zero = [None,None,None,None]
    zoom = [0,0,0,0]
    infile, outfile, antfreq, plotting, figsize, histogram, colorbar, dewow, bgr, noshow = None, None, None, None, None, None, None, None, None, None
    reverse, freqmin, freqmax, specgram, normalize, spm, epsr, absval, pausecorrect, showmarks = None, None, None, None, None, None, None, None, None, None
    colormap = 'gray'
    x, z = 'seconds', 'nanoseconds'
    frmt = 'png'
    gain = 1
 # some of this needs to be tweaked to formulate a command call to one of the main body functions
 # variables that can be passed to a body function: (infile, outfile, antfreq=None, frmt, plotting=False, stack=1)
    try:
        opts, args = getopt.getopt(sys.argv[1:],'hVqd:i:a:o:f:p:s:r:RNwnmc:bg:Z:E:t:x:z:Te:D:APM',
            ['help', 'version', 'quiet','spm=','input=','antfreq=','output=','format=','plot=','stack=','bgr=',
            'reverse', 'normalize','dewow','noshow','histogram','colormap=','colorbar','gain=',
            'zero=','epsr=','bandpass=', 'xscale=', 'zscale=', 'titleoff', 'zoom=', 'dpi=', 'absval','pausecorrect',
            'showmarks'])
    # the 'no option supplied' error
    except getopt.GetoptError as e:
        fx.printmsg('ERROR: invalid argument(s) supplied')
        fx.printmsg('error text: %s' % e)
        fx.printmsg(config.help_text)
        sys.exit(2)
    for opt, arg in opts: 
        if opt in ('-h', '--help'): # the help case
            fx.printmsg(config.help_text)
            sys.exit()
        if opt in ('-V', '--version'): # the help case
            print(config.version_text)
            sys.exit()
        if opt in ('-q', '--quiet'):
            verbose = False
        if opt in ('-i', '--input'): # the input file
            if arg:
                infile = arg
                if '~' in infile:
                    infile = os.path.expanduser(infile) # if using --input=~/... tilde needs to be expanded 
        if opt in ('-o', '--output'): # the output file
            if arg:
                outfile = arg
                if '~' in outfile:
                    outfile = os.path.expanduser(outfile) # expand tilde, see above
        if opt in ('-a', '--antfreq'):
            try:
                antfreq = [None, None, None, None]
                freq_arg = arg.split(',')
                for i in range(len(freq_arg)):
                    antfreq[i] = round(abs(float(freq_arg[i])))
            except ValueError:
                fx.printmsg('ERROR: %s is not a valid number or list of frequency values.' % arg)
                fx.printmsg(config.help_text)
                sys.exit(2)
            fx.printmsg('user specified frequency values of %s MHz will be overwritten if DZT header has valid antenna information.' % antfreq)
        if opt in ('-f', '--format'): # the format string
            # check whether the string is a supported format
            if arg:
                arg = arg.lower()
                if arg in ('.dzt', 'dzt', 'gssi', 'radan'):
                    frmt = 'dzt'
                elif arg in ('csv', '.csv'):
                    frmt = 'csv'
                elif arg in ('sgy', 'segy', 'seg-y', '.sgy', '.segy', '.seg-y'):
                    frmt = 'segy'
                elif arg in ('h5', 'hdf5', '.h5', '.hdf5'):
                    frmt = 'h5'
                elif arg in ('numpy', 'npy', '.npy', 'np'):
                    frmt = 'numpy'
                elif arg in ('gprpy'):
                    frmt = 'gprpy'
                elif arg in ('plot'):
                    frmt = 'png'
                    plotting = True
                elif arg in (plot.fmtst):
                    frmt = arg
                    plotting = True
                else:
                    # else the user has given an invalid format
                    fx.printmsg('Invalid file format given.')
                    fx.printmsg(config.help_text)
                    sys.exit(2)
            else:
                fx.printmsg('No file format specified.')
                fx.printmsg(config.help_text)
                sys.exit(2)
        if opt in ('-s', '--stack'):
            if arg:
                if 'auto' in str(arg).lower():
                    stack = 'auto'
                else:
                    try:
                        stack = abs(int(arg))
                    except ValueError:
                        fx.printmsg('ERROR: stacking argument must be a positive integer or "auto".')
                        fx.printmsg(config.help_text)
                        sys.exit(2)
        if opt in ('-r', '--bgr'):
            bgr = True
            if arg:
                try:
                    win = abs(int(arg))
                except:
                    fx.printmsg('ERROR: background removal window must be a positive integer. defaulting to full width.')
        if opt in ('-w', '--dewow'):
            dewow = True
        if opt in ('-R', '--reverse'):
            reverse = True
        if opt in ('-P', '--pausecorr'):
            pausecorrect = True
        if opt in ('-N', '--normalize'):
            normalize = True
        if opt in ('-Z', '--zero'):
            if arg:
                try:
                    zero = list(map(int, arg.split(',')))
                except:
                    fx.printmsg('ERROR: zero correction must be an integer or list')
            else:
                fx.printmsg('WARNING: no zero correction argument supplied')
        if opt in ('-t', '--bandpass'):
            if arg:
                freqmin, freqmax = arg.split('-')
                try:
                    freqmin = int(freqmin)
                    freqmax = int(freqmax)
                except:
                    fx.printmsg('ERROR: filter frequency must be integers separated by a dash (-)')
                    freqmin, freqmax = None, None
            else:
                fx.printmsg('WARNING: no filter frequency argument supplied')
        if opt in ('-n', '--noshow'):
            noshow = True
        if opt in ('-p', '--plot'):
            plotting = True
            if arg:
                if 'auto' in arg.lower():
                    figsize = 8
                else:
                    try:
                        figsize = abs(int(arg))
                    except ValueError:
                        fx.printmsg('ERROR: plot size argument must be a positive integer or "auto".')
                        fx.printmsg(config.help_text)
                        sys.exit(2)
        if opt in ('-d', '--spm'):
            if arg:
                try:
                    spm = float(arg)
                    assert spm > 0
                except:
                    fx.printmsg('ERROR: samples per meter must be positive. defaulting to read from header')
                    spm = None
            else:
                fx.printmsg('WARNING: no samples per meter value given')
        if opt in ('-x', '--xscale'):
            if arg:
                if arg in ('temporal', 'time', 'seconds', 's'):
                    x = 'seconds'
                elif arg in ('spatial', 'distance', 'dist', 'length', 'meters', 'm'):
                    x = 'm'
                elif arg in ('centimeters', 'cm'):
                    x = 'cm'
                elif arg in ('kilometers', 'km'):
                    x = 'km'
                elif arg in ('traces', 'samples', 'pulses', 'columns'):
                    x = 'traces'
                else:
                    fx.printmsg('WARNING: invalid xscale type specified. defaulting to --xscale="seconds"')
                    x = 'seconds'
            else:
                fx.printmsg('WARNING: no xscale type specified. defaulting to --xscale="seconds"')
                x = 'seconds'
        if opt in ('-z', '--zscale'):
            if arg:
                if arg in ('temporal', 'time', 'nanoseconds', 'ns'):
                    z = 'nanoseconds'
                elif arg in ('spatial', 'distance', 'depth', 'length', 'meters', 'm'):
                    z = 'm'
                elif arg in ('centimeters', 'cm'):
                    z = 'cm'
                elif arg in ('millimeters', 'mm'):
                    z = 'mm'
                elif arg in ('samples', 'rows'):
                    z = 'samples'
                else:
                    fx.printmsg('WARNING: invalid zscale type specified. defaulting to --zscale="nanoseconds"')
                    z = 'nanoseconds'
            else:
                fx.printmsg('WARNING: no zscale type specified. defaulting to --zscale="nanoseconds"')
                z = 'nanoseconds'
        if opt in ('-E', '--epsr'):
            try:
                epsr = float(arg)
                if epsr <= 1:
                    raise Exception
            except:
                fx.printmsg('ERROR: invalid value for epsr (epsilon sub r "dielectric permittivity"). using DZT value instead.')
                epsr = None
        if opt in ('-m', '--histogram'):
            histogram = True
        if opt in ('-M', '--showmarks'):
            showmarks = True
        if opt in ('-c', '--colormap'):
            if arg:
                colormap = arg
        if opt in ('-b', '--colorbar'):
            colorbar = True
        if opt in ('-A', '--absval'):
            absval = True
        if opt in ('-g', '--gain'):
            if arg:
                try:
                    gain = abs(float(arg))
                except:
                    fx.printmsg('ERROR: gain must be positive. defaulting to gain=1.')
                    gain = 1
        if opt in ('-T', '--titleoff'):
            title = False
        if opt in ('-e', '--zoom'):
            if arg:
                if True:#try:
                    zoom = list(map(int, arg.split(',')))
                    if len(zoom) != 4:
                        fx.printmsg('ERROR: zoom must be a list of four numbers (zeros are accepted).')
                        fx.printmsg('       defaulting to full extents.')
                        zoom = [0,0,0,0]
                # except Exception as e:
                #     fx.printmsg('ERROR setting zoom values. zoom must be a list of four numbers (zeros are accepted).')
                #     fx.printmsg('       defaulting to full extents.')
                #     fx.printmsg('details: %s' % e)
        if opt in ('-D', '--dpi'):
            if arg:
                try:
                    dpi = abs(int(arg))
                    assert dpi > 0
                except:
                    fx.printmsg('WARNING: DPI could not be set. did you supply a positive integer?')
    # call the function with the values we just got
    if infile:
        if verbose:
            fx.printmsg(config.dist)
        readgssi(infile=infile, outfile=outfile, antfreq=antfreq, frmt=frmt, plotting=plotting, dpi=dpi,
                 figsize=figsize, stack=stack, verbose=verbose, histogram=histogram, x=x, z=z,
                 colormap=colormap, colorbar=colorbar, reverse=reverse, gain=gain, bgr=bgr, win=win,
                 zero=zero, normalize=normalize, dewow=dewow, noshow=noshow, freqmin=freqmin, freqmax=freqmax,
                 spm=spm, epsr=epsr, title=title, zoom=zoom, absval=absval, pausecorrect=pausecorrect,
                 showmarks=showmarks)
        if verbose:
            fx.printmsg('done with %s' % infile)
        print('')
    else:
        fx.printmsg('ERROR: no input file was specified')
        fx.printmsg(config.help_text)
        sys.exit(2)
 if __name__ == "__main__":
    """
    directs the command line call to the main argument parsing function.
    """
    main()
--- a/readgssi/readgssi2.py
+++ b/readgssi/readgssi2.py
@@ -0,0 +1,501 @@
 # coding=utf-8
 ## readgssi.py
 ## intended to translate radar data from DZT to other more workable formats.
 ## DZT is a file format maintained by Geophysical Survey Systems Incorporated (GSSI).
 ## specifically, this script is intended for use with radar data recorded
 ## with GSSI SIR 3000 and 4000 field units. Other field unit models may record DZT slightly
 ## differently, in which case this script may need to be modified.
 # readgssi was originally written as matlab code by
 # Gabe Lewis, Dartmouth College Department of Earth Sciences.
 # matlab code was adapted for python with permission by
 # Ian Nesbitt, University of Maine School of Earth and Climate Sciences.
 # Copyleft (c) 2017 Ian Nesbitt
 # this code is freely available under the GNU Affero General Public License 3.0.
 # if you did not receive a copy of the license upon obtaining this software, please visit
 # (https://opensource.org/licenses/AGPL-3.0) to obtain a copy.
 import sys, getopt, os
 import numpy as np
 from datetime import datetime, timedelta
 import readgssi.functions as fx
 import readgssi.plot as plot
 from readgssi import translate
 from readgssi import filtering
 from readgssi import arrayops
 from readgssi import config
 from readgssi.constants import *
 from readgssi.dzt import *
 from readgssi.gps import pause_correct
 def readgssi2(infile, outfile=None, verbose=False, antfreq=None, frmt='python',
             plotting=False, figsize=7, dpi=150, stack=1, x='seconds',
             z='nanoseconds', histogram=False, colormap='gray', colorbar=False,
             zero=[None,None,None,None], gain=1, freqmin=None, freqmax=None, 
             reverse=False, bgr=False, win=0, dewow=False, absval=False,
             normalize=False, specgram=False, noshow=False, spm=None,
             start_scan=0, num_scans=-1, epsr=None, title=True, zoom=[0,0,0,0],
             pausecorrect=False, showmarks=False):
    """
    This is the primary directive function. It coordinates calls to reading, filtering, translation, and plotting functions, and should be used as the overarching processing function in most cases.
    :param str infile: Input DZT data file
    :param str outfile: Base output file name for plots, CSVs, and other products. Defaults to :py:data:`None`, which will cause the output filename to take a form similar to the input. The default will let the file be named via the descriptive naming function :py:data:`readgssi.functions.naming()`.
    :param bool verbose: Whether or not to display (a lot of) information about the workings of the program. Defaults to :py:data:`False`. Can be helpful for debugging but also to see various header values and processes taking place.
    :param int antfreq: User setting for antenna frequency. Defaults to :py:data:`None`, which will cause the program to try to determine the frequency from the antenna name in the header of the input file. If the antenna name is not in the dictionary :py:data:`readgssi.constants.ANT`, the function will try to determine the frequency by decoding integers in the antenna name string.
    :param str frmt: The output format to be passed to :py:mod:`readgssi.translate`. Defaults to :py:data:`None`. Presently, this can be set to :py:data:`frmt='dzt'`, :py:data:`frmt='csv'`, :py:data:`'numpy'`, :py:data:`'gprpy'`, or :py:data:`'object'` (which will return the header dictionary, the image arrays, and the gps coordinates as objects). Plotting will not interfere with output (i.e. you can output to CSV and plot a PNG in the same command).
    :param bool plotting: Whether to plot the radargram using :py:func:`readgssi.plot.radargram`. Defaults to :py:data:`False`.
    :param int figsize: Plot size in inches to be passed to :py:func:`readgssi.plot.radargram`.
    :param int dpi: Dots per inch (DPI) for figure creation.
    :param int stack: Number of consecutive traces to stack (horizontally) using :py:func:`readgssi.arrayops.stack`. Defaults to 1 (no stacking). Especially good for handling long radar lines. Algorithm combines consecutive traces together using addition, which reduces noise and enhances signal. The more stacking is done, generally the clearer signal will become. The tradeoff is that you will reduce the length of the X-axis. Sometimes this is desirable (i.e. for long survey lines).
    :param str x: The units to display on the x-axis during plotting. Defaults to :py:data:`x='seconds'`. Acceptable values are :py:data:`x='distance'` (which sets to meters), :py:data:`'km'`, :py:data:`'m'`, :py:data:`'cm'`, :py:data:`'mm'`, :py:data:`'kilometers'`, :py:data:`'meters'`, etc., for distance; :py:data:`'seconds'`, :py:data:`'s'`, :py:data:`'temporal'` or :py:data:`'time'` for seconds, and :py:data:`'traces'`, :py:data:`'samples'`, :py:data:`'pulses'`, or :py:data:`'columns'` for traces.
    :param str z: The units to display on the z-axis during plotting. Defaults to :py:data:`z='nanoseconds'`. Acceptable values are :py:data:`z='depth'` (which sets to meters), :py:data:`'m'`, :py:data:`'cm'`, :py:data:`'mm'`, :py:data:`'meters'`, etc., for depth; :py:data:`'nanoseconds'`, :py:data:`'ns'`, :py:data:`'temporal'` or :py:data:`'time'` for seconds, and :py:data:`'samples'` or :py:data:`'rows'` for samples.
    :param bool histogram: Whether to plot a histogram of array values at plot time.
    :type colormap: :py:class:`str` or :class:`matplotlib.colors.Colormap`
    :param colormap: Plot using a Matplotlib colormap. Defaults to :py:data:`gray` which is colorblind-friendly and behaves similarly to the RADAN default, but :py:data:`seismic` is a favorite of many due to its diverging nature.
    :param bool colorbar: Whether to display a graded color bar at plot time.
    :param list[int,int,int,int] zero: A list of values representing the amount of samples to slice off each channel. Defaults to :py:data:`None` for all channels, which will end up being set by the :code:`rh_zero` variable in :py:func:`readgssi.dzt.readdzt`.
    :param int gain: The amount of gain applied to plots. Defaults to 1. Gain is applied as a ratio of the standard deviation of radargram values to the value set here.
    :param int freqmin: Minimum frequency value to feed to the vertical triangular FIR bandpass filter :py:func:`readgssi.filtering.triangular`. Defaults to :py:data:`None` (no filter).
    :param int freqmax: Maximum frequency value to feed to the vertical triangular FIR bandpass filter :py:func:`readgssi.filtering.triangular`. Defaults to :py:data:`None` (no filter).
    :param bool reverse: Whether to read the array backwards (i.e. flip horizontally; :py:func:`readgssi.arrayops.flip`). Defaults to :py:data:`False`. Useful for lining up travel directions of files run opposite each other.
    :param int bgr: Background removal filter applied after stacking (:py:func:`readgssi.filtering.bgr`). Defaults to :py:data:`False` (off). :py:data:`bgr=True` must be accompanied by a valid value for :py:data:`win`.
    :param int win: Window size for background removal filter (:py:func:`readgssi.filtering.bgr`). If :py:data:`bgr=True` and :py:data:`win=0`, the full-width row average will be subtracted from each row. If :py:data:`bgr=True` and :py:data:`win=50`, a moving window will calculate the average of 25 cells on either side of the current cell, and subtract that average from the cell value, using :py:func:`scipy.ndimage.uniform_filter1d` with :py:data:`mode='constant'` and :py:data:`cval=0`. This is useful for removing non-uniform horizontal average, but the tradeoff is that it creates ghost data half the window size away from vertical figures, and that a window size set too low will obscure any horizontal layering longer than the window size.
    :param bool dewow: Whether to apply a vertical dewow filter (experimental). See :py:func:`readgssi.filtering.dewow`.
    :param bool absval: If :py:data:`True`, displays the absolute value of the vertical gradient of the array when plotting. Good for displaying faint array features.
    :param bool normalize: Distance normalization (:py:func:`readgssi.arrayops.distance_normalize`). Defaults to :py:data:`False`.
    :param bool specgram: Produce a spectrogram of a trace in the array using :py:func:`readgssi.plot.spectrogram`. Defaults to :py:data:`False` (if :py:data:`True`, defaults to a trace roughly halfway across the profile). This is mostly for debugging and is not currently accessible from the command line.
    :param bool noshow: If :py:data:`True`, this will suppress the matplotlib interactive window and simply save a file. This is useful for processing many files in a folder without user input.
    :param float spm: User-set samples per meter. This overrides the value read from the header, and typically doesn't need to be set if the samples per meter value was set correctly at survey time. This value does not need to be set if GPS input (DZG file) is present and the user sets :py:data:`normalize=True`.
    :param int start_scan: zero based start scan to read data from. Defaults to zero.
    :param int num_scans: number of scans to read from the file, Defaults to -1, which reads from start_scan to end of file.
    :param float epsr: Epsilon_r, otherwise known as relative permittivity, or dielectric constant. This determines the speed at which waves travel through the first medium they encounter. It is used to calculate the profile depth if depth units are specified on the Z-axis of plots.
    :param bool title: Whether to display descriptive titles on plots. Defaults to :py:data:`True`.
    :param list[int,int,int,int] zoom: Zoom extents to set programmatically for matplotlib plots. Must pass a list of four integers: :py:data:`[left, right, up, down]`. Since the z-axis begins at the top, the "up" value is actually the one that displays lower on the page. All four values are axis units, so if you are working in nanoseconds, 10 will set a limit 10 nanoseconds down. If your x-axis is in seconds, 6 will set a limit 6 seconds from the start of the survey. It may be helpful to display the matplotlib interactive window at full extents first, to determine appropriate extents to set for this parameter. If extents are set outside the boundaries of the image, they will be set back to the boundaries. If two extents on the same axis are the same, the program will default to plotting full extents for that axis.
    :rtype: header (:py:class:`dict`), radar arrays by channel :py:class:`dict` {0: :py:class:`numpy.ndarray`, 1: :py:class:`numpy.ndarray`, etc.}, gps (False or :py:class:`pandas.DataFrame`)
    :param bool pausecorrect: If :py:data:`True` or minimum speed given as :py:data:`+float`, search the DZG file for pauses, where GPS keeps recording but radar unit does not, and correct them if necessary. Defaults to :py:data:`False`. Minimum speed defaults to 0.25 m/s.
    :param bool showmarks: If :py:data:`True`, display mark locations in plot. Defaults to :py:data:`False`.
    """
    if infile:
        # read the file
        try:
            if verbose:
                fx.printmsg('reading...')
                fx.printmsg('input file:         %s' % (infile))
            header, data, gps = readdzt(infile, gps=normalize, spm=spm,
                                        start_scan=start_scan, num_scans=num_scans,
                                        epsr=epsr, antfreq=antfreq, zero=zero,
                                        verbose=verbose)
            # print a bunch of header info
            if verbose:
                fx.printmsg('success. header values:')
                header_info(header, data)
        except IOError as e: # the user has selected an inaccessible or nonexistent file
            fx.printmsg("ERROR: DZT file is inaccessable or does not exist at %s" % (os.path.abspath(infile)))
            raise IOError(e)
        infile_ext = os.path.splitext(infile)[1]
        infile_basename = os.path.splitext(infile)[0]
    else:
        raise IOError('ERROR: no input file specified')
    for chan in list(range(header['rh_nchan'])):
        try:
            ANT[header['rh_antname'][chan]]
        except KeyError as e:
            print('--------------------WARNING - PLEASE READ---------------------')
            fx.printmsg('WARNING: could not read frequency for antenna name "%s"' % e)
            if (antfreq != None) and (antfreq != [None, None, None, None]):
                fx.printmsg('using user-specified antenna frequency. Please ensure frequency value or list of values is correct.')
                fx.printmsg('old values: %s' % (header['antfreq']))
                fx.printmsg('new values: %s' % (antfreq))
                header['antfreq'] = antfreq
            else:
                fx.printmsg('WARNING: trying to use frequencies of %s MHz (estimated)...' % (header['antfreq'][chan]))
            fx.printmsg('more info: rh_ant=%s' % (header['rh_ant']))
            fx.printmsg('           known_ant=%s' % (header['known_ant']))
            fx.printmsg("please submit a bug report with this warning, the antenna name and frequency")
            fx.printmsg('at https://github.com/iannesbitt/readgssi/issues/new')
            fx.printmsg('or send via email to ian (dot) nesbitt (at) gmail (dot) com.')
            fx.printmsg('if possible, please attach a ZIP file with the offending DZT inside.')
            print('--------------------------------------------------------------')
    chans = list(range(header['rh_nchan']))
    outfiles = {}
    if (pausecorrect) and (not gps.empty):
        kwargs = {}
        fx.printmsg('correcting GPS errors created by user-initiated recording pauses...')
        if (type(pausecorrect) == float) or (type(pausecorrect) == int):
            kwargs['threshold'] = pausecorrect
            fx.printmsg('pause velocity threshold is %s m/s (user-specified)' % (kwargs['threshold']))
        else:
            fx.printmsg('pause velocity threshold is 0.25 m/s (default)')
        gps = pause_correct(header=header, dzg_file=os.path.splitext(infile)[0] + ".DZG", verbose=verbose, **kwargs)
    elif (pausecorrect) and (gps.empty):
        fx.printmsg("can't correct pauses without a valid DZG file to look for. are you sure the DZG has the same name as the DZT file?")
    for ar in data:
        """
        filter and construct an output file or plot from the current channel's array
        """
        if verbose:
            fx.printmsg('beginning processing for channel %s (antenna %s)' % (ar, header['rh_antname'][ar]))
        # execute filtering functions if necessary
        if normalize:
            header, data[ar], gps = arrayops.distance_normalize(header=header, ar=data[ar], gps=gps,
                                                                  verbose=verbose)
        if dewow:
            # dewow
            data[ar] = filtering.dewow(ar=data[ar], verbose=verbose)
        if freqmin and freqmax:
            # vertical triangular bandpass
            data[ar] = filtering.triangular(ar=data[ar], header=header, freqmin=freqmin, freqmax=freqmax,
                                               zerophase=True, verbose=verbose)
        if stack != 1:
            # horizontal stacking
            header, data[ar], stack = arrayops.stack(ar=data[ar],
                                                      header=header,
                                                      stack=stack,
                                                      verbose=verbose)
        else:
            stack = 1 # just in case it's not an integer
        if bgr:
            # background removal
            data[ar] = filtering.bgr(ar=data[ar], header=header, win=win, verbose=verbose)
        else:
            win = None
        if reverse:
            # read array backwards
            data[ar] = arrayops.flip(data[ar], verbose=verbose)
        ## file naming
        # name the output file
        if ar == 0: # first channel?
            orig_outfile = outfile # preserve the original
        else:
            outfile = orig_outfile # recover the original
        outfiles[ar] = fx.naming(outfile=outfile, infile_basename=infile_basename, chans=chans, chan=ar,
                                 normalize=normalize, zero=header['timezero'][ar], stack=stack, reverse=reverse,
                                 bgr=bgr, win=win, dewow=dewow, freqmin=freqmin, freqmax=freqmax, plotting=plotting,
                                 gain=gain, absval=absval)
        if plotting:
            plot.radargram(ar=data[ar], ant=ar, header=header, freq=header['antfreq'][ar], verbose=verbose,
                           figsize=figsize, dpi=dpi, stack=stack, x=x, z=z, gain=gain, colormap=colormap,
                           colorbar=colorbar, noshow=noshow, outfile=outfiles[ar], fmt=frmt, win=win, title=title,
                           zero=header['timezero'][ar], zoom=zoom, absval=absval, showmarks=showmarks)
        if histogram:
            plot.histogram(ar=data[ar], verbose=verbose)
        if specgram:
            plot.spectrogram(ar=data[ar], header=header, freq=header['antfreq'][ar], verbose=verbose)
    if frmt != None:
        if verbose:
            fx.printmsg('outputting to %s...' % frmt)
        for ar in data:
            # is there an output filepath given?
            outfile_abspath = os.path.abspath(outfiles[ar]) # set output to given location
            # what is the output format
            if frmt in 'csv':
                translate.csv(ar=data[ar], outfile_abspath=outfile_abspath,
                              header=header, verbose=verbose)
            elif frmt in 'h5':
                translate.h5(ar=data[ar], infile_basename=infile_basename,
                             outfile_abspath=outfile_abspath, verbose=verbose)
            elif frmt in 'segy':
                translate.segy(ar=data[ar], outfile_abspath=outfile_abspath,
                               verbose=verbose)
            elif frmt in 'numpy':
                translate.numpy(ar=data[ar], outfile_abspath=outfile_abspath,
                                verbose=verbose)
            elif frmt in 'gprpy':
                translate.gprpy(ar=data[ar], outfile_abspath=outfile_abspath,
                                header=header, verbose=verbose)
            elif frmt in 'dzt':
                if ar == 0:
                    translate.dzt(ar=data, outfile_abspath=outfile_abspath,
                                  header=header, verbose=verbose)
        if frmt in ('object', 'python'):
            return header, data, gps
 '''
 def main():
    """
    This function gathers and parses command line arguments with which to create function calls. It is not for use from the python console.
    """
    verbose = True
    title = True
    stack = 1
    win = 0
    dpi = 150
    zero = [None,None,None,None]
    zoom = [0,0,0,0]
    infile, outfile, antfreq, plotting, figsize, histogram, colorbar, dewow, bgr, noshow = None, None, None, None, None, None, None, None, None, None
    reverse, freqmin, freqmax, specgram, normalize, spm, epsr, absval, pausecorrect, showmarks = None, None, None, None, None, None, None, None, None, None
    colormap = 'gray'
    x, z = 'seconds', 'nanoseconds'
    frmt = 'png'
    gain = 1
 # some of this needs to be tweaked to formulate a command call to one of the main body functions
 # variables that can be passed to a body function: (infile, outfile, antfreq=None, frmt, plotting=False, stack=1)
    try:
        opts, args = getopt.getopt(sys.argv[1:],'hVqd:i:a:o:f:p:s:r:RNwnmc:bg:Z:E:t:x:z:Te:D:APM',
            ['help', 'version', 'quiet','spm=','input=','antfreq=','output=','format=','plot=','stack=','bgr=',
            'reverse', 'normalize','dewow','noshow','histogram','colormap=','colorbar','gain=',
            'zero=','epsr=','bandpass=', 'xscale=', 'zscale=', 'titleoff', 'zoom=', 'dpi=', 'absval','pausecorrect',
            'showmarks'])
    # the 'no option supplied' error
    except getopt.GetoptError as e:
        fx.printmsg('ERROR: invalid argument(s) supplied')
        fx.printmsg('error text: %s' % e)
        fx.printmsg(config.help_text)
        sys.exit(2)
    for opt, arg in opts: 
        if opt in ('-h', '--help'): # the help case
            fx.printmsg(config.help_text)
            sys.exit()
        if opt in ('-V', '--version'): # the help case
            print(config.version_text)
            sys.exit()
        if opt in ('-q', '--quiet'):
            verbose = False
        if opt in ('-i', '--input'): # the input file
            if arg:
                infile = arg
                if '~' in infile:
                    infile = os.path.expanduser(infile) # if using --input=~/... tilde needs to be expanded 
        if opt in ('-o', '--output'): # the output file
            if arg:
                outfile = arg
                if '~' in outfile:
                    outfile = os.path.expanduser(outfile) # expand tilde, see above
        if opt in ('-a', '--antfreq'):
            try:
                antfreq = [None, None, None, None]
                freq_arg = arg.split(',')
                for i in range(len(freq_arg)):
                    antfreq[i] = round(abs(float(freq_arg[i])))
            except ValueError:
                fx.printmsg('ERROR: %s is not a valid number or list of frequency values.' % arg)
                fx.printmsg(config.help_text)
                sys.exit(2)
            fx.printmsg('user specified frequency values of %s MHz will be overwritten if DZT header has valid antenna information.' % antfreq)
        if opt in ('-f', '--format'): # the format string
            # check whether the string is a supported format
            if arg:
                arg = arg.lower()
                if arg in ('.dzt', 'dzt', 'gssi', 'radan'):
                    frmt = 'dzt'
                elif arg in ('csv', '.csv'):
                    frmt = 'csv'
                elif arg in ('sgy', 'segy', 'seg-y', '.sgy', '.segy', '.seg-y'):
                    frmt = 'segy'
                elif arg in ('h5', 'hdf5', '.h5', '.hdf5'):
                    frmt = 'h5'
                elif arg in ('numpy', 'npy', '.npy', 'np'):
                    frmt = 'numpy'
                elif arg in ('gprpy'):
                    frmt = 'gprpy'
                elif arg in ('plot'):
                    frmt = 'png'
                    plotting = True
                elif arg in (plot.fmtst):
                    frmt = arg
                    plotting = True
                else:
                    # else the user has given an invalid format
                    fx.printmsg('Invalid file format given.')
                    fx.printmsg(config.help_text)
                    sys.exit(2)
            else:
                fx.printmsg('No file format specified.')
                fx.printmsg(config.help_text)
                sys.exit(2)
        if opt in ('-s', '--stack'):
            if arg:
                if 'auto' in str(arg).lower():
                    stack = 'auto'
                else:
                    try:
                        stack = abs(int(arg))
                    except ValueError:
                        fx.printmsg('ERROR: stacking argument must be a positive integer or "auto".')
                        fx.printmsg(config.help_text)
                        sys.exit(2)
        if opt in ('-r', '--bgr'):
            bgr = True
            if arg:
                try:
                    win = abs(int(arg))
                except:
                    fx.printmsg('ERROR: background removal window must be a positive integer. defaulting to full width.')
        if opt in ('-w', '--dewow'):
            dewow = True
        if opt in ('-R', '--reverse'):
            reverse = True
        if opt in ('-P', '--pausecorr'):
            pausecorrect = True
        if opt in ('-N', '--normalize'):
            normalize = True
        if opt in ('-Z', '--zero'):
            if arg:
                try:
                    zero = list(map(int, arg.split(',')))
                except:
                    fx.printmsg('ERROR: zero correction must be an integer or list')
            else:
                fx.printmsg('WARNING: no zero correction argument supplied')
        if opt in ('-t', '--bandpass'):
            if arg:
                freqmin, freqmax = arg.split('-')
                try:
                    freqmin = int(freqmin)
                    freqmax = int(freqmax)
                except:
                    fx.printmsg('ERROR: filter frequency must be integers separated by a dash (-)')
                    freqmin, freqmax = None, None
            else:
                fx.printmsg('WARNING: no filter frequency argument supplied')
        if opt in ('-n', '--noshow'):
            noshow = True
        if opt in ('-p', '--plot'):
            plotting = True
            if arg:
                if 'auto' in arg.lower():
                    figsize = 8
                else:
                    try:
                        figsize = abs(int(arg))
                    except ValueError:
                        fx.printmsg('ERROR: plot size argument must be a positive integer or "auto".')
                        fx.printmsg(config.help_text)
                        sys.exit(2)
        if opt in ('-d', '--spm'):
            if arg:
                try:
                    spm = float(arg)
                    assert spm > 0
                except:
                    fx.printmsg('ERROR: samples per meter must be positive. defaulting to read from header')
                    spm = None
            else:
                fx.printmsg('WARNING: no samples per meter value given')
        if opt in ('-x', '--xscale'):
            if arg:
                if arg in ('temporal', 'time', 'seconds', 's'):
                    x = 'seconds'
                elif arg in ('spatial', 'distance', 'dist', 'length', 'meters', 'm'):
                    x = 'm'
                elif arg in ('centimeters', 'cm'):
                    x = 'cm'
                elif arg in ('kilometers', 'km'):
                    x = 'km'
                elif arg in ('traces', 'samples', 'pulses', 'columns'):
                    x = 'traces'
                else:
                    fx.printmsg('WARNING: invalid xscale type specified. defaulting to --xscale="seconds"')
                    x = 'seconds'
            else:
                fx.printmsg('WARNING: no xscale type specified. defaulting to --xscale="seconds"')
                x = 'seconds'
        if opt in ('-z', '--zscale'):
            if arg:
                if arg in ('temporal', 'time', 'nanoseconds', 'ns'):
                    z = 'nanoseconds'
                elif arg in ('spatial', 'distance', 'depth', 'length', 'meters', 'm'):
                    z = 'm'
                elif arg in ('centimeters', 'cm'):
                    z = 'cm'
                elif arg in ('millimeters', 'mm'):
                    z = 'mm'
                elif arg in ('samples', 'rows'):
                    z = 'samples'
                else:
                    fx.printmsg('WARNING: invalid zscale type specified. defaulting to --zscale="nanoseconds"')
                    z = 'nanoseconds'
            else:
                fx.printmsg('WARNING: no zscale type specified. defaulting to --zscale="nanoseconds"')
                z = 'nanoseconds'
        if opt in ('-E', '--epsr'):
            try:
                epsr = float(arg)
                if epsr <= 1:
                    raise Exception
            except:
                fx.printmsg('ERROR: invalid value for epsr (epsilon sub r "dielectric permittivity"). using DZT value instead.')
                epsr = None
        if opt in ('-m', '--histogram'):
            histogram = True
        if opt in ('-M', '--showmarks'):
            showmarks = True
        if opt in ('-c', '--colormap'):
            if arg:
                colormap = arg
        if opt in ('-b', '--colorbar'):
            colorbar = True
        if opt in ('-A', '--absval'):
            absval = True
        if opt in ('-g', '--gain'):
            if arg:
                try:
                    gain = abs(float(arg))
                except:
                    fx.printmsg('ERROR: gain must be positive. defaulting to gain=1.')
                    gain = 1
        if opt in ('-T', '--titleoff'):
            title = False
        if opt in ('-e', '--zoom'):
            if arg:
                if True:#try:
                    zoom = list(map(int, arg.split(',')))
                    if len(zoom) != 4:
                        fx.printmsg('ERROR: zoom must be a list of four numbers (zeros are accepted).')
                        fx.printmsg('       defaulting to full extents.')
                        zoom = [0,0,0,0]
                # except Exception as e:
                #     fx.printmsg('ERROR setting zoom values. zoom must be a list of four numbers (zeros are accepted).')
                #     fx.printmsg('       defaulting to full extents.')
                #     fx.printmsg('details: %s' % e)
        if opt in ('-D', '--dpi'):
            if arg:
                try:
                    dpi = abs(int(arg))
                    assert dpi > 0
                except:
                    fx.printmsg('WARNING: DPI could not be set. did you supply a positive integer?')
    # call the function with the values we just got
    if infile:
        if verbose:
            fx.printmsg(config.dist)
        readgssi2(infile=infile, outfile=outfile, antfreq=antfreq, frmt=frmt, plotting=plotting, dpi=dpi,
                 figsize=figsize, stack=stack, verbose=verbose, histogram=histogram, x=x, z=z,
                 colormap=colormap, colorbar=colorbar, reverse=reverse, gain=gain, bgr=bgr, win=win,
                 zero=zero, normalize=normalize, dewow=dewow, noshow=noshow, freqmin=freqmin, freqmax=freqmax,
                 spm=spm, epsr=epsr, title=title, zoom=zoom, absval=absval, pausecorrect=pausecorrect,
                 showmarks=showmarks)
        if verbose:
            fx.printmsg('done with %s' % infile)
        print('')
    else:
        fx.printmsg('ERROR: no input file was specified')
        fx.printmsg(config.help_text)
        sys.exit(2)
 if __name__ == "__main__":
    """
    directs the command line call to the main argument parsing function.
    """
    main()
 '''
--- a/readgssi/translate.py
+++ b/readgssi/translate.py
@@ -0,0 +1,338 @@
 import h5py
 import pandas as pd
 import numpy as np
 import json
 import struct
 from readgssi.gps import readdzg
 import readgssi.functions as fx
 from datetime import datetime
 """
 contains translations to common formats
 """
 def json_header(header, outfile_abspath, verbose=False):
    """
    Save header values as a .json so another script can take what it needs. This is used to export to `GPRPy <https://github.com/NSGeophysics/gprpy>`_.
    :param dict header: The file header dictionary
    :param str outfile_abspath: Output file path
    :param bool verbose: Verbose, defaults to False
    """
    with open('%s.json' % (outfile_abspath), 'w') as f:
        if verbose:
            fx.printmsg('serializing header as %s' % (f.name))
        json.dump(obj=header, fp=f, indent=4, sort_keys=True, default=str)
 def csv(ar, outfile_abspath, header=None, verbose=False):
    """
    Output to csv. Data is read into a :py:class:`pandas.DataFrame`, then written using :py:func:`pandas.DataFrame.to_csv`.
    :param numpy.ndarray ar: Radar array
    :param str outfile_abspath: Output file path
    :param dict header: File header dictionary to write, if desired. Defaults to None.
    :param bool verbose: Verbose, defaults to False
    """
    if verbose:
        t = ''
        if header:
            t = ' with json header'
        fx.printmsg('output format is csv%s. writing data to: %s.csv' % (t, outfile_abspath))
    data = pd.DataFrame(ar) # using pandas to output csv
    data.to_csv('%s.csv' % (outfile_abspath)) # write
    if header:
        json_header(header=header, outfile_abspath=outfile_abspath, verbose=verbose)
 def numpy(ar, outfile_abspath, header=None, verbose=False):
    """
    Output to binary numpy binary file (.npy) with the option of writing the header to .json as well.
    :param numpy.ndarray ar: Radar array
    :param str outfile_abspath: Output file path
    :param dict header: File header dictionary to write, if desired. Defaults to None.
    :param bool verbose: Verbose, defaults to False
    """
    if verbose:
        t = ''
        if header:
            t = ' with json header (compatible with GPRPy)'
        fx.printmsg('output format is numpy binary%s' % t)
        fx.printmsg('writing data to %s.npy' % outfile_abspath)
    np.save('%s.npy' % outfile_abspath, ar, allow_pickle=False)
    if header:
        json_header(header=header, outfile_abspath=outfile_abspath, verbose=verbose)
 def gprpy(ar, header, outfile_abspath, verbose=False):
    """
    Save in a format `GPRPy <https://github.com/NSGeophysics/gprpy>`_ can open (numpy binary .npy and a .json formatted header file).
    .. note:: GPRPy support for this feature is forthcoming (https://github.com/NSGeophysics/GPRPy/issues/3#issuecomment-460462612).
    :param numpy.ndarray ar: Radar array
    :param str outfile_abspath: Output file path
    :param dict header: File header dictionary to write, if desired. Defaults to None.
    :param bool verbose: Verbose, defaults to False
    """
    numpy(ar=ar, header=header, outfile_abspath=outfile_abspath, verbose=verbose)
 def segy(ar, outfile_abspath, header, verbose=False):
    """
    .. warning:: SEGY output is not yet available.
    In the future, this function will output to SEGY format.
    :param numpy.ndarray ar: Radar array
    :param str outfile_abspath: Output file path
    :param dict header: File header dictionary to write, if desired. Defaults to None.
    :param bool verbose: Verbose, defaults to False
    """
    fx.printmsg('ERROR: SEG-Y is not yet supported, please choose another format.')
    raise NotImplementedError('SEG-Y is not yet supported.')
 def h5(ar, infile_basename, outfile_abspath, header, verbose=False):
    """
    .. warning:: HDF5 output is not yet available.
    In the future, this function will output to HDF5 format.
    :param numpy.ndarray ar: Radar array
    :param str infile_basename: Input file basename
    :param str outfile_abspath: Output file path
    :param dict header: File header dictionary to write, if desired. Defaults to None.
    :param bool verbose: Verbose, defaults to False
    """
    '''
    Assumptions:
    - constant velocity between marks (may be possible to add a check)
    - marks are made at same time on GPS and SIR
    - gps and gpr are in same location when mark is made
    - good quality horizontal solution
    single-channel IceRadar h5 structure is
    /line_x/location_n/datacapture_0/echogram_0 (/group/group/group/dataset)
    each dataset has an 'attributes' item attached, formatted in 'collections.defaultdict' style:
    [('PCSavetimestamp', str), ('GPS Cluster- MetaData_xml', str), ('Digitizer-MetaData_xml', str), ('GPS Cluster_UTM-MetaData_xml', str)]
    '''
    if verbose:
        fx.printmsg('output format is IceRadar HDF5. writing file to: %s' % outfile_abspath)
    # setup formattable strings
    svts = 'PCSavetimestamp'
    gpsx = 'GPS Cluster- MetaData_xml'
    # main gps string. 8 formattable values: gps_sec, lat, lon, qual, num_sats, hdop, altitude, geoid_ht
    gpsclstr = '<Cluster>\r\n<Name>GPS Cluster</Name>\r\n<NumElts>10</NumElts>\r\n<String>\r\n<Name>GPS_timestamp_UTC</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<String>\r\n<Name>Lat_N</Name>\r\n<Val>%.4f</Val>\r\n</String>\r\n<String>\r\n<Name>Long_ W</Name>\r\n<Val>%.4f</Val>\r\n</String>\r\n<String>\r\n<Name>Fix_Quality</Name>\r\n<Val>%i</Val>\r\n</String>\r\n<String>\r\n<Name>Num _Sat</Name>\r\n<Val>%i</Val>\r\n</String>\r\n<String>\r\n<Name>Dilution</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<String>\r\n<Name>Alt_asl_m</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<String>\r\n<Name>Geoid_Heigh_m</Name>\r\n<Val>%.2f</Val>\r\n</String>\r\n<Boolean>\r\n<Name>GPS Fix valid</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>GPS Message ok</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n</Cluster>\r\n'
    dimx = 'Digitizer-MetaData_xml'
    # digitizer string. 3 formattable values: rhf_depth, rh_nsamp, stack
    dimxstr = '<Cluster>\r\n<Name>Digitizer MetaData</Name>\r\n<NumElts>3</NumElts>\r\n<Cluster>\r\n<Name>Digitizer settings</Name>\r\n<NumElts>5</NumElts>\r\n<Cluster>\r\n<Name>Vertical</Name>\r\n<NumElts>3</NumElts>\r\n<DBL>\r\n<Name>vertical range</Name>\r\n<Val>%f</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>Vertical Offset</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<I32>\r\n<Name>vertical coupling</Name>\r\n<Val>1</Val>\r\n</I32>\r\n</Cluster>\r\n<Cluster>\r\n<Name>Channel</Name>\r\n<NumElts>1</NumElts>\r\n<DBL>\r\n<Name>maximum input frequency</Name>\r\n<Val>%f</Val>\r\n</DBL>\r\n</Cluster>\r\n<Cluster>\r\n<Name>Horizontal</Name>\r\n<NumElts>2</NumElts>\r\n<DBL>\r\n<Name> Sample Rate</Name>\r\n<Val>250000000.00000000000000</Val>\r\n</DBL>\r\n<I32>\r\n<Name>Record Length</Name>\r\n<Val>%i</Val>\r\n</I32>\r\n</Cluster>\r\n<Cluster>\r\n<Name>Trigger</Name>\r\n<NumElts>12</NumElts>\r\n<U16>\r\n<Name>trigger type</Name>\r\n<Val>0</Val>\r\n</U16>\r\n<DBL>\r\n<Name>trigger delay</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>reference position</Name>\r\n<Val>10.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>trigger level</Name>\r\n<Val>2.00000000000000E-2</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>hysteresis</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>low level</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>high level</Name>\r\n<Val>0.00000000000000</Val>\r\n</DBL>\r\n<U16>\r\n<Name>trigger coupling</Name>\r\n<Val>1</Val>\r\n</U16>\r\n<I32>\r\n<Name>trigger window mode</Name>\r\n<Val>0</Val>\r\n</I32>\r\n<I32>\r\n<Name>trigger slope</Name>\r\n<Val>0</Val>\r\n</I32>\r\n<String>\r\n<Name>trigger source</Name>\r\n<Val>0</Val>\r\n</String>\r\n<I32>\r\n<Name>Trigger Modifier</Name>\r\n<Val>2</Val>\r\n</I32>\r\n</Cluster>\r\n<String>\r\n<Name>channel name</Name>\r\n<Val>0</Val>\r\n</String>\r\n</Cluster>\r\n<U16>\r\n<Name>Stacking</Name>\r\n<Val>%i</Val>\r\n</U16>\r\n<Cluster>\r\n<Name>Radargram extra info</Name>\r\n<NumElts>2</NumElts>\r\n<DBL>\r\n<Name>relativeInitialX</Name>\r\n<Val>-1.51999998365682E-7</Val>\r\n</DBL>\r\n<DBL>\r\n<Name>xIncrement</Name>\r\n<Val>3.99999988687227E-9</Val>\r\n</DBL>\r\n</Cluster>\r\n</Cluster>\r\n'
    gutx = 'GPS Cluster_UTM-MetaData_xml'
    # gps UTM string. 1 formattable value: num_sats
    gpsutmstr = '<Cluster>\r\n<Name>GPS_UTM Cluster</Name>\r\n<NumElts>10</NumElts>\r\n<String>\r\n<Name>Datum</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Easting_m</Name>\r\n<Val></Val>\r\n</String>\r\n<String>\r\n<Name>Northing_m</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Elevation</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Zone</Name>\r\n<Val>NaN</Val>\r\n</String>\r\n<String>\r\n<Name>Satellites (dup)</Name>\r\n<Val>%i</Val>\r\n</String>\r\n<Boolean>\r\n<Name>GPS Fix Valid (dup)</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>GPS Message ok (dup)</Name>\r\n<Val>1</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>Flag_1</Name>\r\n<Val>0</Val>\r\n</Boolean>\r\n<Boolean>\r\n<Name>Flag_2</Name>\r\n<Val>0</Val>\r\n</Boolean>\r\n</Cluster>\r\n'
    if os.path.exists(infile_basename + '.DZG'):
        gps = readdzg(infile_basename + '.DZG', 'dzg', header['rhf_sps'], ar.shape[1], verbose)
    else:
        gps = '' # if there's no DZG file...need a way to parse another gps source if possible
    # make data structure
    n = 0 # line number, iteratively increased
    f = h5py.File('%s.h5' % (outfile_abspath), 'w') # overwrite existing file
    if verbose:
        fx.printmsg('exporting to %s.h5' % outfile_abspath)
    try:
        li = f.create_group('line_0') # create line zero
    except ValueError: # the line already exists in the file
        li = f['line_0']
    for sample in ar.T:
        # create strings
        # pcsavetimestamp
        # formatting: m/d/yyyy_h:m:ss PM
        svts_str = gps[n]['timestamp'].astype(datetime).strftime('%m/%d/%Y_%H:%M:%S %p')
        # gpscluster
        # order we need: (len(list), tracetime, y, x, q, sats, dil, z, gh, 1, 1)
        # rows in gps: tracenum, lat, lon, altitude, geoid_ht, qual, num_sats, hdop, timestamp
        gpsx_str = gpsclstr % (gps[n]['gps_sec'], gps[n]['lat'], gps[n]['lon'], gps[n]['qual'], gps[n]['num_sats'], gps[n]['hdop'], gps[n]['altitude'], gps[n]['geoid_ht'])
        # digitizer
        dimx_str = dimxstr % (r[0]['rhf_depth'], freq, r[0]['rh_nsamp'], r[0]['stack'])
        # utm gpscluster
        gutx_str = gpsutmstr % (gps[n]['num_sats'])
        lo = li.create_group('location_' + str(n)) # create a location for each trace
        dc = lo.create_group('datacapture_0')
        eg = dc.create_dataset('echogram_0', (ar.shape[0],), data=sample)
        eg.attrs.create(svts, svts_str) # store pcsavetimestamp attribute
        eg.attrs.create(gpsx, gpsx_str) # store gpscluster attribute
        eg.attrs.create(dimx, dimx_str) # store digitizer attribute
        eg.attrs.create(gutx, gutx_str) # store utm gpscluster attribute
        n += 1
    f.close()
 def writetime(d):
    '''
    Function to write dates to :code:`rfDateByte` binary objects in DZT headers.
    An inverse of the :py:func:`readgssi.dzt.readtime` function.
    DZT :code:`rfDateByte` objects are 32 bits of binary (01001010111110011010011100101111),
    structured as little endian u5u6u5u5u4u7 where all numbers are base 2 unsigned int (uX)
    composed of X number of bits. Four bytes is an unnecessarily high level of compression
    for a single date object in a filetype that often contains tens or hundreds of megabytes
    of array information anyway.
    So this function reads a datetime object and outputs
    (seconds/2, min, hr, day, month, year-1980).
    For more information on :code:`rfDateByte`, see page 55 of
    `GSSI's SIR 3000 manual <https://support.geophysical.com/gssiSupport/Products/Documents/Control%20Unit%20Manuals/GSSI%20-%20SIR-3000%20Operation%20Manual.pdf>`_.
    :param datetime d: the :py:class:`datetime.datetime` to be encoded
    :rtype: bytes
    '''
    # get binary values
    sec2 = int(bin(int(d.second / 2))[2:])
    mins = int(bin(d.minute)[2:])
    hr = int(bin(d.hour)[2:])
    day = int(bin(d.day)[2:])
    mo = int(bin(d.month)[2:])
    yr = int(bin(d.year - 1980)[2:])
    # create binary string with proper padding
    dtbits = '%07d%04d%05d%05d%06d%05d' % (yr, mo, day, hr, mins, sec2)
    # create four bytes that make up rfDateByte
    byt0 = int(dtbits[24:], 2)
    byt1 = int(dtbits[16:24], 2)
    byt2 = int(dtbits[8:16], 2)
    byt3 = int(dtbits[0:8], 2)
    # return a byte array
    return bytes([byt0, byt1, byt2, byt3])
 def dzt(ar, outfile_abspath, header, verbose=False):
    """
    .. warning:: DZT output is only currently compatible with single-channel files.
    This function will output a RADAN-compatible DZT file after processing.
    This is useful to circumvent RADAN's distance-normalization bug
    when the desired outcome is array migration.
    Users can set DZT output via the command line by setting the
    :code:`-f dzt` flag, or in Python by doing the following: ::
        from readgssi.dzt import readdzt
        from readgssi import translate
        from readgssi.arrayops import stack, distance_normalize
        # first, read a data file
        header, data, gps = readdzt('FILE__001.DZT')
        # do some stuff
        # (distance normalization must be done before stacking)
        for a in data:
            header, data[a], gps = distance_normalize(header=header, ar=data[a], gps=gps)
            header, data[a], stack = stack(header=header, ar=data[a], stack=10)
        # output as modified DZT
        translate.dzt(ar=data, outfile_abspath='FILE__001-DnS10.DZT', header=header)
    This will output :code:`FILE__001-DnS10.DZT` as a distance-normalized DZT.
    :param numpy.ndarray ar: Radar array
    :param str infile_basename: Input file basename
    :param str outfile_abspath: Output file path
    :param dict header: File header dictionary to write, if desired. Defaults to None.
    :param bool verbose: Verbose, defaults to False
    """
    '''
    Assumptions:
    - constant velocity or distance between marks (may be possible to add a check)
    '''
    if len(ar) > 1:
        outfile_abspath = outfile_abspath.replace('c1', '')
    if not outfile_abspath.endswith(('.DZT', '.dzt')):
        outfile_abspath = outfile_abspath + '.DZT'
    outfile = open(outfile_abspath, 'wb')
    fx.printmsg('writing to: %s' % outfile.name)
    for i in range(header['rh_nchan']):
        fx.printmsg('writing DZT header for channel %s' % (i))
        # header should read all values per-channel no matter what
        outfile.write(struct.pack('<h', header['rh_tag']))
        outfile.write(struct.pack('<h', header['rh_data']))
        outfile.write(struct.pack('<h', header['rh_nsamp']))
        outfile.write(struct.pack('<h', 32)) # rhf_bits - for simplicity, just hard-coding 32 bit
        outfile.write(struct.pack('<h', header['rh_zero']))
        # byte 10
        outfile.write(struct.pack('<f', header['rhf_sps']))
        outfile.write(struct.pack('<f', header['rhf_spm'])) # dzt.py ln 94-97
        outfile.write(struct.pack('<f', header['rhf_mpm']))
        outfile.write(struct.pack('<f', header['rhf_position']))
        outfile.write(struct.pack('<f', header['rhf_range']))
        outfile.write(struct.pack('<h', header['rh_npass']))
        # byte 32
        outfile.write(writetime(header['rhb_cdt']))
        outfile.write(writetime(datetime.now())) # modification date/time
        # byte 40
        outfile.write(struct.pack('<h', header['rh_rgain']))
        outfile.write(struct.pack('<h', header['rh_nrgain']))
        outfile.write(struct.pack('<h', header['rh_text']))
        outfile.write(struct.pack('<h', header['rh_ntext']))
        outfile.write(struct.pack('<h', header['rh_proc']))
        outfile.write(struct.pack('<h', header['rh_nproc']))
        outfile.write(struct.pack('<h', header['rh_nchan']))
        outfile.write(struct.pack('<f', header['rhf_epsr'])) # dzt.py ln 121-126
        outfile.write(struct.pack('<f', header['rhf_top']))
        outfile.write(struct.pack('<f', header['rhf_depth']))
        # byte 66
        outfile.write(struct.pack('<f', header['rh_xstart'])) # part of rh_coordx
        outfile.write(struct.pack('<f', header['rh_xend'])) # part of rh_coordx
        outfile.write(struct.pack('<f', header['rhf_servo_level']))
        outfile.write(bytes(3)) # "reserved"
        outfile.write(struct.pack('B', header['rh_accomp']))
        outfile.write(struct.pack('<h', header['rh_sconfig']))
        outfile.write(struct.pack('<h', header['rh_spp']))
        outfile.write(struct.pack('<h', header['rh_linenum']))
        # byte 88
        outfile.write(struct.pack('<f', header['rh_ystart'])) # part of rh_coordy
        outfile.write(struct.pack('<f', header['rh_yend'])) # part of rh_coordy
        outfile.write(header['rh_96'])
        outfile.write(struct.pack('c', header['rh_dtype']))
        outfile.write(header['dzt_ant'][i])
        outfile.write(header['rh_112'])
        # byte 113
        outfile.write(header['vsbyte'])
        outfile.write(header['rh_name'])
        outfile.write(header['rh_chksum'])
        # byte 128
        outfile.write(header['INFOAREA'])
        outfile.write(header['rh_RGPS0'])
        outfile.write(header['rh_RGPS1'])
        i += 1
    outfile.write(header['header_extra'])
    stack = []
    i = 0
    for i in range(header['rh_nchan']):
        # replace zeroed rows
        stack.append(np.zeros((header['timezero'][i], ar[i].shape[1]),
                                    dtype=np.int32))
        stack.append(ar[i])
        i += 1
    writestack = np.vstack(tuple(stack))
    sh = writestack.shape
    writestack = writestack.T.reshape(-1)
    fx.printmsg('writing %s data samples for %s channels (%s x %s)'
          % (writestack.shape[0],
             int(len(stack)/2),
             sh[0], sh[1]))
    # hard coded to write 32 bit signed ints to keep lossiness to a minimum
    outfile.write(writestack.round().astype(np.int32, casting='unsafe').tobytes(order='C'))
    outfile.close()