gpr-sidl-inv/readgssi/gps.py

from datetime import datetime, timedelta
import pynmea2
import math
import os
import numpy as np
import pandas as pd
from geopy.distance import geodesic
import readgssi.functions as fx
from readgssi.constants import TZ
from math import sin, cos, sqrt, atan2, radians
from shutil import copyfile

"""
contains functions for reading gps data from various formats
"""

def msgparse(msg):
    """
    .. deprecated:: 0.0.12

    This function returns the NMEA message variables shared by both RMC and GGA.

    :param pynmea2.nmea.NMEASentence msg: A pynmea2 sentence object.
    :rtype: :py:class:`datetime.datetime`, :py:class:`float`, :py:class:`float`
    """
    return msg.timestamp, msg.latitude, msg.longitude

def readdzg(fi, frmt, header, verbose=False):
    """
    A parser to extract gps data from DZG file format. DZG contains raw NMEA sentences, which should include at least RMC and GGA.

    NMEA RMC sentence string format:
    :py:data:`$xxRMC,UTC hhmmss,status,lat DDmm.sss,lon DDDmm.sss,SOG,COG,date ddmmyy,checksum \*xx`

    NMEA GGA sentence string format:
    :py:data:`$xxGGA,UTC hhmmss.s,lat DDmm.sss,lon DDDmm.sss,fix qual,numsats,hdop,mamsl,wgs84 geoid ht,fix age,dgps sta.,checksum \*xx`

    Shared message variables between GGA and RMC: timestamp, latitude, and longitude

    RMC contains a datestamp which makes it preferable, but this parser will read either.

    :param str fi: File containing gps information
    :param str frmt: GPS information format ('dzg' = DZG file containing gps sentence strings (see below); 'csv' = comma separated file with: lat,lon,elev,time)
    :param dict header: File header produced by :py:func:`readgssi.dzt.readdzt`
    :param bool verbose: Verbose, defaults to False
    :rtype: GPS data (pandas.DataFrame)

        The dataframe contains the following fields:
        * datetimeutc (:py:class:`datetime.datetime`)
        * trace (:py:class:`int` trace number)
        * longitude (:py:class:`float`)
        * latitude (:py:class:`float`)
        * altitude (:py:class:`float`)
        * velocity (:py:class:`float`)
        * sec_elapsed (:py:class:`float`)
        * meters (:py:class:`float` meters traveled)

    """
    if header['rhf_spm'] == 0:
        spu = header['rhf_sps']
    else:
        spu = header['rhf_spm']
    array = pd.DataFrame(columns=['datetimeutc', 'trace', 'longitude', 'latitude', # our dataframe
                                  'altitude', 'velocity', 'sec_elapsed', 'meters'])

    trace = 0 # the elapsed number of traces iterated through
    tracenum = 0 # the sequential increase in trace number
    rownp = 0 # array row number
    rowrmc = 0 # rmc record iterated through (gps file)
    rowgga = 0 # gga record
    sec_elapsed = 0 # number of seconds since the start of the line
    m = 0 # meters traveled over entire line
    m0, m1 = 0, 0 # meters traveled as of last, current loop
    u = 0 # velocity
    u0 = 0 # velocity on last loop
    timestamp = False
    prevtime = False
    init_time = False
    td = False
    prevtrace = False
    rmc, gga = False, False
    rmcwarn = True
    lathem = 'north'
    lonhem = 'east'
    x0, x1, y0, y1 = False, False, False, False # coordinates
    z0, z1 = 0, 0
    x2, y2, z2, sec2 = 0, 0, 0, 0
    with open(fi, 'r') as gf:
        if verbose:
            fx.printmsg('using gps file:     %s' % (fi))
        if frmt == 'dzg': # if we're working with DZG format
            for ln in gf: # loop through the first few sentences, check for RMC
                if 'RMC' in ln: # check to see if RMC sentence (should occur before GGA)
                    rmc = True
                    if rowrmc == 0:
                        msg = pynmea2.parse(ln.rstrip()) # convert gps sentence to pynmea2 named tuple
                        ts0 = datetime.combine(msg.datestamp, msg.timestamp).replace(tzinfo=TZ) # row 0's timestamp (not ideal)
                    if rowrmc == 1:
                        msg = pynmea2.parse(ln.rstrip())
                        ts1 = datetime.combine(msg.datestamp, msg.timestamp).replace(tzinfo=TZ) # row 1's timestamp (not ideal)
                        td = ts1 - ts0 # timedelta = datetime1 - datetime0
                    rowrmc += 1
                if 'GGA' in ln:
                    gga = True
                    if rowgga == 0:
                        msg = pynmea2.parse(ln.rstrip()) # convert gps sentence to pynmea2 named tuple
                        ts0 = datetime.combine(datetime(1980, 1, 1), msg.timestamp).replace(tzinfo=TZ) # row 0's timestamp (not ideal)
                    if rowgga == 1:
                        msg = pynmea2.parse(ln.rstrip())
                        ts1 = datetime.combine(datetime(1980, 1, 1), msg.timestamp).replace(tzinfo=TZ) # row 1's timestamp (not ideal)
                        td = ts1 - ts0 # timedelta = datetime1 - datetime0
                    rowgga += 1
            gpssps = 1 / td.total_seconds() # GPS samples per second
            if (rmcwarn) and (rowrmc == 0):
                fx.printmsg('WARNING: no RMC sentences found in GPS records. this could become an issue if your file goes through 00:00:00.')
                fx.printmsg("         if you get a time jump error please open a github issue at https://github.com/iannesbitt/readgssi/issues")
                fx.printmsg("         and attach the verbose output of this script plus a zip of the DZT and DZG files you're working with.")
                rmcwarn = False
            if (rmc and gga) and (rowrmc != rowgga):
                if verbose:
                    fx.printmsg('WARNING: GGA and RMC sentences are not recorded at the same rate! This could cause unforseen problems!')
                    fx.printmsg('    rmc: %i records' % rowrmc)
                    fx.printmsg('    gga: %i records' % rowgga)
            if verbose:
                ss0, ss1, ss2 = '', '', ''
                if gga:
                    ss0 = 'GGA'
                if rmc:
                    ss2 = 'RMC'
                if gga and rmc:
                    ss1 = ' and '
                fx.printmsg('found %i %s%s%s GPS epochs at rate of ~%.2f Hz' % (rowrmc, ss0, ss1, ss2, gpssps))
                fx.printmsg('reading gps locations to data frame...')

            gf.seek(0) # back to beginning of file
            rowgga, rowrmc = 0, 0
            for ln in gf: # loop over file line by line
                if '$GSSIS' in ln:
                    # if it's a GSSI sentence, grab the scan/trace number
                    trace = int(ln.split(',')[1])

                if (rmc and gga) and ('GGA' in ln):
                    # RMC doesn't use altitude so if it exists we include it from a neighboring GGA
                    z1 = pynmea2.parse(ln.rstrip()).altitude
                    if rowrmc != rowgga:
                        # this takes care of the case where RMC lines occur above GGA
                        z0 = array['altitude'].iat[rowgga]
                        array['altitude'].iat[rowgga] = z1
                    rowgga += 1

                if rmc == True: # if there is RMC, we can use the full datestamp but there is no altitude
                    if 'RMC' in ln:
                        msg = pynmea2.parse(ln.rstrip())
                        timestamp = datetime.combine(msg.datestamp, msg.timestamp).replace(tzinfo=TZ) # set t1 for this loop
                        u = msg.spd_over_grnd * 0.514444444 # convert from knots to m/s

                        sec1 = timestamp.timestamp()
                        x1, y1 = float(msg.longitude), float(msg.latitude)
                        if msg.lon_dir in 'W':
                            lonhem = 'west'
                        if msg.lat_dir in 'S':
                            lathem = 'south'
                        if rowrmc != 0:
                            elapsedelta = timestamp - prevtime # t1 - t0 in timedelta format
                            elapsed = float((timestamp-init_time).total_seconds()) # seconds elapsed
                            m += u * elapsedelta.total_seconds()
                        else:
                            u = 0
                            m = 0
                            elapsed = 0
                            if verbose:
                                fx.printmsg('record starts in %s and %s hemispheres' % (lonhem, lathem))
                        x0, y0, z0, sec0, m0 = x1, y1, z1, sec1, m # set xyzs0 for next loop
                        prevtime = timestamp # set t0 for next loop
                        if rowrmc == 0:
                            init_time = timestamp
                        prevtrace = trace
                        array = array.append({'datetimeutc':timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %z'),
                                              'trace':trace, 'longitude':x1, 'latitude':y1, 'altitude':z1,
                                              'velocity':u, 'sec_elapsed':elapsed, 'meters':m}, ignore_index=True)
                        rowrmc += 1

                else: # if no RMC, we hope there is no UTC 00:00:00 in the file.........
                    if 'GGA' in ln:
                        msg = pynmea2.parse(ln.rstrip())
                        if  msg.timestamp:
                            timestamp = datetime.combine(header['rhb_cdt'], msg.timestamp).replace(tzinfo=TZ) # set t1 for this loop

                            sec1 = timestamp.timestamp()
                            x1, y1 = float(msg.longitude), float(msg.latitude)
                            try:
                                z1 = float(msg.altitude)
                            except AttributeError:
                                z1 = 0
                            if msg.lon_dir in 'W':
                                lonhem = 'west'
                            if msg.lat_dir in 'S':
                                lathem = 'south'
                            if rowgga != 0:
								#If the difference in Altitude (z) between points is not 0, uses Pythagorean Theorem to better calculate the distance
                                if abs(z1 - z0) > 1e-6:
                                    m_geodesic = geodesic((y1, x1), (y0, x0)).meters
                                    m += math.sqrt(m_geodesic**2 + (z1 - z0)**2)
                                else:
                                    m += geodesic((y1, x1, z1), (y0, x0, z0)).meters
                                if rmc == False:
                                    u = float((m - m0) / (sec1 - sec0))
                                elapsedelta = timestamp - prevtime # t1 - t0 in timedelta format
                                elapsed = float((timestamp-init_time).total_seconds()) # seconds elapsed
                                if elapsed > 3600.0:
                                    fx.printmsg("WARNING: Time jumps by more than an hour in this GPS dataset and there are no RMC sentences to anchor the datestamp!")
                                    fx.printmsg("         This dataset may cross over the UTC midnight dateline!\nprevious timestamp: %s\ncurrent timestamp:  %s" % (prevtime, timestamp))
                                    fx.printmsg("         trace number:       %s" % trace)
                            else:
                                u = 0
                                m = 0
                                elapsed = 0
                                if verbose:
                                    fx.printmsg('record starts in %s and %s hemispheres' % (lonhem, lathem))
                            x0, y0, z0, sec0, m0 = x1, y1, z1, sec1, m # set xyzs0 for next loop
                            prevtime = timestamp # set t0 for next loop
                            if rowgga == 0:
                                init_time = timestamp
                            prevtrace = trace
                            array = array.append({'datetimeutc':timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %z'),
                                                  'trace':trace, 'longitude':x1, 'latitude':y1, 'altitude':z1,
                                                  'velocity':u, 'sec_elapsed':elapsed, 'meters':m}, ignore_index=True)
                            rowgga += 1

            if verbose:
                if rmc:
                    fx.printmsg('processed %i gps epochs (RMC)' % (rowrmc))
                else:
                    fx.printmsg('processed %i gps epochs (GGA)' % (rowgga))

        elif frmt == 'csv':
            with open(fi, 'r') as f:
                gps = np.fromfile(f)

    array['datetimeutc'] = pd.to_datetime(array['datetimeutc'], format='%Y-%m-%d %H:%M:%S.%f +0000', utc=True)
    array.set_index('datetimeutc', inplace=True)
    ## testing purposes
    if True:
        if verbose:
            fx.printmsg('writing GPS to %s-gps.csv' % (fi))
        array.to_csv('%s-gps.csv' % (fi))
    return array


def pause_correct(header, dzg_file, verbose=False, **kwargs):
    '''
    This is a streamlined way of removing pauses from DZG files and re-assigning trace values.
    GSSI controllers have a bug in which GPS sentences are collected with increasing trace numbers even though radar trace collection is stopped.
    This results in a misalignment between GPS and radar traces of the same number.
    The function attempts to realign the trace numbering in the GPS file by identifying "pauses", i.e. when velocity drops below a certain value for longer than a certain number of epochs.
    This assumes that each GPS pause period is correctly timed with the surveyor pressing the pause/unpause button on the radar control unit.

    .. warning:: This function will identify and remove ALL pauses longer than 3 epochs and renumber the traces accordingly. Obviously this can have unintended consequences if the radar controller remains collecting data during these periods. Please be extremely cautious and only use this functionality on files you know have radar pauses that are accompanied by movement pauses. A backup of the original DZG file is made each time this function is run on a file, which means that if you make a mistake, you can simply copy the DZG backup (.DZG.bak) and overwrite the output (.DZG).

    .. note:: Any time you are working with original files, it is always good to have a "working" and "raw" copy of your data. Experimental functionality in readgssi cannot be held responsible for its actions in modifying data. You are responsible for keeping a raw backup of your data just in case.

    A detailed explanation of each step taken by this function is available in the code comments.

    :param dict header: File header produced by :py:func:`readgssi.dzt.readdzt`
    :param str dzg_file: DZG GPS file (the original .DZG, not the backup)
    :param float threshold: Numerical velocities threshold, under which will be considered a "pause" (default: 0.25)
    :param bool verbose: Verbose, defaults to False
    :rtype: corrected, de-paused GPS data (pandas.DataFrame)
    '''
    output_file = dzg_file                  # we need RADAN and readgssi to find the modified DZG file
    backup_file = dzg_file + '.bak'         # so we back it up and rewrite the original
    # ensuring a DZG backup
    if os.path.isfile(dzg_file):            # if there is a DZG
        if not os.path.isfile(backup_file): # if there is not a backup, make one
            if verbose:
                fx.printmsg('backing up original DZG file to %s' % (backup_file))
            copyfile(dzg_file, backup_file)
        else:                               # if there already is a backup
            if verbose:
                fx.printmsg('found backed up DZG file %s' % (backup_file))
        dzg_file = backup_file              # we always want to draw from the backup and write to the main DZG file

        # threshold default
        threshold = 0.25
        # get threshold value from kwargs if it exists
        for arg in kwargs:
            if arg == "threshold":
                if (type(kwargs[arg]) == float) or (type(kwargs[arg]) == int):
                    threshold = float(kwargs[arg])

        # pandas ninja maneuvers to get a list of pause boundaries
        orig_gps = readdzg(fi=backup_file, frmt='dzg', header=header, verbose=False)    # get original GPS values
        orig_gps['groups'] = pd.cut(orig_gps.velocity,[-1,threshold,100000])            # segment file into groups based on velocity
        orig_gps['cats'] = (orig_gps.groups != orig_gps.groups.shift()).cumsum()        # give each group a number
        orig_gps['threshold'] = 0                                                       # create threshold column
        orig_gps.loc[3:-3, 'threshold'] = threshold                                     # ignoring start and end epochs, assign threshold
        orig_gps['mask'] = orig_gps['velocity'] < orig_gps['threshold']                 # where does velocity fall beneath threshold
        exceeds = orig_gps[orig_gps['mask'] == True]                                    # create a view based on non exceedence rows
        bounds = []
        if exceeds.cats.count() > 0:                                                    # test whether any exceedences were detected
            for i in range(exceeds.cats.max() + 1):                                     # for each group in the view
                if exceeds[exceeds['cats'] == i].count().cats > 3:                      # are there more than 3 epochs in the group?
                    bounds.append([exceeds[exceeds['cats'] == i].iloc[0].name,          # if so create a data point in the list
                                   exceeds[exceeds['cats'] == i].iloc[-1].name,         # with time and trace number of boundary
                                   exceeds[exceeds['cats'] == i].iloc[0].trace,
                                   exceeds[exceeds['cats'] == i].iloc[-1].trace])

        def new_row_value(trace):   # quick and dirty function to figure out new trace number
            subtract = 0
            for pause in bounds:
                if (trace >= pause[2]) and (trace >= pause[3]):
                    subtract = subtract + pause[3] - pause[2]
            return trace - subtract

        if verbose:
            fx.printmsg('found %s pause periods' % len(bounds))
        if len(bounds) > 0:
            i = 1
            for pause in bounds:
                fx.printmsg('pause %s' % i)
                fx.printmsg('  start trace: %s (%s)' % (pause[2], pause[0]))
                fx.printmsg('    end trace: %s (%s)' % (pause[3], pause[1]))
                i += 1

            if verbose:
                fx.printmsg('transcribing DZG file with new trace values...')
            with open(dzg_file, 'r') as gf:         # gps file
                with open(output_file, 'w') as tf:  # transcription file
                    subtract_amount = 0
                    for ln in gf: # loop over backup file line by line
                        if '$GSSIS' in ln:
                            write = True            # assume this line will get written
                            # if it's a GSSI sentence, grab the scan/trace number
                            trace = int(ln.split(',')[1])
                            for pause in bounds:    # for each pause period
                                if (trace >= pause[2]) and (trace <= pause[3]): # if trace is in pause bounds
                                    write = False   # do not write this group of sentences to file
                            if write:               # if it is outside of a pause period
                                tf.write(ln.replace(str(trace), str(new_row_value(trace))))   # replace trace value and write line to file
                        else:                       # if it's not a GSSI proprietary line
                            if write:               # and it's outside a pause priod
                                tf.write(ln)        # transcribe line to new file

            if verbose:
                fx.printmsg('done. reading new values into array...')

    else:
        fx.printmsg('no dzg file found at (%s)' % (dzg_file))

    return readdzg(fi=output_file, frmt='dzg', header=header, verbose=verbose)