文件
gpr-sidl-inv/readgssi/gps.py
葛峻恺 a02133baed program
Signed-off-by: 葛峻恺 <202115006@mail.sdu.edu.cn>
2025-04-07 12:31:46 +00:00

351 行
20 KiB
Python

from datetime import datetime, timedelta
import pynmea2
import math
import os
import numpy as np
import pandas as pd
from geopy.distance import geodesic
import readgssi.functions as fx
from readgssi.constants import TZ
from math import sin, cos, sqrt, atan2, radians
from shutil import copyfile
"""
contains functions for reading gps data from various formats
"""
def msgparse(msg):
"""
.. deprecated:: 0.0.12
This function returns the NMEA message variables shared by both RMC and GGA.
:param pynmea2.nmea.NMEASentence msg: A pynmea2 sentence object.
:rtype: :py:class:`datetime.datetime`, :py:class:`float`, :py:class:`float`
"""
return msg.timestamp, msg.latitude, msg.longitude
def readdzg(fi, frmt, header, verbose=False):
"""
A parser to extract gps data from DZG file format. DZG contains raw NMEA sentences, which should include at least RMC and GGA.
NMEA RMC sentence string format:
:py:data:`$xxRMC,UTC hhmmss,status,lat DDmm.sss,lon DDDmm.sss,SOG,COG,date ddmmyy,checksum \*xx`
NMEA GGA sentence string format:
:py:data:`$xxGGA,UTC hhmmss.s,lat DDmm.sss,lon DDDmm.sss,fix qual,numsats,hdop,mamsl,wgs84 geoid ht,fix age,dgps sta.,checksum \*xx`
Shared message variables between GGA and RMC: timestamp, latitude, and longitude
RMC contains a datestamp which makes it preferable, but this parser will read either.
:param str fi: File containing gps information
:param str frmt: GPS information format ('dzg' = DZG file containing gps sentence strings (see below); 'csv' = comma separated file with: lat,lon,elev,time)
:param dict header: File header produced by :py:func:`readgssi.dzt.readdzt`
:param bool verbose: Verbose, defaults to False
:rtype: GPS data (pandas.DataFrame)
The dataframe contains the following fields:
* datetimeutc (:py:class:`datetime.datetime`)
* trace (:py:class:`int` trace number)
* longitude (:py:class:`float`)
* latitude (:py:class:`float`)
* altitude (:py:class:`float`)
* velocity (:py:class:`float`)
* sec_elapsed (:py:class:`float`)
* meters (:py:class:`float` meters traveled)
"""
if header['rhf_spm'] == 0:
spu = header['rhf_sps']
else:
spu = header['rhf_spm']
array = pd.DataFrame(columns=['datetimeutc', 'trace', 'longitude', 'latitude', # our dataframe
'altitude', 'velocity', 'sec_elapsed', 'meters'])
trace = 0 # the elapsed number of traces iterated through
tracenum = 0 # the sequential increase in trace number
rownp = 0 # array row number
rowrmc = 0 # rmc record iterated through (gps file)
rowgga = 0 # gga record
sec_elapsed = 0 # number of seconds since the start of the line
m = 0 # meters traveled over entire line
m0, m1 = 0, 0 # meters traveled as of last, current loop
u = 0 # velocity
u0 = 0 # velocity on last loop
timestamp = False
prevtime = False
init_time = False
td = False
prevtrace = False
rmc, gga = False, False
rmcwarn = True
lathem = 'north'
lonhem = 'east'
x0, x1, y0, y1 = False, False, False, False # coordinates
z0, z1 = 0, 0
x2, y2, z2, sec2 = 0, 0, 0, 0
with open(fi, 'r') as gf:
if verbose:
fx.printmsg('using gps file: %s' % (fi))
if frmt == 'dzg': # if we're working with DZG format
for ln in gf: # loop through the first few sentences, check for RMC
if 'RMC' in ln: # check to see if RMC sentence (should occur before GGA)
rmc = True
if rowrmc == 0:
msg = pynmea2.parse(ln.rstrip()) # convert gps sentence to pynmea2 named tuple
ts0 = datetime.combine(msg.datestamp, msg.timestamp).replace(tzinfo=TZ) # row 0's timestamp (not ideal)
if rowrmc == 1:
msg = pynmea2.parse(ln.rstrip())
ts1 = datetime.combine(msg.datestamp, msg.timestamp).replace(tzinfo=TZ) # row 1's timestamp (not ideal)
td = ts1 - ts0 # timedelta = datetime1 - datetime0
rowrmc += 1
if 'GGA' in ln:
gga = True
if rowgga == 0:
msg = pynmea2.parse(ln.rstrip()) # convert gps sentence to pynmea2 named tuple
ts0 = datetime.combine(datetime(1980, 1, 1), msg.timestamp).replace(tzinfo=TZ) # row 0's timestamp (not ideal)
if rowgga == 1:
msg = pynmea2.parse(ln.rstrip())
ts1 = datetime.combine(datetime(1980, 1, 1), msg.timestamp).replace(tzinfo=TZ) # row 1's timestamp (not ideal)
td = ts1 - ts0 # timedelta = datetime1 - datetime0
rowgga += 1
gpssps = 1 / td.total_seconds() # GPS samples per second
if (rmcwarn) and (rowrmc == 0):
fx.printmsg('WARNING: no RMC sentences found in GPS records. this could become an issue if your file goes through 00:00:00.')
fx.printmsg(" if you get a time jump error please open a github issue at https://github.com/iannesbitt/readgssi/issues")
fx.printmsg(" and attach the verbose output of this script plus a zip of the DZT and DZG files you're working with.")
rmcwarn = False
if (rmc and gga) and (rowrmc != rowgga):
if verbose:
fx.printmsg('WARNING: GGA and RMC sentences are not recorded at the same rate! This could cause unforseen problems!')
fx.printmsg(' rmc: %i records' % rowrmc)
fx.printmsg(' gga: %i records' % rowgga)
if verbose:
ss0, ss1, ss2 = '', '', ''
if gga:
ss0 = 'GGA'
if rmc:
ss2 = 'RMC'
if gga and rmc:
ss1 = ' and '
fx.printmsg('found %i %s%s%s GPS epochs at rate of ~%.2f Hz' % (rowrmc, ss0, ss1, ss2, gpssps))
fx.printmsg('reading gps locations to data frame...')
gf.seek(0) # back to beginning of file
rowgga, rowrmc = 0, 0
for ln in gf: # loop over file line by line
if '$GSSIS' in ln:
# if it's a GSSI sentence, grab the scan/trace number
trace = int(ln.split(',')[1])
if (rmc and gga) and ('GGA' in ln):
# RMC doesn't use altitude so if it exists we include it from a neighboring GGA
z1 = pynmea2.parse(ln.rstrip()).altitude
if rowrmc != rowgga:
# this takes care of the case where RMC lines occur above GGA
z0 = array['altitude'].iat[rowgga]
array['altitude'].iat[rowgga] = z1
rowgga += 1
if rmc == True: # if there is RMC, we can use the full datestamp but there is no altitude
if 'RMC' in ln:
msg = pynmea2.parse(ln.rstrip())
timestamp = datetime.combine(msg.datestamp, msg.timestamp).replace(tzinfo=TZ) # set t1 for this loop
u = msg.spd_over_grnd * 0.514444444 # convert from knots to m/s
sec1 = timestamp.timestamp()
x1, y1 = float(msg.longitude), float(msg.latitude)
if msg.lon_dir in 'W':
lonhem = 'west'
if msg.lat_dir in 'S':
lathem = 'south'
if rowrmc != 0:
elapsedelta = timestamp - prevtime # t1 - t0 in timedelta format
elapsed = float((timestamp-init_time).total_seconds()) # seconds elapsed
m += u * elapsedelta.total_seconds()
else:
u = 0
m = 0
elapsed = 0
if verbose:
fx.printmsg('record starts in %s and %s hemispheres' % (lonhem, lathem))
x0, y0, z0, sec0, m0 = x1, y1, z1, sec1, m # set xyzs0 for next loop
prevtime = timestamp # set t0 for next loop
if rowrmc == 0:
init_time = timestamp
prevtrace = trace
array = array.append({'datetimeutc':timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %z'),
'trace':trace, 'longitude':x1, 'latitude':y1, 'altitude':z1,
'velocity':u, 'sec_elapsed':elapsed, 'meters':m}, ignore_index=True)
rowrmc += 1
else: # if no RMC, we hope there is no UTC 00:00:00 in the file.........
if 'GGA' in ln:
msg = pynmea2.parse(ln.rstrip())
if msg.timestamp:
timestamp = datetime.combine(header['rhb_cdt'], msg.timestamp).replace(tzinfo=TZ) # set t1 for this loop
sec1 = timestamp.timestamp()
x1, y1 = float(msg.longitude), float(msg.latitude)
try:
z1 = float(msg.altitude)
except AttributeError:
z1 = 0
if msg.lon_dir in 'W':
lonhem = 'west'
if msg.lat_dir in 'S':
lathem = 'south'
if rowgga != 0:
#If the difference in Altitude (z) between points is not 0, uses Pythagorean Theorem to better calculate the distance
if abs(z1 - z0) > 1e-6:
m_geodesic = geodesic((y1, x1), (y0, x0)).meters
m += math.sqrt(m_geodesic**2 + (z1 - z0)**2)
else:
m += geodesic((y1, x1, z1), (y0, x0, z0)).meters
if rmc == False:
u = float((m - m0) / (sec1 - sec0))
elapsedelta = timestamp - prevtime # t1 - t0 in timedelta format
elapsed = float((timestamp-init_time).total_seconds()) # seconds elapsed
if elapsed > 3600.0:
fx.printmsg("WARNING: Time jumps by more than an hour in this GPS dataset and there are no RMC sentences to anchor the datestamp!")
fx.printmsg(" This dataset may cross over the UTC midnight dateline!\nprevious timestamp: %s\ncurrent timestamp: %s" % (prevtime, timestamp))
fx.printmsg(" trace number: %s" % trace)
else:
u = 0
m = 0
elapsed = 0
if verbose:
fx.printmsg('record starts in %s and %s hemispheres' % (lonhem, lathem))
x0, y0, z0, sec0, m0 = x1, y1, z1, sec1, m # set xyzs0 for next loop
prevtime = timestamp # set t0 for next loop
if rowgga == 0:
init_time = timestamp
prevtrace = trace
array = array.append({'datetimeutc':timestamp.strftime('%Y-%m-%d %H:%M:%S.%f %z'),
'trace':trace, 'longitude':x1, 'latitude':y1, 'altitude':z1,
'velocity':u, 'sec_elapsed':elapsed, 'meters':m}, ignore_index=True)
rowgga += 1
if verbose:
if rmc:
fx.printmsg('processed %i gps epochs (RMC)' % (rowrmc))
else:
fx.printmsg('processed %i gps epochs (GGA)' % (rowgga))
elif frmt == 'csv':
with open(fi, 'r') as f:
gps = np.fromfile(f)
array['datetimeutc'] = pd.to_datetime(array['datetimeutc'], format='%Y-%m-%d %H:%M:%S.%f +0000', utc=True)
array.set_index('datetimeutc', inplace=True)
## testing purposes
if True:
if verbose:
fx.printmsg('writing GPS to %s-gps.csv' % (fi))
array.to_csv('%s-gps.csv' % (fi))
return array
def pause_correct(header, dzg_file, verbose=False, **kwargs):
'''
This is a streamlined way of removing pauses from DZG files and re-assigning trace values.
GSSI controllers have a bug in which GPS sentences are collected with increasing trace numbers even though radar trace collection is stopped.
This results in a misalignment between GPS and radar traces of the same number.
The function attempts to realign the trace numbering in the GPS file by identifying "pauses", i.e. when velocity drops below a certain value for longer than a certain number of epochs.
This assumes that each GPS pause period is correctly timed with the surveyor pressing the pause/unpause button on the radar control unit.
.. warning:: This function will identify and remove ALL pauses longer than 3 epochs and renumber the traces accordingly. Obviously this can have unintended consequences if the radar controller remains collecting data during these periods. Please be extremely cautious and only use this functionality on files you know have radar pauses that are accompanied by movement pauses. A backup of the original DZG file is made each time this function is run on a file, which means that if you make a mistake, you can simply copy the DZG backup (.DZG.bak) and overwrite the output (.DZG).
.. note:: Any time you are working with original files, it is always good to have a "working" and "raw" copy of your data. Experimental functionality in readgssi cannot be held responsible for its actions in modifying data. You are responsible for keeping a raw backup of your data just in case.
A detailed explanation of each step taken by this function is available in the code comments.
:param dict header: File header produced by :py:func:`readgssi.dzt.readdzt`
:param str dzg_file: DZG GPS file (the original .DZG, not the backup)
:param float threshold: Numerical velocities threshold, under which will be considered a "pause" (default: 0.25)
:param bool verbose: Verbose, defaults to False
:rtype: corrected, de-paused GPS data (pandas.DataFrame)
'''
output_file = dzg_file # we need RADAN and readgssi to find the modified DZG file
backup_file = dzg_file + '.bak' # so we back it up and rewrite the original
# ensuring a DZG backup
if os.path.isfile(dzg_file): # if there is a DZG
if not os.path.isfile(backup_file): # if there is not a backup, make one
if verbose:
fx.printmsg('backing up original DZG file to %s' % (backup_file))
copyfile(dzg_file, backup_file)
else: # if there already is a backup
if verbose:
fx.printmsg('found backed up DZG file %s' % (backup_file))
dzg_file = backup_file # we always want to draw from the backup and write to the main DZG file
# threshold default
threshold = 0.25
# get threshold value from kwargs if it exists
for arg in kwargs:
if arg == "threshold":
if (type(kwargs[arg]) == float) or (type(kwargs[arg]) == int):
threshold = float(kwargs[arg])
# pandas ninja maneuvers to get a list of pause boundaries
orig_gps = readdzg(fi=backup_file, frmt='dzg', header=header, verbose=False) # get original GPS values
orig_gps['groups'] = pd.cut(orig_gps.velocity,[-1,threshold,100000]) # segment file into groups based on velocity
orig_gps['cats'] = (orig_gps.groups != orig_gps.groups.shift()).cumsum() # give each group a number
orig_gps['threshold'] = 0 # create threshold column
orig_gps.loc[3:-3, 'threshold'] = threshold # ignoring start and end epochs, assign threshold
orig_gps['mask'] = orig_gps['velocity'] < orig_gps['threshold'] # where does velocity fall beneath threshold
exceeds = orig_gps[orig_gps['mask'] == True] # create a view based on non exceedence rows
bounds = []
if exceeds.cats.count() > 0: # test whether any exceedences were detected
for i in range(exceeds.cats.max() + 1): # for each group in the view
if exceeds[exceeds['cats'] == i].count().cats > 3: # are there more than 3 epochs in the group?
bounds.append([exceeds[exceeds['cats'] == i].iloc[0].name, # if so create a data point in the list
exceeds[exceeds['cats'] == i].iloc[-1].name, # with time and trace number of boundary
exceeds[exceeds['cats'] == i].iloc[0].trace,
exceeds[exceeds['cats'] == i].iloc[-1].trace])
def new_row_value(trace): # quick and dirty function to figure out new trace number
subtract = 0
for pause in bounds:
if (trace >= pause[2]) and (trace >= pause[3]):
subtract = subtract + pause[3] - pause[2]
return trace - subtract
if verbose:
fx.printmsg('found %s pause periods' % len(bounds))
if len(bounds) > 0:
i = 1
for pause in bounds:
fx.printmsg('pause %s' % i)
fx.printmsg(' start trace: %s (%s)' % (pause[2], pause[0]))
fx.printmsg(' end trace: %s (%s)' % (pause[3], pause[1]))
i += 1
if verbose:
fx.printmsg('transcribing DZG file with new trace values...')
with open(dzg_file, 'r') as gf: # gps file
with open(output_file, 'w') as tf: # transcription file
subtract_amount = 0
for ln in gf: # loop over backup file line by line
if '$GSSIS' in ln:
write = True # assume this line will get written
# if it's a GSSI sentence, grab the scan/trace number
trace = int(ln.split(',')[1])
for pause in bounds: # for each pause period
if (trace >= pause[2]) and (trace <= pause[3]): # if trace is in pause bounds
write = False # do not write this group of sentences to file
if write: # if it is outside of a pause period
tf.write(ln.replace(str(trace), str(new_row_value(trace)))) # replace trace value and write line to file
else: # if it's not a GSSI proprietary line
if write: # and it's outside a pause priod
tf.write(ln) # transcribe line to new file
if verbose:
fx.printmsg('done. reading new values into array...')
else:
fx.printmsg('no dzg file found at (%s)' % (dzg_file))
return readdzg(fi=output_file, frmt='dzg', header=header, verbose=verbose)