import ipykernel
import folium
import pandas as pd
import numpy as np
from scipy import stats
import nvector
import math
import colorlover
from functools import lru_cache
%matplotlib inline
import plotly
plotly.offline.init_notebook_mode()
from plotly.graph_objs import *
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
import cufflinks as cf
cf.set_config_file(offline=True, world_readable=True, theme='pearl')
gps_points = pd.read_csv('./data/sample3.csv')
def extract_time(point):
try:
time_str = str(int(point['time_value']))
time_str = "0"*(8-len(time_str)) + time_str
datetime_str = str(int(point['date_value'])) + time_str
return pd.to_datetime(datetime_str, format='%d%m%y%H%M%S%f')
except Exception as e:
# print(point, e)
pass
def GEO_2_ECEF(lat, lng, alt):
wgs84 = nvector.FrameE(name='WGS84')
point = wgs84.GeoPoint(latitude=lat, longitude=lng, z=alt, degrees=True)
p_EB_E = point.to_ecef_vector()
return p_EB_E.pvector.ravel()
def ECEF_2_GEO(x, y, z):
wgs84 = nvector.FrameE(name='WGS84')
pos = np.vstack((x, y, z)) # m
point = wgs84.ECEFvector(pos).to_geo_point()
return point.latitude_deg[0], point.longitude_deg[0], point.z[0]
def GEO_dist(lat, lng, alt, lat_ref, lng_ref, alt_ref):
wgs84 = nvector.FrameE(name='WGS84')
point = wgs84.GeoPoint(latitude=lat, longitude=lng, z=alt, degrees=True)
point_ref = wgs84.GeoPoint(latitude=lat_ref, longitude=lng_ref, z=alt_ref, degrees=True)
ellipsoidal_dist_12, _, _ = point_ref.distance_and_azimuth(point)
return ellipsoidal_dist_12
def GEO_2_NEU(lat, lng, alt, lat_ref, lng_ref, alt_ref):
wgs84 = nvector.FrameE(name='WGS84')
pointA = wgs84.GeoPoint(latitude=lat_ref, longitude=lng_ref, z=alt_ref, degrees=True)
pointB = wgs84.GeoPoint(latitude=lat, longitude=lng, z=alt, degrees=True)
p_AB_ECEF = nvector.diff_positions(pointA, pointB)
frame_NEU = nvector.FrameN(pointA)
p_AB_NEU = p_AB_ECEF.change_frame(frame_NEU).pvector.ravel()
return p_AB_NEU
def pre_processing(gps_points):
# Need more than 10 points to accept a path
gps_points = gps_points.groupby('id').filter(lambda x: len(x) > 10)
# Need more than 3 satellites to accept a point
gps_points = gps_points[gps_points['satellites_value'] > 3]
gps_points['time'] = gps_points.apply(extract_time, axis=1)
gps_points = gps_points[gps_points['time'] > pd.Timestamp('2016-01-01')] # Remove Error
def PD_GEO_2_ECEF(row):
x, y, z = GEO_2_ECEF(row['location_lat'],
row['location_lng'],
row['location_alt'])
return pd.Series({'location_x': x, 'location_y': y, 'location_z': z})
ecef = gps_points.apply(PD_GEO_2_ECEF, axis=1)
gps_points = gps_points.join(ecef)
@lru_cache(maxsize=None)
def get_center(path_id):
path = gps_points[gps_points.id==path_id]
return path[path.satellites_value >= 6].mean()[['location_x', 'location_y', 'location_z']]
center_ecef = gps_points['id'].apply(get_center)
center_ecef.columns = ['center_x', 'center_y', 'center_z']
gps_points = gps_points.join(center_ecef)
def PD_ECEF_2_GEO(row):
lat, lng, alt = ECEF_2_GEO(row['center_x'],
row['center_y'],
row['center_z'])
return pd.Series({'center_lat': lat, 'center_lng': lng, 'center_alt': alt})
center_geo = gps_points.apply(PD_ECEF_2_GEO, axis=1)
gps_points = gps_points.join(center_geo)
def get_distance(row, flat=False):
return GEO_dist(row['location_lat'],
row['location_lng'],
0 if flat else row['location_alt'],
row['center_lat'],
row['center_lng'],
0 if flat else row['center_alt'])
gps_points['distance'] = gps_points.apply(get_distance, axis=1)
gps_points['distance_2D'] = gps_points.apply(get_distance, axis=1, flat=True)
def PD_GEO_2_NEU(row):
n, e, d = GEO_2_NEU(row['location_lat'], row['location_lng'], row['location_alt'],
row['center_lat'], row['center_lng'] , row['center_alt'])
return pd.Series({'north': n, 'east': e, 'up': d})
neu = gps_points.apply(PD_GEO_2_NEU, axis=1)
gps_points = gps_points.join(neu)
gps_points['delta_lat'] = gps_points['location_lat'] - gps_points['center_lat']
gps_points['delta_lng'] = gps_points['location_lng'] - gps_points['center_lng']
gps_points['delta_alt'] = gps_points['location_alt'] - gps_points['center_alt']
gps_points['delta_x'] = gps_points['location_x'] - gps_points['center_x']
gps_points['delta_y'] = gps_points['location_y'] - gps_points['center_y']
gps_points['delta_z'] = gps_points['location_z'] - gps_points['center_z']
return gps_points
gps_points = pre_processing(gps_points)
gps_points.columns
groupedby = gps_points.groupby('satellites_value')
# https://plot.ly/~ben_derv/8/number-of-extracted-positions-by-satellites/
groupedby.size().iplot(
kind='bar',
title="Number of extracted positions by satellites",
yTitle="Number of extracted positions",
xTitle="Number of satellites",
dimensions=(500,400)
)
# https://plot.ly/~ben_derv/12/distribution-of-recorded-positions/
def draw_points_by_satellites(path):
iplot({
'data': [
{
'x': path[path['satellites_value']==sat]['location_lng'],
'y': path[path['satellites_value']==sat]['location_lat'],
'name': sat,
'mode': 'markers',
} for sat in range(3, path['satellites_value'].max()+1)
],
'layout': {
'xaxis': {'title': 'Longitude'},
'yaxis': {'title': 'Latitude'},
'title': 'Distribution of recorded positions'
}
})
draw_points_by_satellites(gps_points[gps_points.id==8])
# https://plot.ly/~ben_derv/4/cumulated-of-gps-record-by-distance/
def draw_distributed_cumsum(path):
def distributed_cumsum(path):
values, base = np.histogram(path['distance_2D'], bins=40)
cumulative = np.cumsum(values)
return cumulative/cumulative[-1], base
iplot({
'data': [{
'y': distributed_cumsum(path[path['satellites_value'] == sat])[0],
'x': distributed_cumsum(path[path['satellites_value'] == sat])[1],
'name': sat,
} for sat in range(3, path['satellites_value'].max()+1)
],
'layout': {
'xaxis': {'title': 'Distance from center (meters)'},
'yaxis': {'title': '% cumulated of positions recorded'},
'title': 'Cumulated % of GPS record by distance '
}
})
draw_distributed_cumsum(gps_points)
v
# https://plot.ly/~ben_derv/14/lat-vs-lng/
def draw_2d_distribution(path):
scl = colorlover.scales['9']['seq']['Blues']
colorscale = [ [ float(i)/float(len(scl)-1), scl[i] ] for i in range(len(scl)) ]
path = path[path['distance_2D'] < 20]
x_grid = np.linspace(
path['location_lng'].min(),
path['location_lng'].max(),
100)
y_grid = np.linspace(
path['location_lat'].min(),
path['location_lat'].max(),
100)
def kde_scipy(x, x_grid, bandwidth=0.2 ):
kde = stats.gaussian_kde(x, bw_method=bandwidth / x.std(ddof=1) )
return kde.evaluate(x_grid)
trace1 = Histogram2dContour(
x=path['location_lng'],
y=path['location_lat'],
name='density',
ncontours=20,
colorscale=colorscale,
showscale=False
)
trace2 = Histogram(
x=path['location_lng'],
name='x density',
yaxis='y2',
histnorm='probability density',
marker=dict(color='rgb(217, 217, 217)'),
nbinsx=25
)
trace2s = Scatter(
x=x_grid,
y=kde_scipy( path['location_lng'].as_matrix(), x_grid ),
yaxis='y2',
line = dict( color='rgb(31, 119, 180)' ),
fill='tonexty',
)
trace3 = Histogram(
y=path['location_lat'],
name='y density',
xaxis='x2',
histnorm='probability density',
marker=dict(color='rgb(217, 217, 217)'),
nbinsy=50
)
trace3s = Scatter(
y=y_grid,
x=kde_scipy( path['location_lat'].as_matrix(), y_grid ),
xaxis='x2',
line = dict( color='rgb(31, 119, 180)' ),
fill='tonextx',
)
data = [trace1, trace2, trace2s, trace3, trace3s]
layout = Layout(
showlegend=False,
autosize=False,
width=700,
height=700,
hovermode='closest',
bargap=0,
xaxis=dict(domain=[0, 0.746], linewidth=2, linecolor='#444', title='Lng',
showgrid=False, zeroline=False, ticks='', showline=True, mirror=True),
yaxis=dict(domain=[0, 0.746],linewidth=2, linecolor='#444', title='Lat',
showgrid=False, zeroline=False, ticks='', showline=True, mirror=True),
xaxis2=dict(domain=[0.75, 1], showgrid=False, zeroline=False, ticks='',
showticklabels=False ),
yaxis2=dict(domain=[0.75, 1], showgrid=False, zeroline=False, ticks='',
showticklabels=False ),
)
iplot(Figure(data=data, layout=layout))
draw_2d_distribution(gps_points)
#https://plot.ly/22/~ben_derv/#
groupedby.mean()['hdop_value'].iplot(
kind='bar',
title="HDOP average by number of satellites",
xTitle="Number of satellites",
yTitle='HDOP',
dimensions=(800,500)
)
# LATITUDE
groupedby['north'].mean().iplot(
kind='bar',
title="Average latitude distance from center by satellites",
xTitle='Number of satellites',
yTitle='Latitude distance (meters)',
dimensions=(500,400)
)
groupedby['north'].std().iplot(
kind='bar',
title="Standard deviation of latitude distance from center by satellites",
xTitle='Number of satellites',
yTitle='Latitude distance (meters)',
dimensions=(500,400)
)
# LATITUDE
groupedby['east'].mean().iplot(
kind='bar',
title="Average longitude distance from center by satellites",
xTitle='Number of satellites',
yTitle='Longitude distance (meters)',
dimensions=(500,400)
)
groupedby['east'].std().iplot(
kind='bar',
title="STD of longitude distance from center by satellites",
xTitle='Number of satellites',
yTitle='Longitude distance (meters)',
dimensions=(500,400)
)
groupedby['up'].mean().iplot(
kind='bar',
title="Altitude mean by number of satellites (meters)",
xTitle='Number of satellites',
yTitle='Altitude distance',
dimensions=(500,400)
)
groupedby['up'].std().iplot(
kind='bar',
title="Altitude std by number of satellites (meters)",
xTitle='Number of satellites',
yTitle='Altitude distance',
dimensions=(500,400)
)
def auto_correlation(x):
res = np.correlate(x, x, mode='full')
return res / max(res)
path_time = gps_points.set_index(gps_points['time']).sort_index()
path = path_time[path_time.id==7]
iplot({'data': [
{
'y': auto_correlation(list(path[col].values - path[col].values.mean())),
# 'x': (path_time.max()['time'] - path.time).astype('timedelta64[m]'),
'name': col
} for col in ['north', 'east', 'satellites_value', 'hdop_value']
],
'layout': {
'xaxis': {'title': 'Time shift (5 seconds delta)'},
'yaxis': {'title': 'Normalized auto-correlation'},
'title': 'Auto-correlation'
}
})
gps_points = pd.read_csv('./data/sample.csv')
def create_map(data, filename='gps.html', max_points=100):
center = [data['location_lat'].mean(), data['location_lng'].mean()]
map_osm = folium.Map(location=center, zoom_start=18)
for _, record in data.iterrows():
if _ > max_points:
print('WARNING: DATASET TOO LARGE')
break
map_osm.add_children(folium.Marker(location=[record['location_lat'], record['location_lng']], popup=str(record['id'])))
map_osm.save(outfile=filename)
path = gps_points[gps_points.id==452]
create_map(path)
%%HTML
<iframe width=100% height=500 src="gps.html"></iframe>
def linear_regression(path):
slope, intercept, r_value, p_value, std_err = stats.linregress(path['location_lat'], path['location_lng'])
line = slope*path['location_lat'] + intercept
data = Data([{
'x': path['location_lat'],
'y': path['location_lng'],
'mode': 'markers',
'name':'Data'
}, {
'y': line,
'x': path['location_lat'],
'name':'Linear Fit'
}])
layout = Layout(
annotations = [Annotation(
x=path['location_lat'].mean(),
y=path['location_lng'].mean()+3*path['location_lng'].std(),
text='r_value = {}'.format(r_value),
showarrow=False,
font=Font(size=16)
)]
)
iplot(Figure(data=data, layout=layout))
linear_regression(path)
# https://plot.ly/~ben_derv/0/gps/
# This is a special record in 'mode 1', which record every 5 meters. It allow us to 'investigate'
# the noise received by the satellites
gps_points = pd.read_csv('./data/sample2.csv')
draw_points_by_satellites(gps_points)