In [3]:
%load_ext autoreload
%autoreload 2

import plotly
plotly.tools.set_credentials_file(username='jewang', api_key='KTVYkl1qBwfdOOKPjF2G')
import plotly.plotly as py
from  plotly.offline import iplot
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True)
import cufflinks as cf
cf.go_offline()

import pandas as pd
import numpy as np
import glob
import random
from sklearn import preprocessing

# Make sure we are running Python 3
import sys
print(sys.version)
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
2.7.14 |Anaconda, Inc.| (default, Dec  7 2017, 11:07:58) 
[GCC 4.2.1 Compatible Clang 4.0.1 (tags/RELEASE_401/final)]

Read & trim data

In [4]:
FOLDER_NAMES = ["wingardium", "flippendo", "negative", "test"]
GESTURES = ["wingardium", "flippendo", "negative_trim"]

# Manually determined trims (copied from Google Sheets)
TRIMS = pd.read_csv('trim_ms.csv', header=0, index_col=0, squeeze=True)
TRIMS = pd.to_numeric(TRIMS, errors='coerce').to_dict()

SEGMENT_LEN_MS = 1500
SAMPLE_FREQ_HZ = 100
SEGMENT_LEN = int(SEGMENT_LEN_MS / SAMPLE_FREQ_HZ * 10) - 5
In [5]:
min_max_scaler = preprocessing.MinMaxScaler()

def read_data(folder_name):
    traces = []
    trace_names = []
    for f in sorted(glob.glob(folder_name + "/*.csv")):
        print(f)
        filename = f[f.find('/') + 1:-4]
        trace = pd.read_csv(f, header=0, index_col="time_ms")
        print(trace.shape)
        trace = trace.apply(pd.to_numeric, errors='coerce')
        trace["accel"] = np.linalg.norm((trace["accel_ms2_x"], trace["accel_ms2_y"], trace["accel_ms2_z"]), axis=0)
        trace["gyro"] = np.linalg.norm((trace['gyro_degs_x'], trace[u'gyro_degs_y'], trace[u'gyro_degs_z']), axis=0)
        
        
        normalized = min_max_scaler.fit_transform(trace['accel_ms2_z'].values.reshape(-1, 1))
        normalized = normalized[:, 0]
        trace["normalized_accel_z"] = normalized
        
        trace_names.append(filename)
        traces.append(trace)
    return trace_names, traces
        
traces = {}
trace_names = {}
for folder_name in FOLDER_NAMES:
    trace_names[folder_name], traces[folder_name] = read_data(folder_name)
wingardium/wingardium000.csv
(301, 24)
wingardium/wingardium001.csv
(297, 24)
wingardium/wingardium002.csv
(298, 24)
wingardium/wingardium003.csv
(301, 24)
wingardium/wingardium004.csv
(301, 24)
wingardium/wingardium005.csv
(300, 24)
wingardium/wingardium006.csv
(300, 24)
wingardium/wingardium007.csv
(301, 24)
wingardium/wingardium008.csv
(300, 24)
wingardium/wingardium009.csv
(301, 24)
wingardium/wingardium010.csv
(301, 24)
wingardium/wingardium011.csv
(300, 24)
wingardium/wingardium012.csv
(300, 24)
wingardium/wingardium013.csv
(300, 24)
wingardium/wingardium014.csv
(300, 24)
wingardium/wingardium015.csv
(299, 24)
wingardium/wingardium016.csv
(300, 24)
wingardium/wingardium017.csv
(300, 24)
wingardium/wingardium018.csv
(301, 24)
wingardium/wingardium019.csv
(300, 24)
wingardium/wingardium020.csv
(298, 24)
wingardium/wingardium021.csv
(299, 24)
wingardium/wingardium022.csv
(300, 24)
wingardium/wingardium023.csv
(300, 24)
wingardium/wingardium024.csv
(300, 24)
wingardium/wingardium025.csv
(301, 24)
wingardium/wingardium026.csv
(300, 24)
wingardium/wingardium027.csv
(300, 24)
wingardium/wingardium028.csv
(300, 24)
wingardium/wingardium029.csv
(300, 24)
wingardium/wingardium030.csv
(300, 24)
wingardium/wingardium031.csv
(300, 24)
wingardium/wingardium032.csv
(297, 24)
wingardium/wingardium033.csv
(299, 24)
wingardium/wingardium034.csv
(292, 24)
wingardium/wingardium035.csv
(300, 24)
wingardium/wingardium036.csv
(301, 24)
wingardium/wingardium037.csv
(300, 24)
wingardium/wingardium038.csv
(300, 24)
wingardium/wingardium039.csv
(300, 24)
wingardium/wingardium040.csv
(300, 24)
wingardium/wingardium041.csv
(301, 24)
wingardium/wingardium042.csv
(300, 24)
wingardium/wingardium043.csv
(300, 24)
wingardium/wingardium044.csv
(301, 24)
wingardium/wingardium045.csv
(300, 24)
wingardium/wingardium046.csv
(296, 24)
wingardium/wingardium047.csv
(299, 24)
wingardium/wingardium048.csv
(301, 24)
wingardium/wingardium049.csv
(300, 24)
wingardium/wingardium050.csv
(300, 24)
wingardium/wingardium051.csv
(300, 24)
wingardium/wingardium052.csv
(299, 24)
wingardium/wingardium053.csv
(300, 24)
wingardium/wingardium054.csv
(296, 24)
wingardium/wingardium055.csv
(300, 24)
wingardium/wingardium056.csv
(300, 24)
wingardium/wingardium057.csv
(300, 24)
wingardium/wingardium058.csv
(301, 24)
wingardium/wingardium059.csv
(300, 24)
wingardium/wingardium060.csv
(300, 24)
wingardium/wingardium061.csv
(301, 24)
wingardium/wingardium062.csv
(300, 24)
wingardium/wingardium063.csv
(300, 24)
wingardium/wingardium064.csv
(300, 24)
wingardium/wingardium065.csv
(296, 24)
wingardium/wingardium066.csv
(300, 24)
wingardium/wingardium067.csv
(300, 24)
wingardium/wingardium068.csv
(300, 24)
wingardium/wingardium069.csv
(300, 24)
wingardium/wingardium070.csv
(300, 24)
wingardium/wingardium071.csv
(300, 24)
wingardium/wingardium072.csv
(300, 24)
wingardium/wingardium073.csv
(301, 24)
wingardium/wingardium074.csv
(300, 24)
wingardium/wingardium075.csv
(301, 24)
wingardium/wingardium076.csv
(300, 24)
wingardium/wingardium077.csv
(300, 24)
wingardium/wingardium078.csv
(300, 24)
wingardium/wingardium079.csv
(300, 24)
wingardium/wingardium080.csv
(297, 24)
wingardium/wingardium081.csv
(301, 24)
wingardium/wingardium082.csv
(299, 24)
wingardium/wingardium083.csv
(300, 24)
wingardium/wingardium084.csv
(300, 24)
wingardium/wingardium085.csv
(300, 24)
wingardium/wingardium086.csv
(300, 24)
wingardium/wingardium087.csv
(300, 24)
wingardium/wingardium088.csv
(301, 24)
wingardium/wingardium089.csv
(301, 24)
wingardium/wingardium090.csv
(301, 24)
wingardium/wingardium091.csv
(300, 24)
wingardium/wingardium092.csv
(301, 24)
wingardium/wingardium093.csv
(301, 24)
wingardium/wingardium094.csv
(300, 24)
wingardium/wingardium095.csv
(299, 24)
wingardium/wingardium096.csv
(300, 24)
wingardium/wingardium097.csv
(298, 24)
wingardium/wingardium098.csv
(300, 24)
wingardium/wingardium099.csv
(300, 24)
flippendo/flippendo000.csv
(301, 24)
flippendo/flippendo001.csv
(300, 24)
flippendo/flippendo002.csv
(300, 24)
flippendo/flippendo003.csv
(300, 24)
flippendo/flippendo004.csv
(300, 24)
flippendo/flippendo005.csv
(301, 24)
flippendo/flippendo006.csv
(300, 24)
flippendo/flippendo007.csv
(300, 24)
flippendo/flippendo008.csv
(300, 24)
flippendo/flippendo009.csv
(301, 24)
flippendo/flippendo010.csv
(300, 24)
flippendo/flippendo011.csv
(300, 24)
flippendo/flippendo012.csv
(300, 24)
flippendo/flippendo013.csv
(298, 24)
flippendo/flippendo014.csv
(300, 24)
flippendo/flippendo015.csv
(300, 24)
flippendo/flippendo016.csv
(300, 24)
flippendo/flippendo017.csv
(300, 24)
flippendo/flippendo018.csv
(300, 24)
flippendo/flippendo019.csv
(301, 24)
flippendo/flippendo020.csv
(301, 24)
flippendo/flippendo021.csv
(300, 24)
flippendo/flippendo022.csv
(300, 24)
flippendo/flippendo023.csv
(300, 24)
flippendo/flippendo024.csv
(300, 24)
flippendo/flippendo025.csv
(297, 24)
flippendo/flippendo026.csv
(300, 24)
flippendo/flippendo027.csv
(300, 24)
flippendo/flippendo028.csv
(300, 24)
flippendo/flippendo029.csv
(300, 24)
flippendo/flippendo030.csv
(300, 24)
flippendo/flippendo031.csv
(300, 24)
flippendo/flippendo032.csv
(300, 24)
flippendo/flippendo033.csv
(299, 24)
flippendo/flippendo034.csv
(300, 24)
flippendo/flippendo035.csv
(301, 24)
flippendo/flippendo036.csv
(300, 24)
flippendo/flippendo037.csv
(300, 24)
flippendo/flippendo038.csv
(300, 24)
flippendo/flippendo039.csv
(300, 24)
flippendo/flippendo040.csv
(300, 24)
flippendo/flippendo041.csv
(300, 24)
flippendo/flippendo042.csv
(300, 24)
flippendo/flippendo043.csv
(300, 24)
flippendo/flippendo044.csv
(301, 24)
flippendo/flippendo045.csv
(300, 24)
flippendo/flippendo046.csv
(300, 24)
flippendo/flippendo047.csv
(300, 24)
flippendo/flippendo048.csv
(300, 24)
flippendo/flippendo049.csv
(301, 24)
flippendo/flippendo050.csv
(301, 24)
flippendo/flippendo051.csv
(300, 24)
flippendo/flippendo052.csv
(300, 24)
flippendo/flippendo053.csv
(297, 24)
flippendo/flippendo054.csv
(299, 24)
flippendo/flippendo055.csv
(300, 24)
flippendo/flippendo056.csv
(301, 24)
flippendo/flippendo057.csv
(300, 24)
flippendo/flippendo058.csv
(300, 24)
flippendo/flippendo059.csv
(301, 24)
flippendo/flippendo060.csv
(300, 24)
flippendo/flippendo061.csv
(299, 24)
flippendo/flippendo062.csv
(300, 24)
flippendo/flippendo063.csv
(300, 24)
flippendo/flippendo064.csv
(300, 24)
flippendo/flippendo065.csv
(300, 24)
flippendo/flippendo066.csv
(301, 24)
flippendo/flippendo067.csv
(298, 24)
flippendo/flippendo068.csv
(300, 24)
flippendo/flippendo069.csv
(301, 24)
flippendo/flippendo070.csv
(300, 24)
flippendo/flippendo071.csv
(299, 24)
flippendo/flippendo072.csv
(300, 24)
flippendo/flippendo073.csv
(300, 24)
flippendo/flippendo074.csv
(300, 24)
flippendo/flippendo075.csv
(300, 24)
flippendo/flippendo076.csv
(300, 24)
flippendo/flippendo077.csv
(301, 24)
flippendo/flippendo078.csv
(300, 24)
flippendo/flippendo079.csv
(300, 24)
flippendo/flippendo080.csv
(301, 24)
flippendo/flippendo081.csv
(299, 24)
flippendo/flippendo082.csv
(300, 24)
flippendo/flippendo083.csv
(301, 24)
flippendo/flippendo084.csv
(301, 24)
flippendo/flippendo085.csv
(300, 24)
flippendo/flippendo086.csv
(300, 24)
flippendo/flippendo087.csv
(300, 24)
flippendo/flippendo088.csv
(300, 24)
flippendo/flippendo089.csv
(300, 24)
flippendo/flippendo090.csv
(299, 24)
flippendo/flippendo091.csv
(300, 24)
flippendo/flippendo092.csv
(301, 24)
flippendo/flippendo093.csv
(299, 24)
flippendo/flippendo094.csv
(300, 24)
flippendo/flippendo095.csv
(300, 24)
flippendo/flippendo096.csv
(300, 24)
flippendo/flippendo097.csv
(301, 24)
flippendo/flippendo098.csv
(300, 24)
flippendo/flippendo099.csv
(300, 24)
negative/negative000.csv
(5987, 24)
negative/negative001.csv
(5986, 24)
negative/negative002.csv
(5972, 24)
negative/negative003.csv
(5971, 24)
negative/negative004.csv
(5982, 24)
negative/negative005.csv
(5980, 24)
negative/negative006.csv
(5982, 24)
negative/negative007.csv
(5984, 24)
test/test000.csv
(1997, 24)
test/test001.csv
(1996, 24)
test/test002.csv
(1996, 24)
test/test003.csv
(1994, 24)
test/test004.csv
(1997, 24)
test/test005.csv
(1998, 24)
test/test006.csv
(1994, 24)
test/test007.csv
(1995, 24)
In [6]:
# Split the long negative traces in 1.5 second segments
traces["negative_trim"] = []
trace_names["negative_trim"] = []
for i, trace in enumerate(traces["negative"]):
    for j in range(0, len(trace), 500):
        # Pad an extra 10 to make sure are generous with data points
        traces["negative_trim"].append(trace[j:j + SEGMENT_LEN_MS + 10])
        trace_names["negative_trim"].append(trace_names["negative"][i] + '_' + str(j))
In [7]:
# Trim traces
for gesture in GESTURES:
    for i, trace in enumerate(traces[gesture]):
        filename = trace_names[gesture][i]
        
        if filename in TRIMS:
            trim = TRIMS[filename]  
            if np.isnan(trim) or trim < SEGMENT_LEN_MS:
                print('DROPPING TRACE', filename, 'TRIM IS TOO EARLY / BLACKLISTED')
                del traces[gesture][i]
                del trace_names[gesture][i]
                continue
            trim += random.randrange(0, 200)
            trace = trace[trim - SEGMENT_LEN_MS:trim]
            
        if len(trace) < SEGMENT_LEN:
            print('DROPPING TRACE', filename, 'NOT ENOUGH DATA POINTS')
            del traces[gesture][i]
            del trace_names[gesture][i]
            continue
            
        traces[gesture][i] = trace.iloc[len(trace) - SEGMENT_LEN:]
        print(filename)
        trace.index = trace.index - trace.index[0]
        traces[gesture][i] = trace
wingardium000
wingardium001
wingardium002
wingardium003
wingardium004
wingardium005
wingardium006
wingardium007
wingardium008
wingardium009
wingardium010
wingardium011
wingardium012
wingardium013
wingardium014
wingardium015
wingardium016
wingardium017
wingardium018
wingardium019
wingardium020
wingardium021
wingardium022
wingardium023
wingardium024
wingardium025
wingardium026
wingardium027
wingardium028
wingardium029
wingardium030
wingardium031
wingardium032
wingardium033
wingardium034
wingardium035
wingardium036
wingardium037
wingardium038
wingardium039
wingardium040
wingardium041
wingardium042
('DROPPING TRACE', 'wingardium043', 'TRIM IS TOO EARLY / BLACKLISTED')
wingardium045
('DROPPING TRACE', 'wingardium046', 'TRIM IS TOO EARLY / BLACKLISTED')
wingardium048
wingardium049
wingardium050
wingardium051
wingardium052
wingardium053
wingardium054
wingardium055
wingardium056
wingardium057
wingardium058
wingardium059
wingardium060
wingardium061
wingardium062
wingardium063
wingardium064
wingardium065
wingardium066
wingardium067
wingardium068
wingardium069
wingardium070
wingardium071
wingardium072
wingardium073
wingardium074
wingardium075
wingardium076
wingardium077
wingardium078
wingardium079
wingardium080
wingardium081
wingardium082
wingardium083
wingardium084
wingardium085
wingardium086
wingardium087
wingardium088
wingardium089
wingardium090
wingardium091
wingardium092
wingardium093
wingardium094
wingardium095
wingardium096
wingardium097
wingardium098
wingardium099
flippendo000
flippendo001
flippendo002
flippendo003
flippendo004
('DROPPING TRACE', 'flippendo005', 'TRIM IS TOO EARLY / BLACKLISTED')
flippendo007
flippendo008
flippendo009
flippendo010
flippendo011
flippendo012
flippendo013
flippendo014
flippendo015
flippendo016
flippendo017
flippendo018
flippendo019
flippendo020
flippendo021
flippendo022
flippendo023
flippendo024
flippendo025
flippendo026
flippendo027
flippendo028
flippendo029
flippendo030
flippendo031
flippendo032
flippendo033
flippendo034
flippendo035
flippendo036
flippendo037
flippendo038
flippendo039
flippendo040
flippendo041
flippendo042
flippendo043
flippendo044
flippendo045
flippendo046
flippendo047
flippendo048
flippendo049
flippendo050
flippendo051
flippendo052
flippendo053
flippendo054
flippendo055
flippendo056
flippendo057
flippendo058
flippendo059
flippendo060
flippendo061
flippendo062
flippendo063
flippendo064
flippendo065
flippendo066
flippendo067
flippendo068
flippendo069
flippendo070
flippendo071
flippendo072
flippendo073
flippendo074
flippendo075
flippendo076
flippendo077
flippendo078
flippendo079
flippendo080
flippendo081
flippendo082
flippendo083
flippendo084
flippendo085
flippendo086
flippendo087
flippendo088
flippendo089
flippendo090
flippendo091
flippendo092
flippendo093
flippendo094
flippendo095
flippendo096
flippendo097
flippendo098
flippendo099
negative000_0
negative000_500
negative000_1000
negative000_1500
negative000_2000
negative000_2500
negative000_3000
negative000_3500
negative000_4000
negative000_4500
negative000_5000
negative000_5500
('DROPPING TRACE', 'negative001_0', 'NOT ENOUGH DATA POINTS')
negative001_1000
negative001_1500
negative001_2000
negative001_2500
negative001_3000
negative001_3500
negative001_4000
negative001_4500
negative001_5000
negative001_5500
('DROPPING TRACE', 'negative002_0', 'NOT ENOUGH DATA POINTS')
negative002_1000
negative002_1500
negative002_2000
negative002_2500
negative002_3000
negative002_3500
negative002_4000
negative002_4500
negative002_5000
negative002_5500
('DROPPING TRACE', 'negative003_0', 'NOT ENOUGH DATA POINTS')
negative003_1000
negative003_1500
negative003_2000
negative003_2500
negative003_3000
negative003_3500
negative003_4000
negative003_4500
negative003_5000
negative003_5500
('DROPPING TRACE', 'negative004_0', 'NOT ENOUGH DATA POINTS')
negative004_1000
negative004_1500
negative004_2000
negative004_2500
negative004_3000
negative004_3500
negative004_4000
negative004_4500
negative004_5000
negative004_5500
('DROPPING TRACE', 'negative005_0', 'NOT ENOUGH DATA POINTS')
negative005_1000
negative005_1500
negative005_2000
negative005_2500
negative005_3000
negative005_3500
negative005_4000
negative005_4500
negative005_5000
negative005_5500
('DROPPING TRACE', 'negative006_0', 'NOT ENOUGH DATA POINTS')
negative006_1000
negative006_1500
negative006_2000
negative006_2500
negative006_3000
negative006_3500
negative006_4000
negative006_4500
negative006_5000
negative006_5500
('DROPPING TRACE', 'negative007_0', 'NOT ENOUGH DATA POINTS')
negative007_1000
negative007_1500
negative007_2000
negative007_2500
negative007_3000
negative007_3500
negative007_4000
negative007_4500
negative007_5000
negative007_5500

Visualize our data

In [8]:
# List of all sensors
print(traces["wingardium"][0].columns)
Index([u'Unnamed: 0', u'delta_ms', u'accel_ms2_x', u'accel_ms2_y',
       u'accel_ms2_z', u'mag_uT_x', u'mag_uT_y', u'mag_uT_z', u'gyro_degs_x',
       u'gyro_degs_y', u'gyro_degs_z', u'euler_deg_x', u'euler_deg_y',
       u'euler_deg_z', u'quaternion_w', u'quaternion_x', u'quaternion_y',
       u'quaternion_z', u'lin_accel_ms2_x', u'lin_accel_ms2_y',
       u'lin_accel_ms2_z', u'gravity_ms2_x', u'gravity_ms2_y',
       u'gravity_ms2_z', u'accel', u'gyro', u'normalized_accel_z'],
      dtype='object')
In [9]:
SENSORS_LIST = ["accel_ms2_x", "accel_ms2_y", "accel_ms2_z"]
# SENSORS_LIST = ["gyro_degs_x", "gyro_degs_y", "gyro_degs_z"]
# SENSORS_LIST = ["accel"]
SENSORS_LIST = ["normalized_accel_z"]
def plot_all(gesture, large=False):
    for i, trace in enumerate(traces[gesture]):
        data = [] 
        annotations = []
        for sensor in SENSORS_LIST:
            data.append(go.Scatter(
                x = trace.index,
                y = trace[sensor],
                name = sensor,
                line = dict(width = 4 if large else 1)))
            
#             annotations.append(dict(xref='paper', x=1, y=trace[sensor].iloc[-1],
#                                   xanchor='left', yanchor='middle',
#                                   text=sensor,
#                                   showarrow=False))
        layout = go.Layout(
            title = trace_names[gesture][i],
            annotations = annotations,
            font=dict(size=28 if large else 8),
            width=1000,
            margin=go.layout.Margin(r=200, pad=5),
            xaxis = dict(title='Time (ms)'))
            #showlegend=False,
            #yaxis = dict(range=[0, 25])
            #              )
        iplot({'data': data, 'layout': layout}, filename='jupyter-basic_bar')
In [10]:
plot_all("wingardium")
In [11]:
plot_all("flippendo")
In [12]:
plot_all("test")
In [13]:
plot_all("negative_trim")

Traditional Machine Learning: Feature Extraction

In [14]:
import utils

print(utils)

def get_all_features(trace, generate_feature_names=False):
    features = utils.get_model_features(trace, generate_feature_names)
    
    if generate_feature_names:
        features.append('accel_z_peaks')
    else:
        normalized = min_max_scaler.fit_transform(trace['accel_ms2_z'].values.reshape(-1, 1))[:, 0] # normalize
        normalized = normalized[0:len(normalized):5] # subsample
        normalized = np.diff((normalized > 0.77).astype(int)) # convert to binary classifier
        normalized = normalized[normalized>0]
        features.append(sum(normalized))
    
    return features
<module 'utils' from 'utils.py'>
In [15]:
# # Sandbox for developing new features

import scipy.fftpack
import matplotlib.pyplot as plot

d = {'wingardium': [], 'flippendo': []}
for gesture in d.keys():
    for trace in traces[gesture]:
        normalized = min_max_scaler.fit_transform(trace['accel_ms2_z'].values.reshape(-1, 1))
        normalized = normalized[:, 0]
        normalized = normalized[0:len(normalized):5]
        normalized = np.diff((normalized > 0.77).astype(int))
        normalized = normalized[normalized>0]
        d[gesture].append(sum(normalized))

    viz = pd.Series(d[gesture])
    if gesture is 'wingardium':
        print('Wingardium peak count accuracy', float(len(viz[viz == 2])) / len(viz))
    else: 
        print('Flippendo peak count accuracy', float(len(viz[viz == 3])) / len(viz))
        
    plot.hist(d[gesture],density=1, bins=20) 
    plot.show()
('Flippendo peak count accuracy', 0.7474747474747475)
<matplotlib.figure.Figure at 0x1098e62d0>
('Wingardium peak count accuracy', 0.7551020408163265)
<matplotlib.figure.Figure at 0x10b93b810>
In [16]:
# Feature Extraction

Y = GESTURES[:]
X = []
FEATURE_NAMES = []
AVG_X = []

for gesture in GESTURES:
    samples = []
    sum_samples = []
    for trace in traces[gesture]:
        if not FEATURE_NAMES:
            FEATURE_NAMES = get_all_features(trace, True)
    
        feature_values = get_all_features(trace)
        
        if not sum_samples:
            sum_samples = feature_values[:]
        else:
            sum_samples = [sum_samples[i] + feature_values[i] for i in range(len(feature_values))]
        samples.append(feature_values)
        
    AVG_X.append([x / float(len(traces[gesture])) for x in sum_samples])
    X.append(samples)
In [17]:
# Average features for each gesture

average_sample = pd.DataFrame(AVG_X, Y, columns=FEATURE_NAMES).transpose()
display(average_sample)
average_sample.iplot()
wingardium flippendo negative_trim
max_accel 22.140673 24.141960 14.908660
min_accel 1.300394 2.239699 6.627922
range_accel 20.840278 21.902261 8.280737
mean_accel 9.910532 12.481682 9.976962
std_accel 5.636141 5.405400 1.478801
max_gyro 13.245858 15.395331 3.693669
min_gyro 0.627105 0.614052 0.220477
range_gyro 12.618753 14.781279 3.473192
mean_gyro 5.938019 7.582554 1.276698
std_gyro 3.801472 3.913202 0.757407
accel_z_peaks 2.183673 2.949495 2.033708
accel_z_peaks 2.183673 2.949495 2.033708

Traditional Machine Learning: Training

In [18]:
# Put data in SKLearn format and split into train/test datasets

for gesture in GESTURES:
    print(gesture, len(traces[gesture]))
    
from sklearn.model_selection import train_test_split

# Flatten the data
X_flat = []
y_flat = []
for i in range(len(X)):
    X_flat += X[i]
    y_flat += [GESTURES[i]] * len(X[i])
X_flat = np.array(X_flat)
y_flat = np.array(y_flat)

# Generate pretty table of all for display
pretty_table = pd.DataFrame(X_flat, columns=FEATURE_NAMES)
pretty_table['y'] = pd.DataFrame(y_flat)
# display(pretty_table)

# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X_flat, y_flat, test_size=0.33, random_state=42)
print(X_train.shape)
# print(y_train)
print(X_test.shape)
# print(y_test)
('wingardium', 98)
('flippendo', 99)
('negative_trim', 89)
(191, 12)
(95, 12)
In [19]:
# Linear SVC Model

# Actually train the model
from sklearn import svm
from sklearn.metrics import confusion_matrix
model = svm.LinearSVC(max_iter=100000)
model.fit(X_train, y_train) 

# How did we do? 
print("Score:", model.score(X_test, y_test))
predictions = model.predict(X_test)
display(confusion_matrix(y_test, predictions))
df = pd.concat([pd.Series(predictions), pd.Series(y_test)], axis=1)
df.columns=["predicted", "actual"]
display(df)

# Save the model
from sklearn.externals import joblib
joblib.dump(model, 'models/' + str(len(X_flat)) + 'pt_model.joblib') 
('Score:', 1.0)
array([[30,  0,  0],
       [ 0, 29,  0],
       [ 0,  0, 36]])
predicted actual
0 wingardium wingardium
1 negative_trim negative_trim
2 flippendo flippendo
3 negative_trim negative_trim
4 negative_trim negative_trim
5 flippendo flippendo
6 negative_trim negative_trim
7 wingardium wingardium
8 flippendo flippendo
9 wingardium wingardium
10 flippendo flippendo
11 flippendo flippendo
12 flippendo flippendo
13 negative_trim negative_trim
14 wingardium wingardium
15 negative_trim negative_trim
16 negative_trim negative_trim
17 negative_trim negative_trim
18 wingardium wingardium
19 negative_trim negative_trim
20 flippendo flippendo
21 wingardium wingardium
22 negative_trim negative_trim
23 wingardium wingardium
24 flippendo flippendo
25 negative_trim negative_trim
26 negative_trim negative_trim
27 negative_trim negative_trim
28 flippendo flippendo
29 wingardium wingardium
... ... ...
65 wingardium wingardium
66 negative_trim negative_trim
67 wingardium wingardium
68 wingardium wingardium
69 wingardium wingardium
70 flippendo flippendo
71 wingardium wingardium
72 wingardium wingardium
73 flippendo flippendo
74 negative_trim negative_trim
75 wingardium wingardium
76 flippendo flippendo
77 flippendo flippendo
78 negative_trim negative_trim
79 flippendo flippendo
80 negative_trim negative_trim
81 flippendo flippendo
82 wingardium wingardium
83 flippendo flippendo
84 negative_trim negative_trim
85 wingardium wingardium
86 flippendo flippendo
87 flippendo flippendo
88 flippendo flippendo
89 wingardium wingardium
90 flippendo flippendo
91 negative_trim negative_trim
92 wingardium wingardium
93 flippendo flippendo
94 wingardium wingardium

95 rows × 2 columns

Out[19]:
['models/286pt_model.joblib']

Set up for deep learning (incomplete)

In [20]:
from sklearn.model_selection import train_test_split

# Put data in SKLearn format 
traces_flat = []
y_flat = []

# We cap the number of data points in a trace so they are 
# all the same length for our model. Subtract a bit because we are running off
# a non-real-time-system
trace_len_start = 2
trace_len_end = int(SEGMENT_LEN_MS / SAMPLE_FREQ_HZ * 10) - 2

for gesture in GESTURES:
    for trace in traces[gesture]:
        segment = trace[
                ["accel_ms2_x", 
                 "accel_ms2_y", 
                 "accel_ms2_z", 
                 "gyro_degs_x", 
                 "gyro_degs_y", 
                 "gyro_degs_z", 
                 "accel", 
                 "gyro"]]
        traces_flat.append(segment.iloc[trace_len_start:trace_len_end]) 
        y_flat.append(gesture)
    
print("Number of traces: ", len(traces_flat), 
      "\nNumber of features: ", len(traces_flat[0].columns) * len(traces_flat[0]), 
      "\nNumber of sensor streams: ", len(traces_flat[0].columns), 
      "\nNumber of data points per trace", len(traces_flat[0]))

# Split into training and test set
traces_train, traces_test, y_train, y_test = train_test_split(traces_flat, y_flat, test_size=0.33, random_state=42)
print("Number of training traces: ", len(traces_train))
# print(y_train)
print("Number of test traces: ", len(traces_test))
# print(y_test)
('Number of traces: ', 286, '\nNumber of features: ', 1168, '\nNumber of sensor streams: ', 8, '\nNumber of data points per trace', 146)
('Number of training traces: ', 191)
('Number of test traces: ', 95)