Gesture-Phase-Detection¶

https://archive.ics.uci.edu/ml/datasets/Gesture+Phase+Segmentation

The dataset is composed by features extracted from 7 videos with people gesticulating, aiming at studying Gesture Phase Segmentation. Each video is represented by two files: a raw file, which contains the position of hands, wrists, head and spine of the user in each frame; and a processed file, which contains velocity and acceleration of hands and wrists. See the data set description for more information on the dataset.

Raw files: 18 numeric attributes (double), a timestamp and a class attribute (nominal). Processed files: 32 numeric attributes (double) and a class attribute (nominal). A feature vector with up to 50 numeric attributes can be generated with the two files mentioned above.

%matplotlib inline
import pandas as pd
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn import cross_validation, metrics
from sklearn import preprocessing
import matplotlib
import matplotlib.pyplot as plt

# read .csv from provided dataset
csv_filename1="a1_raw.csv"
csv_filename2="a1_va3.csv"

# df=pd.read_csv(csv_filename,index_col=0)
df1=pd.read_csv(csv_filename1 , skiprows=[1,2,3,4])
df2=pd.read_csv(csv_filename2)

df1.head()

df1.shape

(1743, 20)

df1['phase'].unique()

array(['Rest', 'Preparation', 'Stroke', 'Hold', 'Retraction'], dtype=object)

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df1['phase'] = le.fit_transform(df1['phase'])

df1['phase'].unique()

array([2, 1, 4, 0, 3], dtype=int64)

df2.head()

df2.shape

(1743, 33)

df2['Phase'].unique()

array(['D', 'P', 'S', 'H', 'R'], dtype=object)

df1.columns

Index([u'lhx', u'lhy', u'lhz', u'rhx', u'rhy', u'rhz', u'hx', u'hy', u'hz',
       u'sx', u'sy', u'sz', u'lwx', u'lwy', u'lwz', u'rwx', u'rwy', u'rwz',
       u'timestamp', u'phase'],
      dtype='object')

df2.columns

Index([u'1', u'2', u'3', u'4', u'5', u'6', u'7', u'8', u'9', u'10', u'11',
       u'12', u'13', u'14', u'15', u'16', u'17', u'18', u'19', u'20', u'21',
       u'22', u'23', u'24', u'25', u'26', u'27', u'28', u'29', u'30', u'31',
       u'32', u'Phase'],
      dtype='object')

df2.rename(columns={'Phase': 'phase'}, inplace=True)

df1.phase.unique()

array([2, 1, 4, 0, 3], dtype=int64)

df2.phase.unique()

array(['D', 'P', 'S', 'H', 'R'], dtype=object)

a = df2.phase == 'D'
b = df2.phase == 'P'
c = df2.phase == 'S'
d = df2.phase == 'H'
e = df2.phase == 'R'

df2.loc[a,'phase'] = 'Rest'
df2.loc[b,'phase'] = 'Preparation'
df2.loc[c,'phase'] = 'Stroke'
df2.loc[d,'phase'] = 'Hold'
df2.loc[e,'phase'] = 'Retraction'

df2.head(3)

df2.phase.unique()

array(['Rest', 'Preparation', 'Stroke', 'Hold', 'Retraction'], dtype=object)

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df2['phase'] = le.fit_transform(df2['phase'])

df2.phase.unique()

array([2, 1, 4, 0, 3], dtype=int64)

df1.groupby('phase').count()

df2.groupby('phase').count()

df1.sort('phase',inplace=True)

C:\Miniconda2\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':

df2.sort('phase',inplace=True)

C:\Miniconda2\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':

df2.tail()

left = pd.DataFrame({
   ....:                      'key2': ['0', '2', '1', '3','0','1'],
   ....:                      'A': ['A0', 'A1', 'A2', 'A3','A4','A5'],
   ....:                      'B': ['B0', 'B1', 'B2', 'B3','B4','B5']})
   ....: 

right = pd.DataFrame({
   ....:                       'key2': ['0', '1', '2', '0', '1', '3'],
   ....:                       'C': ['C0', 'C1', 'C2', 'C3', 'C4', 'C5'],
   ....:                       'D': ['D0', 'D1', 'D2', 'D3', 'D4', 'D5']})
   ....:

left

right

left.sort('key2',inplace=True)

C:\Miniconda2\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':

left

right.sort('key2',inplace=True)

C:\Miniconda2\lib\site-packages\ipykernel\__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':

right

result = pd.merge(left, right, on=['key2'])

result

result2 = pd.merge(left, right, on=['key2'], how='right')

result2

df = pd.merge(df1, df2, on='phase')

df.head()

df.columns

Index([u'lhx', u'lhy', u'lhz', u'rhx', u'rhy', u'rhz', u'hx', u'hy', u'hz',
       u'sx', u'sy', u'sz', u'lwx', u'lwy', u'lwz', u'rwx', u'rwy', u'rwz',
       u'timestamp', u'phase', u'1', u'2', u'3', u'4', u'5', u'6', u'7', u'8',
       u'9', u'10', u'11', u'12', u'13', u'14', u'15', u'16', u'17', u'18',
       u'19', u'20', u'21', u'22', u'23', u'24', u'25', u'26', u'27', u'28',
       u'29', u'30', u'31', u'32'],
      dtype='object')

df[:1]

df1.shape,df2.shape,df.shape

((1743, 20), (1743, 33), (976543, 52))

df.drop('timestamp', axis=1, inplace=True)

cols = list(df.columns)
features = cols
features.remove('phase')

len(features)

50

df1.shape,df2.shape,df.shape

((1743, 20), (1743, 33), (976543, 51))

df1.drop('phase',axis=1,inplace=True)

df_1 = pd.concat([df1,df2],axis=1)

df_1.drop('timestamp' , axis=1, inplace=True )

df_1.shape

(1743, 51)

cols = list(df_1.columns)
features = cols
features.remove('phase')

X = df_1[features]
y = df_1['phase']

# split dataset to 60% training and 40% testing
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4, random_state=0)

print X_train.shape, y_train.shape

(1045, 50) (1045L,)

Unsupervised Learning¶

PCA¶

len(features)

50

# Apply PCA with the same number of dimensions as variables in the dataset
from sklearn.decomposition import PCA
pca = PCA(n_components=50)
pca.fit(X)

# Print the components and the amount of variance in the data contained in each dimension
print(pca.components_)
print(pca.explained_variance_ratio_)

[[  3.24581191e-01   2.92251490e-01  -7.53364165e-03 ...,  -3.24728085e-04
   -3.41055456e-04  -3.57808090e-04]
 [  3.61481275e-01   3.43404679e-01   9.08432680e-04 ...,   5.07423576e-04
   -9.78392399e-05   2.70505189e-04]
 [ -2.37097518e-01   5.92270728e-01   1.24632609e-02 ...,  -3.58624880e-04
   -3.90482340e-04  -1.93933193e-04]
 ..., 
 [ -4.04553884e-05   7.12943663e-05  -1.15402630e-03 ...,   4.86619567e-03
   -1.48671225e-02   7.35656948e-03]
 [ -1.29660243e-04  -2.43834901e-05  -9.59580183e-04 ...,  -2.71273032e-03
    1.29078542e-02   4.33512842e-03]
 [ -6.46273141e-05  -7.94059286e-05  -8.03733060e-04 ...,  -3.07743917e-05
    5.64481537e-03   4.41337238e-04]]
[  5.43602019e-01   1.94609502e-01   1.79945381e-01   6.06757941e-02
   6.51627887e-03   3.58008210e-03   2.67915903e-03   2.33392917e-03
   1.79531854e-03   1.48063948e-03   8.24068490e-04   6.16949441e-04
   5.22681332e-04   3.02808424e-04   9.48140982e-05   8.22443484e-05
   6.86532264e-05   5.74845805e-05   5.44861553e-05   3.78224060e-05
   3.29970596e-05   2.22630456e-05   1.86548452e-05   1.34322604e-05
   4.24253117e-06   3.74622695e-06   3.12518674e-06   2.80866166e-06
   2.68473778e-06   2.38735187e-06   2.18231402e-06   1.95711839e-06
   1.74097538e-06   1.57779695e-06   1.51902148e-06   1.15606438e-06
   1.09387478e-06   4.78227553e-07   3.75018054e-07   3.06011531e-07
   2.81524044e-07   2.41916791e-07   2.23069620e-07   1.76570338e-07
   8.01344334e-08   5.95234827e-08   5.05723238e-08   2.85968368e-08
   8.08258475e-09   6.73808980e-09]

%matplotlib inline
import matplotlib.pyplot as plt
plt.plot(list(pca.explained_variance_ratio_),'-o')
plt.title('Explained variance ratio as function of PCA components')
plt.ylabel('Explained variance ratio')
plt.xlabel('Component')
plt.show()

# First we reduce the data to two dimensions using PCA to capture variation
pca = PCA(n_components=2)
reduced_data = pca.fit_transform(X)
print(reduced_data[:10])  # print upto 10 elements

[[-4.97920203 -1.99254767]
 [ 1.05764711  0.24918246]
 [ 1.0436656   0.27278704]
 [ 0.54357051  0.36484743]
 [ 0.55093989  0.38420189]
 [ 0.55136204  0.39983171]
 [-4.89826696 -1.9399872 ]
 [-4.90657144 -1.94518439]
 [-4.9156538  -1.95073956]
 [-4.92688358 -1.95743474]]

# Import clustering modules
from sklearn.cluster import KMeans
from sklearn.mixture import GMM

kmeans = KMeans(n_clusters=5)
clusters = kmeans.fit(reduced_data)
print(clusters)

KMeans(copy_x=True, init='k-means++', max_iter=300, n_clusters=5, n_init=10,
    n_jobs=1, precompute_distances='auto', random_state=None, tol=0.0001,
    verbose=0)

# Plot the decision boundary by building a mesh grid to populate a graph.
x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
hx = (x_max-x_min)/1000.
hy = (y_max-y_min)/1000.
xx, yy = np.meshgrid(np.arange(x_min, x_max, hx), np.arange(y_min, y_max, hy))

# Obtain labels for each point in mesh. Use last trained model.
Z = clusters.predict(np.c_[xx.ravel(), yy.ravel()])

# Find the centroids for KMeans or the cluster means for GMM 

centroids = kmeans.cluster_centers_
print('*** K MEANS CENTROIDS ***')
print(centroids)

# TRANSFORM DATA BACK TO ORIGINAL SPACE FOR ANSWERING 7
print('*** CENTROIDS TRANSFERED TO ORIGINAL SPACE ***')
print(pca.inverse_transform(centroids))

*** K MEANS CENTROIDS ***
[[ 1.18134022 -0.11051211]
 [-3.756748   -0.90595973]
 [-1.37556782  1.94203578]
 [-0.26097948  0.37070527]
 [-0.75118636 -1.29505563]]
*** CENTROIDS TRANSFERED TO ORIGINAL SPACE ***
[[  5.07568233e+00   4.41700467e+00   1.48298762e+00   4.89710937e+00
    4.42560169e+00   1.47296874e+00   5.11747990e+00   1.65395293e+00
    1.75892126e+00   5.11537035e+00   4.27823846e+00   1.74234865e+00
    4.87822818e+00   4.39828185e+00   1.52204475e+00   5.11203670e+00
    4.41814204e+00   1.50415463e+00  -5.40001340e-04  -4.95434852e-04
    1.45821777e-05   9.43749927e-04  -1.36177541e-03  -5.34799531e-05
   -3.36676110e-04  -4.48647088e-04   6.41964045e-06   7.17965460e-04
   -1.20434182e-03  -7.65163255e-05   1.61552576e-05   8.24982621e-05
   -6.09655572e-06  -5.56746164e-05   9.84436408e-05  -2.72900068e-05
    3.50530897e-05   8.09844769e-05  -3.20020859e-06  -4.32869491e-05
    9.64489989e-05  -2.32896165e-05   6.89508901e-03   8.86421552e-03
    5.54440265e-03   6.75897898e-03   1.65032799e-03   2.08923404e-03
    1.34705981e-03   1.49257474e-03]
 [  3.18533235e+00   2.70068060e+00   1.51946680e+00   7.06738949e+00
    2.76322387e+00   1.51883638e+00   5.04868835e+00   1.69280837e+00
    1.73204626e+00   5.06855381e+00   4.22837301e+00   1.74226038e+00
    3.24120591e+00   2.83158943e+00   1.51399715e+00   6.97957185e+00
    2.87339666e+00   1.52325939e+00   3.33804012e-03   3.10702353e-03
    1.62356900e-04  -3.39428443e-03   9.19754390e-04   2.69057051e-05
    2.53180975e-03   2.46599099e-03   2.22584285e-04  -2.57895704e-03
    5.55667517e-04   7.48502470e-05  -8.49234522e-05  -2.37471298e-04
    4.58835118e-05   2.01223183e-04  -1.01230998e-04   2.06735427e-05
   -8.76506093e-05  -2.58024720e-04   3.60118245e-05   1.86209802e-04
   -1.85442476e-04   2.50613353e-05   2.21797955e-02   2.05765979e-02
    2.02969083e-02   1.95150932e-02   3.57096803e-03   3.28914110e-03
    3.10904773e-03   3.04428995e-03]
 [  4.98771570e+00   4.37459904e+00   1.50411505e+00   5.89679390e+00
    2.18246266e+00   1.49581184e+00   4.94424483e+00   1.65108646e+00
    1.75484332e+00   5.05201274e+00   4.24233827e+00   1.74417394e+00
    4.82799864e+00   4.29479389e+00   1.52855231e+00   5.91331642e+00
    2.41359544e+00   1.49878739e+00  -1.35401238e-03  -1.94494888e-03
   -2.42667660e-04  -1.32071378e-03   6.37096219e-03   5.62992337e-04
   -9.79705922e-04  -1.70384624e-03  -2.86780524e-04  -7.80355989e-04
    5.72597346e-03   6.18188927e-04   5.84445623e-05   1.84671185e-04
   -3.96331029e-06  -1.08409138e-04  -4.30351003e-04   1.26243978e-04
    3.75654223e-05   1.44406641e-04  -3.05494341e-06  -9.10790822e-05
   -4.49592362e-04   1.17900119e-04   8.67067690e-03   1.81212553e-02
    7.71432387e-03   1.46152719e-02   2.28001091e-03   3.96104508e-03
    2.01828752e-03   2.96268198e-03]
 [  4.78148356e+00   4.16073689e+00   1.49429069e+00   5.49508225e+00
    3.53980021e+00   1.48610322e+00   5.05754195e+00   1.65864659e+00
    1.75392003e+00   5.09037036e+00   4.26075509e+00   1.74286460e+00
    4.63096846e+00   4.14559960e+00   1.52278492e+00   5.60952556e+00
    3.61814528e+00   1.50531649e+00  -2.24395379e-04  -4.03009480e-04
   -3.88948087e-05  -3.28592540e-04   1.20150760e-03   1.36441548e-04
   -1.15080717e-04  -3.97738913e-04  -4.77442492e-05  -1.84544765e-04
    1.05246858e-03   1.46147349e-04   1.40305745e-05   6.65952051e-05
    1.91050127e-06  -3.44232845e-05  -8.30494961e-05   2.39827940e-05
    1.83371711e-05   5.11548441e-05   2.41616527e-06  -2.44998778e-05
   -1.01725571e-04   2.44640900e-05   9.58204152e-03   1.32095199e-02
    8.26987410e-03   1.08470848e-02   2.10568184e-03   2.80177680e-03
    1.79188887e-03   2.13882020e-03]
 [  4.02023025e+00   3.44544311e+00   1.49647050e+00   5.79041452e+00
    4.26485748e+00   1.49124042e+00   5.13932083e+00   1.67730375e+00
    1.74491779e+00   5.11090912e+00   4.26229537e+00   1.74165114e+00
    3.95502367e+00   3.53437635e+00   1.51511289e+00   5.90161893e+00
    4.24047319e+00   1.51703837e+00   1.97763401e-03   2.08897226e-03
    1.90680800e-04  -7.47474831e-04  -2.79031040e-03  -2.25712922e-04
    1.54005503e-03   1.67152957e-03   2.34569198e-04  -6.46275984e-04
   -2.64833004e-03  -2.35668684e-04  -5.69321928e-05  -1.37632478e-04
    2.30270405e-05   1.10682843e-04   1.71038523e-04  -5.41225320e-05
   -3.63703968e-05  -1.36359800e-04   1.92884112e-05   1.05568392e-04
    1.27894712e-04  -4.55253926e-05   1.50519728e-02   1.23166580e-02
    1.32556356e-02   1.13077260e-02   2.53123989e-03   2.11571438e-03
    2.12205338e-03   1.86362322e-03]]

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure(1)
plt.clf()
plt.imshow(Z, interpolation='nearest',
           extent=(xx.min(), xx.max(), yy.min(), yy.max()),
           cmap=plt.cm.Paired,
           aspect='auto', origin='lower')

plt.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2)
plt.scatter(centroids[:, 0], centroids[:, 1],
            marker='x', s=169, linewidths=3,
            color='w', zorder=10)
plt.title('Clustering on the seeds dataset (PCA-reduced data)\n'
          'Centroids are marked with white cross')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
plt.show()

Applying agglomerative clustering via scikit-learn¶

from sklearn.cluster import AgglomerativeClustering

ac = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage='complete')
labels = ac.fit_predict(X)
print('Cluster labels: %s' % labels)

Cluster labels: [3 2 2 ..., 2 3 0]

X = df_1[features]
y = df_1['phase']
# split dataset to 60% training and 40% testing
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4, random_state=0)

K Means¶

from sklearn import cluster
clf = cluster.KMeans(init='k-means++', n_clusters=5, random_state=5)
clf.fit(X_train)
print clf.labels_.shape
print clf.labels_

(1045L,)
[0 2 4 ..., 2 1 2]

# Predict clusters on testing data
y_pred = clf.predict(X_test)

from sklearn import metrics
print "Addjusted rand score:{:.2}".format(metrics.adjusted_rand_score(y_test, y_pred))
print "Homogeneity score:{:.2} ".format(metrics.homogeneity_score(y_test, y_pred)) 
print "Completeness score: {:.2} ".format(metrics.completeness_score(y_test, y_pred))
print "Confusion matrix"
print metrics.confusion_matrix(y_test, y_pred)

Addjusted rand score:0.16
Homogeneity score:0.22 
Completeness score: 0.21 
Confusion matrix
[[  8   0   5   0   5]
 [  6  17  20   3  11]
 [  0  80 200   0   0]
 [ 12   7  45   8  10]
 [ 24  13  79  66  79]]

Affinity Propogation¶

# Affinity propagation
aff = cluster.AffinityPropagation()
aff.fit(X_train)
print aff.cluster_centers_indices_.shape

(41L,)

y_pred = aff.predict(X_test)

from sklearn import metrics
print "Addjusted rand score:{:.2}".format(metrics.adjusted_rand_score(y_test, y_pred))
print "Homogeneity score:{:.2} ".format(metrics.homogeneity_score(y_test, y_pred)) 
print "Completeness score: {:.2} ".format(metrics.completeness_score(y_test, y_pred))
print "Confusion matrix"
print metrics.confusion_matrix(y_test, y_pred)

Addjusted rand score:0.078
Homogeneity score:0.57 
Completeness score: 0.21 
Confusion matrix
[[ 0  0  4 ...,  0  4  0]
 [ 0  0  5 ...,  0  0  3]
 [15 24  0 ...,  0  0  0]
 ..., 
 [ 0  0  0 ...,  0  0  0]
 [ 0  0  0 ...,  0  0  0]
 [ 0  0  0 ...,  0  0  0]]

MeanShift¶

ms = cluster.MeanShift()
ms.fit(X_train)

MeanShift(bandwidth=None, bin_seeding=False, cluster_all=True, min_bin_freq=1,
     n_jobs=1, seeds=None)

y_pred = ms.predict(X_test)

from sklearn import metrics
print "Addjusted rand score:{:.2}".format(metrics.adjusted_rand_score(y_test, y_pred))
print "Homogeneity score:{:.2} ".format(metrics.homogeneity_score(y_test, y_pred)) 
print "Completeness score: {:.2} ".format(metrics.completeness_score(y_test, y_pred))
print "Confusion matrix"
print metrics.confusion_matrix(y_test, y_pred)

Addjusted rand score:0.048
Homogeneity score:0.041 
Completeness score: 0.19 
Confusion matrix
[[ 10   8   0   0   0]
 [ 51   6   0   0   0]
 [280   0   0   0   0]
 [ 68  14   0   0   0]
 [234  27   0   0   0]]

Mixture of Guassian Models¶

from sklearn import mixture

# Define a heldout dataset to estimate covariance type
X_train_heldout, X_test_heldout, y_train_heldout, y_test_heldout = train_test_split(
        X_train, y_train,test_size=0.25, random_state=42)
for covariance_type in ['spherical','tied','diag','full']:
    gm=mixture.GMM(n_components=100, covariance_type=covariance_type, random_state=42, n_init=5)
    gm.fit(X_train_heldout)
    y_pred=gm.predict(X_test_heldout)
    print "Adjusted rand score for covariance={}:{:.2}".format(covariance_type, 
                                                               metrics.adjusted_rand_score(y_test_heldout, y_pred))

Adjusted rand score for covariance=spherical:0.046
Adjusted rand score for covariance=tied:0.048
Adjusted rand score for covariance=diag:0.05
Adjusted rand score for covariance=full:0.046

pca = PCA(n_components=2)
X = pca.fit_transform(X)

c = []
from matplotlib.pyplot import cm 
n=6
color=iter(cm.rainbow(np.linspace(0,1,n)))
for i in range(n):
    c.append(next(color))

n = 5
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(8,6))

km = KMeans(n_clusters= n , random_state=0)
y_km = km.fit_predict(X)

for i in range(n):
    ax1.scatter(X[y_km==i,0], X[y_km==i,1], c=c[i], marker='o', s=40, label='cluster{}'.format(i))
ax1.set_title('K-means clustering')

ac = AgglomerativeClustering(n_clusters=n, affinity='euclidean', linkage='complete')
y_ac = ac.fit_predict(X)
for i in range(n):
    ax2.scatter(X[y_ac==i,0], X[y_ac==i,1], c=c[i], marker='o', s=40, label='cluster{}'.format(i))
ax2.set_title('Agglomerative clustering')

# Put a legend below current axis
plt.legend()
    
plt.tight_layout()
#plt.savefig('./figures/kmeans_and_ac.png', dpi=300)
plt.show()

Classification¶

import os
from sklearn.tree import DecisionTreeClassifier, export_graphviz
import pandas as pd
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn import cross_validation, metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from time import time
from sklearn.pipeline import Pipeline
from sklearn.metrics import roc_auc_score , classification_report
from sklearn.grid_search import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.metrics import precision_score, recall_score, accuracy_score, classification_report

X = df_1[features]
y = df_1['phase']
# split dataset to 60% training and 40% testing
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size=0.4, random_state=0)

print X_train.shape, y_train.shape,X_test.shape, y_test.shape

(1045, 50) (1045L,) (698, 50) (698L,)

Decision Tree accuracy and time elapsed caculation¶

t0=time()
print "DecisionTree"

dt = DecisionTreeClassifier(min_samples_split=20,random_state=99)
# dt = DecisionTreeClassifier(min_samples_split=20,max_depth=5,random_state=99)

clf_dt=dt.fit(X_train,y_train)

print "Acurracy: ", clf_dt.score(X_test,y_test)
t1=time()
print "time elapsed: ", t1-t0

DecisionTree
Acurracy:  0.823782234957
time elapsed:  0.154000043869

tt0=time()
print "cross result========"
scores = cross_validation.cross_val_score(dt, X,y, cv=5)
print scores
print scores.mean()
tt1=time()
print "time elapsed: ", tt1-tt0
print "\n"

cross result========
[ 0.49287749  0.7277937   0.64756447  0.68678161  0.5433526 ]
0.619673973884
time elapsed:  0.986000061035

from sklearn.metrics import classification_report

pipeline = Pipeline([
    ('clf', DecisionTreeClassifier(criterion='entropy'))
])

parameters = {
    'clf__max_depth': (5, 25 , 50),
    'clf__min_samples_split': (1, 5, 10),
    'clf__min_samples_leaf': (1, 2, 3)
}

grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1, scoring='f1')
grid_search.fit(X_train, y_train)

print 'Best score: %0.3f' % grid_search.best_score_
print 'Best parameters set:'

best_parameters = grid_search.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
    print '\t%s: %r' % (param_name, best_parameters[param_name])

predictions = grid_search.predict(X_test)

print classification_report(y_test, predictions)

[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   16.9s
[Parallel(n_jobs=-1)]: Done  81 out of  81 | elapsed:   18.7s finished

Fitting 3 folds for each of 27 candidates, totalling 81 fits
Best score: 0.814
Best parameters set:
	clf__max_depth: 50
	clf__min_samples_leaf: 1
	clf__min_samples_split: 10
             precision    recall  f1-score   support

          0       0.80      0.67      0.73        18
          1       0.43      0.46      0.44        57
          2       0.92      0.90      0.91       280
          3       0.75      0.62      0.68        82
          4       0.85      0.92      0.89       261

avg / total       0.83      0.83      0.83       698

Random Forest accuracy and time elapsed caculation¶

t2=time()
print "RandomForest"
rf = RandomForestClassifier(n_estimators=100,n_jobs=-1)
clf_rf = rf.fit(X_train,y_train)
print "Acurracy: ", clf_rf.score(X_test,y_test)
t3=time()
print "time elapsed: ", t3-t2

RandomForest
Acurracy:  0.901146131805
time elapsed:  1.0569999218

tt2=time()
print "cross result========"
scores = cross_validation.cross_val_score(dt, X,y, cv=5)
print scores
print scores.mean()
tt3=time()
print "time elapsed: ", tt3-tt2
print "\n"

cross result========
[ 0.49287749  0.7277937   0.64756447  0.68678161  0.5433526 ]
0.619673973884
time elapsed:  0.889999866486

pipeline2 = Pipeline([
('clf', RandomForestClassifier(criterion='entropy'))
])

parameters = {
    'clf__n_estimators': (5, 25, 50, 100),
    'clf__max_depth': (5, 25 , 50),
    'clf__min_samples_split': (1, 5, 10),
    'clf__min_samples_leaf': (1, 2, 3)
}

grid_search = GridSearchCV(pipeline2, parameters, n_jobs=-1, verbose=1, scoring='accuracy', cv=3)

grid_search.fit(X_train, y_train)

print 'Best score: %0.3f' % grid_search.best_score_

print 'Best parameters set:'
best_parameters = grid_search.best_estimator_.get_params()

for param_name in sorted(parameters.keys()):
    print '\t%s: %r' % (param_name, best_parameters[param_name])

predictions = grid_search.predict(X_test)
print 'Accuracy:', accuracy_score(y_test, predictions)
print classification_report(y_test, predictions)

[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   22.2s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done 324 out of 324 | elapsed:  1.8min finished

Fitting 3 folds for each of 108 candidates, totalling 324 fits
Best score: 0.888
Best parameters set:
	clf__max_depth: 25
	clf__min_samples_leaf: 1
	clf__min_samples_split: 1
	clf__n_estimators: 50
Accuracy: 0.901146131805
             precision    recall  f1-score   support

          0       0.93      0.78      0.85        18
          1       0.93      0.44      0.60        57
          2       0.89      0.99      0.94       280
          3       0.96      0.66      0.78        82
          4       0.89      0.99      0.94       261

avg / total       0.91      0.90      0.89       698

Naive Bayes accuracy and time elapsed caculation¶

t4=time()
print "NaiveBayes"
nb = BernoulliNB()
clf_nb=nb.fit(X_train,y_train)
print "Acurracy: ", clf_nb.score(X_test,y_test)
t5=time()
print "time elapsed: ", t5-t4

NaiveBayes
Acurracy:  0.385386819484
time elapsed:  0.0239999294281

tt4=time()
print "cross result========"
scores = cross_validation.cross_val_score(dt, X,y, cv=5)
print scores
print scores.mean()
tt5=time()
print "time elapsed: ", tt5-tt4
print "\n"

cross result========
[ 0.49287749  0.7277937   0.64756447  0.68678161  0.5433526 ]
0.619673973884
time elapsed:  0.963999986649

KNN accuracy and time elapsed caculation¶

t6=time()
print "KNN"
# knn = KNeighborsClassifier(n_neighbors=3)
knn = KNeighborsClassifier()
clf_knn=knn.fit(X_train, y_train)
print "Acurracy: ", clf_knn.score(X_test,y_test) 
t7=time()
print "time elapsed: ", t7-t6

KNN
Acurracy:  0.843839541547
time elapsed:  0.063000202179

tt6=time()
print "cross result========"
scores = cross_validation.cross_val_score(dt, X,y, cv=5)
print scores
print scores.mean()
tt7=time()
print "time elapsed: ", tt7-tt6
print "\n"

cross result========
[ 0.49287749  0.7277937   0.64756447  0.68678161  0.5433526 ]
0.619673973884
time elapsed:  0.888999938965

SVM accuracy and time elapsed caculation¶

t7=time()
print "SVM"

svc = SVC()
clf_svc=svc.fit(X_train, y_train)
print "Acurracy: ", clf_svc.score(X_test,y_test) 
t8=time()
print "time elapsed: ", t8-t7

SVM
Acurracy:  0.753581661891
time elapsed:  0.262000083923

tt7=time()
print "cross result========"
scores = cross_validation.cross_val_score(dt, X,y, cv=5)
print scores
print scores.mean()
tt8=time()
print "time elapsed: ", tt7-tt6
print "\n"

cross result========
[ 0.49287749  0.7277937   0.64756447  0.68678161  0.5433526 ]
0.619673973884
time elapsed:  7.63800001144

from sklearn.svm import SVC
from sklearn.cross_validation import cross_val_score
from sklearn.pipeline import Pipeline
from sklearn import grid_search

svc = SVC()

parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}

grid = grid_search.GridSearchCV(svc, parameters, n_jobs=-1, verbose=1, scoring='accuracy')


grid.fit(X_train, y_train)

print 'Best score: %0.3f' % grid.best_score_

print 'Best parameters set:'
best_parameters = grid.best_estimator_.get_params()

for param_name in sorted(parameters.keys()):
    print '\t%s: %r' % (param_name, best_parameters[param_name])
    
predictions = grid.predict(X_test)
print classification_report(y_test, predictions)

Fitting 3 folds for each of 4 candidates, totalling 12 fits
Best score: 0.779
Best parameters set:
	C: 10
	kernel: 'linear'
             precision    recall  f1-score   support

          0       1.00      0.22      0.36        18
          1       0.32      0.19      0.24        57
          2       0.84      0.98      0.91       280
          3       0.76      0.27      0.40        82
          4       0.79      0.92      0.85       261

avg / total       0.77      0.79      0.76       698

[Parallel(n_jobs=-1)]: Done  12 out of  12 | elapsed:   19.0s finished

pipeline = Pipeline([
    ('clf', SVC(kernel='rbf', gamma=0.01, C=100))
])

parameters = {
    'clf__gamma': (0.01, 0.03, 0.1, 0.3, 1),
    'clf__C': (0.1, 0.3, 1, 3, 10, 30),
}

grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1, scoring='accuracy')

grid_search.fit(X_train, y_train)

print 'Best score: %0.3f' % grid_search.best_score_

print 'Best parameters set:'
best_parameters = grid_search.best_estimator_.get_params()

for param_name in sorted(parameters.keys()):
    print '\t%s: %r' % (param_name, best_parameters[param_name])
    
predictions = grid_search.predict(X_test)
print classification_report(y_test, predictions)

[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   17.6s
[Parallel(n_jobs=-1)]: Done  90 out of  90 | elapsed:   18.7s finished

Fitting 3 folds for each of 30 candidates, totalling 90 fits
Best score: 0.879
Best parameters set:
	clf__C: 30
	clf__gamma: 1
             precision    recall  f1-score   support

          0       0.89      0.89      0.89        18
          1       0.55      0.63      0.59        57
          2       0.94      0.96      0.95       280
          3       0.85      0.77      0.81        82
          4       0.93      0.91      0.92       261

avg / total       0.89      0.89      0.89       698

Ensemble Learning¶

from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.preprocessing import LabelEncoder
from sklearn.externals import six
from sklearn.base import clone
from sklearn.pipeline import _name_estimators
import numpy as np
import operator


class MajorityVoteClassifier(BaseEstimator, 
                             ClassifierMixin):
    """ A majority vote ensemble classifier

    Parameters
    ----------
    classifiers : array-like, shape = [n_classifiers]
      Different classifiers for the ensemble

    vote : str, {'classlabel', 'probability'} (default='label')
      If 'classlabel' the prediction is based on the argmax of
        class labels. Else if 'probability', the argmax of
        the sum of probabilities is used to predict the class label
        (recommended for calibrated classifiers).

    weights : array-like, shape = [n_classifiers], optional (default=None)
      If a list of `int` or `float` values are provided, the classifiers
      are weighted by importance; Uses uniform weights if `weights=None`.

    """
    def __init__(self, classifiers, vote='classlabel', weights=None):

        self.classifiers = classifiers
        self.named_classifiers = {key: value for key, value
                                  in _name_estimators(classifiers)}
        self.vote = vote
        self.weights = weights

    def fit(self, X, y):
        """ Fit classifiers.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Matrix of training samples.

        y : array-like, shape = [n_samples]
            Vector of target class labels.

        Returns
        -------
        self : object

        """
        if self.vote not in ('probability', 'classlabel'):
            raise ValueError("vote must be 'probability' or 'classlabel'"
                             "; got (vote=%r)"
                             % self.vote)

        if self.weights and len(self.weights) != len(self.classifiers):
            raise ValueError('Number of classifiers and weights must be equal'
                             '; got %d weights, %d classifiers'
                             % (len(self.weights), len(self.classifiers)))

        # Use LabelEncoder to ensure class labels start with 0, which
        # is important for np.argmax call in self.predict
        self.lablenc_ = LabelEncoder()
        self.lablenc_.fit(y)
        self.classes_ = self.lablenc_.classes_
        self.classifiers_ = []
        for clf in self.classifiers:
            fitted_clf = clone(clf).fit(X, self.lablenc_.transform(y))
            self.classifiers_.append(fitted_clf)
        return self

    def predict(self, X):
        """ Predict class labels for X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Matrix of training samples.

        Returns
        ----------
        maj_vote : array-like, shape = [n_samples]
            Predicted class labels.
            
        """
        if self.vote == 'probability':
            maj_vote = np.argmax(self.predict_proba(X), axis=1)
        else:  # 'classlabel' vote

            #  Collect results from clf.predict calls
            predictions = np.asarray([clf.predict(X)
                                      for clf in self.classifiers_]).T

            maj_vote = np.apply_along_axis(
                                      lambda x:
                                      np.argmax(np.bincount(x,
                                                weights=self.weights)),
                                      axis=1,
                                      arr=predictions)
        maj_vote = self.lablenc_.inverse_transform(maj_vote)
        return maj_vote

    def predict_proba(self, X):
        """ Predict class probabilities for X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.

        Returns
        ----------
        avg_proba : array-like, shape = [n_samples, n_classes]
            Weighted average probability for each class per sample.

        """
        probas = np.asarray([clf.predict_proba(X)
                             for clf in self.classifiers_])
        avg_proba = np.average(probas, axis=0, weights=self.weights)
        return avg_proba

    def get_params(self, deep=True):
        """ Get classifier parameter names for GridSearch"""
        if not deep:
            return super(MajorityVoteClassifier, self).get_params(deep=False)
        else:
            out = self.named_classifiers.copy()
            for name, step in six.iteritems(self.named_classifiers):
                for key, value in six.iteritems(step.get_params(deep=True)):
                    out['%s__%s' % (name, key)] = value
            return out

from sklearn.cross_validation import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.pipeline import Pipeline
import numpy as np
from sklearn.preprocessing import StandardScaler

clf1 = LogisticRegression(penalty='l2', 
                          C=0.001, 
                          random_state=0)

clf2 = DecisionTreeClassifier(max_depth=1, 
                              criterion='entropy', 
                              random_state=0)

clf3 = KNeighborsClassifier(n_neighbors=1, 
                            p=2, 
                            metric='minkowski')

pipe1 = Pipeline([['sc', StandardScaler()],
                  ['clf', clf1]])
pipe3 = Pipeline([['sc', StandardScaler()],
                  ['clf', clf3]])

clf_labels = ['Logistic Regression', 'Decision Tree', 'KNN']

print('10-fold cross validation:\n')
for clf, label in zip([pipe1, clf2, pipe3], clf_labels):
    scores = cross_val_score(estimator=clf, 
                             X=X_train, 
                             y=y_train, 
                             cv=10, 
                             scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" 
               % (scores.mean(), scores.std(), label))

10-fold cross validation:

Accuracy: 0.74 (+/- 0.02) [Logistic Regression]
Accuracy: 0.66 (+/- 0.02) [Decision Tree]
Accuracy: 0.86 (+/- 0.03) [KNN]

# Majority Rule (hard) Voting

mv_clf = MajorityVoteClassifier(
                classifiers=[pipe1, clf2, pipe3])

clf_labels += ['Majority Voting']
all_clf = [pipe1, clf2, pipe3, mv_clf]

for clf, label in zip(all_clf, clf_labels):
    scores = cross_val_score(estimator=clf, 
                             X=X_train, 
                             y=y_train, 
                             cv=10, 
                             scoring='accuracy')
    print("Accuracy: %0.2f (+/- %0.2f) [%s]" 
               % (scores.mean(), scores.std(), label))

Accuracy: 0.74 (+/- 0.02) [Logistic Regression]
Accuracy: 0.66 (+/- 0.02) [Decision Tree]
Accuracy: 0.86 (+/- 0.03) [KNN]
Accuracy: 0.76 (+/- 0.02) [Majority Voting]

mv_clf.get_params()

{'decisiontreeclassifier': DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=1,
             max_features=None, max_leaf_nodes=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             presort=False, random_state=0, splitter='best'),
 'decisiontreeclassifier__class_weight': None,
 'decisiontreeclassifier__criterion': 'entropy',
 'decisiontreeclassifier__max_depth': 1,
 'decisiontreeclassifier__max_features': None,
 'decisiontreeclassifier__max_leaf_nodes': None,
 'decisiontreeclassifier__min_samples_leaf': 1,
 'decisiontreeclassifier__min_samples_split': 2,
 'decisiontreeclassifier__min_weight_fraction_leaf': 0.0,
 'decisiontreeclassifier__presort': False,
 'decisiontreeclassifier__random_state': 0,
 'decisiontreeclassifier__splitter': 'best',
 'pipeline-1': Pipeline(steps=[['sc', StandardScaler(copy=True, with_mean=True, with_std=True)], ['clf', LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
           intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
           penalty='l2', random_state=0, solver='liblinear', tol=0.0001,
           verbose=0, warm_start=False)]]),
 'pipeline-1__clf': LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
           intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
           penalty='l2', random_state=0, solver='liblinear', tol=0.0001,
           verbose=0, warm_start=False),
 'pipeline-1__clf__C': 0.001,
 'pipeline-1__clf__class_weight': None,
 'pipeline-1__clf__dual': False,
 'pipeline-1__clf__fit_intercept': True,
 'pipeline-1__clf__intercept_scaling': 1,
 'pipeline-1__clf__max_iter': 100,
 'pipeline-1__clf__multi_class': 'ovr',
 'pipeline-1__clf__n_jobs': 1,
 'pipeline-1__clf__penalty': 'l2',
 'pipeline-1__clf__random_state': 0,
 'pipeline-1__clf__solver': 'liblinear',
 'pipeline-1__clf__tol': 0.0001,
 'pipeline-1__clf__verbose': 0,
 'pipeline-1__clf__warm_start': False,
 'pipeline-1__sc': StandardScaler(copy=True, with_mean=True, with_std=True),
 'pipeline-1__sc__copy': True,
 'pipeline-1__sc__with_mean': True,
 'pipeline-1__sc__with_std': True,
 'pipeline-1__steps': [['sc',
   StandardScaler(copy=True, with_mean=True, with_std=True)],
  ['clf',
   LogisticRegression(C=0.001, class_weight=None, dual=False, fit_intercept=True,
             intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
             penalty='l2', random_state=0, solver='liblinear', tol=0.0001,
             verbose=0, warm_start=False)]],
 'pipeline-2': Pipeline(steps=[['sc', StandardScaler(copy=True, with_mean=True, with_std=True)], ['clf', KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
            metric_params=None, n_jobs=1, n_neighbors=1, p=2,
            weights='uniform')]]),
 'pipeline-2__clf': KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
            metric_params=None, n_jobs=1, n_neighbors=1, p=2,
            weights='uniform'),
 'pipeline-2__clf__algorithm': 'auto',
 'pipeline-2__clf__leaf_size': 30,
 'pipeline-2__clf__metric': 'minkowski',
 'pipeline-2__clf__metric_params': None,
 'pipeline-2__clf__n_jobs': 1,
 'pipeline-2__clf__n_neighbors': 1,
 'pipeline-2__clf__p': 2,
 'pipeline-2__clf__weights': 'uniform',
 'pipeline-2__sc': StandardScaler(copy=True, with_mean=True, with_std=True),
 'pipeline-2__sc__copy': True,
 'pipeline-2__sc__with_mean': True,
 'pipeline-2__sc__with_std': True,
 'pipeline-2__steps': [['sc',
   StandardScaler(copy=True, with_mean=True, with_std=True)],
  ['clf',
   KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
              metric_params=None, n_jobs=1, n_neighbors=1, p=2,
              weights='uniform')]]}

from sklearn.grid_search import GridSearchCV

params = {'decisiontreeclassifier__max_depth': [1, 2],
          'pipeline-1__clf__C': [0.001, 0.1, 100.0]}

grid = GridSearchCV(estimator=mv_clf, 
                    param_grid=params, 
                    cv=10, 
                    scoring='accuracy')
grid.fit(X_train, y_train)

for params, mean_score, scores in grid.grid_scores_:
    print("%0.3f+/-%0.2f %r"
            % (mean_score, scores.std() / 2, params))

0.757+/-0.01 {'pipeline-1__clf__C': 0.001, 'decisiontreeclassifier__max_depth': 1}
0.802+/-0.01 {'pipeline-1__clf__C': 0.1, 'decisiontreeclassifier__max_depth': 1}
0.836+/-0.01 {'pipeline-1__clf__C': 100.0, 'decisiontreeclassifier__max_depth': 1}
0.782+/-0.01 {'pipeline-1__clf__C': 0.001, 'decisiontreeclassifier__max_depth': 2}
0.815+/-0.01 {'pipeline-1__clf__C': 0.1, 'decisiontreeclassifier__max_depth': 2}
0.845+/-0.01 {'pipeline-1__clf__C': 100.0, 'decisiontreeclassifier__max_depth': 2}

print('Best parameters: %s' % grid.best_params_)
print('Accuracy: %.2f' % grid.best_score_)

Best parameters: {'pipeline-1__clf__C': 100.0, 'decisiontreeclassifier__max_depth': 2}
Accuracy: 0.84

Bagging -- Building an ensemble of classifiers from bootstrap samples¶

from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier(criterion='entropy', 
                              max_depth=None)

bag = BaggingClassifier(base_estimator=tree,
                        n_estimators=500, 
                        max_samples=1.0, 
                        max_features=1.0, 
                        bootstrap=True, 
                        bootstrap_features=False, 
                        n_jobs=1, 
                        random_state=1)

from sklearn.metrics import accuracy_score

tree = tree.fit(X_train, y_train)
y_train_pred = tree.predict(X_train)
y_test_pred = tree.predict(X_test)

tree_train = accuracy_score(y_train, y_train_pred)
tree_test = accuracy_score(y_test, y_test_pred)
print('Decision tree train/test accuracies %.3f/%.3f'
      % (tree_train, tree_test))

bag = bag.fit(X_train, y_train)
y_train_pred = bag.predict(X_train)
y_test_pred = bag.predict(X_test)

bag_train = accuracy_score(y_train, y_train_pred) 
bag_test = accuracy_score(y_test, y_test_pred) 
print('Bagging train/test accuracies %.3f/%.3f'
      % (bag_train, bag_test))

Decision tree train/test accuracies 1.000/0.842
Bagging train/test accuracies 1.000/0.903

Leveraging weak learners via adaptive boosting¶

from sklearn.ensemble import AdaBoostClassifier

tree = DecisionTreeClassifier(criterion='entropy', 
                              max_depth=1)

ada = AdaBoostClassifier(base_estimator=tree,
                         n_estimators=500, 
                         learning_rate=0.1,
                         random_state=0)

tree = tree.fit(X_train, y_train)
y_train_pred = tree.predict(X_train)
y_test_pred = tree.predict(X_test)

tree_train = accuracy_score(y_train, y_train_pred)
tree_test = accuracy_score(y_test, y_test_pred)
print('Decision tree train/test accuracies %.3f/%.3f'
      % (tree_train, tree_test))

ada = ada.fit(X_train, y_train)
y_train_pred = ada.predict(X_train)
y_test_pred = ada.predict(X_test)

ada_train = accuracy_score(y_train, y_train_pred) 
ada_test = accuracy_score(y_test, y_test_pred) 
print('AdaBoost train/test accuracies %.3f/%.3f'
      % (ada_train, ada_test))

Decision tree train/test accuracies 0.665/0.672
AdaBoost train/test accuracies 0.772/0.734

	lhx	lhy	lhz	rhx	rhy	rhz	hx	hy	hz	sx	sy	sz	lwx	lwy	lwz	rwx	rwy	rwz	timestamp	phase
0	5.003160	4.278530	1.542866	4.985812	4.182155	1.520330	5.037557	1.619226	1.778925	5.052367	4.225485	1.775536	4.983912	4.296833	1.569889	5.193762	4.335417	1.560144	5702167	Rest
1	5.064488	4.290401	1.542146	4.955739	4.163175	1.511876	5.037724	1.618397	1.779722	5.045395	4.223284	1.777401	5.000410	4.301358	1.566544	5.164159	4.313107	1.552097	5702307	Rest
2	5.067825	4.290883	1.542058	4.928284	4.157637	1.511306	5.038332	1.618043	1.780080	5.045374	4.223690	1.777571	5.001656	4.299812	1.566537	5.136817	4.307087	1.551576	5702338	Rest
3	5.070332	4.290677	1.541985	4.916637	4.151067	1.510510	5.038742	1.618044	1.780114	5.045767	4.224827	1.777669	5.002672	4.298810	1.566489	5.125220	4.300282	1.550805	5702370	Rest
4	5.071611	4.290927	1.542046	4.906132	4.143034	1.509449	5.042224	1.618561	1.780209	5.047422	4.223671	1.778054	5.012298	4.298582	1.565061	5.114789	4.292008	1.549765	5702432	Rest

	1	2	3	4	5	6	7	8	9	10	...	24	25	26	27	28	29	30	31	32	Phase
0	-0.005009	-0.000964	0.000573	0.008623	0.005667	0.001302	-0.000631	0.000130	-0.000048	0.007762	...	1.880800e-04	0.005133	0.010400	0.000646	0.007871	0.004631	0.000963	0.000092	0.000438	D
1	0.004905	0.001209	-0.000649	0.004737	0.003166	0.000819	-0.000572	-0.000015	0.000023	0.002706	...	-7.500000e-07	0.005093	0.005756	0.000573	0.003459	0.000730	0.000332	0.000012	0.000433	D
2	-0.002393	-0.000216	0.000136	0.003028	0.001212	0.000336	-0.000449	0.000017	0.000047	0.002868	...	-3.920000e-05	0.002406	0.003279	0.000452	0.003261	0.002412	0.000852	0.000042	0.000202	D
3	-0.001394	-0.000242	0.000056	0.001182	0.000575	0.000225	-0.000479	-0.000050	0.000104	0.001171	...	-3.184000e-05	0.001416	0.001334	0.000493	0.001358	0.000313	0.000611	0.000029	0.000596	D
4	-0.000156	-0.000004	0.000023	0.001585	0.000630	0.000094	-0.000303	0.000097	0.000065	0.001579	...	-2.015000e-05	0.000158	0.001709	0.000325	0.001713	0.000203	0.000069	0.000038	0.000069	D

	1	2	3	4	5	6	7	8	9	10	...	24	25	26	27	28	29	30	31	32	phase
0	-0.005009	-0.000964	0.000573	0.008623	0.005667	0.001302	-0.000631	0.000130	-0.000048	0.007762	...	1.880800e-04	0.005133	0.010400	0.000646	0.007871	0.004631	0.000963	0.000092	0.000438	Rest
1	0.004905	0.001209	-0.000649	0.004737	0.003166	0.000819	-0.000572	-0.000015	0.000023	0.002706	...	-7.500000e-07	0.005093	0.005756	0.000573	0.003459	0.000730	0.000332	0.000012	0.000433	Rest
2	-0.002393	-0.000216	0.000136	0.003028	0.001212	0.000336	-0.000449	0.000017	0.000047	0.002868	...	-3.920000e-05	0.002406	0.003279	0.000452	0.003261	0.002412	0.000852	0.000042	0.000202	Rest

	1	2	3	4	5	6	7	8	9	10	...	24	25	26	27	28	29	30	31	32	phase
1117	0.013652	-0.005854	-0.000167	0.007993	0.011719	-0.001821	0.011699	-0.006116	-0.000023	0.007367	...	-0.000594	0.014855	0.014302	0.013201	0.012085	0.004621	0.012173	0.004385	0.009172	4
1118	0.025679	0.029140	0.004168	0.034294	0.008579	0.001769	0.025674	0.029158	0.004169	0.017613	...	0.000532	0.039063	0.035395	0.039073	0.018241	0.012018	0.008623	0.012314	0.005680	4
1119	0.036604	0.044513	0.005200	0.032754	-0.018750	-0.000209	0.024677	0.029102	0.003881	0.026038	...	-0.000077	0.057864	0.037742	0.038353	0.028690	0.006093	0.008853	0.000335	0.003600	4
371	-0.000753	0.010647	-0.004304	0.005471	0.010892	-0.005108	0.003455	0.007538	-0.003794	0.001323	...	0.000180	0.011508	0.013216	0.009119	0.008922	0.005335	0.005696	0.004074	0.003962	4
871	-0.001645	0.001315	0.000080	0.010487	0.026691	-0.000251	-0.005443	-0.005227	0.000392	-0.007014	...	-0.000067	0.002108	0.028678	0.007557	0.017659	0.000165	0.001954	0.004185	0.004051	4

	lhx	lhy	lhz	rhx	rhy	rhz	hx	hy	hz	sx	...	23	24	25	26	27	28	29	30	31	32
0	1.843293	3.074965	1.644773	8.228968	3.020815	1.570013	5.03905	1.756163	1.734586	5.049521	...	-0.000049	0.000047	0.001516	0.001352	0.001703	0.001068	0.000416	0.000397	0.000240	0.000422
1	1.843293	3.074965	1.644773	8.228968	3.020815	1.570013	5.03905	1.756163	1.734586	5.049521	...	-0.000273	0.000059	0.007427	0.017248	0.003709	0.012792	0.000039	0.000174	0.000440	0.000281
2	1.843293	3.074965	1.644773	8.228968	3.020815	1.570013	5.03905	1.756163	1.734586	5.049521	...	-0.002584	-0.000602	0.007392	0.017023	0.002659	0.012178	0.002543	0.005297	0.001246	0.004187
3	1.843293	3.074965	1.644773	8.228968	3.020815	1.570013	5.03905	1.756163	1.734586	5.049521	...	0.000008	0.000070	0.000688	0.001083	0.001302	0.000407	0.000281	0.000334	0.000434	0.000264
4	1.843293	3.074965	1.644773	8.228968	3.020815	1.570013	5.03905	1.756163	1.734586	5.049521	...	-0.000092	-0.000110	0.001527	0.001989	0.001715	0.001108	0.000086	0.000280	0.000341	0.000221

	A	B	key2	C	D
0	A0	B0	0	C0	D0
1	A0	B0	0	C3	D3
2	A4	B4	0	C0	D0
3	A4	B4	0	C3	D3
4	A2	B2	1	C1	D1
5	A2	B2	1	C4	D4
6	A5	B5	1	C1	D1
7	A5	B5	1	C4	D4
8	A1	B1	2	C2	D2
9	A3	B3	3	C5	D5

	A	B	key2	C	D
0	A0	B0	0	C0	D0
1	A4	B5	0	C0	D0
2	A0	B0	0	C3	D3
3	A4	B5	0	C3	D3
4	A1	B1	2	C2	D2
5	A2	B2	1	C1	D1
6	A5	B6	1	C1	D1

	lhx	lhy	lhz	rhx	rhy	rhz	hx	hy	hz	sx	sy	sz	lwx	lwy	lwz	rwx	rwy	rwz	timestamp
phase
0	39	39	39	39	39	39	39	39	39	39	39	39	39	39	39	39	39	39	39
1	163	163	163	163	163	163	163	163	163	163	163	163	163	163	163	163	163	163	163
2	694	694	694	694	694	694	694	694	694	694	694	694	694	694	694	694	694	694	694
3	191	191	191	191	191	191	191	191	191	191	191	191	191	191	191	191	191	191	191
4	656	656	656	656	656	656	656	656	656	656	656	656	656	656	656	656	656	656	656

	1	2	3	4	5	6	7	8	9	10	...	23	24	25	26	27	28	29	30	31	32
phase
0	39	39	39	39	39	39	39	39	39	39	...	39	39	39	39	39	39	39	39	39	39
1	163	163	163	163	163	163	163	163	163	163	...	163	163	163	163	163	163	163	163	163	163
2	694	694	694	694	694	694	694	694	694	694	...	694	694	694	694	694	694	694	694	694	694
3	191	191	191	191	191	191	191	191	191	191	...	191	191	191	191	191	191	191	191	191	191
4	656	656	656	656	656	656	656	656	656	656	...	656	656	656	656	656	656	656	656	656	656

	A	B	key2	C	D
0	A0	B0	0	C0	D0
1	A0	B0	0	C3	D3
2	A4	B4	0	C0	D0
3	A4	B4	0	C3	D3
4	A2	B2	1	C1	D1
5	A2	B2	1	C4	D4
6	A5	B5	1	C1	D1
7	A5	B5	1	C4	D4
8	A1	B1	2	C2	D2
9	A3	B3	3	C5	D5

	A	B	key2	C	D
0	A0	B0	0	C0	D0
1	A4	B5	0	C0	D0
2	A0	B0	0	C3	D3
3	A4	B5	0	C3	D3
4	A1	B1	2	C2	D2
5	A2	B2	1	C1	D1
6	A5	B6	1	C1	D1

	A	B	key2	C	D
0	A0	B0	0	C0	D0
1	A0	B0	0	C3	D3
2	A4	B4	0	C0	D0
3	A4	B4	0	C3	D3
4	A2	B2	1	C1	D1
5	A2	B2	1	C4	D4
6	A5	B5	1	C1	D1
7	A5	B5	1	C4	D4
8	A1	B1	2	C2	D2
9	A3	B3	3	C5	D5

	A	B	key2	C	D
0	A0	B0	0	C0	D0
1	A4	B5	0	C0	D0
2	A0	B0	0	C3	D3
3	A4	B5	0	C3	D3
4	A1	B1	2	C2	D2
5	A2	B2	1	C1	D1
6	A5	B6	1	C1	D1