taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

commit 9adfe767010e23823089b4db94cb4dc53cc3c12a
parent 71bb4d90da2bad933fdca48d1879886fe7aa9bc8
Author: Alex Auvolat <alex.auvolat@ens.fr>
Date:   Mon,  4 May 2015 13:15:42 -0400

Merge branch 'master' of github.com:adbrebs/taxi

Diffstat:
Mconvert_data.py | 3++-
Mhdist.py | 16+++++++++++-----
2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/convert_data.py b/convert_data.py @@ -104,8 +104,9 @@ def convert(input_directory, save_path): h5file = h5py.File(save_path, 'w') split = {} split.update(read_stands(input_directory, h5file)) - split.update(read_taxis(input_directory, h5file, 'test', 'test_')) split.update(read_taxis(input_directory, h5file, 'train', '')) + print 'First origin_call not present in training set: ', len(origin_call_dict) + split.update(read_taxis(input_directory, h5file, 'test', 'test_')) split.update(unique(h5file)) h5file.attrs['split'] = H5PYDataset.create_split_array(split) h5file.flush() diff --git a/hdist.py b/hdist.py @@ -2,6 +2,9 @@ from theano import tensor import theano import numpy +rearth = const(6371) +deg2rad = const(3.141592653589793 / 180) + def const(v): if theano.config.floatX == 'float32': return numpy.float32(v) @@ -9,9 +12,6 @@ def const(v): return numpy.float64(v) def hdist(a, b): - rearth = const(6371) - deg2rad = const(3.141592653589793 / 180) - lat1 = a[:, 0] * deg2rad lon1 = a[:, 1] * deg2rad lat2 = b[:, 0] * deg2rad @@ -27,5 +27,11 @@ def hdist(a, b): return tensor.switch(tensor.eq(hd, float('nan')), (a-b).norm(2, axis=1), hd) - - +def erdist(a, b): + lat1 = a[:, 0] * deg2rad + lon1 = a[:, 1] * deg2rad + lat2 = b[:, 0] * deg2rad + lon2 = b[:, 1] * deg2rad + x = (lon2-lon1) * tensor.cos((lat1+lat2)/2) + y = (lat2-lat1) + return tensor.sqrt(tensor.sqr(x) + tensor.sqr(y)) * rearth