taxi

Winning entry to the Kaggle taxi competition
git clone https://esimon.eu/repos/taxi.git
Log | Files | Refs | README

commit 1795dfe742bcb75085a909413b723b64a8eeb4fc
parent fe608831c62c7dba60a3bf57433d97b999e567c8
Author: Alex Auvolat <alex.auvolat@ens.fr>
Date:   Thu, 23 Jul 2015 19:00:52 -0400

Memory network with bidirectionnal RNN

Diffstat:
Aconfig/memory_network_bidir.py | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mmodel/bidirectional.py | 2+-
Mmodel/memory_network.py | 321++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Amodel/memory_network_bidir.py | 143+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amodel/memory_network_mlp.py | 106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 493 insertions(+), 135 deletions(-)

diff --git a/config/memory_network_bidir.py b/config/memory_network_bidir.py @@ -0,0 +1,56 @@ +from blocks.initialization import IsotropicGaussian, Constant + +from blocks.bricks import Tanh + +import data +from model.memory_network_bidir import Model, Stream + + +dim_embeddings = [ + ('origin_call', data.origin_call_train_size, 10), + ('origin_stand', data.stands_size, 10), + ('week_of_year', 52, 10), + ('day_of_week', 7, 10), + ('qhour_of_day', 24 * 4, 10), + ('day_type', 3, 10), +] + +embed_weights_init = IsotropicGaussian(0.001) + + +class RNNConfig(object): + __slots__ = ('rec_state_dim', 'dim_embeddings', 'embed_weights_init', + 'dim_hidden', 'weights_init', 'biases_init') + +prefix_encoder = RNNConfig() +prefix_encoder.dim_embeddings = dim_embeddings +prefix_encoder.embed_weights_init = embed_weights_init +prefix_encoder.rec_state_dim = 100 +prefix_encoder.dim_hidden = [100, 100] +prefix_encoder.weights_init = IsotropicGaussian(0.01) +prefix_encoder.biases_init = Constant(0.001) + +candidate_encoder = RNNConfig() +candidate_encoder.dim_embeddings = dim_embeddings +candidate_encoder.embed_weights_init = embed_weights_init +candidate_encoder.rec_state_dim = 100 +candidate_encoder.dim_hidden = [100, 100] +candidate_encoder.weights_init = IsotropicGaussian(0.01) +candidate_encoder.biases_init = Constant(0.001) + +representation_size = 100 +representation_activation = Tanh + +normalize_representation = True + + +batch_size = 32 +batch_sort_size = 20 + +valid_set = 'cuts/test_times_0' +max_splits = 100 +num_cuts = 1000 + +train_candidate_size = 100 +valid_candidate_size = 100 +test_candidate_size = 100 diff --git a/model/bidirectional.py b/model/bidirectional.py @@ -121,8 +121,8 @@ class Stream(object): stream = transformers.balanced_batch(stream, key='latitude', batch_size=self.config.batch_size, batch_sort_size=self.config.batch_sort_size) stream = Padding(stream, mask_sources=['latitude', 'longitude']) + stream = transformers.Select(stream, req_vars) - stream = MultiProcessing(stream) return stream diff --git a/model/memory_network.py b/model/memory_network.py @@ -1,7 +1,7 @@ from theano import tensor -from fuel.transformers import Batch, MultiProcessing, Merge +from fuel.transformers import Batch, MultiProcessing, Merge, Padding from fuel.streams import DataStream from fuel.schemes import ConstantScheme, ShuffledExampleScheme, SequentialExampleScheme from blocks.bricks import application, MLP, Rectifier, Initializable, Softmax @@ -13,98 +13,100 @@ from data.hdf5 import TaxiDataset, TaxiStream import error from model import ContextEmbedder - -class Model(Initializable): +class MemoryNetworkBase(Initializable): def __init__(self, config, **kwargs): - super(Model, self).__init__(**kwargs) + super(MemoryNetworkBase, self).__init__(**kwargs) + self.config = config - self.context_embedder = ContextEmbedder(config) - - self.prefix_encoder = MLP(activations=[Rectifier() for _ in config.prefix_encoder.dim_hidden] + [config.representation_activation()], - dims=[config.prefix_encoder.dim_input] + config.prefix_encoder.dim_hidden + [config.representation_size], - name='prefix_encoder') - self.candidate_encoder = MLP(activations=[Rectifier() for _ in config.candidate_encoder.dim_hidden] + [config.representation_activation()], - dims=[config.candidate_encoder.dim_input] + config.candidate_encoder.dim_hidden + [config.representation_size], - name='candidate_encoder') - self.softmax = Softmax() - - self.prefix_extremities = {'%s_k_%s' % (side, ['latitude', 'longitude'][axis]): axis for side in ['first', 'last'] for axis in [0, 1]} - self.candidate_extremities = {'candidate_%s_k_%s' % (side, ['latitude', 'longitude'][axis]): axis for side in ['first', 'last'] for axis in [0, 1]} - - self.inputs = self.context_embedder.inputs + ['candidate_%s'%k for k in self.context_embedder.inputs] + self.prefix_extremities.keys() + self.candidate_extremities.keys() - self.children = [ self.context_embedder, self.prefix_encoder, self.candidate_encoder, self.softmax ] - - def _push_initialization_config(self): - for (mlp, config) in [[self.prefix_encoder, self.config.prefix_encoder], [self.candidate_encoder, self.config.candidate_encoder]]: - mlp.weights_init = config.weights_init - mlp.biases_init = config.biases_init - - @application(outputs=['destination']) - def predict(self, **kwargs): - prefix_embeddings = tuple(self.context_embedder.apply(**{k: kwargs[k] for k in self.context_embedder.inputs })) - prefix_extremities = tuple((kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v] for k, v in self.prefix_extremities.items()) - prefix_inputs = tensor.concatenate(prefix_extremities + prefix_embeddings, axis=1) - prefix_representation = self.prefix_encoder.apply(prefix_inputs) - if self.config.normalize_representation: - prefix_representation = prefix_representation / tensor.sqrt((prefix_representation ** 2).sum(axis=1, keepdims=True)) - - candidate_embeddings = tuple(self.context_embedder.apply(**{k: kwargs['candidate_%s'%k] for k in self.context_embedder.inputs })) - candidate_extremities = tuple((kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v] for k, v in self.candidate_extremities.items()) - candidate_inputs = tensor.concatenate(candidate_extremities + candidate_embeddings, axis=1) - candidate_representation = self.candidate_encoder.apply(candidate_inputs) - if self.config.normalize_representation: - candidate_representation = candidate_representation / tensor.sqrt((candidate_representation ** 2).sum(axis=1, keepdims=True)) - - similarity_score = tensor.dot(prefix_representation, candidate_representation.T) - similarity = self.softmax.apply(similarity_score) - - candidate_destination = tensor.concatenate( - (tensor.shape_padright(kwargs['candidate_last_k_latitude'][:,-1]), - tensor.shape_padright(kwargs['candidate_last_k_longitude'][:,-1])), - axis=1) - - return tensor.dot(similarity, candidate_destination) - - @predict.property('inputs') - def predict_inputs(self): - return self.inputs - - @application(outputs=['cost']) - def cost(self, **kwargs): - y_hat = self.predict(**kwargs) - y = tensor.concatenate((kwargs['destination_latitude'][:, None], - kwargs['destination_longitude'][:, None]), axis=1) - - return error.erdist(y_hat, y).mean() - - @cost.property('inputs') - def cost_inputs(self): - return self.inputs + ['destination_latitude', 'destination_longitude'] - -class Stream(object): + +class StreamBase(object): def __init__(self, config): self.config = config - def train(self, req_vars): + self.prefix_inputs = [ + ('call_type', tensor.bvector), + ('origin_call', tensor.ivector), + ('origin_stand', tensor.bvector), + ('taxi_id', tensor.wvector), + ('timestamp', tensor.ivector), + ('day_type', tensor.bvector), + ('missing_data', tensor.bvector), + ('latitude', tensor.matrix), + ('longitude', tensor.matrix), + ('destination_latitude', tensor.vector), + ('destination_longitude', tensor.vector), + ('travel_time', tensor.ivector), + ('input_time', tensor.ivector), + ('week_of_year', tensor.bvector), + ('day_of_week', tensor.bvector), + ('qhour_of_day', tensor.bvector) + ] + self.candidate_inputs = self.prefix_inputs + + def inputs(self): + prefix_inputs = { name: constructor(name) + for name, constructor in self.prefix_inputs } + candidate_inputs = { 'candidate_'+name: constructor('candidate_'+name) + for name, constructor in self.candidate_inputs } + return dict(prefix_inputs.items() + candidate_inputs.items()) + + @property + def valid_dataset(self): + return TaxiDataset(self.config.valid_set, 'valid.hdf5') + + @property + def valid_trips_ids(self): valid = TaxiDataset(self.config.valid_set, 'valid.hdf5', sources=('trip_id',)) - valid_trips_ids = valid.get_data(None, slice(0, valid.num_examples))[0] + return valid.get_data(None, slice(0, valid.num_examples))[0] - dataset = TaxiDataset('train') + @property + def train_dataset(self): + return TaxiDataset('train') + + @property + def test_dataset(self): + return TaxiDataset('test') - prefix_stream = DataStream(dataset, iteration_scheme=TaxiTimeCutScheme(self.config.num_cuts)) - prefix_stream = transformers.TaxiExcludeTrips(prefix_stream, valid_trips_ids) - prefix_stream = transformers.TaxiGenerateSplits(prefix_stream, max_splits=self.config.max_splits) - prefix_stream = transformers.taxi_add_datetime(prefix_stream) - prefix_stream = transformers.taxi_add_first_last_len(prefix_stream, self.config.n_begin_end_pts) - prefix_stream = Batch(prefix_stream, iteration_scheme=ConstantScheme(self.config.batch_size)) - candidate_stream = DataStream(dataset, iteration_scheme=ShuffledExampleScheme(dataset.num_examples)) - candidate_stream = transformers.TaxiExcludeTrips(candidate_stream, valid_trips_ids) +class StreamSimple(StreamBase): + def __init__(self, config): + super(StreamSimple, self).__init__(config) + + self.prefix_inputs += [ + ('first_k_latitude', tensor.matrix), + ('first_k_longitude', tensor.matrix), + ('last_k_latitude', tensor.matrix), + ('last_k_longitude', tensor.matrix), + ] + self.candidate_inputs = self.prefix_inputs + + def candidate_stream(self, n_candidates): + candidate_stream = DataStream(self.train_dataset, + iteration_scheme=ShuffledExampleScheme(dataset.num_examples)) + candidate_stream = transformers.TaxiExcludeTrips(candidate_stream, self.valid_trips_ids) candidate_stream = transformers.TaxiExcludeEmptyTrips(candidate_stream) candidate_stream = transformers.taxi_add_datetime(candidate_stream) - candidate_stream = transformers.taxi_add_first_last_len(candidate_stream, self.config.n_begin_end_pts) - candidate_stream = Batch(candidate_stream, iteration_scheme=ConstantScheme(self.config.train_candidate_size)) + candidate_stream = transformers.taxi_add_first_last_len(candidate_stream, + self.config.n_begin_end_pts) + return Batch(candidate_stream, + iteration_scheme=ConstantScheme(n_candidates)) + + def train(self, req_vars): + prefix_stream = DataStream(self.train_dataset, + iteration_scheme=ShuffledExampleScheme(self.train_dataset.num_examples)) + + prefix_stream = transformers.TaxiExcludeTrips(prefix_stream, self.valid_trips_ids) + prefix_stream = transformers.TaxiExcludeEmptyTrips(prefix_stream) + prefix_stream = transformers.TaxiGenerateSplits(prefix_stream, + max_splits=self.config.max_splits) + prefix_stream = transformers.taxi_add_datetime(prefix_stream) + prefix_stream = transformers.taxi_add_first_last_len(prefix_stream, + self.config.n_begin_end_pts) + prefix_stream = Batch(prefix_stream, + iteration_scheme=ConstantScheme(self.config.batch_size)) + + candidate_stream = self.candidate_stream(self.config.train_candidate_size) sources = prefix_stream.sources + tuple('candidate_%s' % k for k in candidate_stream.sources) stream = Merge((prefix_stream, candidate_stream), sources) @@ -113,66 +115,117 @@ class Stream(object): return stream def valid(self, req_vars): - valid_dataset = TaxiDataset(self.config.valid_set, 'valid.hdf5') - train_dataset = TaxiDataset('train') - valid_trips_ids = valid_dataset.get_data(None, slice(0, valid_dataset.num_examples))[valid_dataset.sources.index('trip_id')] - - prefix_stream = DataStream(valid_dataset, iteration_scheme=SequentialExampleScheme(valid_dataset.num_examples)) + prefix_stream = DataStream( + self.valid_dataset, + iteration_scheme=SequentialExampleScheme(self.valid_dataset.num_examples)) prefix_stream = transformers.taxi_add_datetime(prefix_stream) - prefix_stream = transformers.taxi_add_first_last_len(prefix_stream, self.config.n_begin_end_pts) - prefix_stream = Batch(prefix_stream, iteration_scheme=ConstantScheme(self.config.batch_size)) + prefix_stream = transformers.taxi_add_first_last_len(prefix_stream, + self.config.n_begin_end_pts) + prefix_stream = Batch(prefix_stream, + iteration_scheme=ConstantScheme(self.config.batch_size)) + + candidate_stream = self.candidate_stream(self.config.valid_candidate_size) - candidate_stream = DataStream(train_dataset, iteration_scheme=ShuffledExampleScheme(train_dataset.num_examples)) - candidate_stream = transformers.TaxiExcludeTrips(candidate_stream, valid_trips_ids) + sources = prefix_stream.sources + tuple('candidate_%s' % k for k in candidate_stream.sources) + stream = Merge((prefix_stream, candidate_stream), sources) + stream = transformers.Select(stream, tuple(req_vars)) + stream = MultiProcessing(stream) + return stream + + +class StreamRecurrent(StreamBase): + def __init__(self, config): + super(StreamRecurrent, self).__init__(config) + + self.prefix_inputs += [ + ('latitude_mask', tensor.matrix), + ('longitude_mask', tensor.matrix), + ] + self.candidate_inputs = self.prefix_inputs + + def candidate_stream(self, n_candidates): + candidate_stream = DataStream(self.train_dataset, + iteration_scheme=ShuffledExampleScheme(self.train_dataset.num_examples)) + candidate_stream = transformers.TaxiExcludeTrips(candidate_stream, self.valid_trips_ids) candidate_stream = transformers.TaxiExcludeEmptyTrips(candidate_stream) candidate_stream = transformers.taxi_add_datetime(candidate_stream) - candidate_stream = transformers.taxi_add_first_last_len(candidate_stream, self.config.n_begin_end_pts) - candidate_stream = Batch(candidate_stream, iteration_scheme=ConstantScheme(self.config.valid_candidate_size)) + + candidate_stream = Batch(candidate_stream, + iteration_scheme=ConstantScheme(n_candidates)) + + candidate_stream = Padding(candidate_stream, + mask_sources=['latitude', 'longitude']) + + return candidate_stream + + def train(self, req_vars): + prefix_stream = DataStream(self.train_dataset, + iteration_scheme=ShuffledExampleScheme(self.train_dataset.num_examples)) + + prefix_stream = transformers.TaxiExcludeTrips(prefix_stream, self.valid_trips_ids) + prefix_stream = transformers.TaxiExcludeEmptyTrips(prefix_stream) + prefix_stream = transformers.TaxiGenerateSplits(prefix_stream, + max_splits=self.config.max_splits) + + prefix_stream = transformers.taxi_add_datetime(prefix_stream) + + prefix_stream = transformers.balanced_batch(prefix_stream, + key='latitude', + batch_size=self.config.batch_size, + batch_sort_size=self.config.batch_sort_size) + + prefix_stream = Padding(prefix_stream, mask_sources=['latitude', 'longitude']) + + candidate_stream = self.candidate_stream(self.config.train_candidate_size) sources = prefix_stream.sources + tuple('candidate_%s' % k for k in candidate_stream.sources) stream = Merge((prefix_stream, candidate_stream), sources) + stream = transformers.Select(stream, tuple(req_vars)) - stream = MultiProcessing(stream) + # stream = MultiProcessing(stream) return stream - def inputs(self): - return {'call_type': tensor.bvector('call_type'), - 'origin_call': tensor.ivector('origin_call'), - 'origin_stand': tensor.bvector('origin_stand'), - 'taxi_id': tensor.wvector('taxi_id'), - 'timestamp': tensor.ivector('timestamp'), - 'day_type': tensor.bvector('day_type'), - 'missing_data': tensor.bvector('missing_data'), - 'latitude': tensor.matrix('latitude'), - 'longitude': tensor.matrix('longitude'), - 'destination_latitude': tensor.vector('destination_latitude'), - 'destination_longitude': tensor.vector('destination_longitude'), - 'travel_time': tensor.ivector('travel_time'), - 'first_k_latitude': tensor.matrix('first_k_latitude'), - 'first_k_longitude': tensor.matrix('first_k_longitude'), - 'last_k_latitude': tensor.matrix('last_k_latitude'), - 'last_k_longitude': tensor.matrix('last_k_longitude'), - 'input_time': tensor.ivector('input_time'), - 'week_of_year': tensor.bvector('week_of_year'), - 'day_of_week': tensor.bvector('day_of_week'), - 'qhour_of_day': tensor.bvector('qhour_of_day'), - 'candidate_call_type': tensor.bvector('candidate_call_type'), - 'candidate_origin_call': tensor.ivector('candidate_origin_call'), - 'candidate_origin_stand': tensor.bvector('candidate_origin_stand'), - 'candidate_taxi_id': tensor.wvector('candidate_taxi_id'), - 'candidate_timestamp': tensor.ivector('candidate_timestamp'), - 'candidate_day_type': tensor.bvector('candidate_day_type'), - 'candidate_missing_data': tensor.bvector('candidate_missing_data'), - 'candidate_latitude': tensor.matrix('candidate_latitude'), - 'candidate_longitude': tensor.matrix('candidate_longitude'), - 'candidate_destination_latitude': tensor.vector('candidate_destination_latitude'), - 'candidate_destination_longitude': tensor.vector('candidate_destination_longitude'), - 'candidate_travel_time': tensor.ivector('candidate_travel_time'), - 'candidate_first_k_latitude': tensor.matrix('candidate_first_k_latitude'), - 'candidate_first_k_longitude': tensor.matrix('candidate_first_k_longitude'), - 'candidate_last_k_latitude': tensor.matrix('candidate_last_k_latitude'), - 'candidate_last_k_longitude': tensor.matrix('candidate_last_k_longitude'), - 'candidate_input_time': tensor.ivector('candidate_input_time'), - 'candidate_week_of_year': tensor.bvector('candidate_week_of_year'), - 'candidate_day_of_week': tensor.bvector('candidate_day_of_week'), - 'candidate_qhour_of_day': tensor.bvector('candidate_qhour_of_day')} + def valid(self, req_vars): + prefix_stream = DataStream( + self.valid_dataset, + iteration_scheme=SequentialExampleScheme(self.valid_dataset.num_examples)) + + prefix_stream = transformers.TaxiExcludeEmptyTrips(prefix_stream) + + prefix_stream = transformers.taxi_add_datetime(prefix_stream) + + prefix_stream = Batch(prefix_stream, + iteration_scheme=ConstantScheme(self.config.batch_size)) + prefix_stream = Padding(prefix_stream, mask_sources=['latitude', 'longitude']) + + candidate_stream = self.candidate_stream(self.config.valid_candidate_size) + + sources = prefix_stream.sources + tuple('candidate_%s' % k for k in candidate_stream.sources) + stream = Merge((prefix_stream, candidate_stream), sources) + + stream = transformers.Select(stream, tuple(req_vars)) + # stream = MultiProcessing(stream) + + return stream + + def test(self, req_vars): + prefix_stream = DataStream( + self.test_dataset, + iteration_scheme=SequentialExampleScheme(self.test_dataset.num_examples)) + + prefix_stream = transformers.taxi_add_datetime(prefix_stream) + prefix_stream = transformers.taxi_remove_test_only_clients(prefix_stream) + + prefix_stream = Batch(prefix_stream, + iteration_scheme=ConstantScheme(self.config.batch_size)) + prefix_stream = Padding(prefix_stream, mask_sources=['latitude', 'longitude']) + + candidate_stream = self.candidate_stream(self.config.test_candidate_size) + + sources = prefix_stream.sources + tuple('candidate_%s' % k for k in candidate_stream.sources) + stream = Merge((prefix_stream, candidate_stream), sources) + + stream = transformers.Select(stream, tuple(req_vars)) + # stream = MultiProcessing(stream) + + return stream diff --git a/model/memory_network_bidir.py b/model/memory_network_bidir.py @@ -0,0 +1,143 @@ +from theano import tensor + +from blocks.bricks import application, MLP, Rectifier, Initializable, Softmax, Linear +from blocks.bricks.parallel import Fork +from blocks.bricks.recurrent import Bidirectional, LSTM + +import data +from data import transformers +from data.cut import TaxiTimeCutScheme +from data.hdf5 import TaxiDataset, TaxiStream +import error +from model import ContextEmbedder + +from memory_network import StreamRecurrent as Stream +from memory_network import MemoryNetworkBase + +class RecurrentEncoder(Initializable): + def __init__(self, config, output_dim, activation, **kwargs): + super(RecurrentEncoder, self).__init__(**kwargs) + + self.config = config + self.context_embedder = ContextEmbedder(config) + + self.rec = Bidirectional(LSTM(dim=config.rec_state_dim, name='encoder_recurrent')) + self.fork = Fork( + [name for name in self.rec.prototype.apply.sequences + if name != 'mask'], + prototype=Linear()) + + rto_in = config.rec_state_dim * 2 + sum(x[2] for x in config.dim_embeddings) + self.rec_to_output = MLP( + activations=[Rectifier() for _ in config.dim_hidden] + [activation], + dims=[rto_in] + config.dim_hidden + [output_dim], + name='encoder_rto') + + self.children = [self.context_embedder, self.rec, self.fork, self.rec_to_output] + + self.rec_inputs = ['latitude', 'longitude', 'latitude_mask'] + self.inputs = self.context_embedder.inputs + self.rec_inputs + + def _push_allocation_config(self): + self.fork.input_dim = 2 + self.fork.output_dims = [ self.rec.children[0].get_dim(name) + for name in self.fork.output_names ] + + def _push_initialization_config(self): + for brick in self.children: + brick.weights_init = self.config.weights_init + brick.biases_init = self.config.biases_init + + @application + def apply(self, latitude, longitude, latitude_mask, **kwargs): + latitude = (latitude.T - data.train_gps_mean[0]) / data.train_gps_std[0] + longitude = (longitude.T - data.train_gps_mean[1]) / data.train_gps_std[1] + latitude_mask = latitude_mask.T + + rec_in = tensor.concatenate((latitude[:, :, None], longitude[:, :, None]), + axis=2) + path = self.rec.apply(self.fork.apply(rec_in), mask=latitude_mask)[0] + + last_id = tensor.cast(latitude_mask.sum(axis=0) - 1, dtype='int64') + + path_representation = (path[0][:, -self.config.rec_state_dim:], + path[last_id - 1, tensor.arange(last_id.shape[0])] + [:, :self.config.rec_state_dim]) + + embeddings = tuple(self.context_embedder.apply( + **{k: kwargs[k] for k in self.context_embedder.inputs })) + + inputs = tensor.concatenate(path_representation + embeddings, axis=1) + outputs = self.rec_to_output.apply(inputs) + + return outputs + + +class Model(MemoryNetworkBase): + def __init__(self, config, **kwargs): + super(Model, self).__init__(config, **kwargs) + + # Build prefix encoder : recurrent then MLP + self.prefix_encoder = RecurrentEncoder(self.config.prefix_encoder, + self.config.representation_size, + self.config.representation_activation(), + name='prefix_encoder') + + # Build candidate encoder + self.candidate_encoder = RecurrentEncoder(self.config.candidate_encoder, + self.config.representation_size, + self.config.representation_activation(), + name='candidate_encoder') + + # Rest of the stuff + self.softmax = Softmax() + + self.inputs = self.prefix_encoder.inputs \ + + ['candidate_'+k for k in self.candidate_encoder.inputs] + + self.children = [ self.prefix_encoder, + self.candidate_encoder, + self.softmax ] + + + @application(outputs=['destination']) + def predict(self, **kwargs): + prefix_representation = self.prefix_encoder.apply( + **{ name: kwargs[name] for name in self.prefix_encoder.inputs }) + + candidate_representation = self.prefix_encoder.apply( + **{ name: kwargs['candidate_'+name] for name in self.candidate_encoder.inputs }) + + if self.config.normalize_representation: + candidate_representation = candidate_representation \ + / tensor.sqrt((candidate_representation ** 2).sum(axis=1, keepdims=True)) + + similarity_score = tensor.dot(prefix_representation, candidate_representation.T) + similarity = self.softmax.apply(similarity_score) + + candidate_mask = kwargs['candidate_latitude_mask'] + candidate_last = tensor.cast(candidate_mask.sum(axis=1) - 1, 'int64') + candidate_destination = tensor.concatenate( + (kwargs['candidate_latitude'][tensor.arange(candidate_mask.shape[0]), candidate_last] + [:, None], + kwargs['candidate_longitude'][tensor.arange(candidate_mask.shape[0]), candidate_last] + [:, None]), + axis=1) + + return tensor.dot(similarity, candidate_destination) + + @predict.property('inputs') + def predict_inputs(self): + return self.inputs + + @application(outputs=['cost']) + def cost(self, **kwargs): + y_hat = self.predict(**kwargs) + y = tensor.concatenate((kwargs['destination_latitude'][:, None], + kwargs['destination_longitude'][:, None]), axis=1) + + return error.erdist(y_hat, y).mean() + + @cost.property('inputs') + def cost_inputs(self): + return self.inputs + ['destination_latitude', 'destination_longitude'] diff --git a/model/memory_network_mlp.py b/model/memory_network_mlp.py @@ -0,0 +1,106 @@ + +from theano import tensor + +from fuel.transformers import Batch, MultiProcessing, Merge +from fuel.streams import DataStream +from fuel.schemes import ConstantScheme, ShuffledExampleScheme, SequentialExampleScheme +from blocks.bricks import application, MLP, Rectifier, Initializable, Softmax + +import data +from data import transformers +from data.cut import TaxiTimeCutScheme +from data.hdf5 import TaxiDataset, TaxiStream +import error +from model import ContextEmbedder + +from memory_network import StreamSimple as Stream +from memory_network import MemoryNetworkBase + + +class Model(MemoryNetworkBase): + def __init__(self, **kwargs): + super(Model, self).__init__(**kwargs) + + self.prefix_encoder = MLP(activations=[Rectifier() for _ in config.prefix_encoder.dim_hidden] + + [config.representation_activation()], + dims=[config.prefix_encoder.dim_input] + + config.prefix_encoder.dim_hidden + + [config.representation_size], + name='prefix_encoder') + + self.candidate_encoder = MLP( + activations=[Rectifier() for _ in config.candidate_encoder.dim_hidden] + + [config.representation_activation()], + dims=[config.candidate_encoder.dim_input] + + config.candidate_encoder.dim_hidden + + [config.representation_size], + name='candidate_encoder') + self.softmax = Softmax() + + self.prefix_extremities = {'%s_k_%s' % (side, ['latitude', 'longitude'][axis]): axis + for side in ['first', 'last'] for axis in [0, 1]} + self.candidate_extremities = {'candidate_%s_k_%s' % (side, axname): axis + for side in ['first', 'last'] + for axis, axname in enumerate(['latitude', 'longitude'])} + + self.inputs = self.context_embedder.inputs \ + + ['candidate_%s'%k for k in self.context_embedder.inputs] \ + + self.prefix_extremities.keys() + self.candidate_extremities.keys() + self.children = [ self.context_embedder, + self.prefix_encoder, + self.candidate_encoder, + self.softmax ] + + def _push_initialization_config(self): + for (mlp, config) in [[self.prefix_encoder, self.config.prefix_encoder], + [self.candidate_encoder, self.config.candidate_encoder]]: + mlp.weights_init = config.weights_init + mlp.biases_init = config.biases_init + + @application(outputs=['destination']) + def predict(self, **kwargs): + prefix_embeddings = tuple(self.context_embedder.apply( + **{k: kwargs[k] for k in self.context_embedder.inputs })) + prefix_extremities = tuple((kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v] + for k, v in self.prefix_extremities.items()) + prefix_inputs = tensor.concatenate(prefix_extremities + prefix_embeddings, axis=1) + prefix_representation = self.prefix_encoder.apply(prefix_inputs) + if self.config.normalize_representation: + prefix_representation = prefix_representation \ + / tensor.sqrt((prefix_representation ** 2).sum(axis=1, keepdims=True)) + + candidate_embeddings = tuple(self.context_embedder.apply(**{k: kwargs['candidate_%s'%k] + for k in self.context_embedder.inputs })) + candidate_extremities = tuple((kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v] + for k, v in self.candidate_extremities.items()) + candidate_inputs = tensor.concatenate(candidate_extremities + candidate_embeddings, axis=1) + candidate_representation = self.candidate_encoder.apply(candidate_inputs) + if self.config.normalize_representation: + candidate_representation = candidate_representation \ + / tensor.sqrt((candidate_representation ** 2).sum(axis=1, keepdims=True)) + + similarity_score = tensor.dot(prefix_representation, candidate_representation.T) + similarity = self.softmax.apply(similarity_score) + + candidate_destination = tensor.concatenate( + (tensor.shape_padright(kwargs['candidate_last_k_latitude'][:,-1]), + tensor.shape_padright(kwargs['candidate_last_k_longitude'][:,-1])), + axis=1) + + return tensor.dot(similarity, candidate_destination) + + @predict.property('inputs') + def predict_inputs(self): + return self.inputs + + @application(outputs=['cost']) + def cost(self, **kwargs): + y_hat = self.predict(**kwargs) + y = tensor.concatenate((kwargs['destination_latitude'][:, None], + kwargs['destination_longitude'][:, None]), axis=1) + + return error.erdist(y_hat, y).mean() + + @cost.property('inputs') + def cost_inputs(self): + return self.inputs + ['destination_latitude', 'destination_longitude']