commit 1795dfe742bcb75085a909413b723b64a8eeb4fc
parent fe608831c62c7dba60a3bf57433d97b999e567c8
Author: Alex Auvolat <alex.auvolat@ens.fr>
Date: Thu, 23 Jul 2015 19:00:52 -0400
Memory network with bidirectionnal RNN
Diffstat:
5 files changed, 493 insertions(+), 135 deletions(-)
diff --git a/config/memory_network_bidir.py b/config/memory_network_bidir.py
@@ -0,0 +1,56 @@
+from blocks.initialization import IsotropicGaussian, Constant
+
+from blocks.bricks import Tanh
+
+import data
+from model.memory_network_bidir import Model, Stream
+
+
+dim_embeddings = [
+ ('origin_call', data.origin_call_train_size, 10),
+ ('origin_stand', data.stands_size, 10),
+ ('week_of_year', 52, 10),
+ ('day_of_week', 7, 10),
+ ('qhour_of_day', 24 * 4, 10),
+ ('day_type', 3, 10),
+]
+
+embed_weights_init = IsotropicGaussian(0.001)
+
+
+class RNNConfig(object):
+ __slots__ = ('rec_state_dim', 'dim_embeddings', 'embed_weights_init',
+ 'dim_hidden', 'weights_init', 'biases_init')
+
+prefix_encoder = RNNConfig()
+prefix_encoder.dim_embeddings = dim_embeddings
+prefix_encoder.embed_weights_init = embed_weights_init
+prefix_encoder.rec_state_dim = 100
+prefix_encoder.dim_hidden = [100, 100]
+prefix_encoder.weights_init = IsotropicGaussian(0.01)
+prefix_encoder.biases_init = Constant(0.001)
+
+candidate_encoder = RNNConfig()
+candidate_encoder.dim_embeddings = dim_embeddings
+candidate_encoder.embed_weights_init = embed_weights_init
+candidate_encoder.rec_state_dim = 100
+candidate_encoder.dim_hidden = [100, 100]
+candidate_encoder.weights_init = IsotropicGaussian(0.01)
+candidate_encoder.biases_init = Constant(0.001)
+
+representation_size = 100
+representation_activation = Tanh
+
+normalize_representation = True
+
+
+batch_size = 32
+batch_sort_size = 20
+
+valid_set = 'cuts/test_times_0'
+max_splits = 100
+num_cuts = 1000
+
+train_candidate_size = 100
+valid_candidate_size = 100
+test_candidate_size = 100
diff --git a/model/bidirectional.py b/model/bidirectional.py
@@ -121,8 +121,8 @@ class Stream(object):
stream = transformers.balanced_batch(stream, key='latitude', batch_size=self.config.batch_size, batch_sort_size=self.config.batch_sort_size)
stream = Padding(stream, mask_sources=['latitude', 'longitude'])
+
stream = transformers.Select(stream, req_vars)
-
stream = MultiProcessing(stream)
return stream
diff --git a/model/memory_network.py b/model/memory_network.py
@@ -1,7 +1,7 @@
from theano import tensor
-from fuel.transformers import Batch, MultiProcessing, Merge
+from fuel.transformers import Batch, MultiProcessing, Merge, Padding
from fuel.streams import DataStream
from fuel.schemes import ConstantScheme, ShuffledExampleScheme, SequentialExampleScheme
from blocks.bricks import application, MLP, Rectifier, Initializable, Softmax
@@ -13,98 +13,100 @@ from data.hdf5 import TaxiDataset, TaxiStream
import error
from model import ContextEmbedder
-
-class Model(Initializable):
+class MemoryNetworkBase(Initializable):
def __init__(self, config, **kwargs):
- super(Model, self).__init__(**kwargs)
+ super(MemoryNetworkBase, self).__init__(**kwargs)
+
self.config = config
- self.context_embedder = ContextEmbedder(config)
-
- self.prefix_encoder = MLP(activations=[Rectifier() for _ in config.prefix_encoder.dim_hidden] + [config.representation_activation()],
- dims=[config.prefix_encoder.dim_input] + config.prefix_encoder.dim_hidden + [config.representation_size],
- name='prefix_encoder')
- self.candidate_encoder = MLP(activations=[Rectifier() for _ in config.candidate_encoder.dim_hidden] + [config.representation_activation()],
- dims=[config.candidate_encoder.dim_input] + config.candidate_encoder.dim_hidden + [config.representation_size],
- name='candidate_encoder')
- self.softmax = Softmax()
-
- self.prefix_extremities = {'%s_k_%s' % (side, ['latitude', 'longitude'][axis]): axis for side in ['first', 'last'] for axis in [0, 1]}
- self.candidate_extremities = {'candidate_%s_k_%s' % (side, ['latitude', 'longitude'][axis]): axis for side in ['first', 'last'] for axis in [0, 1]}
-
- self.inputs = self.context_embedder.inputs + ['candidate_%s'%k for k in self.context_embedder.inputs] + self.prefix_extremities.keys() + self.candidate_extremities.keys()
- self.children = [ self.context_embedder, self.prefix_encoder, self.candidate_encoder, self.softmax ]
-
- def _push_initialization_config(self):
- for (mlp, config) in [[self.prefix_encoder, self.config.prefix_encoder], [self.candidate_encoder, self.config.candidate_encoder]]:
- mlp.weights_init = config.weights_init
- mlp.biases_init = config.biases_init
-
- @application(outputs=['destination'])
- def predict(self, **kwargs):
- prefix_embeddings = tuple(self.context_embedder.apply(**{k: kwargs[k] for k in self.context_embedder.inputs }))
- prefix_extremities = tuple((kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v] for k, v in self.prefix_extremities.items())
- prefix_inputs = tensor.concatenate(prefix_extremities + prefix_embeddings, axis=1)
- prefix_representation = self.prefix_encoder.apply(prefix_inputs)
- if self.config.normalize_representation:
- prefix_representation = prefix_representation / tensor.sqrt((prefix_representation ** 2).sum(axis=1, keepdims=True))
-
- candidate_embeddings = tuple(self.context_embedder.apply(**{k: kwargs['candidate_%s'%k] for k in self.context_embedder.inputs }))
- candidate_extremities = tuple((kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v] for k, v in self.candidate_extremities.items())
- candidate_inputs = tensor.concatenate(candidate_extremities + candidate_embeddings, axis=1)
- candidate_representation = self.candidate_encoder.apply(candidate_inputs)
- if self.config.normalize_representation:
- candidate_representation = candidate_representation / tensor.sqrt((candidate_representation ** 2).sum(axis=1, keepdims=True))
-
- similarity_score = tensor.dot(prefix_representation, candidate_representation.T)
- similarity = self.softmax.apply(similarity_score)
-
- candidate_destination = tensor.concatenate(
- (tensor.shape_padright(kwargs['candidate_last_k_latitude'][:,-1]),
- tensor.shape_padright(kwargs['candidate_last_k_longitude'][:,-1])),
- axis=1)
-
- return tensor.dot(similarity, candidate_destination)
-
- @predict.property('inputs')
- def predict_inputs(self):
- return self.inputs
-
- @application(outputs=['cost'])
- def cost(self, **kwargs):
- y_hat = self.predict(**kwargs)
- y = tensor.concatenate((kwargs['destination_latitude'][:, None],
- kwargs['destination_longitude'][:, None]), axis=1)
-
- return error.erdist(y_hat, y).mean()
-
- @cost.property('inputs')
- def cost_inputs(self):
- return self.inputs + ['destination_latitude', 'destination_longitude']
-
-class Stream(object):
+
+class StreamBase(object):
def __init__(self, config):
self.config = config
- def train(self, req_vars):
+ self.prefix_inputs = [
+ ('call_type', tensor.bvector),
+ ('origin_call', tensor.ivector),
+ ('origin_stand', tensor.bvector),
+ ('taxi_id', tensor.wvector),
+ ('timestamp', tensor.ivector),
+ ('day_type', tensor.bvector),
+ ('missing_data', tensor.bvector),
+ ('latitude', tensor.matrix),
+ ('longitude', tensor.matrix),
+ ('destination_latitude', tensor.vector),
+ ('destination_longitude', tensor.vector),
+ ('travel_time', tensor.ivector),
+ ('input_time', tensor.ivector),
+ ('week_of_year', tensor.bvector),
+ ('day_of_week', tensor.bvector),
+ ('qhour_of_day', tensor.bvector)
+ ]
+ self.candidate_inputs = self.prefix_inputs
+
+ def inputs(self):
+ prefix_inputs = { name: constructor(name)
+ for name, constructor in self.prefix_inputs }
+ candidate_inputs = { 'candidate_'+name: constructor('candidate_'+name)
+ for name, constructor in self.candidate_inputs }
+ return dict(prefix_inputs.items() + candidate_inputs.items())
+
+ @property
+ def valid_dataset(self):
+ return TaxiDataset(self.config.valid_set, 'valid.hdf5')
+
+ @property
+ def valid_trips_ids(self):
valid = TaxiDataset(self.config.valid_set, 'valid.hdf5', sources=('trip_id',))
- valid_trips_ids = valid.get_data(None, slice(0, valid.num_examples))[0]
+ return valid.get_data(None, slice(0, valid.num_examples))[0]
- dataset = TaxiDataset('train')
+ @property
+ def train_dataset(self):
+ return TaxiDataset('train')
+
+ @property
+ def test_dataset(self):
+ return TaxiDataset('test')
- prefix_stream = DataStream(dataset, iteration_scheme=TaxiTimeCutScheme(self.config.num_cuts))
- prefix_stream = transformers.TaxiExcludeTrips(prefix_stream, valid_trips_ids)
- prefix_stream = transformers.TaxiGenerateSplits(prefix_stream, max_splits=self.config.max_splits)
- prefix_stream = transformers.taxi_add_datetime(prefix_stream)
- prefix_stream = transformers.taxi_add_first_last_len(prefix_stream, self.config.n_begin_end_pts)
- prefix_stream = Batch(prefix_stream, iteration_scheme=ConstantScheme(self.config.batch_size))
- candidate_stream = DataStream(dataset, iteration_scheme=ShuffledExampleScheme(dataset.num_examples))
- candidate_stream = transformers.TaxiExcludeTrips(candidate_stream, valid_trips_ids)
+class StreamSimple(StreamBase):
+ def __init__(self, config):
+ super(StreamSimple, self).__init__(config)
+
+ self.prefix_inputs += [
+ ('first_k_latitude', tensor.matrix),
+ ('first_k_longitude', tensor.matrix),
+ ('last_k_latitude', tensor.matrix),
+ ('last_k_longitude', tensor.matrix),
+ ]
+ self.candidate_inputs = self.prefix_inputs
+
+ def candidate_stream(self, n_candidates):
+ candidate_stream = DataStream(self.train_dataset,
+ iteration_scheme=ShuffledExampleScheme(dataset.num_examples))
+ candidate_stream = transformers.TaxiExcludeTrips(candidate_stream, self.valid_trips_ids)
candidate_stream = transformers.TaxiExcludeEmptyTrips(candidate_stream)
candidate_stream = transformers.taxi_add_datetime(candidate_stream)
- candidate_stream = transformers.taxi_add_first_last_len(candidate_stream, self.config.n_begin_end_pts)
- candidate_stream = Batch(candidate_stream, iteration_scheme=ConstantScheme(self.config.train_candidate_size))
+ candidate_stream = transformers.taxi_add_first_last_len(candidate_stream,
+ self.config.n_begin_end_pts)
+ return Batch(candidate_stream,
+ iteration_scheme=ConstantScheme(n_candidates))
+
+ def train(self, req_vars):
+ prefix_stream = DataStream(self.train_dataset,
+ iteration_scheme=ShuffledExampleScheme(self.train_dataset.num_examples))
+
+ prefix_stream = transformers.TaxiExcludeTrips(prefix_stream, self.valid_trips_ids)
+ prefix_stream = transformers.TaxiExcludeEmptyTrips(prefix_stream)
+ prefix_stream = transformers.TaxiGenerateSplits(prefix_stream,
+ max_splits=self.config.max_splits)
+ prefix_stream = transformers.taxi_add_datetime(prefix_stream)
+ prefix_stream = transformers.taxi_add_first_last_len(prefix_stream,
+ self.config.n_begin_end_pts)
+ prefix_stream = Batch(prefix_stream,
+ iteration_scheme=ConstantScheme(self.config.batch_size))
+
+ candidate_stream = self.candidate_stream(self.config.train_candidate_size)
sources = prefix_stream.sources + tuple('candidate_%s' % k for k in candidate_stream.sources)
stream = Merge((prefix_stream, candidate_stream), sources)
@@ -113,66 +115,117 @@ class Stream(object):
return stream
def valid(self, req_vars):
- valid_dataset = TaxiDataset(self.config.valid_set, 'valid.hdf5')
- train_dataset = TaxiDataset('train')
- valid_trips_ids = valid_dataset.get_data(None, slice(0, valid_dataset.num_examples))[valid_dataset.sources.index('trip_id')]
-
- prefix_stream = DataStream(valid_dataset, iteration_scheme=SequentialExampleScheme(valid_dataset.num_examples))
+ prefix_stream = DataStream(
+ self.valid_dataset,
+ iteration_scheme=SequentialExampleScheme(self.valid_dataset.num_examples))
prefix_stream = transformers.taxi_add_datetime(prefix_stream)
- prefix_stream = transformers.taxi_add_first_last_len(prefix_stream, self.config.n_begin_end_pts)
- prefix_stream = Batch(prefix_stream, iteration_scheme=ConstantScheme(self.config.batch_size))
+ prefix_stream = transformers.taxi_add_first_last_len(prefix_stream,
+ self.config.n_begin_end_pts)
+ prefix_stream = Batch(prefix_stream,
+ iteration_scheme=ConstantScheme(self.config.batch_size))
+
+ candidate_stream = self.candidate_stream(self.config.valid_candidate_size)
- candidate_stream = DataStream(train_dataset, iteration_scheme=ShuffledExampleScheme(train_dataset.num_examples))
- candidate_stream = transformers.TaxiExcludeTrips(candidate_stream, valid_trips_ids)
+ sources = prefix_stream.sources + tuple('candidate_%s' % k for k in candidate_stream.sources)
+ stream = Merge((prefix_stream, candidate_stream), sources)
+ stream = transformers.Select(stream, tuple(req_vars))
+ stream = MultiProcessing(stream)
+ return stream
+
+
+class StreamRecurrent(StreamBase):
+ def __init__(self, config):
+ super(StreamRecurrent, self).__init__(config)
+
+ self.prefix_inputs += [
+ ('latitude_mask', tensor.matrix),
+ ('longitude_mask', tensor.matrix),
+ ]
+ self.candidate_inputs = self.prefix_inputs
+
+ def candidate_stream(self, n_candidates):
+ candidate_stream = DataStream(self.train_dataset,
+ iteration_scheme=ShuffledExampleScheme(self.train_dataset.num_examples))
+ candidate_stream = transformers.TaxiExcludeTrips(candidate_stream, self.valid_trips_ids)
candidate_stream = transformers.TaxiExcludeEmptyTrips(candidate_stream)
candidate_stream = transformers.taxi_add_datetime(candidate_stream)
- candidate_stream = transformers.taxi_add_first_last_len(candidate_stream, self.config.n_begin_end_pts)
- candidate_stream = Batch(candidate_stream, iteration_scheme=ConstantScheme(self.config.valid_candidate_size))
+
+ candidate_stream = Batch(candidate_stream,
+ iteration_scheme=ConstantScheme(n_candidates))
+
+ candidate_stream = Padding(candidate_stream,
+ mask_sources=['latitude', 'longitude'])
+
+ return candidate_stream
+
+ def train(self, req_vars):
+ prefix_stream = DataStream(self.train_dataset,
+ iteration_scheme=ShuffledExampleScheme(self.train_dataset.num_examples))
+
+ prefix_stream = transformers.TaxiExcludeTrips(prefix_stream, self.valid_trips_ids)
+ prefix_stream = transformers.TaxiExcludeEmptyTrips(prefix_stream)
+ prefix_stream = transformers.TaxiGenerateSplits(prefix_stream,
+ max_splits=self.config.max_splits)
+
+ prefix_stream = transformers.taxi_add_datetime(prefix_stream)
+
+ prefix_stream = transformers.balanced_batch(prefix_stream,
+ key='latitude',
+ batch_size=self.config.batch_size,
+ batch_sort_size=self.config.batch_sort_size)
+
+ prefix_stream = Padding(prefix_stream, mask_sources=['latitude', 'longitude'])
+
+ candidate_stream = self.candidate_stream(self.config.train_candidate_size)
sources = prefix_stream.sources + tuple('candidate_%s' % k for k in candidate_stream.sources)
stream = Merge((prefix_stream, candidate_stream), sources)
+
stream = transformers.Select(stream, tuple(req_vars))
- stream = MultiProcessing(stream)
+ # stream = MultiProcessing(stream)
return stream
- def inputs(self):
- return {'call_type': tensor.bvector('call_type'),
- 'origin_call': tensor.ivector('origin_call'),
- 'origin_stand': tensor.bvector('origin_stand'),
- 'taxi_id': tensor.wvector('taxi_id'),
- 'timestamp': tensor.ivector('timestamp'),
- 'day_type': tensor.bvector('day_type'),
- 'missing_data': tensor.bvector('missing_data'),
- 'latitude': tensor.matrix('latitude'),
- 'longitude': tensor.matrix('longitude'),
- 'destination_latitude': tensor.vector('destination_latitude'),
- 'destination_longitude': tensor.vector('destination_longitude'),
- 'travel_time': tensor.ivector('travel_time'),
- 'first_k_latitude': tensor.matrix('first_k_latitude'),
- 'first_k_longitude': tensor.matrix('first_k_longitude'),
- 'last_k_latitude': tensor.matrix('last_k_latitude'),
- 'last_k_longitude': tensor.matrix('last_k_longitude'),
- 'input_time': tensor.ivector('input_time'),
- 'week_of_year': tensor.bvector('week_of_year'),
- 'day_of_week': tensor.bvector('day_of_week'),
- 'qhour_of_day': tensor.bvector('qhour_of_day'),
- 'candidate_call_type': tensor.bvector('candidate_call_type'),
- 'candidate_origin_call': tensor.ivector('candidate_origin_call'),
- 'candidate_origin_stand': tensor.bvector('candidate_origin_stand'),
- 'candidate_taxi_id': tensor.wvector('candidate_taxi_id'),
- 'candidate_timestamp': tensor.ivector('candidate_timestamp'),
- 'candidate_day_type': tensor.bvector('candidate_day_type'),
- 'candidate_missing_data': tensor.bvector('candidate_missing_data'),
- 'candidate_latitude': tensor.matrix('candidate_latitude'),
- 'candidate_longitude': tensor.matrix('candidate_longitude'),
- 'candidate_destination_latitude': tensor.vector('candidate_destination_latitude'),
- 'candidate_destination_longitude': tensor.vector('candidate_destination_longitude'),
- 'candidate_travel_time': tensor.ivector('candidate_travel_time'),
- 'candidate_first_k_latitude': tensor.matrix('candidate_first_k_latitude'),
- 'candidate_first_k_longitude': tensor.matrix('candidate_first_k_longitude'),
- 'candidate_last_k_latitude': tensor.matrix('candidate_last_k_latitude'),
- 'candidate_last_k_longitude': tensor.matrix('candidate_last_k_longitude'),
- 'candidate_input_time': tensor.ivector('candidate_input_time'),
- 'candidate_week_of_year': tensor.bvector('candidate_week_of_year'),
- 'candidate_day_of_week': tensor.bvector('candidate_day_of_week'),
- 'candidate_qhour_of_day': tensor.bvector('candidate_qhour_of_day')}
+ def valid(self, req_vars):
+ prefix_stream = DataStream(
+ self.valid_dataset,
+ iteration_scheme=SequentialExampleScheme(self.valid_dataset.num_examples))
+
+ prefix_stream = transformers.TaxiExcludeEmptyTrips(prefix_stream)
+
+ prefix_stream = transformers.taxi_add_datetime(prefix_stream)
+
+ prefix_stream = Batch(prefix_stream,
+ iteration_scheme=ConstantScheme(self.config.batch_size))
+ prefix_stream = Padding(prefix_stream, mask_sources=['latitude', 'longitude'])
+
+ candidate_stream = self.candidate_stream(self.config.valid_candidate_size)
+
+ sources = prefix_stream.sources + tuple('candidate_%s' % k for k in candidate_stream.sources)
+ stream = Merge((prefix_stream, candidate_stream), sources)
+
+ stream = transformers.Select(stream, tuple(req_vars))
+ # stream = MultiProcessing(stream)
+
+ return stream
+
+ def test(self, req_vars):
+ prefix_stream = DataStream(
+ self.test_dataset,
+ iteration_scheme=SequentialExampleScheme(self.test_dataset.num_examples))
+
+ prefix_stream = transformers.taxi_add_datetime(prefix_stream)
+ prefix_stream = transformers.taxi_remove_test_only_clients(prefix_stream)
+
+ prefix_stream = Batch(prefix_stream,
+ iteration_scheme=ConstantScheme(self.config.batch_size))
+ prefix_stream = Padding(prefix_stream, mask_sources=['latitude', 'longitude'])
+
+ candidate_stream = self.candidate_stream(self.config.test_candidate_size)
+
+ sources = prefix_stream.sources + tuple('candidate_%s' % k for k in candidate_stream.sources)
+ stream = Merge((prefix_stream, candidate_stream), sources)
+
+ stream = transformers.Select(stream, tuple(req_vars))
+ # stream = MultiProcessing(stream)
+
+ return stream
diff --git a/model/memory_network_bidir.py b/model/memory_network_bidir.py
@@ -0,0 +1,143 @@
+from theano import tensor
+
+from blocks.bricks import application, MLP, Rectifier, Initializable, Softmax, Linear
+from blocks.bricks.parallel import Fork
+from blocks.bricks.recurrent import Bidirectional, LSTM
+
+import data
+from data import transformers
+from data.cut import TaxiTimeCutScheme
+from data.hdf5 import TaxiDataset, TaxiStream
+import error
+from model import ContextEmbedder
+
+from memory_network import StreamRecurrent as Stream
+from memory_network import MemoryNetworkBase
+
+class RecurrentEncoder(Initializable):
+ def __init__(self, config, output_dim, activation, **kwargs):
+ super(RecurrentEncoder, self).__init__(**kwargs)
+
+ self.config = config
+ self.context_embedder = ContextEmbedder(config)
+
+ self.rec = Bidirectional(LSTM(dim=config.rec_state_dim, name='encoder_recurrent'))
+ self.fork = Fork(
+ [name for name in self.rec.prototype.apply.sequences
+ if name != 'mask'],
+ prototype=Linear())
+
+ rto_in = config.rec_state_dim * 2 + sum(x[2] for x in config.dim_embeddings)
+ self.rec_to_output = MLP(
+ activations=[Rectifier() for _ in config.dim_hidden] + [activation],
+ dims=[rto_in] + config.dim_hidden + [output_dim],
+ name='encoder_rto')
+
+ self.children = [self.context_embedder, self.rec, self.fork, self.rec_to_output]
+
+ self.rec_inputs = ['latitude', 'longitude', 'latitude_mask']
+ self.inputs = self.context_embedder.inputs + self.rec_inputs
+
+ def _push_allocation_config(self):
+ self.fork.input_dim = 2
+ self.fork.output_dims = [ self.rec.children[0].get_dim(name)
+ for name in self.fork.output_names ]
+
+ def _push_initialization_config(self):
+ for brick in self.children:
+ brick.weights_init = self.config.weights_init
+ brick.biases_init = self.config.biases_init
+
+ @application
+ def apply(self, latitude, longitude, latitude_mask, **kwargs):
+ latitude = (latitude.T - data.train_gps_mean[0]) / data.train_gps_std[0]
+ longitude = (longitude.T - data.train_gps_mean[1]) / data.train_gps_std[1]
+ latitude_mask = latitude_mask.T
+
+ rec_in = tensor.concatenate((latitude[:, :, None], longitude[:, :, None]),
+ axis=2)
+ path = self.rec.apply(self.fork.apply(rec_in), mask=latitude_mask)[0]
+
+ last_id = tensor.cast(latitude_mask.sum(axis=0) - 1, dtype='int64')
+
+ path_representation = (path[0][:, -self.config.rec_state_dim:],
+ path[last_id - 1, tensor.arange(last_id.shape[0])]
+ [:, :self.config.rec_state_dim])
+
+ embeddings = tuple(self.context_embedder.apply(
+ **{k: kwargs[k] for k in self.context_embedder.inputs }))
+
+ inputs = tensor.concatenate(path_representation + embeddings, axis=1)
+ outputs = self.rec_to_output.apply(inputs)
+
+ return outputs
+
+
+class Model(MemoryNetworkBase):
+ def __init__(self, config, **kwargs):
+ super(Model, self).__init__(config, **kwargs)
+
+ # Build prefix encoder : recurrent then MLP
+ self.prefix_encoder = RecurrentEncoder(self.config.prefix_encoder,
+ self.config.representation_size,
+ self.config.representation_activation(),
+ name='prefix_encoder')
+
+ # Build candidate encoder
+ self.candidate_encoder = RecurrentEncoder(self.config.candidate_encoder,
+ self.config.representation_size,
+ self.config.representation_activation(),
+ name='candidate_encoder')
+
+ # Rest of the stuff
+ self.softmax = Softmax()
+
+ self.inputs = self.prefix_encoder.inputs \
+ + ['candidate_'+k for k in self.candidate_encoder.inputs]
+
+ self.children = [ self.prefix_encoder,
+ self.candidate_encoder,
+ self.softmax ]
+
+
+ @application(outputs=['destination'])
+ def predict(self, **kwargs):
+ prefix_representation = self.prefix_encoder.apply(
+ **{ name: kwargs[name] for name in self.prefix_encoder.inputs })
+
+ candidate_representation = self.prefix_encoder.apply(
+ **{ name: kwargs['candidate_'+name] for name in self.candidate_encoder.inputs })
+
+ if self.config.normalize_representation:
+ candidate_representation = candidate_representation \
+ / tensor.sqrt((candidate_representation ** 2).sum(axis=1, keepdims=True))
+
+ similarity_score = tensor.dot(prefix_representation, candidate_representation.T)
+ similarity = self.softmax.apply(similarity_score)
+
+ candidate_mask = kwargs['candidate_latitude_mask']
+ candidate_last = tensor.cast(candidate_mask.sum(axis=1) - 1, 'int64')
+ candidate_destination = tensor.concatenate(
+ (kwargs['candidate_latitude'][tensor.arange(candidate_mask.shape[0]), candidate_last]
+ [:, None],
+ kwargs['candidate_longitude'][tensor.arange(candidate_mask.shape[0]), candidate_last]
+ [:, None]),
+ axis=1)
+
+ return tensor.dot(similarity, candidate_destination)
+
+ @predict.property('inputs')
+ def predict_inputs(self):
+ return self.inputs
+
+ @application(outputs=['cost'])
+ def cost(self, **kwargs):
+ y_hat = self.predict(**kwargs)
+ y = tensor.concatenate((kwargs['destination_latitude'][:, None],
+ kwargs['destination_longitude'][:, None]), axis=1)
+
+ return error.erdist(y_hat, y).mean()
+
+ @cost.property('inputs')
+ def cost_inputs(self):
+ return self.inputs + ['destination_latitude', 'destination_longitude']
diff --git a/model/memory_network_mlp.py b/model/memory_network_mlp.py
@@ -0,0 +1,106 @@
+
+from theano import tensor
+
+from fuel.transformers import Batch, MultiProcessing, Merge
+from fuel.streams import DataStream
+from fuel.schemes import ConstantScheme, ShuffledExampleScheme, SequentialExampleScheme
+from blocks.bricks import application, MLP, Rectifier, Initializable, Softmax
+
+import data
+from data import transformers
+from data.cut import TaxiTimeCutScheme
+from data.hdf5 import TaxiDataset, TaxiStream
+import error
+from model import ContextEmbedder
+
+from memory_network import StreamSimple as Stream
+from memory_network import MemoryNetworkBase
+
+
+class Model(MemoryNetworkBase):
+ def __init__(self, **kwargs):
+ super(Model, self).__init__(**kwargs)
+
+ self.prefix_encoder = MLP(activations=[Rectifier() for _ in config.prefix_encoder.dim_hidden]
+ + [config.representation_activation()],
+ dims=[config.prefix_encoder.dim_input]
+ + config.prefix_encoder.dim_hidden
+ + [config.representation_size],
+ name='prefix_encoder')
+
+ self.candidate_encoder = MLP(
+ activations=[Rectifier() for _ in config.candidate_encoder.dim_hidden]
+ + [config.representation_activation()],
+ dims=[config.candidate_encoder.dim_input]
+ + config.candidate_encoder.dim_hidden
+ + [config.representation_size],
+ name='candidate_encoder')
+ self.softmax = Softmax()
+
+ self.prefix_extremities = {'%s_k_%s' % (side, ['latitude', 'longitude'][axis]): axis
+ for side in ['first', 'last'] for axis in [0, 1]}
+ self.candidate_extremities = {'candidate_%s_k_%s' % (side, axname): axis
+ for side in ['first', 'last']
+ for axis, axname in enumerate(['latitude', 'longitude'])}
+
+ self.inputs = self.context_embedder.inputs \
+ + ['candidate_%s'%k for k in self.context_embedder.inputs] \
+ + self.prefix_extremities.keys() + self.candidate_extremities.keys()
+ self.children = [ self.context_embedder,
+ self.prefix_encoder,
+ self.candidate_encoder,
+ self.softmax ]
+
+ def _push_initialization_config(self):
+ for (mlp, config) in [[self.prefix_encoder, self.config.prefix_encoder],
+ [self.candidate_encoder, self.config.candidate_encoder]]:
+ mlp.weights_init = config.weights_init
+ mlp.biases_init = config.biases_init
+
+ @application(outputs=['destination'])
+ def predict(self, **kwargs):
+ prefix_embeddings = tuple(self.context_embedder.apply(
+ **{k: kwargs[k] for k in self.context_embedder.inputs }))
+ prefix_extremities = tuple((kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v]
+ for k, v in self.prefix_extremities.items())
+ prefix_inputs = tensor.concatenate(prefix_extremities + prefix_embeddings, axis=1)
+ prefix_representation = self.prefix_encoder.apply(prefix_inputs)
+ if self.config.normalize_representation:
+ prefix_representation = prefix_representation \
+ / tensor.sqrt((prefix_representation ** 2).sum(axis=1, keepdims=True))
+
+ candidate_embeddings = tuple(self.context_embedder.apply(**{k: kwargs['candidate_%s'%k]
+ for k in self.context_embedder.inputs }))
+ candidate_extremities = tuple((kwargs[k] - data.train_gps_mean[v]) / data.train_gps_std[v]
+ for k, v in self.candidate_extremities.items())
+ candidate_inputs = tensor.concatenate(candidate_extremities + candidate_embeddings, axis=1)
+ candidate_representation = self.candidate_encoder.apply(candidate_inputs)
+ if self.config.normalize_representation:
+ candidate_representation = candidate_representation \
+ / tensor.sqrt((candidate_representation ** 2).sum(axis=1, keepdims=True))
+
+ similarity_score = tensor.dot(prefix_representation, candidate_representation.T)
+ similarity = self.softmax.apply(similarity_score)
+
+ candidate_destination = tensor.concatenate(
+ (tensor.shape_padright(kwargs['candidate_last_k_latitude'][:,-1]),
+ tensor.shape_padright(kwargs['candidate_last_k_longitude'][:,-1])),
+ axis=1)
+
+ return tensor.dot(similarity, candidate_destination)
+
+ @predict.property('inputs')
+ def predict_inputs(self):
+ return self.inputs
+
+ @application(outputs=['cost'])
+ def cost(self, **kwargs):
+ y_hat = self.predict(**kwargs)
+ y = tensor.concatenate((kwargs['destination_latitude'][:, None],
+ kwargs['destination_longitude'][:, None]), axis=1)
+
+ return error.erdist(y_hat, y).mean()
+
+ @cost.property('inputs')
+ def cost_inputs(self):
+ return self.inputs + ['destination_latitude', 'destination_longitude']