joint_mlp_tgtcls_111_cswdtx_bigger_dropout.py (1557B)
1 import os 2 import cPickle 3 4 from blocks.initialization import IsotropicGaussian, Constant 5 from blocks.bricks import Rectifier, Tanh, Logistic 6 from blocks.filter import VariableFilter 7 from blocks import roles 8 9 import data 10 from model.joint_mlp_tgtcls import Model, Stream 11 12 13 n_begin_end_pts = 10 # how many points we consider at the beginning and end of the known trajectory 14 15 with open(os.path.join(data.path, 'arrival-clusters.pkl')) as f: 16 dest_tgtcls = cPickle.load(f) 17 18 # generate target classes for time prediction as a Fibonacci sequence 19 time_tgtcls = [1, 2] 20 for i in range(21): 21 time_tgtcls.append(time_tgtcls[-1] + time_tgtcls[-2]) 22 23 dim_embeddings = [ 24 ('origin_call', data.origin_call_size, 15), 25 ('origin_stand', data.stands_size, 10), 26 ('week_of_year', 52, 10), 27 ('day_of_week', 7, 10), 28 ('qhour_of_day', 24 * 4, 10), 29 ('day_type', 3, 10), 30 ('taxi_id', 448, 10), 31 ] 32 33 # Common network part 34 dim_input = n_begin_end_pts * 2 * 2 + sum(x for (_, _, x) in dim_embeddings) 35 dim_hidden = [5000] 36 37 # Destination prediction part 38 dim_hidden_dest = [1000] 39 dim_output_dest = dest_tgtcls.shape[0] 40 41 # Time prediction part 42 dim_hidden_time = [500] 43 dim_output_time = len(time_tgtcls) 44 45 # Cost ratio between distance cost and time cost 46 time_cost_factor = 4 47 48 embed_weights_init = IsotropicGaussian(0.01) 49 mlp_weights_init = IsotropicGaussian(0.1) 50 mlp_biases_init = Constant(0.01) 51 52 dropout = 0.5 53 dropout_inputs = VariableFilter(bricks=[Rectifier], name='output') 54 55 # use adadelta, so no learning_rate or momentum 56 batch_size = 200 57 58 max_splits = 100