deep_bidir_lstm_2x128.py (843B)
1 from blocks.algorithms import BasicMomentum, AdaDelta, RMSProp, Adam, CompositeRule, StepClipping 2 from blocks.initialization import IsotropicGaussian, Constant 3 from blocks.bricks import Tanh 4 5 from model.deep_bidir_lstm import Model 6 7 8 batch_size = 32 9 sort_batch_count = 20 10 11 shuffle_questions = True 12 shuffle_entities = True 13 14 concat_ctx_and_question = True 15 concat_question_before = True ## should not matter for bidirectionnal network 16 17 embed_size = 200 18 19 lstm_size = [128, 128] 20 skip_connections = True 21 22 n_entities = 550 23 out_mlp_hidden = [] 24 out_mlp_activations = [] 25 26 step_rule = CompositeRule([RMSProp(decay_rate=0.95, learning_rate=5e-5), 27 BasicMomentum(momentum=0.9)]) 28 29 dropout = 0.1 30 w_noise = 0.05 31 32 valid_freq = 1000 33 save_freq = 1000 34 print_freq = 100 35 36 weights_init = IsotropicGaussian(0.01) 37 biases_init = Constant(0.)