commit b637e0bc7b123fe41ea2247ebb7aa311c88b81e0
parent 3f3ab2bfe3ebfa266d433012be1c89c722d63352
Author: Alex Auvolat <alex.auvolat@ens.fr>
Date: Thu, 2 Jul 2015 11:30:41 -0400
Step rule & dropout params cleanup
Diffstat:
2 files changed, 11 insertions(+), 3 deletions(-)
diff --git a/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py b/config/joint_simple_mlp_tgtcls_111_cswdtx_bigger_dropout.py
@@ -2,6 +2,9 @@ import os
import cPickle
from blocks.initialization import IsotropicGaussian, Constant
+from blocks.bricks import Rectifier, Tanh, Logistic
+from blocks.filter import VariableFilter
+from blocks import roles
import data
from model.joint_simple_mlp_tgtcls import Model, Stream
@@ -46,8 +49,8 @@ embed_weights_init = IsotropicGaussian(0.01)
mlp_weights_init = IsotropicGaussian(0.1)
mlp_biases_init = Constant(0.01)
-apply_dropout = True
-dropout_p = 0.5
+dropout = 0.5
+dropout_inputs = VariableFilter(bricks=[Rectifier], name='output')
# use adadelta, so no learning_rate or momentum
batch_size = 200
diff --git a/train.py b/train.py
@@ -132,12 +132,17 @@ if __name__ == "__main__":
parameters_size += reduce(operator.mul, value.get_value().shape, 1)
logger.info('Total number of parameters: %d in %d matrices' % (parameters_size, len(cg.get_params())))
+ if hasattr(config, 'step_rule'):
+ step_rule = config.step_rule
+ else:
+ step_rule = AdaDelta()
+
params = cg.parameters
algorithm = GradientDescent(
cost=cost,
step_rule=CompositeRule([
ElementwiseRemoveNotFinite(),
- config.step_rule,
+ step_rule
]),
params=params)