gcn_mtb.py (1487B)

1 from gbure.model.matching_the_blanks import Model 2 from gbure.model.fewshot import Model as EvalModel 3 from torch.optim import Adam as Optimizer 4 from torch.optim.lr_scheduler import LinearLR as Scheduler 5 6 7 dataset_name = "T-REx" 8 graph_name = "T-REx" 9 unsupervised = "mtb" 10 11 eval_dataset_name = "FewRel" 12 valid_name = "7def1330ba9527d6" 13 shot = 1 14 way = 5 15 16 # From Section 4.1 17 linguistic_similarity = "dot" 18 undefined_poison_whole_meta = True 19 20 # Observed to be better 21 latent_metric_scale = "standard" 22 latent_dot_mean = 1067.65 23 latent_dot_std = 111.17 24 25 # GCN 26 neighborhood_size = 3 27 gcn_aggregator = "mean" 28 29 # From Section 4.3 30 blank_probability = 0.7 31 32 # From Section 5 33 transformer_model = "bert-base-cased" 34 sample_per_epoch = 100000 35 learning_rate = 3e-5 36 accumulated_batch_size = 2048 37 38 # Stated to be 10 in Section 5, but found 5 was better on T-REx dataset. 39 max_epoch = 5 40 41 # From BERT 42 mlm_probability = 0.15 43 mlm_masked_probability = 0.8 44 mlm_random_probability = 0.1 45 46 # Guessed 47 # post_transformer_layer might need to be changed depending on the subsequent task 48 # the "layer_norm" gives results within expectations for non-finetuned few-shot. 49 max_sentence_length = 100 # Maybe should be 40 (from footnote 2, guessed from ACL slides) 50 language_model_weight = 1 51 edge_sampling = "uniform-inverse degree" 52 clip_gradient = 1 53 54 strong_negative_probability = 0.5 55 weak_negative_probability = 0.0 56 57 # Implementation details 58 seed = 0 59 amp = True 60 initial_grad_scale = 1 61 batch_size = 2 62 eval_batch_size = 1 63 workers = 2