gbure

Graph-based approaches on unsupervised relation extraction evaluated as a fewshot problem
git clone https://esimon.eu/repos/gbure.git
Log | Files | Refs | README | LICENSE

gcn_mtb.py (1487B)


      1 from gbure.model.matching_the_blanks import Model
      2 from gbure.model.fewshot import Model as EvalModel
      3 from torch.optim import Adam as Optimizer
      4 from torch.optim.lr_scheduler import LinearLR as Scheduler
      5 
      6 
      7 dataset_name = "T-REx"
      8 graph_name = "T-REx"
      9 unsupervised = "mtb"
     10 
     11 eval_dataset_name = "FewRel"
     12 valid_name = "7def1330ba9527d6"
     13 shot = 1
     14 way = 5
     15 
     16 # From Section 4.1
     17 linguistic_similarity = "dot"
     18 undefined_poison_whole_meta = True
     19 
     20 # Observed to be better
     21 latent_metric_scale = "standard"
     22 latent_dot_mean = 1067.65
     23 latent_dot_std = 111.17
     24 
     25 # GCN
     26 neighborhood_size = 3
     27 gcn_aggregator = "mean"
     28 
     29 # From Section 4.3
     30 blank_probability = 0.7
     31 
     32 # From Section 5
     33 transformer_model = "bert-base-cased"
     34 sample_per_epoch = 100000
     35 learning_rate = 3e-5
     36 accumulated_batch_size = 2048
     37 
     38 # Stated to be 10 in Section 5, but found 5 was better on T-REx dataset.
     39 max_epoch = 5
     40 
     41 # From BERT
     42 mlm_probability = 0.15
     43 mlm_masked_probability = 0.8
     44 mlm_random_probability = 0.1
     45 
     46 # Guessed
     47 # post_transformer_layer might need to be changed depending on the subsequent task
     48 # the "layer_norm" gives results within expectations for non-finetuned few-shot.
     49 max_sentence_length = 100  # Maybe should be 40 (from footnote 2, guessed from ACL slides)
     50 language_model_weight = 1
     51 edge_sampling = "uniform-inverse degree"
     52 clip_gradient = 1
     53 
     54 strong_negative_probability = 0.5
     55 weak_negative_probability = 0.0
     56 
     57 # Implementation details
     58 seed = 0
     59 amp = True
     60 initial_grad_scale = 1
     61 batch_size = 2
     62 eval_batch_size = 1
     63 workers = 2