Fine-Tuning Transformers Models
 
from adaptnlp import LMFineTuner

Initialize train and test data, same format as you would load in a classifier

OUTPUT_DIR = "Path/to/output_dir"
train_data_file = "Path/to/train.csv" 
eval_data_file = "Path/to/test.csv"

Initialize Fine Tuner and Freeze up to last layer

ft_configs = {
              "train_data_file": train_data_file,
              "eval_data_file": eval_data_file,
              "model_type": "bert",
              "model_name_or_path": "bert-base-cased",
              "mlm": True,
              "mlm_probability": 0.15,
              "config_name": None,
              "tokenizer_name": None,
              "cache_dir": None,
              "block_size": -1,
              "no_cuda": False,
              "overwrite_cache": False,
              "seed": 42,
              "fp16": False,
              "fp16_opt_level": "O1",
              "local_rank": -1,
             }
finetuner = LMFineTuner(**ft_configs)
finetuner.freeze()

Find Learning Rate for Fine Tuner with automated learning rate finder

learning_rate_finder_configs = {
    "output_dir": OUTPUT_DIR,
    "file_name": "learning_rate.tsv",
    "start_learning_rate": 1e-7,
    "end_learning_rate": 10,
    "iterations": 100,
    "mini_batch_size": 8,
    "stop_early": True,
    "smoothing_factor": 0.7,
    "adam_epsilon": 1e-8,
    "weight_decay": 0.0,
}
learning_rate = finetuner.find_learning_rate(**learning_rate_finder_configs)
finetuner.freeze()

Train One Cycle

finetuner.freeze()
train_configs = {
    "output_dir": OUTPUT_DIR,
    "should_continue": False,
    "overwrite_output_dir": True,
    "evaluate_during_training": True,
    "per_gpu_train_batch_size": 2,
    "gradient_accumulation_steps": 1,
    "learning_rate": learning_rate,
    "weight_decay": 0.0,
    "adam_epsilon": 1e-8,
    "max_grad_norm": 1.0,
    "num_train_epochs": 10.0,
    "max_steps": -1,
    "warmup_steps": 0,
    "logging_steps": 50,
    "save_steps": 50,
    "save_total_limit": None,
    "use_tensorboard": False,
}
finetuner.train_one_cycle(**train_configs)
finetuner.unfreeze()
finetuner.train(**train_configs)

Evaluate

finetuner.evaluate_all_checkpoints(
    output_dir=OUTPUT_DIR,
    per_gpu_eval_batch_size=2
)