Skip to content

Instantly share code, notes, and snippets.

@PhilipMay
Created September 29, 2020 09:46
Show Gist options
  • Save PhilipMay/e4034f92e098aed1b72ce146c104a17f to your computer and use it in GitHub Desktop.
Save PhilipMay/e4034f92e098aed1b72ce146c104a17f to your computer and use it in GitHub Desktop.

Revisions

  1. PhilipMay created this gist Sep 29, 2020.
    168 changes: 168 additions & 0 deletions all_nli_de.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,168 @@
    from torch.utils.data import DataLoader
    import math
    from sentence_transformers import models, losses
    from sentence_transformers import SentencesDataset, LoggingHandler, SentenceTransformer, util, InputExample
    from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator, SimilarityFunction
    import logging
    from datetime import datetime
    import sys
    import os
    import gzip
    import pandas as pd
    import csv
    import numpy as np
    import optuna
    import transformers

    evaluation_steps = 1000
    base_save_dir = '/srv/data/nlp/sentence_transformers'
    model_name = 'dbmdz/bert-base-german-uncased'
    study_name='all_nli_de_08'

    logging.basicConfig(format='%(asctime)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S',
    level=logging.INFO,
    handlers=[LoggingHandler()])

    def callback(value, a, b):
    print('callback:', value, a, b)
    if math.isnan(value):
    raise optuna.exceptions.TrialPruned()

    def train(trial, i):
    train_batch_size = trial.suggest_int('train_batch_size', 16, 60)
    num_epochs = trial.suggest_int('num_epochs', 1, 5)
    lr = trial.suggest_uniform('lr', 2e-6, 2e-4) # 2e-5
    eps = trial.suggest_uniform('eps', 1e-7, 1e-5) # 1e-6
    weight_decay = trial.suggest_uniform('weight_decay', 0.001, 0.1) # 0.01
    warmup_steps_mul = trial.suggest_uniform('warmup_steps_mul', 0.1, 0.5)

    model_save_path = f'{base_save_dir}/{study_name}_t{trial.number:02d}_i{i}'
    label2int = {"contradiction": 0, "entailment": 1, "neutral": 2}

    # create model
    word_embedding_model = models.Transformer(model_name)
    pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(),
    pooling_mode_mean_tokens=True,
    pooling_mode_cls_token=False,
    pooling_mode_max_tokens=False)
    model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

    # read mnli
    mnli_df = pd.read_csv('./mnli/mnli_all_en_de.csv')
    mnli_df.drop(mnli_df[mnli_df['gold_label'] == '-'].index, inplace=True)
    mnli_df.dropna(inplace=True)
    s1_de = mnli_df['sentence1_de'].tolist()
    s2_de = mnli_df['sentence2_de'].tolist()
    label = mnli_df['gold_label'].tolist()

    # read and add snli
    snli_df = pd.read_csv('./snli/snli_all_en_de.csv')
    snli_df.drop(snli_df[snli_df['gold_label'] == '-'].index, inplace=True)
    snli_df.dropna(inplace=True)
    s1_de.extend(snli_df['sentence1_de'].tolist())
    s2_de.extend(snli_df['sentence2_de'].tolist())
    label.extend(snli_df['gold_label'].tolist())

    assert len(s1_de) == len(s2_de) == len(label)

    train_samples = []
    for i, (_s1_de, _s2_de, _label) in enumerate(zip(s1_de, s2_de, label)):
    label_id = label2int[_label]
    assert type(_s1_de) == str
    assert len(_s1_de) > 0
    assert type(_s2_de) == str
    assert len(_s2_de) > 0
    assert type(label_id) == int
    train_samples.append(InputExample(texts=[_s1_de, _s2_de], label=label_id))

    train_dataset = SentencesDataset(train_samples, model=model)
    train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=train_batch_size)
    train_loss = losses.SoftmaxLoss(model=model, sentence_embedding_dimension=model.get_sentence_embedding_dimension(), num_labels=len(label2int))


    stsb_dev = pd.read_csv('./data/stsbenchmark/de/sts_dev_de.csv', sep='\t', quoting=csv.QUOTE_NONE, names=['label', 's1', 's2'])
    s1 = stsb_dev['s1'].tolist()
    s2 = stsb_dev['s2'].tolist()
    label = stsb_dev['label'].tolist()

    stsb_dev = pd.read_csv('./data/stsbenchmark/de/sts_test_de.csv', sep='\t', quoting=csv.QUOTE_NONE, names=['label', 's1', 's2'])
    s1.extend(stsb_dev['s1'].tolist())
    s2.extend(stsb_dev['s2'].tolist())
    label.extend(stsb_dev['label'].tolist())

    dev_samples = []
    for _s1, _s2, _label in zip(s1, s2, label):
    score = _label / 5.0
    assert type(_s1) == str
    assert len(_s1) > 0
    assert type(_s2) == str
    assert len(_s2) > 0
    assert type(score) == float
    assert score >= 0.0
    assert score <= 1.0
    dev_samples.append(InputExample(texts=[_s1, _s2], label=score))

    assert len(dev_samples) == 1500 + 1379

    dev_evaluator = EmbeddingSimilarityEvaluator.from_input_examples(
    dev_samples,
    batch_size=train_batch_size,
    name='sts-dev',
    main_similarity=SimilarityFunction.COSINE
    )

    warmup_steps = math.ceil(len(train_dataset) * num_epochs / train_batch_size * warmup_steps_mul) # 0.1
    logging.info("Warmup-steps: {}".format(warmup_steps))

    #optimizer_class = None
    #optimizer_class_str = trial.suggest_categorical('optimizer_class', ['AdamW', 'Adafactor'])
    #if optimizer_class_str == 'Adafactor':
    # optimizer_class = transformers.optimization.Adafactor
    #elif optimizer_class_str == 'AdamW':
    # optimizer_class = transformers.optimization.AdamW
    #else:
    # assert False

    # Train the model
    model.fit(train_objectives=[(train_dataloader, train_loss)],
    evaluator=dev_evaluator,
    epochs=num_epochs,
    scheduler=trial.suggest_categorical('scheduler', ['WarmupLinear', 'warmupcosine', 'warmupcosinewithhardrestarts']),
    #optimizer_class=optimizer_class,
    evaluation_steps=evaluation_steps,
    warmup_steps=warmup_steps,
    output_path=model_save_path,
    optimizer_params={'lr': lr, 'eps': eps, 'correct_bias': False},
    weight_decay=weight_decay,
    callback=callback,
    )

    best_score = model.best_score
    print(best_score)
    return best_score

    def objective(trial):
    try:
    results = []
    for i in range(3):
    result = train(trial, i)
    results.append(result)
    trial.set_user_attr('results', str(results))
    mean_result = np.mean(results)
    trial.set_user_attr('mean_result', str(mean_result))
    if mean_result < 0.77:
    return mean_result
    return mean_result
    except Exception as e:
    trial.set_user_attr('exception', str(e))
    print(e)
    return 0

    study = optuna.create_study(
    study_name=study_name,
    storage='sqlite:///optuna.db',
    load_if_exists=True,
    direction='maximize',
    )
    study.optimize(objective)