Skip to content

Instantly share code, notes, and snippets.

@tokestermw
Last active September 6, 2018 21:27
Show Gist options
  • Save tokestermw/6b3549bc5caa1be1d724a2a09659284c to your computer and use it in GitHub Desktop.
Save tokestermw/6b3549bc5caa1be1d724a2a09659284c to your computer and use it in GitHub Desktop.

Revisions

  1. tokestermw revised this gist Sep 6, 2018. 1 changed file with 4 additions and 0 deletions.
    4 changes: 4 additions & 0 deletions play_elmo_embeddings_softmax.py
    Original file line number Diff line number Diff line change
    @@ -26,6 +26,10 @@
    grp.create_dataset('W', W.shape, dtype='float32', data=W)
    grp.create_dataset('b', b.shape, dtype='float32', data=b)
    To test
    pytest allennlp/tests/modules/elmo_test.py
    """

    from allennlp.modules.elmo import _ElmoCharacterEncoder, _ElmoBiLm, _ElmoSoftmax, Elmo, batch_to_ids
  2. tokestermw revised this gist Sep 6, 2018. 1 changed file with 58 additions and 0 deletions.
    58 changes: 58 additions & 0 deletions play_elmo_embeddings_softmax.py
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,61 @@
    """
    To use it inside ELMo script
    To get the embeddings:
    allennlp elmo sample_sents.txt out1.hdf5 --top
    python -c "import h5py; f = h5py.File('out1.hdf5'); print(f['0'][:], f['0'].shape)"
    To get probabilities:
    allennlp elmo sample_sents.txt out2.hdf5 --top \
    --softmax-weight-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_softmax_weights.hdf5 \
    --softmax-vocab-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/vocab-2016-09-10.txt
    python -c "import h5py; f = h5py.File('out2.hdf5'); print(f['0'][:], f['0'].shape)"
    Save new example softmax file
    import h5py
    import numpy as np
    e = h5py.File('./allennlp/tests/fixtures/elmo/elmo_token_embeddings.hdf5')
    W = e['embedding'][:]
    b = np.zeros_like(W[:, 0])
    with h5py.File('elmo_softmax_weights.hdf5', 'w') as f:
    grp = f.create_group('softmax')
    grp.create_dataset('W', W.shape, dtype='float32', data=W)
    grp.create_dataset('b', b.shape, dtype='float32', data=b)
    """

    from allennlp.modules.elmo import _ElmoCharacterEncoder, _ElmoBiLm, _ElmoSoftmax, Elmo, batch_to_ids
    from allennlp.commands.elmo import DEFAULT_OPTIONS_FILE, DEFAULT_WEIGHT_FILE, DEFAULT_SOFTMAX_FILE, DEFAULT_VOCAB_FILE


    def _tokenize(text):
    return text.split()


    if __name__ == '__main__':
    # elmo_char_encoder - _ElmoCharacterEncoder
    elmo_bilm = _ElmoBiLm(DEFAULT_OPTIONS_FILE, DEFAULT_WEIGHT_FILE)
    elmo_softmax = _ElmoSoftmax(DEFAULT_SOFTMAX_FILE, DEFAULT_VOCAB_FILE)

    sentences = [
    'How are you ?',
    'how are you ?',
    'How are you .',
    'You are how ?',
    ]
    sentences = [_tokenize(i) for i in sentences]

    char_ids, word_ids = batch_to_ids(sentences, elmo_softmax.vocab)

    bilm_outputs = elmo_bilm(char_ids)

    softmax_log_probs, softmax_mask = elmo_softmax(
    bilm_outputs, word_ids, aggregation_fun='mean')

    # average backward and forward log probs
    print(softmax_log_probs.shape)
    print(softmax_mask.shape)
  3. tokestermw revised this gist Sep 6, 2018. 1 changed file with 1 addition and 59 deletions.
    60 changes: 1 addition & 59 deletions play_elmo_embeddings_softmax.py
    Original file line number Diff line number Diff line change
    @@ -1,61 +1,3 @@
    """
    To use it inside ELMo script
    To get the embeddings:
    allennlp elmo sample_sents.txt out1.hdf5 --top
    python -c "import h5py; f = h5py.File('out1.hdf5'); print(f['0'][:], f['0'].shape)"
    To get probabilities:
    allennlp elmo sample_sents.txt out2.hdf5 --top \
    --softmax-weight-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_softmax_weights.hdf5 \
    --vocab-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/vocab-2016-09-10.txt
    python -c "import h5py; f = h5py.File('out2.hdf5'); print(f['0'][:], f['0'].shape)"
    Save new example softmax file
    import h5py
    import numpy as np
    e = h5py.File('./allennlp/tests/fixtures/elmo/elmo_token_embeddings.hdf5')
    W = e['embedding'][:]
    b = np.zeros_like(W[:, 0])
    with h5py.File('elmo_softmax_weights.hdf5', 'w') as f:
    grp = f.create_group('softmax')
    grp.create_dataset('W', W.shape, dtype='float32', data=W)
    grp.create_dataset('b', b.shape, dtype='float32', data=b)
    """

    from allennlp.modules.elmo import _ElmoCharacterEncoder, _ElmoBiLm, _ElmoSoftmax, Elmo, batch_to_ids
    from allennlp.commands.elmo import DEFAULT_OPTIONS_FILE, DEFAULT_WEIGHT_FILE, DEFAULT_SOFTMAX_FILE, DEFAULT_VOCAB_FILE


    def _tokenize(text):
    return text.split()


    if __name__ == '__main__':
    # elmo_char_encoder - _ElmoCharacterEncoder
    elmo_bilm = _ElmoBiLm(DEFAULT_OPTIONS_FILE, DEFAULT_WEIGHT_FILE)
    elmo_softmax = _ElmoSoftmax(DEFAULT_SOFTMAX_FILE, DEFAULT_VOCAB_FILE)

    sentences = [
    'How are you ?',
    'how are you ?',
    'How are you .',
    'You are how ?',
    ]
    sentences = [_tokenize(i) for i in sentences]

    char_ids, word_ids = batch_to_ids(sentences, elmo_softmax.vocab)

    bilm_outputs = elmo_bilm(char_ids)

    softmax_log_probs, softmax_mask = elmo_softmax(
    bilm_outputs, word_ids, aggregation_fun='mean')

    # average backward and forward log probs
    print(softmax_log_probs.shape)
    print(softmax_mask.shape)
    --softmax-vocab-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/vocab-2016-09-10.txt
  4. tokestermw revised this gist Sep 6, 2018. 1 changed file with 15 additions and 2 deletions.
    17 changes: 15 additions & 2 deletions play_elmo_embeddings_softmax.py
    Original file line number Diff line number Diff line change
    @@ -13,6 +13,19 @@
    --vocab-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/vocab-2016-09-10.txt
    python -c "import h5py; f = h5py.File('out2.hdf5'); print(f['0'][:], f['0'].shape)"
    Save new example softmax file
    import h5py
    import numpy as np
    e = h5py.File('./allennlp/tests/fixtures/elmo/elmo_token_embeddings.hdf5')
    W = e['embedding'][:]
    b = np.zeros_like(W[:, 0])
    with h5py.File('elmo_softmax_weights.hdf5', 'w') as f:
    grp = f.create_group('softmax')
    grp.create_dataset('W', W.shape, dtype='float32', data=W)
    grp.create_dataset('b', b.shape, dtype='float32', data=b)
    """

    from allennlp.modules.elmo import _ElmoCharacterEncoder, _ElmoBiLm, _ElmoSoftmax, Elmo, batch_to_ids
    @@ -44,5 +57,5 @@ def _tokenize(text):
    bilm_outputs, word_ids, aggregation_fun='mean')

    # average backward and forward log probs
    print(softmax_log_probs)
    print(softmax_mask)
    print(softmax_log_probs.shape)
    print(softmax_mask.shape)
  5. tokestermw revised this gist Sep 6, 2018. 1 changed file with 3 additions and 1 deletion.
    4 changes: 3 additions & 1 deletion play_elmo_embeddings_softmax.py
    Original file line number Diff line number Diff line change
    @@ -8,7 +8,9 @@
    To get probabilities:
    allennlp elmo sample_sents.txt out2.hdf5 --top --softmax-weight-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_softmax_weights.hdf5 --vocab-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/vocab-2016-09-10.txt
    allennlp elmo sample_sents.txt out2.hdf5 --top \
    --softmax-weight-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_softmax_weights.hdf5 \
    --vocab-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/vocab-2016-09-10.txt
    python -c "import h5py; f = h5py.File('out2.hdf5'); print(f['0'][:], f['0'].shape)"
    """
  6. tokestermw revised this gist Sep 5, 2018. 1 changed file with 2 additions and 55 deletions.
    57 changes: 2 additions & 55 deletions play_elmo_embeddings_softmax.py
    Original file line number Diff line number Diff line change
    @@ -13,61 +13,8 @@
    """

    from allennlp.modules.elmo import (
    _ElmoCharacterEncoder, _ElmoBiLm, _ElmoSoftmax, Elmo, batch_to_ids
    )

    DEFAULT_OPTIONS_FILE = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" # pylint: disable=line-too-long
    DEFAULT_WEIGHT_FILE = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" # pylint: disable=line-too-long
    # TODO: add softmax as an option to the elmo command
    DEFAULT_SOFTMAX_FILE = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_softmax_weights.hdf5" # pylint: disable=line-too-long
    DEFAULT_VOCAB_FILE = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/vocab-2016-09-10.txt" # pylint: disable=line-too-long


    #
    # def batch_to_ids(batch: List[List[str]], vocab: Vocabulary = None) -> Tuple[torch.Tensor, torch.Tensor]:
    # """
    # Converts a batch of tokenized sentences to a tensor representing the sentences with encoded characters
    # (len(batch), max sentence length, max word length).
    #
    # Parameters
    # ----------
    # batch : ``List[List[str]]``, required
    # A list of tokenized sentences.
    # vocab : ``Vocabulary``, optional
    # A vocab of words if you need to return word ids.
    #
    # Returns
    # -------
    # If vocab is present, returns a tuple of char ids and word ids.
    # If not, it returns a tensor of char ids.
    # """
    # instances = []
    # char_indexer = ELMoTokenCharactersIndexer()
    # if vocab:
    # token_indexer = SingleIdTokenIndexer(
    # namespace='tokens', lowercase_tokens=False)
    # else:
    # token_indexer = None
    # for sentence in batch:
    # tokens = [Token(token) for token in sentence]
    # if vocab:
    # field = TextField(tokens, {
    # 'character_ids': char_indexer,
    # 'word_ids': token_indexer,
    # })
    # else:
    # field = TextField(tokens, {'character_ids': char_indexer})
    # instance = Instance({"elmo": field})
    # instances.append(instance)
    #
    # dataset = Batch(instances)
    # dataset.index_instances(vocab)
    # elmo_tensor_dict = dataset.as_tensor_dict()['elmo']
    # if vocab:
    # return elmo_tensor_dict['character_ids'], elmo_tensor_dict['word_ids']
    # else:
    # return elmo_tensor_dict['character_ids']
    from allennlp.modules.elmo import _ElmoCharacterEncoder, _ElmoBiLm, _ElmoSoftmax, Elmo, batch_to_ids
    from allennlp.commands.elmo import DEFAULT_OPTIONS_FILE, DEFAULT_WEIGHT_FILE, DEFAULT_SOFTMAX_FILE, DEFAULT_VOCAB_FILE


    def _tokenize(text):
  7. tokestermw revised this gist Sep 5, 2018. 1 changed file with 62 additions and 55 deletions.
    117 changes: 62 additions & 55 deletions play_elmo_embeddings_softmax.py
    Original file line number Diff line number Diff line change
    @@ -1,13 +1,20 @@
    from typing import List, Tuple
    """
    To use it inside ELMo script
    import torch
    To get the embeddings:
    allennlp elmo sample_sents.txt out1.hdf5 --top
    python -c "import h5py; f = h5py.File('out1.hdf5'); print(f['0'][:], f['0'].shape)"
    To get probabilities:
    allennlp elmo sample_sents.txt out2.hdf5 --top --softmax-weight-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_softmax_weights.hdf5 --vocab-file https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/vocab-2016-09-10.txt
    python -c "import h5py; f = h5py.File('out2.hdf5'); print(f['0'][:], f['0'].shape)"
    """

    from allennlp.data import Token, Instance, Vocabulary
    from allennlp.data.dataset import Batch
    from allennlp.data.fields import TextField
    from allennlp.data.token_indexers import ELMoTokenCharactersIndexer, SingleIdTokenIndexer
    from allennlp.modules.elmo import (
    _ElmoCharacterEncoder, _ElmoBiLm, _ElmoSoftmax, Elmo, #batch_to_ids
    _ElmoCharacterEncoder, _ElmoBiLm, _ElmoSoftmax, Elmo, batch_to_ids
    )

    DEFAULT_OPTIONS_FILE = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" # pylint: disable=line-too-long
    @@ -17,57 +24,61 @@
    DEFAULT_VOCAB_FILE = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/vocab-2016-09-10.txt" # pylint: disable=line-too-long



    def batch_to_ids(batch: List[List[str]], vocab: Vocabulary = None) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Converts a batch of tokenized sentences to a tensor representing the sentences with encoded characters
    (len(batch), max sentence length, max word length).
    Parameters
    ----------
    batch : ``List[List[str]]``, required
    A list of tokenized sentences.
    vocab : ``Vocabulary``, optional
    A vocab of words if you need to return word ids.
    Returns
    -------
    If vocab is present, returns a tuple of char ids and word ids.
    If not, it returns a tensor of char ids.
    """
    instances = []
    char_indexer = ELMoTokenCharactersIndexer()
    if vocab:
    token_indexer = SingleIdTokenIndexer(
    namespace='tokens', lowercase_tokens=False)
    else:
    token_indexer = None
    for sentence in batch:
    tokens = [Token(token) for token in sentence]
    if vocab:
    field = TextField(tokens, {
    'character_ids': char_indexer,
    'word_ids': token_indexer,
    })
    else:
    field = TextField(tokens, {'character_ids': char_indexer})
    instance = Instance({"elmo": field})
    instances.append(instance)

    dataset = Batch(instances)
    dataset.index_instances(vocab)
    elmo_tensor_dict = dataset.as_tensor_dict()['elmo']
    if vocab:
    return elmo_tensor_dict['character_ids'], elmo_tensor_dict['word_ids']
    else:
    return elmo_tensor_dict['character_ids']
    #
    # def batch_to_ids(batch: List[List[str]], vocab: Vocabulary = None) -> Tuple[torch.Tensor, torch.Tensor]:
    # """
    # Converts a batch of tokenized sentences to a tensor representing the sentences with encoded characters
    # (len(batch), max sentence length, max word length).
    #
    # Parameters
    # ----------
    # batch : ``List[List[str]]``, required
    # A list of tokenized sentences.
    # vocab : ``Vocabulary``, optional
    # A vocab of words if you need to return word ids.
    #
    # Returns
    # -------
    # If vocab is present, returns a tuple of char ids and word ids.
    # If not, it returns a tensor of char ids.
    # """
    # instances = []
    # char_indexer = ELMoTokenCharactersIndexer()
    # if vocab:
    # token_indexer = SingleIdTokenIndexer(
    # namespace='tokens', lowercase_tokens=False)
    # else:
    # token_indexer = None
    # for sentence in batch:
    # tokens = [Token(token) for token in sentence]
    # if vocab:
    # field = TextField(tokens, {
    # 'character_ids': char_indexer,
    # 'word_ids': token_indexer,
    # })
    # else:
    # field = TextField(tokens, {'character_ids': char_indexer})
    # instance = Instance({"elmo": field})
    # instances.append(instance)
    #
    # dataset = Batch(instances)
    # dataset.index_instances(vocab)
    # elmo_tensor_dict = dataset.as_tensor_dict()['elmo']
    # if vocab:
    # return elmo_tensor_dict['character_ids'], elmo_tensor_dict['word_ids']
    # else:
    # return elmo_tensor_dict['character_ids']


    def _tokenize(text):
    return text.split()


    if __name__ == '__main__':
    # elmo_char_encoder - _ElmoCharacterEncoder
    elmo_bilm = _ElmoBiLm(DEFAULT_OPTIONS_FILE, DEFAULT_WEIGHT_FILE)
    elmo_softmax = _ElmoSoftmax(DEFAULT_SOFTMAX_FILE, DEFAULT_VOCAB_FILE)

    sentences = [
    'How are you ?',
    'how are you ?',
    @@ -76,10 +87,6 @@ def _tokenize(text):
    ]
    sentences = [_tokenize(i) for i in sentences]

    # elmo_char_encoder - _ElmoCharacterEncoder
    elmo_bilm = _ElmoBiLm(DEFAULT_OPTIONS_FILE, DEFAULT_WEIGHT_FILE)
    elmo_softmax = _ElmoSoftmax(DEFAULT_SOFTMAX_FILE, DEFAULT_VOCAB_FILE)

    char_ids, word_ids = batch_to_ids(sentences, elmo_softmax.vocab)

    bilm_outputs = elmo_bilm(char_ids)
  8. tokestermw revised this gist Sep 5, 2018. 1 changed file with 7 additions and 4 deletions.
    11 changes: 7 additions & 4 deletions play_elmo_embeddings_softmax.py
    Original file line number Diff line number Diff line change
    @@ -2,10 +2,6 @@

    import torch

    from allennlp.commands.elmo import (
    DEFAULT_OPTIONS_FILE, DEFAULT_WEIGHT_FILE, DEFAULT_SOFTMAX_FILE, DEFAULT_VOCAB_FILE,
    ElmoEmbedder,
    )
    from allennlp.data import Token, Instance, Vocabulary
    from allennlp.data.dataset import Batch
    from allennlp.data.fields import TextField
    @@ -14,6 +10,13 @@
    _ElmoCharacterEncoder, _ElmoBiLm, _ElmoSoftmax, Elmo, #batch_to_ids
    )

    DEFAULT_OPTIONS_FILE = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json" # pylint: disable=line-too-long
    DEFAULT_WEIGHT_FILE = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5" # pylint: disable=line-too-long
    # TODO: add softmax as an option to the elmo command
    DEFAULT_SOFTMAX_FILE = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_softmax_weights.hdf5" # pylint: disable=line-too-long
    DEFAULT_VOCAB_FILE = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/vocab-2016-09-10.txt" # pylint: disable=line-too-long



    def batch_to_ids(batch: List[List[str]], vocab: Vocabulary = None) -> Tuple[torch.Tensor, torch.Tensor]:
    """
  9. tokestermw created this gist Sep 5, 2018.
    89 changes: 89 additions & 0 deletions play_elmo_embeddings_softmax.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,89 @@
    from typing import List, Tuple

    import torch

    from allennlp.commands.elmo import (
    DEFAULT_OPTIONS_FILE, DEFAULT_WEIGHT_FILE, DEFAULT_SOFTMAX_FILE, DEFAULT_VOCAB_FILE,
    ElmoEmbedder,
    )
    from allennlp.data import Token, Instance, Vocabulary
    from allennlp.data.dataset import Batch
    from allennlp.data.fields import TextField
    from allennlp.data.token_indexers import ELMoTokenCharactersIndexer, SingleIdTokenIndexer
    from allennlp.modules.elmo import (
    _ElmoCharacterEncoder, _ElmoBiLm, _ElmoSoftmax, Elmo, #batch_to_ids
    )


    def batch_to_ids(batch: List[List[str]], vocab: Vocabulary = None) -> Tuple[torch.Tensor, torch.Tensor]:
    """
    Converts a batch of tokenized sentences to a tensor representing the sentences with encoded characters
    (len(batch), max sentence length, max word length).
    Parameters
    ----------
    batch : ``List[List[str]]``, required
    A list of tokenized sentences.
    vocab : ``Vocabulary``, optional
    A vocab of words if you need to return word ids.
    Returns
    -------
    If vocab is present, returns a tuple of char ids and word ids.
    If not, it returns a tensor of char ids.
    """
    instances = []
    char_indexer = ELMoTokenCharactersIndexer()
    if vocab:
    token_indexer = SingleIdTokenIndexer(
    namespace='tokens', lowercase_tokens=False)
    else:
    token_indexer = None
    for sentence in batch:
    tokens = [Token(token) for token in sentence]
    if vocab:
    field = TextField(tokens, {
    'character_ids': char_indexer,
    'word_ids': token_indexer,
    })
    else:
    field = TextField(tokens, {'character_ids': char_indexer})
    instance = Instance({"elmo": field})
    instances.append(instance)

    dataset = Batch(instances)
    dataset.index_instances(vocab)
    elmo_tensor_dict = dataset.as_tensor_dict()['elmo']
    if vocab:
    return elmo_tensor_dict['character_ids'], elmo_tensor_dict['word_ids']
    else:
    return elmo_tensor_dict['character_ids']


    def _tokenize(text):
    return text.split()


    if __name__ == '__main__':
    sentences = [
    'How are you ?',
    'how are you ?',
    'How are you .',
    'You are how ?',
    ]
    sentences = [_tokenize(i) for i in sentences]

    # elmo_char_encoder - _ElmoCharacterEncoder
    elmo_bilm = _ElmoBiLm(DEFAULT_OPTIONS_FILE, DEFAULT_WEIGHT_FILE)
    elmo_softmax = _ElmoSoftmax(DEFAULT_SOFTMAX_FILE, DEFAULT_VOCAB_FILE)

    char_ids, word_ids = batch_to_ids(sentences, elmo_softmax.vocab)

    bilm_outputs = elmo_bilm(char_ids)

    softmax_log_probs, softmax_mask = elmo_softmax(
    bilm_outputs, word_ids, aggregation_fun='mean')

    # average backward and forward log probs
    print(softmax_log_probs)
    print(softmax_mask)