class DataMapCallback(tf.keras.callbacks.Callback): """ Gather training dynamics for data map generation. Assumes a binary or multi-class model, no support for multi label. Arguments --------- - `dataset` (``tf.data.: Dataset``): Usually, as the paper suggests, this is the training dataset. It should be: 1. Non-shuffled, so each iteration over the dataset should yield samples in the same order 2. Already batched, the ``.batch(n)`` method should already be applied on this dataset 3. Should yield batches of ``(features, labels)``, sample weights are not supported - | `outputs_to_probabilities` (``Optional[Callable[[Any], tf.Tensor]]``): Callable to convert model's output to probabilities. Use this if the model outputs logits, dictionary or any other form which is not a tensor of probabilities. Defaults to ``None``. - | `sparse_labels` (``bool``): Set to ``True`` if the labels are given as integers (not one hot encoded). Defaults to ``False``. Attributes ---------- - | `gold_labels_probabilities` (``np.ndarray``): Gold label predicted probabilities. With the shape of ``(n_samples, n_epochs)`` and ``(i, j)`` is the probability of the gold label for sample ``i`` at epoch ``j``. - `confidence` (``np.ndarray``): Mean of true label probability across epochs. - `variability` (``np.ndarray``): Standard deviation of true label probability across epochs. - `correctness` (``np.ndarray``): Fraction of times correctly predicted across epochs Examples -------- Calculate training dynamics during training .. code-block:: python3 import tensorflow as tf import tavolo as tvl # Load dataset train = ... # Instance of dataset train_unshuffled = ... # Instance of dataset, unshuffled so that each iteration over the dataset would yield # samples in the same order # Prepare train = train.shuffle(BUFFER_SIZE).batch(BATCH_SIZE) train = train_unshuffled.batch(BATCH_SIZE * 10) # No gradient updates in data map, can use bigger batches # Create the datamap callback datamap = tvl.learning.DatMaCallback(dataset=train_unshuffled) # Train model.fit(train, epochs=N_EPOCHS, callbacks=[datamap]) # Get training dynamics confidence, variability, correctness = datamap.confidence, datamap.variability, datamap.correctness Calculate training dynamics from a model that outputs logits (and NOT probabilities) .. code-block:: python3 import tensorflow as tf import tavolo as tvl # Create the datamap callback - using the outputs_to_predictions option datamap = tvl.learning.DatMaCallback(dataset=train_unshuffled, outputs_to_probabilities=tf.nn.softmax) # Train model.fit(train, epochs=N_EPOCHS, callbacks=[datamap]) References ---------- - `Dataset Cartography: Mapping and Diagnosing Datasets with Training Dynamics`_ .. _`Dataset Cartography: Mapping and Diagnosing Datasets with Training Dynamics`: https://arxiv.org/pdf/2009.10795 """ # TODO - The implementation saves all the gold label probabilities across epochs for the training dynamics # computations. This can be optimized by calculating a running version of each training dynamic. # Once tfp.stats releases RunningVariance and RunningMean to the stable tfp versions - training dynamics # calculations should be reimplemented doing this, thus avoiding (n_epochs - 1) * n_samples floating points # memory usage. def __init__(self, dataset: tf.data.Dataset, outputs_to_probabilities: Optional[Callable[[Any], tf.Tensor]] = None, sparse_labels: bool = False): """ :param dataset: Dataset. Usually, as the paper suggests, this is the training dataset. It should be: - Non-shuffled, so each iteration over the dataset should yield samples in the same order - Already batched, the ``.batch(n)`` method should already be applied on this dataset - Should yield batches of ``(features, labels)``, sample weights are not supported :param outputs_to_probabilities: Callable to convert model's output to probabilities. Use this if the model outputs logits, dictionary or any other form which is not a vector of probabilities. :param sparse_labels: Set to ``True`` if the labels are given as integers (not one hot encoded) """ self._dataset = dataset self._outputs2probabilities = outputs_to_probabilities self._sparse_labels = sparse_labels self._gold_labels_probabilities = None def on_epoch_end(self, epoch, logs=None): gold_label_probabilities = list() for x, y in self._dataset: probabilities = self.model.predict(x) if self._outputs2probabilities is not None: probabilities = self._outputs2probabilities(probabilities) if self._sparse_labels: y = tf.one_hot(y, depth=probabilities.shape[-1]) if tf.rank(tf.squeeze(y)) == 1: probabilities, y = tf.squeeze(probabilities), tf.squeeze(y) batch_gold_label_probabilities = tf.where(y == 0, 1 - probabilities, probabilities) elif tf.rank(tf.squeeze(y)) == 2: if not tf.reduce_all(tf.reduce_sum(tf.cast(y == 1, tf.int32), axis=-1) == 1): raise ValueError('DataMapCallback does not support multi-label classification') batch_gold_label_probabilities = tf.boolean_mask(probabilities, tf.cast(y, tf.bool)).numpy() else: raise ValueError( 'tf.squeeze(y) (y == labels from the dataset) must be of rank 1 for binary classification or ' '2 for multi class. Instead got ({})'.format(tf.rank(tf.squeeze(y)))) gold_label_probabilities = np.append(gold_label_probabilities, [batch_gold_label_probabilities]) if self._gold_labels_probabilities is None: self._gold_labels_probabilities = np.expand_dims(gold_label_probabilities, axis=-1) else: stack = [self._gold_labels_probabilities, np.expand_dims(gold_label_probabilities, axis=-1)] self._gold_labels_probabilities = np.hstack(stack) @property def gold_labels_probabilities(self) -> np.ndarray: """ Gold label predicted probabilities. With the shape of ``(n_samples, n_epochs)`` and ``(i, j)`` is the probability of the gold label for sample ``i`` at epoch ``j`` :return: Gold label probabilities """ return self._gold_labels_probabilities @property def confidence(self) -> np.ndarray: """ Mean of true label probability across epochs :return: Confidence """ return np.mean(self._gold_labels_probabilities, axis=-1) @property def variability(self) -> np.ndarray: """ Standard deviation of true label probability across epochs :return: Variability """ return np.std(self._gold_labels_probabilities, axis=-1) @property def correctness(self) -> np.ndarray: """ Fraction of times correctly predicted across epochs :return: Correctness """ return np.mean(self._gold_labels_probabilities > 0.5, axis=-1)