Skip to content

Instantly share code, notes, and snippets.

@mindis
Forked from LeegleechN/verify_tfrecords.py
Created April 3, 2020 08:24
Show Gist options
  • Select an option

  • Save mindis/9cfb757d2320de914e75c08e8a49b5e5 to your computer and use it in GitHub Desktop.

Select an option

Save mindis/9cfb757d2320de914e75c08e8a49b5e5 to your computer and use it in GitHub Desktop.

Revisions

  1. @LeegleechN LeegleechN revised this gist Feb 19, 2017. 1 changed file with 1 addition and 5 deletions.
    6 changes: 1 addition & 5 deletions verify_tfrecords.py
    Original file line number Diff line number Diff line change
    @@ -5,20 +5,16 @@
    CRCs.
    """
    import struct

    #import google3

    import tensorflow as tf

    from tensorflow import app
    from tensorflow import flags
    #from google3.pyglib import gfile
    from tensorflow import gfile
    from tensorflow import logging

    flags = tf.app.flags
    FLAGS = flags.FLAGS
    flags.DEFINE_string("input_data_pattern", "/media/ndk/YT8M/Train/[i-p]*",
    flags.DEFINE_string("input_data_pattern", "",
    "File glob defining for the TFRecords files.")


  2. @LeegleechN LeegleechN created this gist Feb 19, 2017.
    54 changes: 54 additions & 0 deletions verify_tfrecords.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,54 @@
    """Checks if a set of TFRecords appear to be valid.
    Specifically, this checks whether the provided record sizes are consistent and
    that the file does not end in the middle of a record. It does not verify the
    CRCs.
    """
    import struct

    #import google3

    import tensorflow as tf

    from tensorflow import app
    from tensorflow import flags
    #from google3.pyglib import gfile
    from tensorflow import gfile
    from tensorflow import logging

    flags = tf.app.flags
    FLAGS = flags.FLAGS
    flags.DEFINE_string("input_data_pattern", "/media/ndk/YT8M/Train/[i-p]*",
    "File glob defining for the TFRecords files.")


    def main(unused_argv):
    logging.set_verbosity(tf.logging.INFO)
    paths = gfile.Glob(FLAGS.input_data_pattern)
    logging.info("Found %s files.", len(paths))
    for path in paths:
    with gfile.Open(path, "r") as f:
    first_read = True
    while True:
    length_raw = f.read(8)
    if not length_raw and first_read:
    logging.fatal("File %s has no data.", path)
    break
    elif not length_raw:
    logging.info("File %s looks good.", path)
    break
    else:
    first_read = False
    if len(length_raw) != 8:
    logging.fatal("File ends when reading record length: " + path)
    break
    length, = struct.unpack("L", length_raw)
    # +8 to include the crc values.
    record = f.read(length + 8)
    if len(record) != length + 8:
    logging.fatal("File ends in the middle of a record: " + path)
    break


    if __name__ == "__main__":
    app.run()