#!/usr/bin/python import sys import os import humanize """ Simple python command line program to estimate the size of a CSV or TSV file. See also: https://stackoverflow.com/questions/27642656/estimating-number-of-lines-in-a-file-mismatch-between-file-size-and-size-of-al https://databio.org/posts/wcle.html http://nathansheffield.com/wordpress/ Modern update by Abe Usher """ def estimate_lines(filename): """ Estimate the number of lines in the given file(s) """ LEARN_SIZE = 65536 # Get total size of all files numLines = os.path.getsize(filename) with open(filename, 'rb') as file: buf = file.read(LEARN_SIZE) numLines /= (len(buf) // buf.count(b'\n')) return numLines def main(): filename = sys.argv[1] total_lines = estimate_lines(filename) total_lines_word = humanize.intword(total_lines) print ('Estimated lines in (filename): %s'%(total_lines_word)) if __name__ == '__main__': main()