-
-
Save void52/ad0943b76a37a984eb42dacf188d296c to your computer and use it in GitHub Desktop.
Plotting a time series
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| """ | |
| tsplot: a matplotlib wrapper that plots a time series read from a file. | |
| It tokenizes each line into fields with the specified set of delimiters | |
| (whitespaces by default). One of these fields should be designated as the time | |
| column so that tsplot can interprete the time series. If none is specified, | |
| the first column is used. For tsplot to understand the time values, the time | |
| format, which should use the same syntax used by strptime(3) and has a default | |
| value of "%H:%M:%S", can be specified. The rest of the fields are considered | |
| data points unless a discriminiator field index is specified by the | |
| "--discriminator" option is specified. For example, with time column being 1, | |
| fields in the second column of the following input are considered data points: | |
| 09:00:00 3.5 | |
| 09:00:03 9.2 | |
| 09:01:07 4.3 | |
| If we add one more discriminator column (the last column): | |
| 09:00:00 3.5 A | |
| 09:00:03 9.2 B | |
| 09:01:07 4.3 A | |
| We'll end up with 2 lines in the plot, one for column 2 of A and the other for | |
| column 2 of B. So if there are N data-point fields and M discriminators (M>=0), | |
| we shall see N (if M=0) or MxN lines in the plot. | |
| """ | |
| import sys | |
| from datetime import datetime | |
| import re, argparse | |
| import numpy | |
| import matplotlib.pyplot as plt | |
| from matplotlib import dates | |
| PALETTE = 'bgrcmykw' | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser(description='Plot time series') | |
| parser.add_argument('-d', dest='delimiter', help='delimiter(s)', | |
| default=' \t') | |
| parser.add_argument('-t', dest='time_col', type=int, | |
| help='the index of the time column (1-based)') | |
| parser.add_argument('input', nargs='?', | |
| help='input file (stdin if not specified)') | |
| parser.add_argument('--timefmt', default='%H:%M:%S', | |
| help='the format of the time column (see strptime(3))') | |
| parser.add_argument('-s', '--start', dest='start', | |
| help='the start time (should have the same format as --timefmt)') | |
| parser.add_argument('-e', '--end', dest='end', | |
| help='the start time (should have the same format as --timefmt)') | |
| parser.add_argument('--discriminator', type=int, default=0, | |
| help='the index of the discriminator column (1-based)') | |
| parser.add_argument('--legend', type=str, help='comma-separated legends') | |
| opts = parser.parse_args() | |
| delimiter = re.compile('|'.join(opts.delimiter)) | |
| time_col = opts.time_col if opts.time_col else 1 | |
| input_file = open(opts.input, 'r') if opts.input else sys.stdin | |
| if opts.discriminator and opts.discriminator == time_col: | |
| print 'discriminator column cannot be the same as the time column' | |
| sys.exit(1) | |
| start = datetime.strptime(opts.start, opts.timefmt) if opts.start else None | |
| end = datetime.strptime(opts.end, opts.timefmt) if opts.end else None | |
| time_vals = [] | |
| records = {} # discriminator->([], {}), where [] is time points and {} is | |
| # data vlaues | |
| for line in input_file: | |
| line = line.strip() | |
| cols = delimiter.split(line) | |
| time_point = datetime.strptime(cols[time_col - 1], opts.timefmt) | |
| if start and time_point < start: | |
| continue | |
| if end and time_point > end: | |
| continue; | |
| time_vals.append(time_point) | |
| for i in range(1, len(cols) + 1): | |
| if i not in (time_col, opts.discriminator): | |
| # NB: not very efficient | |
| key = '%s-%d' % (cols[opts.discriminator - 1], i) \ | |
| if opts.discriminator else str(i) | |
| key = cols[opts.discriminator - 1] if opts.discriminator else '' | |
| if key not in records: | |
| records[key] = ([], {}) | |
| records[key][0].append(time_point) | |
| data_vals = records[key][1] | |
| if i not in data_vals: | |
| data_vals[i] = [cols[i - 1]] | |
| else: | |
| data_vals[i].append(cols[i - 1]) | |
| user_legends = opts.legend.split(',') if opts.legend else [] | |
| legends = [] | |
| plot_args = [] | |
| style_count = 0 | |
| for k, record in records.iteritems(): | |
| x = numpy.array(record[0]) | |
| # NB: dict.keys() and dict.values() have same ordering and should be | |
| # both sorted | |
| y_array = [numpy.array([float(v) for v in col]) for col in \ | |
| record[1].values()] | |
| col_count = 0 | |
| for col_idx, col in record[1].iteritems(): | |
| # col_idx should be in sorted order | |
| if len(user_legends) and col_count < len(user_legends): | |
| legends.append('%s%s' % ('%s-' % k if k else '', \ | |
| user_legends[col_count])) | |
| else: | |
| legends.append('%s%s' % ('%s-' % k if k else '', \ | |
| 'col-%d' % col_idx)) | |
| col_count += 1 | |
| for y in y_array: | |
| plot_args.extend([x, y, '%s-' % PALETTE[style_count % \ | |
| len(PALETTE)]]) | |
| style_count += 1 | |
| style_count += 1 | |
| fig, ax = plt.subplots() | |
| ax.xaxis_date() | |
| ax.xaxis.set_major_formatter(dates.DateFormatter(opts.timefmt)) | |
| lines = plt.plot(*plot_args) | |
| plt.figlegend(lines, legends, 'upper right') | |
| plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment