Skip to content

Instantly share code, notes, and snippets.

@void52
Forked from queba/tsplot
Created January 15, 2023 23:41
Show Gist options
  • Select an option

  • Save void52/ad0943b76a37a984eb42dacf188d296c to your computer and use it in GitHub Desktop.

Select an option

Save void52/ad0943b76a37a984eb42dacf188d296c to your computer and use it in GitHub Desktop.
Plotting a time series
#!/usr/bin/env python
"""
tsplot: a matplotlib wrapper that plots a time series read from a file.
It tokenizes each line into fields with the specified set of delimiters
(whitespaces by default). One of these fields should be designated as the time
column so that tsplot can interprete the time series. If none is specified,
the first column is used. For tsplot to understand the time values, the time
format, which should use the same syntax used by strptime(3) and has a default
value of "%H:%M:%S", can be specified. The rest of the fields are considered
data points unless a discriminiator field index is specified by the
"--discriminator" option is specified. For example, with time column being 1,
fields in the second column of the following input are considered data points:
09:00:00 3.5
09:00:03 9.2
09:01:07 4.3
If we add one more discriminator column (the last column):
09:00:00 3.5 A
09:00:03 9.2 B
09:01:07 4.3 A
We'll end up with 2 lines in the plot, one for column 2 of A and the other for
column 2 of B. So if there are N data-point fields and M discriminators (M>=0),
we shall see N (if M=0) or MxN lines in the plot.
"""
import sys
from datetime import datetime
import re, argparse
import numpy
import matplotlib.pyplot as plt
from matplotlib import dates
PALETTE = 'bgrcmykw'
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Plot time series')
parser.add_argument('-d', dest='delimiter', help='delimiter(s)',
default=' \t')
parser.add_argument('-t', dest='time_col', type=int,
help='the index of the time column (1-based)')
parser.add_argument('input', nargs='?',
help='input file (stdin if not specified)')
parser.add_argument('--timefmt', default='%H:%M:%S',
help='the format of the time column (see strptime(3))')
parser.add_argument('-s', '--start', dest='start',
help='the start time (should have the same format as --timefmt)')
parser.add_argument('-e', '--end', dest='end',
help='the start time (should have the same format as --timefmt)')
parser.add_argument('--discriminator', type=int, default=0,
help='the index of the discriminator column (1-based)')
parser.add_argument('--legend', type=str, help='comma-separated legends')
opts = parser.parse_args()
delimiter = re.compile('|'.join(opts.delimiter))
time_col = opts.time_col if opts.time_col else 1
input_file = open(opts.input, 'r') if opts.input else sys.stdin
if opts.discriminator and opts.discriminator == time_col:
print 'discriminator column cannot be the same as the time column'
sys.exit(1)
start = datetime.strptime(opts.start, opts.timefmt) if opts.start else None
end = datetime.strptime(opts.end, opts.timefmt) if opts.end else None
time_vals = []
records = {} # discriminator->([], {}), where [] is time points and {} is
# data vlaues
for line in input_file:
line = line.strip()
cols = delimiter.split(line)
time_point = datetime.strptime(cols[time_col - 1], opts.timefmt)
if start and time_point < start:
continue
if end and time_point > end:
continue;
time_vals.append(time_point)
for i in range(1, len(cols) + 1):
if i not in (time_col, opts.discriminator):
# NB: not very efficient
key = '%s-%d' % (cols[opts.discriminator - 1], i) \
if opts.discriminator else str(i)
key = cols[opts.discriminator - 1] if opts.discriminator else ''
if key not in records:
records[key] = ([], {})
records[key][0].append(time_point)
data_vals = records[key][1]
if i not in data_vals:
data_vals[i] = [cols[i - 1]]
else:
data_vals[i].append(cols[i - 1])
user_legends = opts.legend.split(',') if opts.legend else []
legends = []
plot_args = []
style_count = 0
for k, record in records.iteritems():
x = numpy.array(record[0])
# NB: dict.keys() and dict.values() have same ordering and should be
# both sorted
y_array = [numpy.array([float(v) for v in col]) for col in \
record[1].values()]
col_count = 0
for col_idx, col in record[1].iteritems():
# col_idx should be in sorted order
if len(user_legends) and col_count < len(user_legends):
legends.append('%s%s' % ('%s-' % k if k else '', \
user_legends[col_count]))
else:
legends.append('%s%s' % ('%s-' % k if k else '', \
'col-%d' % col_idx))
col_count += 1
for y in y_array:
plot_args.extend([x, y, '%s-' % PALETTE[style_count % \
len(PALETTE)]])
style_count += 1
style_count += 1
fig, ax = plt.subplots()
ax.xaxis_date()
ax.xaxis.set_major_formatter(dates.DateFormatter(opts.timefmt))
lines = plt.plot(*plot_args)
plt.figlegend(lines, legends, 'upper right')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment