Skip to content

Instantly share code, notes, and snippets.

@tomron
Created September 15, 2022 10:11
Show Gist options
  • Select an option

  • Save tomron/8a8e2e17538c303f3bef60cd7f41f315 to your computer and use it in GitHub Desktop.

Select an option

Save tomron/8a8e2e17538c303f3bef60cd7f41f315 to your computer and use it in GitHub Desktop.

Revisions

  1. tomron created this gist Sep 15, 2022.
    281 changes: 281 additions & 0 deletions missleading_plots.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,281 @@
    nimport numpy as np
    import pandas as pd
    import sys
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots
    import plotly
    import plotly.express as px

    import matplotlib.pyplot as plt

    import random

    output_folder ='datatlv'
    color = "#F7931E"
    method='averaged_inverted_cdf'


    np.random.seed(1)
    mu, sigma = 0, 4

    small_sample_size = 10
    medium_sample_size = 100
    large_sample_size = 1000
    small_sample = np.random.normal(mu, sigma, small_sample_size)
    medium_sample = np.random.normal(mu, sigma, medium_sample_size)
    large_sample = np.random.normal(mu, sigma, large_sample_size)
    data = [small_sample, large_sample]

    uniform_data = list(range(0, 101))
    interval_data = [0] * 25 + [25] * 24 + [50]*3 + [75]*24 + [100] * 25

    def get_basic_layout(title=''):
    return go.Layout(
    xaxis = dict(
    showgrid=False,
    zeroline=False,
    showline=True,
    mirror=True,
    linewidth=2,
    linecolor='black'),
    yaxis = dict(
    showgrid=False,
    zeroline=False,
    showline=True,
    mirror=True,
    linewidth=2,
    linecolor='black'),
    showlegend=False,
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    title_x=0.5,
    font=dict(size=30),
    title=title
    )

    ###############################################################################################################

    def get_width(sample):
    return np.log2(len(sample))/20

    def build_traces(samples, names, args={}, width_func=None, output_path=None, layout=get_basic_layout(), show_text=True, only_traces=False):
    if width_func is None:
    traces = [go.Box(y=sample, name=name, **args) for sample, name in zip(samples, names)]
    else:
    traces = []
    for sample, name in zip(samples, names):
    traces.append(go.Box(y=sample, name=name, **args, width=width_func(sample)))
    if only_traces:
    return traces
    fig = go.Figure(layout=layout, data=traces)
    if show_text:
    for sample_idx, sample in enumerate(samples):
    for x in zip(["min","q1","med","q3","max"],np.percentile(sample, [0, 25, 50, 75, 100], method=method)):
    fig.add_annotation(
    x=0.33 + sample_idx,
    y=x[1],
    text=f"{x[0]}: {round(x[1], 2)}",
    font_size=20,
    showarrow=False
    )

    if output_path:
    fig.write_image(output_path, scale=10)#, dpi=1200)
    return fig

    basic_args = {}
    points_args = {'boxpoints':'all', 'jitter':0.3, 'pointpos':-1.8}
    points_args.update(basic_args)

    fig = build_traces(
    data,
    names=['sample1', 'sample2'],
    args=basic_args,
    output_path='datatlv/simple_box.png',
    layout=get_basic_layout('Simple Box Plot'),
    show_text=False)
    fig.show()
    fig = build_traces(
    data,
    names=['sample1', 'sample2'],
    args=basic_args,
    output_path='datatlv/simple_box.png',
    layout=get_basic_layout('Simple Box Plot'),
    show_text=True)
    fig.show()
    fig = build_traces(
    data,
    names=['sample1', 'sample2'],
    args=points_args,
    output_path='datatlv/simple_box_with_points.png',
    layout=get_basic_layout('Simple Box Plot with Points'),
    show_text=False)
    fig.show()
    fig = build_traces(
    data,
    names=['sample1', 'sample2'],
    args=basic_args,
    width_func=get_width,
    output_path='datatlv/simple_box_with_width.png',
    layout=get_basic_layout('Simple Box Plot adjusted width'),
    show_text=False)
    fig.show()

    #TODO - side by side with and without width?
    fig = make_subplots(rows=1, cols=2)

    traces = build_traces(
    data,
    names=['sample1', 'sample2'],
    args=basic_args,
    only_traces=True)
    for trace in traces:
    fig.add_trace(
    trace,
    row=1, col=1
    )

    traces = build_traces(
    data,
    names=['sample1', 'sample2'],
    args=basic_args,
    width_func=get_width,
    only_traces=True)

    fig.add_traces(traces, row=1, col=2)
    # for trace in traces:
    # fig.add_trace(
    # trace,
    # row=1, col=2
    # )
    fig.update_layout(
    title="Box Plot with and without Width",
    xaxis = dict(
    showgrid=False,
    zeroline=False,
    showline=True,
    mirror=True,
    linewidth=2,
    linecolor='black'),
    yaxis = dict(
    showgrid=False,
    zeroline=False,
    showline=True,
    mirror=True,
    linewidth=2,
    linecolor='black'),
    xaxis2 = dict(
    showgrid=False,
    zeroline=False,
    showline=True,
    mirror=True,
    linewidth=2,
    linecolor='black'),
    yaxis2 = dict(
    showgrid=False,
    zeroline=False,
    showline=True,
    mirror=True,
    linewidth=2,
    linecolor='black'),
    showlegend=False,
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    title_x=0.5,
    font=dict(size=30))

    for d in dir(fig):
    print(d)
    fig.show()
    ###############################################################################################################

    fig = go.Figure(layout=get_basic_layout('Simple Box Plot'))
    fig.add_trace(go.Box(y=uniform_data, quartilemethod="inclusive", name="sample1"))
    fig.add_trace(go.Box(y=interval_data, quartilemethod="inclusive", name="sample2"))
    fig.show()

    fig = go.Figure(layout=get_basic_layout('Simple Box Plot with Points'))
    fig.add_trace(go.Box(y=uniform_data, quartilemethod="inclusive", name="sample1"))
    fig.add_trace(go.Box(y=interval_data, quartilemethod="inclusive", name="sample2"))
    fig.update_traces(boxpoints='all', jitter=0.3)
    fig.show()

    ###############################################################################################################

    histogram = np.concatenate((
    np.random.randint(90, 100, 7),
    np.random.randint(80, 90, 5),
    np.random.randint(70, 80, 11),
    np.random.randint(60, 70, 14),
    np.random.randint(50, 60, 17),
    np.random.randint(40, 50, 5),
    np.random.randint(30, 40, 7),
    np.random.randint(20, 30, 8),
    np.random.randint(0, 10, 26))
    )


    labels = ['90 to <100', '80 to <90', '70 to <80', '60 to <70', '50 to <60', '40 to <50', '30 to <40', '20 to <30', '10 to <20', '0 to <10']
    fig = go.Figure(data=[go.Histogram(y=histogram)], layout=get_basic_layout('Patients Age Histogram'))
    fig.update_layout(
    yaxis = dict(
    tickmode = 'array',
    tickvals = [95, 85, 75, 65, 55, 45, 35, 25, 15, 5],
    ticktext = labels
    ))
    fig.show()

    fig = go.Figure(data=[go.Box(y=histogram, name='Patients')],layout=get_basic_layout('Patients Age Box Plot'))
    fig.show()

    fig = go.Figure(data=[go.Box(y=histogram, name='Patients', notched=True)],layout=get_basic_layout('Patients Age Box Plot Notched'))
    fig.show()


    fig = make_subplots(rows=1, cols=2)

    fig.add_trace(
    go.Histogram(y=histogram),
    row=1, col=1
    )

    fig.add_trace(
    go.Box(y=histogram, marker_color = '#636EFA', name='Patients'),
    row=1, col=2
    )
    fig.update_layout(
    title="Histogram vs Box Plot",
    xaxis = dict(
    showgrid=False,
    zeroline=False,
    showline=True,
    mirror=True,
    linewidth=2,
    linecolor='black'),
    yaxis = dict(
    showgrid=False,
    zeroline=False,
    showline=True,
    mirror=True,
    linewidth=2,
    linecolor='black'),
    xaxis2 = dict(
    showgrid=False,
    zeroline=False,
    showline=True,
    mirror=True,
    linewidth=2,
    linecolor='black'),
    yaxis2 = dict(
    showgrid=False,
    zeroline=False,
    showline=True,
    mirror=True,
    linewidth=2,
    linecolor='black'),
    showlegend=False,
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    title_x=0.5,
    font=dict(size=30))
    fig.show()