Skip to content

Instantly share code, notes, and snippets.

@ifnull
Last active February 10, 2021 06:26
Show Gist options
  • Save ifnull/859454d076791c99a52510af4bcf6088 to your computer and use it in GitHub Desktop.
Save ifnull/859454d076791c99a52510af4bcf6088 to your computer and use it in GitHub Desktop.

Revisions

  1. Daniel Smith revised this gist Feb 10, 2021. 1 changed file with 0 additions and 121 deletions.
    121 changes: 0 additions & 121 deletions challenger-data.csv
    Original file line number Diff line number Diff line change
    @@ -1,121 +0,0 @@
    Observation,Y,X
    1,1,53
    2,1,53
    3,1,53
    4,0,53
    5,0,53
    6,1,57
    7,0,57
    8,0,57
    9,0,57
    10,0,57
    11,1,58
    12,0,58
    13,0,58
    14,0,58
    15,0,58
    16,1,63
    17,0,63
    18,0,63
    19,0,63
    20,0,63
    21,0,66
    22,0,66
    23,0,66
    24,0,66
    25,0,66
    26,0,67
    27,0,67
    28,0,67
    29,0,67
    30,0,67
    31,0,67
    32,0,67
    33,0,67
    34,0,67
    35,0,67
    36,0,67
    37,0,67
    38,0,67
    39,0,67
    40,0,67
    41,0,68
    42,0,68
    43,0,68
    44,0,68
    45,0,68
    46,0,69
    47,0,69
    48,0,69
    49,0,69
    50,0,69
    51,1,70
    52,0,70
    53,0,70
    54,0,70
    55,0,70
    56,1,70
    57,0,70
    58,0,70
    59,0,70
    60,0,70
    61,0,70
    62,0,70
    63,0,70
    64,0,70
    65,0,70
    66,0,70
    67,0,70
    68,0,70
    69,0,70
    70,0,70
    71,0,72
    72,0,72
    73,0,72
    74,0,72
    75,0,72
    76,0,73
    77,0,73
    78,0,73
    79,0,73
    80,0,73
    81,1,75
    82,1,75
    83,0,75
    84,0,75
    85,0,75
    86,0,75
    87,0,75
    88,0,75
    89,0,75
    90,0,75
    91,0,76
    92,0,76
    93,0,76
    94,0,76
    95,0,76
    96,0,76
    97,0,76
    98,0,76
    99,0,76
    100,0,76
    101,0,78
    102,0,78
    103,0,78
    104,0,78
    105,0,78
    106,0,79
    107,0,79
    108,0,79
    109,0,79
    110,0,79
    111,0,80
    112,0,80
    113,0,80
    114,0,80
    115,0,80
    116,0,81
    117,0,81
    118,0,81
    119,0,81
    120,0,81
  2. Daniel Smith revised this gist Feb 10, 2021. No changes.
  3. Daniel Smith created this gist Feb 10, 2021.
    121 changes: 121 additions & 0 deletions challenger-data.csv
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,121 @@
    Observation,Y,X
    1,1,53
    2,1,53
    3,1,53
    4,0,53
    5,0,53
    6,1,57
    7,0,57
    8,0,57
    9,0,57
    10,0,57
    11,1,58
    12,0,58
    13,0,58
    14,0,58
    15,0,58
    16,1,63
    17,0,63
    18,0,63
    19,0,63
    20,0,63
    21,0,66
    22,0,66
    23,0,66
    24,0,66
    25,0,66
    26,0,67
    27,0,67
    28,0,67
    29,0,67
    30,0,67
    31,0,67
    32,0,67
    33,0,67
    34,0,67
    35,0,67
    36,0,67
    37,0,67
    38,0,67
    39,0,67
    40,0,67
    41,0,68
    42,0,68
    43,0,68
    44,0,68
    45,0,68
    46,0,69
    47,0,69
    48,0,69
    49,0,69
    50,0,69
    51,1,70
    52,0,70
    53,0,70
    54,0,70
    55,0,70
    56,1,70
    57,0,70
    58,0,70
    59,0,70
    60,0,70
    61,0,70
    62,0,70
    63,0,70
    64,0,70
    65,0,70
    66,0,70
    67,0,70
    68,0,70
    69,0,70
    70,0,70
    71,0,72
    72,0,72
    73,0,72
    74,0,72
    75,0,72
    76,0,73
    77,0,73
    78,0,73
    79,0,73
    80,0,73
    81,1,75
    82,1,75
    83,0,75
    84,0,75
    85,0,75
    86,0,75
    87,0,75
    88,0,75
    89,0,75
    90,0,75
    91,0,76
    92,0,76
    93,0,76
    94,0,76
    95,0,76
    96,0,76
    97,0,76
    98,0,76
    99,0,76
    100,0,76
    101,0,78
    102,0,78
    103,0,78
    104,0,78
    105,0,78
    106,0,79
    107,0,79
    108,0,79
    109,0,79
    110,0,79
    111,0,80
    112,0,80
    113,0,80
    114,0,80
    115,0,80
    116,0,81
    117,0,81
    118,0,81
    119,0,81
    120,0,81
    35 changes: 35 additions & 0 deletions main.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,35 @@
    import numpy as np
    import statsmodels.discrete.discrete_model as sm
    import matplotlib as mpl
    import pandas as pd

    from patsy import dmatrices
    from matplotlib import pyplot as plt

    data = pd.read_csv("challenger-data.csv")

    # subsetting data
    failures = data.loc[(data.Y == 1)]
    no_failures = data.loc[(data.Y == 0)]

    # frequencies
    failures_freq = failures.X.value_counts()
    no_failures_freq = no_failures.X.value_counts()

    # plotting
    plt.scatter(failures_freq.index, failures_freq, c="red", s=40)
    plt.scatter(no_failures_freq.index, np.zeros(
    len(no_failures_freq)), c="blue", s=40)
    plt.xlabel("X: Temperature")
    plt.ylabel("Number of Failures")
    plt.show()

    # get the data in correct format
    y, X = dmatrices("Y ~ X", data, return_type="dataframe")

    # build the model
    logit = sm.Logit(y, X)
    result = logit.fit()

    # summarize the model
    print(result.summary())
    5 changes: 5 additions & 0 deletions requirements.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,5 @@
    matplotlib==3.3.4
    numpy==1.18.0
    pandas==1.2.2
    patsy==0.5.1
    statsmodels==0.12.2