Skip to content

Instantly share code, notes, and snippets.

@haydenflinner
Created January 17, 2024 12:38
Show Gist options
  • Save haydenflinner/1c713169250170a21b34ecc25e4d4992 to your computer and use it in GitHub Desktop.
Save haydenflinner/1c713169250170a21b34ecc25e4d4992 to your computer and use it in GitHub Desktop.

Revisions

  1. haydenflinner created this gist Jan 17, 2024.
    75 changes: 75 additions & 0 deletions stats.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,75 @@
    import pandas as pd

    from dataclasses import dataclass

    @dataclass
    class PositionSample:
    lap: int
    rider: str
    pos: int
    Sample = PositionSample

    # 5 riders, 4 lap race. Real data should reveal more noise / trends.
    df = pd.DataFrame([
    Sample(1, 'a', 1),
    Sample(1, 'b', 2),
    Sample(1, 'c', 3),
    Sample(1, 'd', 4),
    Sample(1, 'e', 5),

    Sample(2, 'a', 1),
    Sample(2, 'b', 2),
    Sample(2, 'c', 3),
    Sample(2, 'e', 4),
    Sample(2, 'd', 5), # <-- d and e traded places

    Sample(3, 'a', 1),
    Sample(3, 'd', 2),
    Sample(3, 'e', 3),
    Sample(3, 'c', 4), # b took out c and himself. or c took out b and himself.
    Sample(3, 'b', 5),

    # Finishing lap unchanged from prior lap.
    Sample(4, 'a', 1),
    Sample(4, 'd', 2),
    Sample(4, 'e', 3),
    Sample(4, 'c', 4),
    Sample(4, 'b', 5),
    ])

    # Our goal here is to find if there's a correlation between being near certain riders
    # and changes in Position. For example, maybe a certain rider is known to ride
    # a wide bike and so being just behind him means you have a below average
    # chance of passing. To really weigh that you'd need to correct for speed with
    # something like ELO or maybe just finishing position in the current race.
    # Another example would be that having Jett Lawrence behind you is a recipe
    # for losing one spot by the end of the lap.
    # This may also reveal riders who have a tendency
    # to put other riders far down the results sheet.

    # For each lap sample, this is important info.
    df = df.sort_values(by=["lap", "pos"])
    df["rider_ahead"] = df.groupby("lap")["rider"].shift()
    df["rider_behind"] = df.groupby("lap")["rider"].shift(-1)

    # Group by "rider" and then use shift to get the previous lap's "pos"
    df = df.sort_values(by=["rider", "lap"])
    df["prev_pos"] = df.groupby("rider")["pos"].shift()
    df["rider_ahead_last_lap"] = df.groupby("rider")["rider_ahead"].shift()

    # display(df.sort_values(by=["lap", "pos"]))
    # Drop rows where there is no previous lap. Not a big loss since first laps are especially hectic.
    df = df.dropna(subset=["prev_pos"])

    df["pos_change"] = df.pos - df.prev_pos

    # Resetting index if needed
    df = df.reset_index(drop=True)


    df.sort_values(by=["lap", "pos"])

    display(df.groupby("rider_ahead_last_lap")["pos_change"].max())

    import plotly.express as px
    px.box(df, x='rider_ahead_last_lap', y='pos_change')