Created
January 17, 2024 12:38
-
-
Save haydenflinner/1c713169250170a21b34ecc25e4d4992 to your computer and use it in GitHub Desktop.
Revisions
-
haydenflinner created this gist
Jan 17, 2024 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,75 @@ import pandas as pd from dataclasses import dataclass @dataclass class PositionSample: lap: int rider: str pos: int Sample = PositionSample # 5 riders, 4 lap race. Real data should reveal more noise / trends. df = pd.DataFrame([ Sample(1, 'a', 1), Sample(1, 'b', 2), Sample(1, 'c', 3), Sample(1, 'd', 4), Sample(1, 'e', 5), Sample(2, 'a', 1), Sample(2, 'b', 2), Sample(2, 'c', 3), Sample(2, 'e', 4), Sample(2, 'd', 5), # <-- d and e traded places Sample(3, 'a', 1), Sample(3, 'd', 2), Sample(3, 'e', 3), Sample(3, 'c', 4), # b took out c and himself. or c took out b and himself. Sample(3, 'b', 5), # Finishing lap unchanged from prior lap. Sample(4, 'a', 1), Sample(4, 'd', 2), Sample(4, 'e', 3), Sample(4, 'c', 4), Sample(4, 'b', 5), ]) # Our goal here is to find if there's a correlation between being near certain riders # and changes in Position. For example, maybe a certain rider is known to ride # a wide bike and so being just behind him means you have a below average # chance of passing. To really weigh that you'd need to correct for speed with # something like ELO or maybe just finishing position in the current race. # Another example would be that having Jett Lawrence behind you is a recipe # for losing one spot by the end of the lap. # This may also reveal riders who have a tendency # to put other riders far down the results sheet. # For each lap sample, this is important info. df = df.sort_values(by=["lap", "pos"]) df["rider_ahead"] = df.groupby("lap")["rider"].shift() df["rider_behind"] = df.groupby("lap")["rider"].shift(-1) # Group by "rider" and then use shift to get the previous lap's "pos" df = df.sort_values(by=["rider", "lap"]) df["prev_pos"] = df.groupby("rider")["pos"].shift() df["rider_ahead_last_lap"] = df.groupby("rider")["rider_ahead"].shift() # display(df.sort_values(by=["lap", "pos"])) # Drop rows where there is no previous lap. Not a big loss since first laps are especially hectic. df = df.dropna(subset=["prev_pos"]) df["pos_change"] = df.pos - df.prev_pos # Resetting index if needed df = df.reset_index(drop=True) df.sort_values(by=["lap", "pos"]) display(df.groupby("rider_ahead_last_lap")["pos_change"].max()) import plotly.express as px px.box(df, x='rider_ahead_last_lap', y='pos_change')