Last active
October 31, 2019 02:59
-
-
Save ClaytonJY/ac6853d55cb61d9a2f6885e0679bf66f to your computer and use it in GitHub Desktop.
Revisions
-
ClaytonJY revised this gist
Oct 31, 2019 . 1 changed file with 53 additions and 53 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -21,16 +21,16 @@ input_tbl #> # A tibble: 27 x 7 #> MarketDay HourEnding FuelType Month Region consumption withdrawal #> <date> <int> <chr> <dbl> <chr> <dbl> <dbl> #> 1 2016-01-01 1 Coal 1 North -11.3 23.8 #> 2 2016-01-02 1 Coal 1 North 9.01 16.9 #> 3 2016-01-03 1 Coal 1 North 42.1 47.2 #> 4 2016-01-01 2 Coal 1 North 33.0 94.1 #> 5 2016-01-02 2 Coal 1 North 43.0 88.3 #> 6 2016-01-03 2 Coal 1 North 55.4 137. #> 7 2016-01-01 3 Coal 1 North 68.6 165. #> 8 2016-01-02 3 Coal 1 North 82.0 153. #> 9 2016-01-03 3 Coal 1 North 77.2 158. #> 10 2016-01-01 1 Gas 1 North 84.7 171. #> # … with 17 more rows # we want total consumption & withdrawal per MarketDay & HourEnding. @@ -47,15 +47,15 @@ summary_tbl #> # A tibble: 9 x 4 #> MarketDay HourEnding hourly_total_consumption hourly_total_withdrawal #> <date> <int> <dbl> <dbl> #> 1 2016-01-01 1 261. 549. #> 2 2016-01-01 2 388. 753. #> 3 2016-01-01 3 484. 941. #> 4 2016-01-02 1 319. 680. #> 5 2016-01-02 2 398. 877. #> 6 2016-01-02 3 507. 987. #> 7 2016-01-03 1 362. 696. #> 8 2016-01-03 2 464. 947. #> 9 2016-01-03 3 537. 1075. # now we compute deltas by subtracting a lagged value from our value # this would be trickier to implement and reason about if our hourly_total_* columns @@ -73,15 +73,15 @@ summary_tbl #> # A tibble: 9 x 6 #> MarketDay HourEnding hourly_total_co… hourly_total_wi… delta_hourly_co… #> <date> <int> <dbl> <dbl> <dbl> #> 1 2016-01-01 1 261. 549. NA #> 2 2016-01-01 2 388. 753. 127. #> 3 2016-01-01 3 484. 941. 96.4 #> 4 2016-01-02 1 319. 680. -165. #> 5 2016-01-02 2 398. 877. 78.5 #> 6 2016-01-02 3 507. 987. 109. #> 7 2016-01-03 1 362. 696. -144. #> 8 2016-01-03 2 464. 947. 102. #> 9 2016-01-03 3 537. 1075. 73.2 #> # … with 1 more variable: delta_hourly_withdrawal <dbl> # now we can join this back in to our original table @@ -93,37 +93,37 @@ tbl #> # A tibble: 27 x 11 #> MarketDay HourEnding FuelType Month Region consumption withdrawal #> <date> <int> <chr> <dbl> <chr> <dbl> <dbl> #> 1 2016-01-01 1 Coal 1 North -11.3 23.8 #> 2 2016-01-01 1 Gas 1 North 84.7 171. #> 3 2016-01-01 1 Hydro 1 North 188. 354. #> 4 2016-01-01 2 Coal 1 North 33.0 94.1 #> 5 2016-01-01 2 Gas 1 North 122. 249. #> 6 2016-01-01 2 Hydro 1 North 233. 410. #> 7 2016-01-01 3 Coal 1 North 68.6 165. #> 8 2016-01-01 3 Gas 1 North 161. 296. #> 9 2016-01-01 3 Hydro 1 North 255. 481. #> 10 2016-01-02 1 Coal 1 North 9.01 16.9 #> # … with 17 more rows, and 4 more variables: #> # hourly_total_consumption <dbl>, hourly_total_withdrawal <dbl>, #> # delta_hourly_consumption <dbl>, delta_hourly_withdrawal <dbl> # too many columns to show in this format, so we'll focus on comsumption-only tbl %>% select(-Month, -Region, -FuelType, -contains("withdrawal")) #> # A tibble: 27 x 5 #> MarketDay HourEnding consumption hourly_total_consu… delta_hourly_cons… #> <date> <int> <dbl> <dbl> <dbl> #> 1 2016-01-01 1 -11.3 261. NA #> 2 2016-01-01 1 84.7 261. NA #> 3 2016-01-01 1 188. 261. NA #> 4 2016-01-01 2 33.0 388. 127. #> 5 2016-01-01 2 122. 388. 127. #> 6 2016-01-01 2 233. 388. 127. #> 7 2016-01-01 3 68.6 484. 96.4 #> 8 2016-01-01 3 161. 484. 96.4 #> 9 2016-01-01 3 255. 484. 96.4 #> 10 2016-01-02 1 9.01 319. -165. #> # … with 17 more rows ``` <sup>Created on 2019-10-30 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0)</sup> -
ClaytonJY revised this gist
Oct 31, 2019 . 1 changed file with 92 additions and 62 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -13,87 +13,117 @@ input_tbl <- lst( mutate( MarketDay = as.Date(MarketDay, origin = "1970-01-01"), # skipping GenMISObyFuel, consumption_factor, withdrawal_factor consumption = 10 * (1:n() + rnorm(n())), # changed this up to make later steps more clear withdrawal = 20 * (1:n() + rnorm(n())) ) input_tbl #> # A tibble: 27 x 7 #> MarketDay HourEnding FuelType Month Region consumption withdrawal #> <date> <int> <chr> <dbl> <chr> <dbl> <dbl> #> 1 2016-01-01 1 Coal 1 North 4.47 22.0 #> 2 2016-01-02 1 Coal 1 North -2.44 64.8 #> 3 2016-01-03 1 Coal 1 North 26.9 62.4 #> 4 2016-01-01 2 Coal 1 North 47.6 104. #> 5 2016-01-02 2 Coal 1 North 50.4 86.8 #> 6 2016-01-03 2 Coal 1 North 71.7 79.2 #> 7 2016-01-01 3 Coal 1 North 77.6 138. #> 8 2016-01-02 3 Coal 1 North 65.4 126. #> 9 2016-01-03 3 Coal 1 North 73.6 202. #> 10 2016-01-01 1 Gas 1 North 101. 204. #> # … with 17 more rows # we want total consumption & withdrawal per MarketDay & HourEnding. # we'll do this as a separate tibble because it'll simplify the next step summary_tbl <- input_tbl %>% group_by(MarketDay, HourEnding) %>% summarize( hourly_total_consumption = sum(consumption), hourly_total_withdrawal = sum(withdrawal) ) %>% ungroup() summary_tbl #> # A tibble: 9 x 4 #> MarketDay HourEnding hourly_total_consumption hourly_total_withdrawal #> <date> <int> <dbl> <dbl> #> 1 2016-01-01 1 292. 584. #> 2 2016-01-01 2 397. 826. #> 3 2016-01-01 3 489. 971. #> 4 2016-01-02 1 306. 661. #> 5 2016-01-02 2 418. 877. #> 6 2016-01-02 3 486. 987. #> 7 2016-01-03 1 387. 705. #> 8 2016-01-03 2 455. 867. #> 9 2016-01-03 3 515. 1146. # now we compute deltas by subtracting a lagged value from our value # this would be trickier to implement and reason about if our hourly_total_* columns # were already in our bigger input_tbl summary_tbl <- summary_tbl %>% arrange(MarketDay, HourEnding) %>% # to be extra sure our order is chronological mutate( delta_hourly_consumption = hourly_total_consumption - lag(hourly_total_consumption), delta_hourly_withdrawal = hourly_total_withdrawal - lag(hourly_total_withdrawal) ) # notice the NA's at the top here! # there's nothing to subtract; the lag starts with NA, and x - NA = NA summary_tbl #> # A tibble: 9 x 6 #> MarketDay HourEnding hourly_total_co… hourly_total_wi… delta_hourly_co… #> <date> <int> <dbl> <dbl> <dbl> #> 1 2016-01-01 1 292. 584. NA #> 2 2016-01-01 2 397. 826. 105. #> 3 2016-01-01 3 489. 971. 92.0 #> 4 2016-01-02 1 306. 661. -183. #> 5 2016-01-02 2 418. 877. 112. #> 6 2016-01-02 3 486. 987. 67.6 #> 7 2016-01-03 1 387. 705. -98.3 #> 8 2016-01-03 2 455. 867. 67.1 #> 9 2016-01-03 3 515. 1146. 60.4 #> # … with 1 more variable: delta_hourly_withdrawal <dbl> # now we can join this back in to our original table tbl <- input_tbl %>% left_join(summary_tbl, by = c("MarketDay", "HourEnding")) %>% arrange(MarketDay, HourEnding) tbl #> # A tibble: 27 x 11 #> MarketDay HourEnding FuelType Month Region consumption withdrawal #> <date> <int> <chr> <dbl> <chr> <dbl> <dbl> #> 1 2016-01-01 1 Coal 1 North 4.47 22.0 #> 2 2016-01-01 1 Gas 1 North 101. 204. #> 3 2016-01-01 1 Hydro 1 North 187. 358. #> 4 2016-01-01 2 Coal 1 North 47.6 104. #> 5 2016-01-01 2 Gas 1 North 126. 263. #> 6 2016-01-01 2 Hydro 1 North 224. 460. #> 7 2016-01-01 3 Coal 1 North 77.6 138. #> 8 2016-01-01 3 Gas 1 North 169. 326. #> 9 2016-01-01 3 Hydro 1 North 242. 508. #> 10 2016-01-02 1 Coal 1 North -2.44 64.8 #> # … with 17 more rows, and 4 more variables: #> # hourly_total_consumption <dbl>, hourly_total_withdrawal <dbl>, #> # delta_hourly_consumption <dbl>, delta_hourly_withdrawal <dbl> # too many columns to show in this format, so we'll focus on comsumption-only tbl %>% select(-contains("withdrawal")) #> # A tibble: 27 x 8 #> MarketDay HourEnding FuelType Month Region consumption hourly_total_co… #> <date> <int> <chr> <dbl> <chr> <dbl> <dbl> #> 1 2016-01-01 1 Coal 1 North 4.47 292. #> 2 2016-01-01 1 Gas 1 North 101. 292. #> 3 2016-01-01 1 Hydro 1 North 187. 292. #> 4 2016-01-01 2 Coal 1 North 47.6 397. #> 5 2016-01-01 2 Gas 1 North 126. 397. #> 6 2016-01-01 2 Hydro 1 North 224. 397. #> 7 2016-01-01 3 Coal 1 North 77.6 489. #> 8 2016-01-01 3 Gas 1 North 169. 489. #> 9 2016-01-01 3 Hydro 1 North 242. 489. #> 10 2016-01-02 1 Coal 1 North -2.44 306. #> # … with 17 more rows, and 1 more variable: delta_hourly_consumption <dbl> ``` <sup>Created on 2019-10-30 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0)</sup> -
ClaytonJY revised this gist
Oct 30, 2019 . 1 changed file with 99 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,99 @@ ``` r library(tidyverse) # fake some data input_tbl <- lst( MarketDay = as.Date(c("2016-01-01", "2016-01-02", "2016-01-03")), HourEnding = 1:3, FuelType = c("Coal", "Gas", "Hydro"), Month = 1, # because only Jan dates Region = "North" ) %>% cross_df() %>% mutate( MarketDay = as.Date(MarketDay, origin = "1970-01-01"), # skipping GenMISObyFuel, consumption_factor, withdrawal_factor consumption = rnorm(nrow(.)), # random draws from normal dist; doesn't look like your data, but doesn't matter withdrawal = rnorm(nrow(.)) ) input_tbl #> # A tibble: 27 x 7 #> MarketDay HourEnding FuelType Month Region consumption withdrawal #> <date> <int> <chr> <dbl> <chr> <dbl> <dbl> #> 1 2016-01-01 1 Coal 1 North -0.746 1.70 #> 2 2016-01-02 1 Coal 1 North -0.670 -1.04 #> 3 2016-01-03 1 Coal 1 North -1.07 0.634 #> 4 2016-01-01 2 Coal 1 North -0.375 0.534 #> 5 2016-01-02 2 Coal 1 North -0.0111 0.613 #> 6 2016-01-03 2 Coal 1 North 0.537 -1.64 #> 7 2016-01-01 3 Coal 1 North -1.19 0.645 #> 8 2016-01-02 3 Coal 1 North -0.456 0.631 #> 9 2016-01-03 3 Coal 1 North -0.866 -0.488 #> 10 2016-01-01 1 Gas 1 North 0.131 -0.913 #> # … with 17 more rows # we want sum consumption & withdrawal by MarketDay & HourEnding # will produce 1 row per MarketDay-HourEnding combo # used 3 values for each above, so 9 rows expected input_tbl %>% group_by(MarketDay, HourEnding) %>% summarize( hourly_total_consumption = sum(consumption), hourly_total_withdrawal = sum(withdrawal) ) %>% ungroup() # important so we aren't still grouped during later work #> # A tibble: 9 x 4 #> MarketDay HourEnding hourly_total_consumption hourly_total_withdrawal #> <date> <int> <dbl> <dbl> #> 1 2016-01-01 1 -0.270 1.88 #> 2 2016-01-01 2 -0.840 2.15 #> 3 2016-01-01 3 -2.79 2.35 #> 4 2016-01-02 1 0.343 -0.866 #> 5 2016-01-02 2 -0.552 0.288 #> 6 2016-01-02 3 0.793 0.960 #> 7 2016-01-03 1 0.201 -0.292 #> 8 2016-01-03 2 -0.753 -1.77 #> 9 2016-01-03 3 -0.567 -0.401 # notice everything but the group cols and summarize cols is _gone_ # that's expected! the colums I didn't add to input_tbl would also go away, # hence why not including them didnt matter here #### BONUS #### # remember count()? input_tbl %>% count(MarketDay, HourEnding) #> # A tibble: 9 x 3 #> MarketDay HourEnding n #> <date> <int> <int> #> 1 2016-01-01 1 3 #> 2 2016-01-01 2 3 #> 3 2016-01-01 3 3 #> 4 2016-01-02 1 3 #> 5 2016-01-02 2 3 #> 6 2016-01-02 3 3 #> 7 2016-01-03 1 3 #> 8 2016-01-03 2 3 #> 9 2016-01-03 3 3 # thats really shorthand for a group_by and summarize! input_tbl %>% group_by(MarketDay, HourEnding) %>% summarize(n = n()) %>% # n() is a special function for use in certain dplyr functions; numer of rows in group ungroup() #> # A tibble: 9 x 3 #> MarketDay HourEnding n #> <date> <int> <int> #> 1 2016-01-01 1 3 #> 2 2016-01-01 2 3 #> 3 2016-01-01 3 3 #> 4 2016-01-02 1 3 #> 5 2016-01-02 2 3 #> 6 2016-01-02 3 3 #> 7 2016-01-03 1 3 #> 8 2016-01-03 2 3 #> 9 2016-01-03 3 3 ``` <sup>Created on 2019-10-30 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0)</sup> -
ClaytonJY renamed this gist
Oct 30, 2019 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
ClaytonJY revised this gist
Oct 30, 2019 . 1 changed file with 56 additions and 26 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,4 @@ ``` r library(tidyverse) # all the good stuff library(lubridate) #> @@ -27,16 +28,16 @@ df2016north #> # A tibble: 20,886 x 6 #> MarketDay HourEnding FuelType Month Region GenMISObyFuel #> <date> <dbl> <chr> <dbl> <chr> <int> #> 1 2019-01-01 1 Coal 1 North 7 #> 2 2019-01-02 1 Coal 1 North 7 #> 3 2019-01-03 1 Coal 1 North 18 #> 4 2019-01-04 1 Coal 1 North 8 #> 5 2019-01-05 1 Coal 1 North 11 #> 6 2019-01-06 1 Coal 1 North 20 #> 7 2019-01-07 1 Coal 1 North 18 #> 8 2019-01-08 1 Coal 1 North 2 #> 9 2019-01-09 1 Coal 1 North 5 #> 10 2019-01-10 1 Coal 1 North 7 #> # … with 20,876 more rows # to map (FuelType, Month) to impact_factor, we'd like a table with each as a column @@ -55,12 +56,12 @@ impact_df #> # A tibble: 6 x 3 #> FuelType Month impact_factor #> <chr> <dbl> <int> #> 1 Coal 1 88 #> 2 Gas 1 22 #> 3 Other 1 113 #> 4 Coal 2 140 #> 5 Gas 2 130 #> 6 Other 2 67 # now we can join to get impact_factor into our df2016north tbl # left join means we'll keep all the columns from the left one, df2016north, @@ -72,15 +73,44 @@ df2016north #> # A tibble: 20,886 x 7 #> MarketDay HourEnding FuelType Month Region GenMISObyFuel impact_factor #> <date> <dbl> <chr> <dbl> <chr> <int> <int> #> 1 2019-01-01 1 Coal 1 North 7 88 #> 2 2019-01-02 1 Coal 1 North 7 88 #> 3 2019-01-03 1 Coal 1 North 18 88 #> 4 2019-01-04 1 Coal 1 North 8 88 #> 5 2019-01-05 1 Coal 1 North 11 88 #> 6 2019-01-06 1 Coal 1 North 20 88 #> 7 2019-01-07 1 Coal 1 North 18 88 #> 8 2019-01-08 1 Coal 1 North 2 88 #> 9 2019-01-09 1 Coal 1 North 5 88 #> 10 2019-01-10 1 Coal 1 North 7 88 #> # … with 20,876 more rows # that only showed one FuelType-Month combo # we can randomly sample to better understand what happened sample_n(df2016north, 20) #> # A tibble: 20 x 7 #> MarketDay HourEnding FuelType Month Region GenMISObyFuel impact_factor #> <date> <dbl> <chr> <dbl> <chr> <int> <int> #> 1 2019-02-01 2 Coal 2 North 1 140 #> 2 2019-01-18 2 Coal 2 North 6 140 #> 3 2019-01-31 1 Gas 2 North 3 130 #> 4 2019-02-11 2 Other 2 North 4 67 #> 5 2019-01-07 1 Coal 2 North 13 140 #> 6 2019-01-18 1 Other 2 North 12 67 #> 7 2019-01-20 1 Other 2 North 17 67 #> 8 2019-01-10 1 Other 1 North 1 113 #> 9 2019-01-30 2 Other 2 North 14 67 #> 10 2019-01-23 2 Other 1 North 2 113 #> 11 2019-01-09 1 Gas 1 North 11 22 #> 12 2019-01-04 2 Other 1 North 13 113 #> 13 2019-01-02 2 Coal 1 North 4 88 #> 14 2019-02-24 2 Gas 1 North 9 22 #> 15 2019-01-18 1 Other 1 North 5 113 #> 16 2019-02-20 2 Coal 1 North 14 88 #> 17 2019-02-04 1 Other 2 North 15 67 #> 18 2019-02-26 2 Coal 1 North 1 88 #> 19 2019-02-09 1 Other 2 North 19 67 #> 20 2019-01-21 2 Coal 1 North 19 88 ``` <sup>Created on 2019-10-30 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0)</sup> -
ClaytonJY revised this gist
Oct 30, 2019 . 1 changed file with 31 additions and 35 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,3 @@ library(tidyverse) # all the good stuff library(lubridate) #> @@ -28,16 +27,16 @@ df2016north #> # A tibble: 20,886 x 6 #> MarketDay HourEnding FuelType Month Region GenMISObyFuel #> <date> <dbl> <chr> <dbl> <chr> <int> #> 1 2019-01-01 1 Coal 1 North 15 #> 2 2019-01-02 1 Coal 1 North 4 #> 3 2019-01-03 1 Coal 1 North 19 #> 4 2019-01-04 1 Coal 1 North 6 #> 5 2019-01-05 1 Coal 1 North 2 #> 6 2019-01-06 1 Coal 1 North 20 #> 7 2019-01-07 1 Coal 1 North 14 #> 8 2019-01-08 1 Coal 1 North 16 #> 9 2019-01-09 1 Coal 1 North 13 #> 10 2019-01-10 1 Coal 1 North 15 #> # … with 20,876 more rows # to map (FuelType, Month) to impact_factor, we'd like a table with each as a column @@ -56,35 +55,32 @@ impact_df #> # A tibble: 6 x 3 #> FuelType Month impact_factor #> <chr> <dbl> <int> #> 1 Coal 1 100 #> 2 Gas 1 142 #> 3 Other 1 148 #> 4 Coal 2 129 #> 5 Gas 2 97 #> 6 Other 2 43 # now we can join to get impact_factor into our df2016north tbl # left join means we'll keep all the columns from the left one, df2016north, # even the rows with no match in impact_df (all rows have matched in this example) df2016north <- left_join(df2016north, impact_df, by = c("FuelType", "Month")) # peek df2016north #> # A tibble: 20,886 x 7 #> MarketDay HourEnding FuelType Month Region GenMISObyFuel impact_factor #> <date> <dbl> <chr> <dbl> <chr> <int> <int> #> 1 2019-01-01 1 Coal 1 North 15 100 #> 2 2019-01-02 1 Coal 1 North 4 100 #> 3 2019-01-03 1 Coal 1 North 19 100 #> 4 2019-01-04 1 Coal 1 North 6 100 #> 5 2019-01-05 1 Coal 1 North 2 100 #> 6 2019-01-06 1 Coal 1 North 20 100 #> 7 2019-01-07 1 Coal 1 North 14 100 #> 8 2019-01-08 1 Coal 1 North 16 100 #> 9 2019-01-09 1 Coal 1 North 13 100 #> 10 2019-01-10 1 Coal 1 North 15 100 #> # … with 20,876 more rows Created on 2019-10-30 by the reprex package (v0.3.0) -
ClaytonJY revised this gist
Oct 30, 2019 . No changes.There are no files selected for viewing
-
ClaytonJY revised this gist
Oct 30, 2019 . 1 changed file with 41 additions and 35 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,4 @@ ``` r library(tidyverse) # all the good stuff library(lubridate) #> @@ -16,24 +17,27 @@ df2016north <- lst( Region = "North" ) %>% cross_df() %>% mutate( MarketDay = as_date(MarketDay), # this one got messed up by cross_df GenMISObyFuel = sample.int(20:7000, nrow(.), replace = TRUE) ) # let's take a peek # should be one row for every possible combo of the stuff in the lst above df2016north #> # A tibble: 20,886 x 6 #> MarketDay HourEnding FuelType Month Region GenMISObyFuel #> <date> <dbl> <chr> <dbl> <chr> <int> #> 1 2019-01-01 1 Coal 1 North 10 #> 2 2019-01-02 1 Coal 1 North 1 #> 3 2019-01-03 1 Coal 1 North 13 #> 4 2019-01-04 1 Coal 1 North 16 #> 5 2019-01-05 1 Coal 1 North 11 #> 6 2019-01-06 1 Coal 1 North 17 #> 7 2019-01-07 1 Coal 1 North 20 #> 8 2019-01-08 1 Coal 1 North 5 #> 9 2019-01-09 1 Coal 1 North 4 #> 10 2019-01-10 1 Coal 1 North 20 #> # … with 20,876 more rows # to map (FuelType, Month) to impact_factor, we'd like a table with each as a column @@ -52,33 +56,35 @@ impact_df #> # A tibble: 6 x 3 #> FuelType Month impact_factor #> <chr> <dbl> <int> #> 1 Coal 1 17 #> 2 Gas 1 18 #> 3 Other 1 24 #> 4 Coal 2 148 #> 5 Gas 2 28 #> 6 Other 2 107 # now we can join to get impact_factor into our df2016north tbl # left join means we'll keep all the columns from the left one, df2016north, # even the rows with no match in impact_df (all rows have matched in this example) df2016north <- left_join(df2016north, impact_df, by = c("Fuel Type", "Month")) #> `by` can't contain join column `Fuel Type` which is missing from LHS # peek df2016north #> # A tibble: 20,886 x 6 #> MarketDay HourEnding FuelType Month Region GenMISObyFuel #> <date> <dbl> <chr> <dbl> <chr> <int> #> 1 2019-01-01 1 Coal 1 North 10 #> 2 2019-01-02 1 Coal 1 North 1 #> 3 2019-01-03 1 Coal 1 North 13 #> 4 2019-01-04 1 Coal 1 North 16 #> 5 2019-01-05 1 Coal 1 North 11 #> 6 2019-01-06 1 Coal 1 North 17 #> 7 2019-01-07 1 Coal 1 North 20 #> 8 2019-01-08 1 Coal 1 North 5 #> 9 2019-01-09 1 Coal 1 North 4 #> 10 2019-01-10 1 Coal 1 North 20 #> # … with 20,876 more rows ``` <sup>Created on 2019-10-30 by the [reprex package](https://reprex.tidyverse.org) (v0.3.0)</sup> -
ClaytonJY created this gist
Oct 30, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,84 @@ library(tidyverse) # all the good stuff library(lubridate) #> #> Attaching package: 'lubridate' #> The following object is masked from 'package:base': #> #> date # I want to build something kinda like your actual data, but simpler # fewer levels of important cols, only a few extra columns, but same general idea df2016north <- lst( MarketDay = seq.Date(from = as_date("2019-01-01"), to = as_date("2019-02-28"), by = "day"), HourEnding = c(1, 2), FuelType = c("Coal", "Gas", "Other"), Month = month(MarketDay), Region = "North" ) %>% cross_df() %>% mutate(GenMISObyFuel = sample.int(20:7000, nrow(.), replace = TRUE)) # let's take a peek # should be one row for every possible combo of the stuff in the lst above df2016north #> # A tibble: 20,886 x 6 #> MarketDay HourEnding FuelType Month Region GenMISObyFuel #> <dbl> <dbl> <chr> <dbl> <chr> <int> #> 1 17897 1 Coal 1 North 3 #> 2 17898 1 Coal 1 North 18 #> 3 17899 1 Coal 1 North 2 #> 4 17900 1 Coal 1 North 18 #> 5 17901 1 Coal 1 North 20 #> 6 17902 1 Coal 1 North 18 #> 7 17903 1 Coal 1 North 5 #> 8 17904 1 Coal 1 North 14 #> 9 17905 1 Coal 1 North 19 #> 10 17906 1 Coal 1 North 2 #> # … with 20,876 more rows # to map (FuelType, Month) to impact_factor, we'd like a table with each as a column # it might look like this, after we've read it in and formatted it appropriately impact_df <- lst( FuelType = unique(df2016north$FuelType), Month = unique(df2016north$Month) ) %>% cross_df() %>% mutate( impact_factor = sample.int(150:800, nrow(.)) ) # take a peek impact_df #> # A tibble: 6 x 3 #> FuelType Month impact_factor #> <chr> <dbl> <int> #> 1 Coal 1 22 #> 2 Gas 1 118 #> 3 Other 1 133 #> 4 Coal 2 130 #> 5 Gas 2 114 #> 6 Other 2 90 # now we can join to get impact_factor into our df2016north tbl # left join means we'll keep all the columns from the left one, df2016north, # even the rows with no match in impact_df (all rows have matched in this example) df2016north <- left_join(df2016north, impact_df) #> Joining, by = c("FuelType", "Month") # peek df2016north #> # A tibble: 20,886 x 7 #> MarketDay HourEnding FuelType Month Region GenMISObyFuel impact_factor #> <dbl> <dbl> <chr> <dbl> <chr> <int> <int> #> 1 17897 1 Coal 1 North 3 22 #> 2 17898 1 Coal 1 North 18 22 #> 3 17899 1 Coal 1 North 2 22 #> 4 17900 1 Coal 1 North 18 22 #> 5 17901 1 Coal 1 North 20 22 #> 6 17902 1 Coal 1 North 18 22 #> 7 17903 1 Coal 1 North 5 22 #> 8 17904 1 Coal 1 North 14 22 #> 9 17905 1 Coal 1 North 19 22 #> 10 17906 1 Coal 1 North 2 22 #> # … with 20,876 more rows Created on 2019-10-30 by the reprex package (v0.3.0)