Last active
July 16, 2020 21:03
-
-
Save mcshaz/b4793c19096669a89ef83522437ec79d to your computer and use it in GitHub Desktop.
Revisions
-
mcshaz revised this gist
Jul 16, 2020 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -104,8 +104,8 @@ oneSampleSurvival <- function(df, description) { df <- byfirstsurg[, c("Male", "Dead", "DateofBirth", "DateOfEvent", "DatelastKnown")] # rename variables to required dataframe names for oneSampleSurvival function names(df) <- c("male", "dead", "dob", "entry", "exit") df$male <- df$male == 1 df$dead <- df$dead == 1 # looking at subsets of the data - trying to identify where/which subset the proportional hazard assumption might hold oneSampleSurvival (df[df$exit - df$entry > 30,], "surviving >30 days from operation") -
mcshaz revised this gist
Jul 16, 2020 . 1 changed file with 11 additions and 8 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,10 +1,12 @@ # see http://hedwig.mgh.harvard.edu/biostatistics/node/30 for the original paper and an excel spreadsheet with macros # the spreadsheet does much the same in VBA, but with considerably more code # unlike the vba spreadsheet, this also takes into acount the fraction of age in years at entry and exit (in creating per patient risk) # if the age is greater than the maximum data (101 years old), each subsequent year is a repeat of the final annual risk available library(survival) oneSampleSurvival <- function(df, description) { # NZ data between 0 and 100 for each year # 10 years of population and death data was averaged (2009-2018) for each year of age until 89 years old # data is then repeated in 5 yearly blocks as more granular data was hard to obtain # mortality at 0 years excludes mortality before 28 days (not relevant) maleMortality <- c(0.001843361228536, 0.000426012398328, 0.000205162390909, 0.000159933000221, 0.000131106236558, 0.000109847867736, 7.54877026981029E-05, 0.000104275266973, 8.46092661589332E-05, 0.000105655560873, 9.83646791092508E-05, 9.85469708619511E-05, 0.000107171040591, 0.000136745787619, 0.000211693246977, 0.000409680498681, 0.00047908243582, 0.00068725838029, 0.000792558666334, 0.00084616668714, 0.000794068160984, 0.000907776641227, 0.000800008665019, 0.000828096770177, 0.000806997128346, 0.000840005277716, 0.000802987629156, 0.000825605870321, 0.000765479018619, 0.000763137039117, 0.000806246482394, 0.000903749143428, 0.000924492304174, 0.000857232269108, 0.00092020098225, 0.000998230025349, 0.001085433512175, 0.001027818977332, 0.001122918474693, 0.00119988265766, 0.00137785701308, 0.001442949518615, 0.00153845943647, 0.00164270871642, 0.00188256662831, 0.002023312990214, 0.002030541981486, 0.002272865187846, 0.002607033588397, 0.002696261347275, 0.002969802371852, 0.003330445145048, 0.003450994308472, 0.003766824109133, 0.004382106432976, 0.004619214946603, 0.00507076963448, 0.005168789456058, 0.006025729167394, 0.006502154100548, 0.006825708117681, 0.00758116723691, 0.008280373380387, 0.009053044757868, 0.00965343239435, 0.010853634625976, 0.012058271659544, 0.013235643983316, 0.014237071816918, 0.016323379469387, 0.01765435043422, 0.019773974552901, 0.021931935366736, 0.024825087322466, 0.027006375178927, 0.029820680952757, 0.033902362368077, 0.03837648923878, 0.042932432116485, 0.046894126378063, 0.054108256726034, 0.060411289976837, 0.068653770714218, 0.077960340417016, 0.088759284493938, 0.099283037280737, 0.112244152400086, 0.128687561933202, 0.144916978613787, 0.157930508606968, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.4025) @@ -67,10 +69,11 @@ oneSampleSurvival <- function(df, description) { }) #main output cat(description, "(using proportional hazards assumption)", "\n") totalDeaths <- sum(df$dead) totalRisk <- sum(riskOfDeath) cat("observed / expected =", totalDeaths, "/", totalRisk, "\n") print(smrProportionalHazard(totalDeaths, totalRisk)) df$yrsOldAtEntry <- as.numeric(df$entry - df$dob) / 365.25 df$yrsInStudy <- as.numeric(df$exit - df$entry) / 365.25 @@ -105,8 +108,8 @@ df$male = df$male == 1 df$dead = df$dead == 1 # looking at subsets of the data - trying to identify where/which subset the proportional hazard assumption might hold oneSampleSurvival (df[df$exit - df$entry > 30,], "surviving >30 days from operation") oneSampleSurvival (df[df$exit - df$entry > 30 & df$entry - df$dob > 365.25 * 15,], "surviving >30 days and >14 yr old at operation") oneSampleSurvival (df[df$exit - df$entry > 365,], "surviving at least 1 yr from operation") -
mcshaz revised this gist
Jul 16, 2020 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -99,7 +99,7 @@ oneSampleSurvival <- function(df, description) { } df <- byfirstsurg[, c("Male", "Dead", "DateofBirth", "DateOfEvent", "DatelastKnown")] # rename variables to required dataframe names for oneSampleSurvival function names(df) <- c("male", "dead", "dob", "entry", "exit") df$male = df$male == 1 df$dead = df$dead == 1 -
mcshaz revised this gist
Jul 16, 2020 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -4,7 +4,7 @@ library(survival) oneSampleSurvival <- function(df, description) { # NZ data between 0 and 100 for each year # 10 years of population and death data was averaged (2009-2018) for each year until 89 years old # data is then repeated in 5 yearly blocks as more granular data was hard to obtain # mortality at 0 years excludes mortality before 28 days (not relevant) maleMortality <- c(0.001843361228536, 0.000426012398328, 0.000205162390909, 0.000159933000221, 0.000131106236558, 0.000109847867736, 7.54877026981029E-05, 0.000104275266973, 8.46092661589332E-05, 0.000105655560873, 9.83646791092508E-05, 9.85469708619511E-05, 0.000107171040591, 0.000136745787619, 0.000211693246977, 0.000409680498681, 0.00047908243582, 0.00068725838029, 0.000792558666334, 0.00084616668714, 0.000794068160984, 0.000907776641227, 0.000800008665019, 0.000828096770177, 0.000806997128346, 0.000840005277716, 0.000802987629156, 0.000825605870321, 0.000765479018619, 0.000763137039117, 0.000806246482394, 0.000903749143428, 0.000924492304174, 0.000857232269108, 0.00092020098225, 0.000998230025349, 0.001085433512175, 0.001027818977332, 0.001122918474693, 0.00119988265766, 0.00137785701308, 0.001442949518615, 0.00153845943647, 0.00164270871642, 0.00188256662831, 0.002023312990214, 0.002030541981486, 0.002272865187846, 0.002607033588397, 0.002696261347275, 0.002969802371852, 0.003330445145048, 0.003450994308472, 0.003766824109133, 0.004382106432976, 0.004619214946603, 0.00507076963448, 0.005168789456058, 0.006025729167394, 0.006502154100548, 0.006825708117681, 0.00758116723691, 0.008280373380387, 0.009053044757868, 0.00965343239435, 0.010853634625976, 0.012058271659544, 0.013235643983316, 0.014237071816918, 0.016323379469387, 0.01765435043422, 0.019773974552901, 0.021931935366736, 0.024825087322466, 0.027006375178927, 0.029820680952757, 0.033902362368077, 0.03837648923878, 0.042932432116485, 0.046894126378063, 0.054108256726034, 0.060411289976837, 0.068653770714218, 0.077960340417016, 0.088759284493938, 0.099283037280737, 0.112244152400086, 0.128687561933202, 0.144916978613787, 0.157930508606968, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.4025) -
mcshaz revised this gist
Jul 16, 2020 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -93,8 +93,8 @@ oneSampleSurvival <- function(df, description) { lines(0:(as.integer(maxYears) + 1L), hazards, col="grey") legend("bottomleft", inset = 0.05, legend = c("general pop. matched for age/sex", "cardiac", "95% conf. int."), col = c("grey", "black", "black"), lty = c(1, 1, 2)) } -
mcshaz revised this gist
Jul 16, 2020 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,5 @@ # see http://hedwig.mgh.harvard.edu/biostatistics/node/30 for the original paper and an excel spreadsheet with macros # the spreadsheet does much the same in VBA, but with considerably more code and complexity library(survival) oneSampleSurvival <- function(df, description) { @@ -78,7 +79,6 @@ oneSampleSurvival <- function(df, description) { maxYears <- ceiling(max(df$yrsInStudy)) hazards <- by(df, seq_len(nrow(df)), function(rw){ mortVector <- getYrMortality(rw$yrsOldAtEntry, rw$yrsOldAtEntry + maxYears, rw$male) return(exp(-cumsum(mortVector))) }) hazards <- c(1, rowMeans(simplify2array(hazards))) -
mcshaz created this gist
Jul 16, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,112 @@ # see http://hedwig.mgh.harvard.edu/biostatistics/node/30 library(survival) oneSampleSurvival <- function(df, description) { # NZ data between 0 and 100 for each year # 10 years of population and death data was averaged (2009-2018) for each year until 89 # data is then repeated in 5 yearly blocks as more granular data was hard to obtain # mortality at 0 years excludes mortality before 28 days (not relevant) maleMortality <- c(0.001843361228536, 0.000426012398328, 0.000205162390909, 0.000159933000221, 0.000131106236558, 0.000109847867736, 7.54877026981029E-05, 0.000104275266973, 8.46092661589332E-05, 0.000105655560873, 9.83646791092508E-05, 9.85469708619511E-05, 0.000107171040591, 0.000136745787619, 0.000211693246977, 0.000409680498681, 0.00047908243582, 0.00068725838029, 0.000792558666334, 0.00084616668714, 0.000794068160984, 0.000907776641227, 0.000800008665019, 0.000828096770177, 0.000806997128346, 0.000840005277716, 0.000802987629156, 0.000825605870321, 0.000765479018619, 0.000763137039117, 0.000806246482394, 0.000903749143428, 0.000924492304174, 0.000857232269108, 0.00092020098225, 0.000998230025349, 0.001085433512175, 0.001027818977332, 0.001122918474693, 0.00119988265766, 0.00137785701308, 0.001442949518615, 0.00153845943647, 0.00164270871642, 0.00188256662831, 0.002023312990214, 0.002030541981486, 0.002272865187846, 0.002607033588397, 0.002696261347275, 0.002969802371852, 0.003330445145048, 0.003450994308472, 0.003766824109133, 0.004382106432976, 0.004619214946603, 0.00507076963448, 0.005168789456058, 0.006025729167394, 0.006502154100548, 0.006825708117681, 0.00758116723691, 0.008280373380387, 0.009053044757868, 0.00965343239435, 0.010853634625976, 0.012058271659544, 0.013235643983316, 0.014237071816918, 0.016323379469387, 0.01765435043422, 0.019773974552901, 0.021931935366736, 0.024825087322466, 0.027006375178927, 0.029820680952757, 0.033902362368077, 0.03837648923878, 0.042932432116485, 0.046894126378063, 0.054108256726034, 0.060411289976837, 0.068653770714218, 0.077960340417016, 0.088759284493938, 0.099283037280737, 0.112244152400086, 0.128687561933202, 0.144916978613787, 0.157930508606968, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.4025) femaleMortality <- c(0.001440725276587, 0.000378599537757, 0.00021709924675, 9.97502891438584E-05, 9.85006425048784E-05, 8.92050090982164E-05, 8.95206096251493E-05, 7.96114330270264E-05, 6.92762143799792E-05, 0.000101432919309, 5.08040463311748E-05, 9.22608680532037E-05, 0.000122942485987, 0.000153294565916, 0.000172502569892, 0.000260513869301, 0.000255523432279, 0.000368255538044, 0.000356330469863, 0.000267139191191, 0.000358230732596, 0.000369833546194, 0.000305476109726, 0.000319588939803, 0.000297956887714, 0.000363010043724, 0.000362859918462, 0.000432002032646, 0.000321026034948, 0.000361625954016, 0.00040452437153, 0.000431745162574, 0.000512770712544, 0.00054317376104, 0.000488063712367, 0.000486563128689, 0.00057339122496, 0.00072400070304, 0.00072060329081, 0.000754517692419, 0.000849463968011, 0.000831645042867, 0.001042677971853, 0.001132957559664, 0.001207180709918, 0.00131585417895, 0.001513946743796, 0.001606617997058, 0.001723479078906, 0.001933808631927, 0.002226989986555, 0.002231269823877, 0.002626438738956, 0.002696431092572, 0.002966902852488, 0.003080837093037, 0.003413401888315, 0.003427147560834, 0.004010391720794, 0.004217567593114, 0.004606013461916, 0.005031607028952, 0.005503746741787, 0.006108782143772, 0.006845816438536, 0.007141344621113, 0.007955395638506, 0.009127032877493, 0.010042400111416, 0.010803760162666, 0.012057728392066, 0.01323141371974, 0.014562716593054, 0.015873768931767, 0.018880147634345, 0.02056309402693, 0.022568564901875, 0.025811635811795, 0.029461369991122, 0.032920253916872, 0.038426882965907, 0.042458470397799, 0.050308505460449, 0.05592788582796, 0.063922777668064, 0.074695945529539, 0.084828676018797, 0.095857321745606, 0.110779421454886, 0.129589230549757, 0.185325602140946, 0.185325602140946, 0.185325602140946, 0.185325602140946, 0.185325602140946, 0.320109190172884, 0.320109190172884, 0.320109190172884, 0.320109190172884, 0.320109190172884, 0.444805194805195) getYrMortality <- function(startAgeYr, endAgeYr, isMale){ if (isMale) { lookupVector <- maleMortality } else { lookupVector <- femaleMortality } startIndx <- as.integer(startAgeYr) + 1L # add 1 as mortality begins at 0 years of age (died <28d excluded) endIndx <- as.integer(endAgeYr) + 1L if (endIndx > length(lookupVector)) { lastIndx <- length(lookupVector) returnVar <- c(lookupVector[startIndx:lastIndx], rep(lookupVector[lastIndx], endIndx - lastIndx)) } else { returnVar <- lookupVector[startIndx:endIndx] } return(returnVar) } smrProportionalHazard <- function(observed, expected, ci = 95) { stat <- (observed - expected) ^ 2 / expected smr <- observed / expected chiterm <- qchisq((100 + ci) / 200, 1) ub <- smr + chiterm / (2 * expected) + ((chiterm * (4 * observed + chiterm)) ^ 0.5) / (2 * expected) lb <- smr + chiterm / (2 * expected) - ((chiterm * (4 * observed + chiterm)) ^ 0.5) / (2 * expected) return(c("smr" = smr, "p_value" = 1 - pchisq(stat, 1), "lower_ci" = lb, "upper_ci" = ub)) } riskOfDeath <- by(df, seq_len(nrow(df)), function(rw){ # extracts mortality data for each year of age between entry and exit date # first and last date will be fractionally reduced according to how much of thay age year was remaining startAgeYr <- as.numeric(rw$entry - rw$dob) / 365.25 startProportion <- startAgeYr %% 1 startAgeYr <- as.integer(startAgeYr) endAgeYr <- as.numeric(rw$exit - rw$dob) / 365.25 endProportion <- endAgeYr %% 1 endAgeYr <- as.integer(endAgeYr) returnVar <- getYrMortality(startAgeYr, endAgeYr, rw$male) if (startAgeYr == endAgeYr) { returnVar <- returnVar * (endProportion - startProportion) } else { returnVar[1] <- returnVar[1] * (1 - startProportion) len <- length(returnVar) returnVar[len] <- returnVar[len] * endProportion } return(sum(returnVar)) }) #main output print(description) print("using proportional hazards assumption") totalDeaths <- sum(df$dead) print(smrProportionalHazard(totalDeaths, sum(riskOfDeath))) df$yrsOldAtEntry <- as.numeric(df$entry - df$dob) / 365.25 df$yrsInStudy <- as.numeric(df$exit - df$entry) / 365.25 # create reference for population hazard maxYears <- ceiling(max(df$yrsInStudy)) hazards <- by(df, seq_len(nrow(df)), function(rw){ mortVector <- getYrMortality(rw$yrsOldAtEntry, rw$yrsOldAtEntry + maxYears, rw$male) return(exp(-cumsum(mortVector))) }) hazards <- c(1, rowMeans(simplify2array(hazards))) survData <- survfit(Surv(df$yrsInStudy, df$dead) ~ 1, data = df) lowerY <- floor(survData$lower[length(survData$lower)] * 100) / 100 par(cex.sub = 0.8) plot(survData, xlab = "Years from op", ylab = "Survival probability", ylim = c(lowerY, 1), sub = description) lines(0:(as.integer(maxYears) + 1L), hazards, col="grey") legend("bottomleft", inset = 0.05, legend=c("general pop. matched for age/sex", "cardiac", "95% conf. int."), col=c("grey", "black", "black"), lty = c(1, 1, 2)) } df <- byfirstsurg[, c("Male", "Dead", "DateofBirth", "DateOfEvent", "DatelastKnown")] # rename variables to required names for getStats function names(df) <- c("male", "dead", "dob", "entry", "exit") df$male = df$male == 1 df$dead = df$dead == 1 # looking at subsets of the data - trying to identify where/which subset the proportional hazard assumption might hold oneSampleSurvival(df[df$exit - df$entry > 30,], "alive >30 days from operation") oneSampleSurvival(df[df$exit - df$entry > 30 & df$entry - df$dob > 365.25 * 15,], "alive >30 days from operation and >14yo at op.") oneSampleSurvival(df[df$exit - df$entry > 365,], "alive at least 1 yr from op")