mcshaz · July 16, 2020 21:03 · Jul 16, 2020 · Jul 16, 2020 · Jul 16, 2020 · Jul 16, 2020
diff --git a/oneSampleSurvival.R b/oneSampleSurvival.R
@@ -104,8 +104,8 @@ oneSampleSurvival <- function(df, description) {
 df <- byfirstsurg[, c("Male", "Dead", "DateofBirth", "DateOfEvent", "DatelastKnown")]
 # rename variables to required dataframe names for oneSampleSurvival function
 names(df) <- c("male", "dead", "dob", "entry", "exit")
-df$male = df$male == 1
-df$dead = df$dead == 1
+df$male <- df$male == 1
+df$dead <- df$dead == 1
 
 # looking at subsets of the data - trying to identify where/which subset the proportional hazard assumption might hold
 oneSampleSurvival (df[df$exit - df$entry > 30,], "surviving >30 days from operation")

diff --git a/oneSampleSurvival.R b/oneSampleSurvival.R
@@ -1,10 +1,12 @@
 # see http://hedwig.mgh.harvard.edu/biostatistics/node/30 for the original paper and an excel spreadsheet with macros
-# the spreadsheet does much the same in VBA, but with considerably more code and complexity
+# the spreadsheet does much the same in VBA, but with considerably more code
+# unlike the vba spreadsheet, this also takes into acount the fraction of age in years at entry and exit (in creating per patient risk)
+# if the age is greater than the maximum data (101 years old), each subsequent year is a repeat of the final annual risk available
 library(survival)
 
 oneSampleSurvival <- function(df, description) {
   # NZ data between 0 and 100 for each year
-  # 10 years of population and death data was averaged (2009-2018) for each year until 89 years old
+  # 10 years of population and death data was averaged (2009-2018) for each year of age until 89 years old
   # data is then repeated in 5 yearly blocks as more granular data was hard to obtain
   # mortality at 0 years excludes mortality before 28 days (not relevant)
   maleMortality <- c(0.001843361228536, 0.000426012398328, 0.000205162390909, 0.000159933000221, 0.000131106236558, 0.000109847867736, 7.54877026981029E-05, 0.000104275266973, 8.46092661589332E-05, 0.000105655560873, 9.83646791092508E-05, 9.85469708619511E-05, 0.000107171040591, 0.000136745787619, 0.000211693246977, 0.000409680498681, 0.00047908243582, 0.00068725838029, 0.000792558666334, 0.00084616668714, 0.000794068160984, 0.000907776641227, 0.000800008665019, 0.000828096770177, 0.000806997128346, 0.000840005277716, 0.000802987629156, 0.000825605870321, 0.000765479018619, 0.000763137039117, 0.000806246482394, 0.000903749143428, 0.000924492304174, 0.000857232269108, 0.00092020098225, 0.000998230025349, 0.001085433512175, 0.001027818977332, 0.001122918474693, 0.00119988265766, 0.00137785701308, 0.001442949518615, 0.00153845943647, 0.00164270871642, 0.00188256662831, 0.002023312990214, 0.002030541981486, 0.002272865187846, 0.002607033588397, 0.002696261347275, 0.002969802371852, 0.003330445145048, 0.003450994308472, 0.003766824109133, 0.004382106432976, 0.004619214946603, 0.00507076963448, 0.005168789456058, 0.006025729167394, 0.006502154100548, 0.006825708117681, 0.00758116723691, 0.008280373380387, 0.009053044757868, 0.00965343239435, 0.010853634625976, 0.012058271659544, 0.013235643983316, 0.014237071816918, 0.016323379469387, 0.01765435043422, 0.019773974552901, 0.021931935366736, 0.024825087322466, 0.027006375178927, 0.029820680952757, 0.033902362368077, 0.03837648923878, 0.042932432116485, 0.046894126378063, 0.054108256726034, 0.060411289976837, 0.068653770714218, 0.077960340417016, 0.088759284493938, 0.099283037280737, 0.112244152400086, 0.128687561933202, 0.144916978613787, 0.157930508606968, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.4025)
@@ -67,10 +69,11 @@ oneSampleSurvival <- function(df, description) {
   })
 
   #main output
-  print(description)
-  print("using proportional hazards assumption")
+  cat(description, "(using proportional hazards assumption)", "\n")
   totalDeaths <- sum(df$dead)
-  print(smrProportionalHazard(totalDeaths, sum(riskOfDeath)))
+  totalRisk <- sum(riskOfDeath)
+  cat("observed / expected =", totalDeaths, "/", totalRisk, "\n")
+  print(smrProportionalHazard(totalDeaths, totalRisk))
 
   df$yrsOldAtEntry <- as.numeric(df$entry - df$dob) / 365.25
   df$yrsInStudy <- as.numeric(df$exit - df$entry) / 365.25
@@ -105,8 +108,8 @@ df$male = df$male == 1
 df$dead = df$dead == 1
 
 # looking at subsets of the data - trying to identify where/which subset the proportional hazard assumption might hold
-oneSampleSurvival(df[df$exit - df$entry > 30,], "alive >30 days from operation")
+oneSampleSurvival (df[df$exit - df$entry > 30,], "surviving >30 days from operation")
 
-oneSampleSurvival(df[df$exit - df$entry > 30 & df$entry - df$dob > 365.25 * 15,], "alive >30 days from operation and >14yo at op.")
+oneSampleSurvival (df[df$exit - df$entry > 30 & df$entry - df$dob > 365.25 * 15,], "surviving >30 days and >14 yr old at operation")
 
-oneSampleSurvival(df[df$exit - df$entry > 365,], "alive at least 1 yr from op")
+oneSampleSurvival (df[df$exit - df$entry > 365,], "surviving at least 1 yr from operation")
diff --git a/oneSampleSurvival.R b/oneSampleSurvival.R
@@ -99,7 +99,7 @@ oneSampleSurvival <- function(df, description) {
 }
 
 df <- byfirstsurg[, c("Male", "Dead", "DateofBirth", "DateOfEvent", "DatelastKnown")]
-# rename variables to required names for getStats function
+# rename variables to required dataframe names for oneSampleSurvival function
 names(df) <- c("male", "dead", "dob", "entry", "exit")
 df$male = df$male == 1
 df$dead = df$dead == 1

diff --git a/oneSampleSurvival.R b/oneSampleSurvival.R
@@ -4,7 +4,7 @@ library(survival)
 
 oneSampleSurvival <- function(df, description) {
   # NZ data between 0 and 100 for each year
-  # 10 years of population and death data was averaged (2009-2018) for each year until 89
+  # 10 years of population and death data was averaged (2009-2018) for each year until 89 years old
   # data is then repeated in 5 yearly blocks as more granular data was hard to obtain
   # mortality at 0 years excludes mortality before 28 days (not relevant)
   maleMortality <- c(0.001843361228536, 0.000426012398328, 0.000205162390909, 0.000159933000221, 0.000131106236558, 0.000109847867736, 7.54877026981029E-05, 0.000104275266973, 8.46092661589332E-05, 0.000105655560873, 9.83646791092508E-05, 9.85469708619511E-05, 0.000107171040591, 0.000136745787619, 0.000211693246977, 0.000409680498681, 0.00047908243582, 0.00068725838029, 0.000792558666334, 0.00084616668714, 0.000794068160984, 0.000907776641227, 0.000800008665019, 0.000828096770177, 0.000806997128346, 0.000840005277716, 0.000802987629156, 0.000825605870321, 0.000765479018619, 0.000763137039117, 0.000806246482394, 0.000903749143428, 0.000924492304174, 0.000857232269108, 0.00092020098225, 0.000998230025349, 0.001085433512175, 0.001027818977332, 0.001122918474693, 0.00119988265766, 0.00137785701308, 0.001442949518615, 0.00153845943647, 0.00164270871642, 0.00188256662831, 0.002023312990214, 0.002030541981486, 0.002272865187846, 0.002607033588397, 0.002696261347275, 0.002969802371852, 0.003330445145048, 0.003450994308472, 0.003766824109133, 0.004382106432976, 0.004619214946603, 0.00507076963448, 0.005168789456058, 0.006025729167394, 0.006502154100548, 0.006825708117681, 0.00758116723691, 0.008280373380387, 0.009053044757868, 0.00965343239435, 0.010853634625976, 0.012058271659544, 0.013235643983316, 0.014237071816918, 0.016323379469387, 0.01765435043422, 0.019773974552901, 0.021931935366736, 0.024825087322466, 0.027006375178927, 0.029820680952757, 0.033902362368077, 0.03837648923878, 0.042932432116485, 0.046894126378063, 0.054108256726034, 0.060411289976837, 0.068653770714218, 0.077960340417016, 0.088759284493938, 0.099283037280737, 0.112244152400086, 0.128687561933202, 0.144916978613787, 0.157930508606968, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.4025)

diff --git a/oneSampleSurvival.R b/oneSampleSurvival.R
@@ -93,8 +93,8 @@ oneSampleSurvival <- function(df, description) {
   lines(0:(as.integer(maxYears) + 1L), hazards, col="grey")
   legend("bottomleft",
          inset = 0.05,
-         legend=c("general pop. matched for age/sex", "cardiac", "95% conf. int."),
-         col=c("grey", "black", "black"),
+         legend = c("general pop. matched for age/sex", "cardiac", "95% conf. int."),
+         col = c("grey", "black", "black"),
          lty = c(1, 1, 2))
 }
 

diff --git a/oneSampleSurvival.R b/oneSampleSurvival.R
@@ -1,4 +1,5 @@
-# see http://hedwig.mgh.harvard.edu/biostatistics/node/30
+# see http://hedwig.mgh.harvard.edu/biostatistics/node/30 for the original paper and an excel spreadsheet with macros
+# the spreadsheet does much the same in VBA, but with considerably more code and complexity
 library(survival)
 
 oneSampleSurvival <- function(df, description) {
@@ -78,7 +79,6 @@ oneSampleSurvival <- function(df, description) {
   maxYears <- ceiling(max(df$yrsInStudy))
   hazards <- by(df, seq_len(nrow(df)), function(rw){
     mortVector <- getYrMortality(rw$yrsOldAtEntry, rw$yrsOldAtEntry + maxYears, rw$male)
-
     return(exp(-cumsum(mortVector)))
   })
   hazards <- c(1, rowMeans(simplify2array(hazards)))

diff --git a/oneSampleSurvival.R b/oneSampleSurvival.R
@@ -0,0 +1,112 @@
+# see http://hedwig.mgh.harvard.edu/biostatistics/node/30
+library(survival)
+
+oneSampleSurvival <- function(df, description) {
+  # NZ data between 0 and 100 for each year
+  # 10 years of population and death data was averaged (2009-2018) for each year until 89
+  # data is then repeated in 5 yearly blocks as more granular data was hard to obtain
+  # mortality at 0 years excludes mortality before 28 days (not relevant)
+  maleMortality <- c(0.001843361228536, 0.000426012398328, 0.000205162390909, 0.000159933000221, 0.000131106236558, 0.000109847867736, 7.54877026981029E-05, 0.000104275266973, 8.46092661589332E-05, 0.000105655560873, 9.83646791092508E-05, 9.85469708619511E-05, 0.000107171040591, 0.000136745787619, 0.000211693246977, 0.000409680498681, 0.00047908243582, 0.00068725838029, 0.000792558666334, 0.00084616668714, 0.000794068160984, 0.000907776641227, 0.000800008665019, 0.000828096770177, 0.000806997128346, 0.000840005277716, 0.000802987629156, 0.000825605870321, 0.000765479018619, 0.000763137039117, 0.000806246482394, 0.000903749143428, 0.000924492304174, 0.000857232269108, 0.00092020098225, 0.000998230025349, 0.001085433512175, 0.001027818977332, 0.001122918474693, 0.00119988265766, 0.00137785701308, 0.001442949518615, 0.00153845943647, 0.00164270871642, 0.00188256662831, 0.002023312990214, 0.002030541981486, 0.002272865187846, 0.002607033588397, 0.002696261347275, 0.002969802371852, 0.003330445145048, 0.003450994308472, 0.003766824109133, 0.004382106432976, 0.004619214946603, 0.00507076963448, 0.005168789456058, 0.006025729167394, 0.006502154100548, 0.006825708117681, 0.00758116723691, 0.008280373380387, 0.009053044757868, 0.00965343239435, 0.010853634625976, 0.012058271659544, 0.013235643983316, 0.014237071816918, 0.016323379469387, 0.01765435043422, 0.019773974552901, 0.021931935366736, 0.024825087322466, 0.027006375178927, 0.029820680952757, 0.033902362368077, 0.03837648923878, 0.042932432116485, 0.046894126378063, 0.054108256726034, 0.060411289976837, 0.068653770714218, 0.077960340417016, 0.088759284493938, 0.099283037280737, 0.112244152400086, 0.128687561933202, 0.144916978613787, 0.157930508606968, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.229411764705882, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.362290502793296, 0.4025)
+  femaleMortality <- c(0.001440725276587, 0.000378599537757, 0.00021709924675, 9.97502891438584E-05, 9.85006425048784E-05, 8.92050090982164E-05, 8.95206096251493E-05, 7.96114330270264E-05, 6.92762143799792E-05, 0.000101432919309, 5.08040463311748E-05, 9.22608680532037E-05, 0.000122942485987, 0.000153294565916, 0.000172502569892, 0.000260513869301, 0.000255523432279, 0.000368255538044, 0.000356330469863, 0.000267139191191, 0.000358230732596, 0.000369833546194, 0.000305476109726, 0.000319588939803, 0.000297956887714, 0.000363010043724, 0.000362859918462, 0.000432002032646, 0.000321026034948, 0.000361625954016, 0.00040452437153, 0.000431745162574, 0.000512770712544, 0.00054317376104, 0.000488063712367, 0.000486563128689, 0.00057339122496, 0.00072400070304, 0.00072060329081, 0.000754517692419, 0.000849463968011, 0.000831645042867, 0.001042677971853, 0.001132957559664, 0.001207180709918, 0.00131585417895, 0.001513946743796, 0.001606617997058, 0.001723479078906, 0.001933808631927, 0.002226989986555, 0.002231269823877, 0.002626438738956, 0.002696431092572, 0.002966902852488, 0.003080837093037, 0.003413401888315, 0.003427147560834, 0.004010391720794, 0.004217567593114, 0.004606013461916, 0.005031607028952, 0.005503746741787, 0.006108782143772, 0.006845816438536, 0.007141344621113, 0.007955395638506, 0.009127032877493, 0.010042400111416, 0.010803760162666, 0.012057728392066, 0.01323141371974, 0.014562716593054, 0.015873768931767, 0.018880147634345, 0.02056309402693, 0.022568564901875, 0.025811635811795, 0.029461369991122, 0.032920253916872, 0.038426882965907, 0.042458470397799, 0.050308505460449, 0.05592788582796, 0.063922777668064, 0.074695945529539, 0.084828676018797, 0.095857321745606, 0.110779421454886, 0.129589230549757, 0.185325602140946, 0.185325602140946, 0.185325602140946, 0.185325602140946, 0.185325602140946, 0.320109190172884, 0.320109190172884, 0.320109190172884, 0.320109190172884, 0.320109190172884, 0.444805194805195)
+
+  getYrMortality <- function(startAgeYr, endAgeYr, isMale){
+    if (isMale) {
+      lookupVector <- maleMortality
+    } else {
+      lookupVector <- femaleMortality
+    }
+    startIndx <- as.integer(startAgeYr) + 1L # add 1 as mortality begins at 0 years of age (died <28d excluded)
+    endIndx <- as.integer(endAgeYr) + 1L
+    if (endIndx > length(lookupVector)) {
+      lastIndx <- length(lookupVector)
+      returnVar <- c(lookupVector[startIndx:lastIndx], rep(lookupVector[lastIndx], endIndx - lastIndx))
+    } else {
+      returnVar <- lookupVector[startIndx:endIndx]
+    }
+    return(returnVar)
+  }
+
+  smrProportionalHazard <- function(observed, expected, ci = 95) {
+    stat <- (observed - expected) ^ 2 / expected
+
+    smr <- observed / expected
+
+    chiterm <- qchisq((100 + ci) / 200, 1)
+    ub <- smr + chiterm / (2 * expected) + ((chiterm * (4 * observed + chiterm)) ^ 0.5) / (2 * expected)
+    lb <- smr + chiterm / (2 * expected) - ((chiterm * (4 * observed + chiterm)) ^ 0.5) / (2 * expected)
+
+    return(c("smr" = smr,
+            "p_value" = 1 - pchisq(stat, 1),
+            "lower_ci" = lb,
+            "upper_ci" = ub))
+  }
+
+  riskOfDeath <- by(df, seq_len(nrow(df)), function(rw){
+    # extracts mortality data for each year of age between entry and exit date
+    # first and last date will be fractionally reduced according to how much of thay age year was remaining
+    startAgeYr <- as.numeric(rw$entry - rw$dob) / 365.25
+    startProportion <- startAgeYr %% 1
+    startAgeYr <- as.integer(startAgeYr)
+
+    endAgeYr <- as.numeric(rw$exit - rw$dob) / 365.25
+    endProportion <- endAgeYr %% 1
+    endAgeYr <- as.integer(endAgeYr)
+
+    returnVar <- getYrMortality(startAgeYr, endAgeYr, rw$male)
+
+    if (startAgeYr == endAgeYr) {
+      returnVar <- returnVar * (endProportion - startProportion)
+    } else {
+      returnVar[1] <- returnVar[1] * (1 - startProportion)
+      len <- length(returnVar)
+      returnVar[len] <- returnVar[len] * endProportion
+    }
+
+    return(sum(returnVar))
+  })
+
+  #main output
+  print(description)
+  print("using proportional hazards assumption")
+  totalDeaths <- sum(df$dead)
+  print(smrProportionalHazard(totalDeaths, sum(riskOfDeath)))
+
+  df$yrsOldAtEntry <- as.numeric(df$entry - df$dob) / 365.25
+  df$yrsInStudy <- as.numeric(df$exit - df$entry) / 365.25
+
+  # create reference for population hazard
+  maxYears <- ceiling(max(df$yrsInStudy))
+  hazards <- by(df, seq_len(nrow(df)), function(rw){
+    mortVector <- getYrMortality(rw$yrsOldAtEntry, rw$yrsOldAtEntry + maxYears, rw$male)
+
+    return(exp(-cumsum(mortVector)))
+  })
+  hazards <- c(1, rowMeans(simplify2array(hazards)))
+  survData <- survfit(Surv(df$yrsInStudy, df$dead) ~ 1, data = df)
+  lowerY <- floor(survData$lower[length(survData$lower)] * 100) / 100
+  par(cex.sub = 0.8)
+  plot(survData, 
+       xlab = "Years from op", 
+       ylab = "Survival probability",
+       ylim = c(lowerY, 1),
+       sub = description)
+  lines(0:(as.integer(maxYears) + 1L), hazards, col="grey")
+  legend("bottomleft",
+         inset = 0.05,
+         legend=c("general pop. matched for age/sex", "cardiac", "95% conf. int."),
+         col=c("grey", "black", "black"),
+         lty = c(1, 1, 2))
+}
+
+df <- byfirstsurg[, c("Male", "Dead", "DateofBirth", "DateOfEvent", "DatelastKnown")]
+# rename variables to required names for getStats function
+names(df) <- c("male", "dead", "dob", "entry", "exit")
+df$male = df$male == 1
+df$dead = df$dead == 1
+
+# looking at subsets of the data - trying to identify where/which subset the proportional hazard assumption might hold
+oneSampleSurvival(df[df$exit - df$entry > 30,], "alive >30 days from operation")
+
+oneSampleSurvival(df[df$exit - df$entry > 30 & df$entry - df$dob > 365.25 * 15,], "alive >30 days from operation and >14yo at op.")
+
+oneSampleSurvival(df[df$exit - df$entry > 365,], "alive at least 1 yr from op")
No results found