Created
March 3, 2018 11:10
-
-
Save nbremer/a8cdd4bee75b71fe201e1b54b9ab6f44 to your computer and use it in GitHub Desktop.
Revisions
-
nbremer created this gist
Mar 3, 2018 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,246 @@ #' Create a Kaplan-Meier plot using ggplot2 #' #' @param sfit: a survfit object #' @param table: logical: Create a table graphic below the K-M plot, indicating at-risk numbers? #' @param returns logical: if TRUE, return an arrangeGrob object #' @param xlabs: x-axis label #' @param ylabs: y-axis label #' @param ystratalabs: The strata labels. Default = levels(summary(sfit)$strata) #' @param ystrataname: The legend name. Default = "Strata" #' @param timeby numeric: control the granularity along the time-axis #' @param main plot title #' @param pval logical: add the pvalue to the plot? #' @param marks logical: should censoring marks be added? #' @param shape: what shape should the censoring marks be, default is a vertical line #' @param legend logical: should a legend be added to the plot? #' #' @return a ggplot is made. if return=TRUE, then an arrangeGlob object #' is returned #' @author Abhijit Dasgupta with contributions by Gil Tomas #' \url{http://statbandit.wordpress.com/2011/03/08/an-enhanced-kaplan-meier-plot/} #' slight adjustment to cope with none strata calls (e.g. Surv(time,event)~1), #' option to remove the legend and also draw marks at censoring locations by Nadieh Bremer #' #' @examples #' library(survival) #' data(colon) #' fit <- survfit(Surv(time,status)~rx, data=colon) #' ggkm(fit, timeby=500) ggkm <- function(sfit, table = TRUE, returns = FALSE, xlabs = "Time", ylabs = "Survival Probability", xlims = c(0,max(sfit$time)), ylims = c(0,1), ystratalabs = NULL, ystrataname = NULL, timeby = 100, main = "Kaplan-Meier Plot", pval = TRUE, marks = FALSE, shape = 3, legend = TRUE, subs = NULL, ...) { ############# # libraries # ############# #Check if the following packages have been installed. If not, install them if (!"ggplot2" %in% installed.packages()) install.packages("ggplot2") if (!"survival" %in% installed.packages()) install.packages("survival") if (!"gridExtra" %in% installed.packages()) install.packages("gridExtra") if (!"reshape" %in% installed.packages()) install.packages("reshape") suppressPackageStartupMessages(library(ggplot2, warn.conflicts=FALSE)) suppressPackageStartupMessages(library(survival, warn.conflicts=FALSE)) suppressPackageStartupMessages(library(gridExtra, warn.conflicts=FALSE)) suppressPackageStartupMessages(library(reshape, warn.conflicts=FALSE)) ################################# # sorting the use of subsetting # ################################# times <- seq(0, max(sfit$time), by = timeby) if(is.null(subs)){ if(length(levels(summary(sfit)$strata)) == 0) { subs1 <- 1 subs2 <- 1:length(summary(sfit,censored=T)$time) subs3 <- 1:length(summary(sfit,times = times,extend = TRUE)$time) } else { subs1 <- 1:length(levels(summary(sfit)$strata)) subs2 <- 1:length(summary(sfit,censored=T)$strata) subs3 <- 1:length(summary(sfit,times = times,extend = TRUE)$strata) } } else{ for(i in 1:length(subs)){ if(i==1){ ssvar <- paste("(?=.*\\b=",subs[i],sep="") } if(i==length(subs)){ ssvar <- paste(ssvar,"\\b)(?=.*\\b=",subs[i],"\\b)",sep="") } if(!i %in% c(1, length(subs))){ ssvar <- paste(ssvar,"\\b)(?=.*\\b=",subs[i],sep="") } if(i==1 & i==length(subs)){ ssvar <- paste("(?=.*\\b=",subs[i],"\\b)",sep="") } } subs1 <- which(regexpr(ssvar,levels(summary(sfit)$strata), perl=T)!=-1) subs2 <- which(regexpr(ssvar,summary(sfit,censored=T)$strata, perl=T)!=-1) subs3 <- which(regexpr(ssvar,summary(sfit,times = times,extend = TRUE)$strata, perl=T)!=-1) } if(!is.null(subs)) pval <- FALSE ################################## # data manipulation pre-plotting # ################################## if(length(levels(summary(sfit)$strata)) == 0) { #[subs1] if(is.null(ystratalabs)) ystratalabs <- as.character(sub("group=*","","All")) } else { #[subs1] if(is.null(ystratalabs)) ystratalabs <- as.character(sub("group=*","",names(sfit$strata))) } if(is.null(ystrataname)) ystrataname <- "Strata" m <- max(nchar(ystratalabs)) times <- seq(0, max(sfit$time), by = timeby) if(length(levels(summary(sfit)$strata)) == 0) { Factor <- factor(rep("All",length(subs2))) } else { Factor <- factor(summary(sfit, censored = T)$strata[subs2]) } #Data to be used in the survival plot .df <- data.frame( time = sfit$time[subs2], n.risk = sfit$n.risk[subs2], n.event = sfit$n.event[subs2], n.censor = sfit$n.censor[subs2], surv = sfit$surv[subs2], strata = Factor, upper = sfit$upper[subs2], lower = sfit$lower[subs2] ) #Final changes to data for survival plot levels(.df$strata) <- ystratalabs zeros <- data.frame(time = 0, surv = 1, strata = factor(ystratalabs, levels=levels(.df$strata)), upper = 1, lower = 1) .df <- rbind.fill(zeros, .df) d <- length(levels(.df$strata)) ################################### # specifying plot parameteres etc # ################################### p <- ggplot( .df, aes(time, surv)) + geom_step(aes(linetype = strata), size = 0.7) + theme_bw() + theme(axis.title.x = element_text(vjust = 0.5)) + scale_x_continuous(xlabs, breaks = times, limits = xlims) + scale_y_continuous(ylabs, limits = ylims) + theme(panel.grid.minor = element_blank()) + # MOVE LEGEND HERE BELOW [first is x dim, second is y dim] theme(legend.position = c(ifelse(m < 10, .85, .75),ifelse(d < 4, .85, .8))) + theme(legend.key = element_rect(colour = NA)) + theme(panel.border = element_blank()) + labs(linetype = ystrataname) + theme(plot.margin = unit(c(0, 1, .5,ifelse(m < 10, 1.5, 2.5)),"lines")) + ggtitle(main) #Removes the legend: if(legend == FALSE) p <- p + theme(legend.position="none") #Add censoring marks to the line: if(marks == TRUE) p <- p + geom_point(data = subset(.df, n.censor >= 1), aes(x = time, y = surv), shape = shape) ## Create a blank plot for place-holding blank.pic <- ggplot(.df, aes(time, surv)) + geom_blank() + theme_bw() + theme(axis.text.x = element_blank(),axis.text.y = element_blank(), axis.title.x = element_blank(),axis.title.y = element_blank(), axis.ticks = element_blank(), panel.grid.major = element_blank(),panel.border = element_blank()) ##################### # p-value placement # #####################a if(length(levels(summary(sfit)$strata)) == 0) pval <- FALSE if(pval == TRUE) { sdiff <- survdiff(eval(sfit$call$formula), data = eval(sfit$call$data)) pvalue <- pchisq(sdiff$chisq,length(sdiff$n) - 1,lower.tail = FALSE) pvaltxt <- ifelse(pvalue < 0.0001,"p < 0.0001",paste("p =", signif(pvalue, 3))) # MOVE P-VALUE LEGEND HERE BELOW [set x and y] p <- p + annotate("text",x = 150, y = 0.1,label = pvaltxt) }#if ################################################### # Create table graphic to include at-risk numbers # ################################################### if(length(levels(summary(sfit)$strata)) == 0) { Factor <- factor(rep("All",length(subs3))) } else { Factor <- factor(summary(sfit,times = times,extend = TRUE)$strata[subs3]) } if(table) { risk.data <- data.frame( strata = Factor, time = summary(sfit,times = times,extend = TRUE)$time[subs3], n.risk = summary(sfit,times = times,extend = TRUE)$n.risk[subs3] ) risk.data$strata <- factor(risk.data$strata, levels=rev(levels(risk.data$strata))) data.table <- ggplot(risk.data,aes(x = time, y = strata, label = format(n.risk, nsmall = 0))) + geom_text(size = 3.5) + theme_bw() + scale_y_discrete(breaks = as.character(levels(risk.data$strata)), labels = rev(ystratalabs)) + scale_x_continuous("Numbers at risk", limits = xlims) + theme(axis.title.x = element_text(size = 10, vjust = 1), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(),axis.text.x = element_blank(), axis.ticks = element_blank(),axis.text.y = element_text(face = "bold",hjust = 1)) data.table <- data.table + theme(legend.position = "none") + xlab(NULL) + ylab(NULL) # ADJUST POSITION OF TABLE FOR AT RISK data.table <- data.table + theme(plot.margin = unit(c(-1.5, 1, 0.1, ifelse(m < 10, 2.5, 3.5) - 0.15 * m), "lines")) ####################### # Plotting the graphs # ####################### grid.arrange(p, blank.pic, data.table, clip = FALSE, nrow = 3, ncol = 1, heights = unit(c(2, .1, .25),c("null", "null", "null"))) if(returns) { a <- arrangeGrob(p, blank.pic, data.table, clip = FALSE, nrow = 3, ncol = 1, heights = unit(c(2, .1, .25), c("null", "null", "null"))) return(a) }#if } else { if(returns) return(p) }#else } ################################################################################################## ################################################################################################## ##################################################################################################