#' Create a Kaplan-Meier plot using ggplot2 #' #' @param sfit: a survfit object #' @param table: logical: Create a table graphic below the K-M plot, indicating at-risk numbers? #' @param returns logical: if TRUE, return an arrangeGrob object #' @param xlabs: x-axis label #' @param ylabs: y-axis label #' @param ystratalabs: The strata labels. Default = levels(summary(sfit)$strata) #' @param ystrataname: The legend name. Default = "Strata" #' @param timeby numeric: control the granularity along the time-axis #' @param main plot title #' @param pval logical: add the pvalue to the plot? #' @param marks logical: should censoring marks be added? #' @param shape: what shape should the censoring marks be, default is a vertical line #' @param legend logical: should a legend be added to the plot? #' #' @return a ggplot is made. if return=TRUE, then an arrangeGlob object #' is returned #' @author Abhijit Dasgupta with contributions by Gil Tomas #' \url{http://statbandit.wordpress.com/2011/03/08/an-enhanced-kaplan-meier-plot/} #' slight adjustment to cope with none strata calls (e.g. Surv(time,event)~1), #' option to remove the legend and also draw marks at censoring locations by Nadieh Bremer #' #' @examples #' library(survival) #' data(colon) #' fit <- survfit(Surv(time,status)~rx, data=colon) #' ggkm(fit, timeby=500) ggkm <- function(sfit, table = TRUE, returns = FALSE, xlabs = "Time", ylabs = "Survival Probability", xlims = c(0,max(sfit$time)), ylims = c(0,1), ystratalabs = NULL, ystrataname = NULL, timeby = 100, main = "Kaplan-Meier Plot", pval = TRUE, marks = FALSE, shape = 3, legend = TRUE, subs = NULL, ...) { ############# # libraries # ############# #Check if the following packages have been installed. If not, install them if (!"ggplot2" %in% installed.packages()) install.packages("ggplot2") if (!"survival" %in% installed.packages()) install.packages("survival") if (!"gridExtra" %in% installed.packages()) install.packages("gridExtra") if (!"reshape" %in% installed.packages()) install.packages("reshape") suppressPackageStartupMessages(library(ggplot2, warn.conflicts=FALSE)) suppressPackageStartupMessages(library(survival, warn.conflicts=FALSE)) suppressPackageStartupMessages(library(gridExtra, warn.conflicts=FALSE)) suppressPackageStartupMessages(library(reshape, warn.conflicts=FALSE)) ################################# # sorting the use of subsetting # ################################# times <- seq(0, max(sfit$time), by = timeby) if(is.null(subs)){ if(length(levels(summary(sfit)$strata)) == 0) { subs1 <- 1 subs2 <- 1:length(summary(sfit,censored=T)$time) subs3 <- 1:length(summary(sfit,times = times,extend = TRUE)$time) } else { subs1 <- 1:length(levels(summary(sfit)$strata)) subs2 <- 1:length(summary(sfit,censored=T)$strata) subs3 <- 1:length(summary(sfit,times = times,extend = TRUE)$strata) } } else{ for(i in 1:length(subs)){ if(i==1){ ssvar <- paste("(?=.*\\b=",subs[i],sep="") } if(i==length(subs)){ ssvar <- paste(ssvar,"\\b)(?=.*\\b=",subs[i],"\\b)",sep="") } if(!i %in% c(1, length(subs))){ ssvar <- paste(ssvar,"\\b)(?=.*\\b=",subs[i],sep="") } if(i==1 & i==length(subs)){ ssvar <- paste("(?=.*\\b=",subs[i],"\\b)",sep="") } } subs1 <- which(regexpr(ssvar,levels(summary(sfit)$strata), perl=T)!=-1) subs2 <- which(regexpr(ssvar,summary(sfit,censored=T)$strata, perl=T)!=-1) subs3 <- which(regexpr(ssvar,summary(sfit,times = times,extend = TRUE)$strata, perl=T)!=-1) } if(!is.null(subs)) pval <- FALSE ################################## # data manipulation pre-plotting # ################################## if(length(levels(summary(sfit)$strata)) == 0) { #[subs1] if(is.null(ystratalabs)) ystratalabs <- as.character(sub("group=*","","All")) } else { #[subs1] if(is.null(ystratalabs)) ystratalabs <- as.character(sub("group=*","",names(sfit$strata))) } if(is.null(ystrataname)) ystrataname <- "Strata" m <- max(nchar(ystratalabs)) times <- seq(0, max(sfit$time), by = timeby) if(length(levels(summary(sfit)$strata)) == 0) { Factor <- factor(rep("All",length(subs2))) } else { Factor <- factor(summary(sfit, censored = T)$strata[subs2]) } #Data to be used in the survival plot .df <- data.frame( time = sfit$time[subs2], n.risk = sfit$n.risk[subs2], n.event = sfit$n.event[subs2], n.censor = sfit$n.censor[subs2], surv = sfit$surv[subs2], strata = Factor, upper = sfit$upper[subs2], lower = sfit$lower[subs2] ) #Final changes to data for survival plot levels(.df$strata) <- ystratalabs zeros <- data.frame(time = 0, surv = 1, strata = factor(ystratalabs, levels=levels(.df$strata)), upper = 1, lower = 1) .df <- rbind.fill(zeros, .df) d <- length(levels(.df$strata)) ################################### # specifying plot parameteres etc # ################################### p <- ggplot( .df, aes(time, surv)) + geom_step(aes(linetype = strata), size = 0.7) + theme_bw() + theme(axis.title.x = element_text(vjust = 0.5)) + scale_x_continuous(xlabs, breaks = times, limits = xlims) + scale_y_continuous(ylabs, limits = ylims) + theme(panel.grid.minor = element_blank()) + # MOVE LEGEND HERE BELOW [first is x dim, second is y dim] theme(legend.position = c(ifelse(m < 10, .85, .75),ifelse(d < 4, .85, .8))) + theme(legend.key = element_rect(colour = NA)) + theme(panel.border = element_blank()) + labs(linetype = ystrataname) + theme(plot.margin = unit(c(0, 1, .5,ifelse(m < 10, 1.5, 2.5)),"lines")) + ggtitle(main) #Removes the legend: if(legend == FALSE) p <- p + theme(legend.position="none") #Add censoring marks to the line: if(marks == TRUE) p <- p + geom_point(data = subset(.df, n.censor >= 1), aes(x = time, y = surv), shape = shape) ## Create a blank plot for place-holding blank.pic <- ggplot(.df, aes(time, surv)) + geom_blank() + theme_bw() + theme(axis.text.x = element_blank(),axis.text.y = element_blank(), axis.title.x = element_blank(),axis.title.y = element_blank(), axis.ticks = element_blank(), panel.grid.major = element_blank(),panel.border = element_blank()) ##################### # p-value placement # #####################a if(length(levels(summary(sfit)$strata)) == 0) pval <- FALSE if(pval == TRUE) { sdiff <- survdiff(eval(sfit$call$formula), data = eval(sfit$call$data)) pvalue <- pchisq(sdiff$chisq,length(sdiff$n) - 1,lower.tail = FALSE) pvaltxt <- ifelse(pvalue < 0.0001,"p < 0.0001",paste("p =", signif(pvalue, 3))) # MOVE P-VALUE LEGEND HERE BELOW [set x and y] p <- p + annotate("text",x = 150, y = 0.1,label = pvaltxt) }#if ################################################### # Create table graphic to include at-risk numbers # ################################################### if(length(levels(summary(sfit)$strata)) == 0) { Factor <- factor(rep("All",length(subs3))) } else { Factor <- factor(summary(sfit,times = times,extend = TRUE)$strata[subs3]) } if(table) { risk.data <- data.frame( strata = Factor, time = summary(sfit,times = times,extend = TRUE)$time[subs3], n.risk = summary(sfit,times = times,extend = TRUE)$n.risk[subs3] ) risk.data$strata <- factor(risk.data$strata, levels=rev(levels(risk.data$strata))) data.table <- ggplot(risk.data,aes(x = time, y = strata, label = format(n.risk, nsmall = 0))) + geom_text(size = 3.5) + theme_bw() + scale_y_discrete(breaks = as.character(levels(risk.data$strata)), labels = rev(ystratalabs)) + scale_x_continuous("Numbers at risk", limits = xlims) + theme(axis.title.x = element_text(size = 10, vjust = 1), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.border = element_blank(),axis.text.x = element_blank(), axis.ticks = element_blank(),axis.text.y = element_text(face = "bold",hjust = 1)) data.table <- data.table + theme(legend.position = "none") + xlab(NULL) + ylab(NULL) # ADJUST POSITION OF TABLE FOR AT RISK data.table <- data.table + theme(plot.margin = unit(c(-1.5, 1, 0.1, ifelse(m < 10, 2.5, 3.5) - 0.15 * m), "lines")) ####################### # Plotting the graphs # ####################### grid.arrange(p, blank.pic, data.table, clip = FALSE, nrow = 3, ncol = 1, heights = unit(c(2, .1, .25),c("null", "null", "null"))) if(returns) { a <- arrangeGrob(p, blank.pic, data.table, clip = FALSE, nrow = 3, ncol = 1, heights = unit(c(2, .1, .25), c("null", "null", "null"))) return(a) }#if } else { if(returns) return(p) }#else } ################################################################################################## ################################################################################################## ##################################################################################################