-
-
Save kintero/da8ce35317cec5eae158 to your computer and use it in GitHub Desktop.
Revisions
-
jkeirstead revised this gist
Feb 17, 2015 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -68,7 +68,7 @@ waterfall <- function(df, offset=0.3) { xmax=order + offset, ymin=min, ymax=max, fill=sector)) + scale_x_continuous(breaks=unique(df$order), labels=unique(df$category)) return(gg) } -
jkeirstead revised this gist
Feb 17, 2015 . 3 changed files with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes.File renamed without changes.File renamed without changes. -
jkeirstead revised this gist
Feb 17, 2015 . 3 changed files with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes.File renamed without changes.File renamed without changes. -
jkeirstead created this gist
Feb 17, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,41 @@ source("waterfall.r") ## Load the data and set correct column names df <- read.csv("data.csv", stringsAsFactors=FALSE) ## ----further-prep-------------------------------------------------------- ## Tidy the levels df$category <- factor(df$category, levels=unique(df$category)) levels(df$category) <- gsub("flows ", "flows \n", levels(df$category)) levels(df$category) <- gsub(" emissions", "\nemissions", levels(df$category)) df$sector <- factor(df$sector, levels=c("UK", "EU", "Annex 1", "Non-Annex 1", "China", "Other non-Annex 1")) ## ----prepare-plot, echo=TRUE--------------------------------------------- ## Determines the spacing between columns in the waterfall chart offset <- 0.3 gg <- waterfall(df, offset=offset) + coord_cartesian(ylim=c(600, 900)) + scale_fill_manual(guide="none", values=c(rgb(81, 34, 112, max=255), rgb(125, 96, 153, max=255), rgb(116, 173, 226, max=255), rgb(17, 135, 146, max=255), rgb(69, 171, 183, max=255), rgb(17, 135, 146, max=255))) + labs(x="", y="Mt CO2", title="UK embodied emissions balance (import and export) with major regions, 2004") + theme_classic() + annotate("text", x=6, y=838, label="China", colour="white") + annotate("text", x=8 + offset, y=900, hjust=1, vjust=1, size=3, label="Data source: Carbon Trust (2011)", fontface="italic") + theme(plot.title=element_text(face="bold", hjust=0, vjust=2)) ## ----plot, dev='png', fig.width=12, fig.height=7.5----------------------- print(gg) This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,10 @@ category,value,sector UK production emissions,632,UK Carbon flows from EU,88,EU Carbon flows to EU,-61,EU Carbon flows from other Annex 1,82,Annex 1 Carbon flows to other Annex 1,-39,Annex 1 Carbon flows from non-Annex 1,104,Other non-Annex 1 Carbon flows from non-Annex 1,64,China Carbon flows to non-Annex 1,-25,Non-Annex 1 UK consumption emissions,845,UK This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,75 @@ #' Makes a waterfall plot #' #' Makes a waterfall plot using ggplot2. The bars will be plotted in #' the order specified by the factoring of the 'category' column. #' Values should represent the positive or negative changes relative #' to the previous bar. #' #' @param df a dataframe with columns 'category' (an ordered factor), #' 'value' (numeric), and 'sector' (character) #' @param offset the spacing between the columns, default = 0.3 #' #' @examples #' raw <- data.frame(category=c("A", "B", "C", "D"), #' value=c(100, -20, 10, 90), #' sector=1) #' #' df1 <- transform(raw, category=factor(category)) #' waterfall(df1) + theme_bw() + labs(x="", y="Value") #' #' df2 <- transform(raw, category=factor(category, levels=c("A", "C", "B", "D"))) #' waterfall(df2) + theme_bw() + labs(x="", y="Value") #' #' @return a ggplot2 object waterfall <- function(df, offset=0.3) { library(ggplot2) library(scales) library(dplyr) ## Add the order column to the raw data frame and order appropriately df <- df %>% mutate(order=as.numeric(category)) %>% arrange(order) ## The last value needs to be negated so that it goes down to ## zero. Throws a warning if the cumulative sum doesn't match. last.id <- nrow(df) df$value[last.id] <- -df$value[last.id] ## Calculate the cumulative sums df <- df %>% mutate(cs1=cumsum(value)) ## Throw a warning if the values don't match zero as expected final_value <- tail(df$cs1, 1) if (final_value!=0) { warning(sprintf("Final value doesn't return to 0. %.2d instead.", final_value)) } ## Calculate the max and mins for each category and sector df <- transform(df, min.val=c(0, head(cs1, -1)), max.val=c(head(cs1, -1), 0)) df <- df %>% group_by(order, category, sector, value, cs1) %>% summarize(min=min(min.val, max.val), max=max(min.val, max.val)) ## Create the lines data frame to link the bars lines <- df %>% group_by(order) %>% summarize(cs=max(cs1)) lines <- with(lines, data.frame(x=head(order, -1), xend=tail(order, -1), y=head(cs, -1), yend=head(cs, -1))) ## Add the offset parameter df <- transform(df, offset=offset) ## Make the plot gg <- ggplot() + geom_segment(data=lines, aes(x=x, y=y, xend=xend, yend=yend), linetype="dashed") + geom_rect(data=df, aes(xmin=order - offset, xmax=order + offset, ymin=min, ymax=max, fill=sector)) + scale_x_continuous(breaks=df$order, labels=df$category) return(gg) }