kintero · August 29, 2015 14:15 · Feb 17, 2015 · Feb 17, 2015 · Feb 17, 2015 · Feb 17, 2015
diff --git a/waterfall.r b/waterfall.r
@@ -68,7 +68,7 @@ waterfall <- function(df, offset=0.3) {
                           xmax=order + offset, 
                           ymin=min,
                           ymax=max, fill=sector)) +
-                              scale_x_continuous(breaks=df$order, labels=df$category)
+                              scale_x_continuous(breaks=unique(df$order), labels=unique(df$category))
 
     return(gg)
 }

diff --git a/2-data.csv → data.csv b/2-data.csv → data.csv
diff --git a/3-blog-post.r → waterfall-demo.r b/3-blog-post.r → waterfall-demo.r
diff --git a/1-waterfall.r → waterfall.r b/1-waterfall.r → waterfall.r
diff --git a/waterfall.r → 1-waterfall.r b/waterfall.r → 1-waterfall.r
diff --git a/data.csv → 2-data.csv b/data.csv → 2-data.csv
diff --git a/blog-post.r → 3-blog-post.r b/blog-post.r → 3-blog-post.r
diff --git a/blog-post.r b/blog-post.r
@@ -0,0 +1,41 @@
+source("waterfall.r")
+
+## Load the data and set correct column names
+df <- read.csv("data.csv", stringsAsFactors=FALSE)
+
+## ----further-prep--------------------------------------------------------
+## Tidy the levels
+df$category <- factor(df$category, levels=unique(df$category))
+levels(df$category) <- gsub("flows ", "flows \n", levels(df$category))
+levels(df$category) <- gsub(" emissions", "\nemissions", levels(df$category))
+df$sector <- factor(df$sector, levels=c("UK", "EU", "Annex 1", "Non-Annex 1",
+                                   "China", "Other non-Annex 1"))
+
+## ----prepare-plot, echo=TRUE---------------------------------------------
+
+## Determines the spacing between columns in the waterfall chart
+offset <- 0.3
+gg <- waterfall(df, offset=offset) +
+    coord_cartesian(ylim=c(600, 900)) +
+    scale_fill_manual(guide="none", values=c(rgb(81, 34, 112, max=255),
+                                        rgb(125, 96, 153, max=255),
+                                        rgb(116, 173, 226, max=255),
+                                        rgb(17, 135, 146, max=255),
+                                        rgb(69, 171, 183, max=255),
+                                        rgb(17, 135, 146, max=255))) +
+    labs(x="", y="Mt CO2", 
+         title="UK embodied emissions balance (import and export) with major regions, 2004") +
+    theme_classic() +
+    annotate("text", x=6, y=838, label="China", colour="white") +
+    annotate("text", x=8 + offset, y=900,
+             hjust=1, vjust=1,
+             size=3,
+             label="Data source: Carbon Trust (2011)",
+             fontface="italic") +
+    theme(plot.title=element_text(face="bold", hjust=0, vjust=2))
+
+
+## ----plot, dev='png', fig.width=12, fig.height=7.5-----------------------
+print(gg)
+
+
diff --git a/data.csv b/data.csv
@@ -0,0 +1,10 @@
+category,value,sector
+UK production emissions,632,UK
+Carbon flows from EU,88,EU
+Carbon flows to EU,-61,EU
+Carbon flows from other Annex 1,82,Annex 1
+Carbon flows to other Annex 1,-39,Annex 1
+Carbon flows from non-Annex 1,104,Other non-Annex 1
+Carbon flows from non-Annex 1,64,China
+Carbon flows to non-Annex 1,-25,Non-Annex 1
+UK consumption emissions,845,UK
diff --git a/waterfall.r b/waterfall.r
@@ -0,0 +1,75 @@
+#' Makes a waterfall plot
+#'
+#' Makes a waterfall plot using ggplot2.  The bars will be plotted in
+#' the order specified by the factoring of the 'category' column.
+#' Values should represent the positive or negative changes relative
+#' to the previous bar.  
+#'
+#' @param df a dataframe with columns 'category' (an ordered factor),
+#' 'value' (numeric), and 'sector' (character)
+#' @param offset the spacing between the columns, default = 0.3
+#'
+#' @examples
+#'  raw <- data.frame(category=c("A", "B", "C", "D"),
+#'                    value=c(100, -20, 10, 90),
+#'                    sector=1)
+#'
+#'  df1 <- transform(raw, category=factor(category))                
+#'  waterfall(df1) + theme_bw() + labs(x="", y="Value")
+#'
+#'  df2 <- transform(raw, category=factor(category, levels=c("A", "C", "B", "D")))
+#'  waterfall(df2) + theme_bw() + labs(x="", y="Value")
+#'
+#' @return a ggplot2 object
+waterfall <- function(df, offset=0.3) {
+
+    library(ggplot2)
+    library(scales)
+    library(dplyr)
+
+    ## Add the order column to the raw data frame and order appropriately
+    df <- df %>% mutate(order=as.numeric(category)) %>% arrange(order)
+
+    ## The last value needs to be negated so that it goes down to
+    ## zero.  Throws a warning if the cumulative sum doesn't match.
+    last.id <- nrow(df)
+    df$value[last.id] <- -df$value[last.id]
+
+    ## Calculate the cumulative sums
+    df <- df %>% mutate(cs1=cumsum(value))
+
+    ## Throw a warning if the values don't match zero as expected
+    final_value <- tail(df$cs1, 1)
+    if (final_value!=0) {
+        warning(sprintf("Final value doesn't return to 0.  %.2d instead.", final_value))
+    }
+
+    ## Calculate the max and mins for each category and sector
+    df <- transform(df, min.val=c(0, head(cs1, -1)),
+                    max.val=c(head(cs1, -1), 0))    
+    df <- df %>% group_by(order, category, sector, value, cs1) %>%
+        summarize(min=min(min.val, max.val), max=max(min.val, max.val))
+
+    ## Create the lines data frame to link the bars
+    lines <- df %>% group_by(order) %>% summarize(cs=max(cs1))
+    lines <- with(lines, data.frame(x=head(order, -1),
+                                    xend=tail(order, -1),
+                                    y=head(cs, -1),
+                                    yend=head(cs, -1)))
+
+
+    ## Add the offset parameter
+    df <- transform(df, offset=offset)
+
+    ## Make the plot    
+    gg <- ggplot() +
+        geom_segment(data=lines, aes(x=x, y=y, xend=xend, yend=yend), linetype="dashed")  +
+            geom_rect(data=df, aes(xmin=order - offset,
+                          xmax=order + offset, 
+                          ymin=min,
+                          ymax=max, fill=sector)) +
+                              scale_x_continuous(breaks=df$order, labels=df$category)
+
+    return(gg)
+}
+
No results found