Skip to content

Instantly share code, notes, and snippets.

@kintero
Forked from jkeirstead/data.csv
Last active August 29, 2015 14:15
Show Gist options
  • Select an option

  • Save kintero/da8ce35317cec5eae158 to your computer and use it in GitHub Desktop.

Select an option

Save kintero/da8ce35317cec5eae158 to your computer and use it in GitHub Desktop.

Revisions

  1. @jkeirstead jkeirstead revised this gist Feb 17, 2015. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion waterfall.r
    Original file line number Diff line number Diff line change
    @@ -68,7 +68,7 @@ waterfall <- function(df, offset=0.3) {
    xmax=order + offset,
    ymin=min,
    ymax=max, fill=sector)) +
    scale_x_continuous(breaks=df$order, labels=df$category)
    scale_x_continuous(breaks=unique(df$order), labels=unique(df$category))

    return(gg)
    }
  2. @jkeirstead jkeirstead revised this gist Feb 17, 2015. 3 changed files with 0 additions and 0 deletions.
    File renamed without changes.
    File renamed without changes.
    File renamed without changes.
  3. @jkeirstead jkeirstead revised this gist Feb 17, 2015. 3 changed files with 0 additions and 0 deletions.
    File renamed without changes.
    File renamed without changes.
    File renamed without changes.
  4. @jkeirstead jkeirstead created this gist Feb 17, 2015.
    41 changes: 41 additions & 0 deletions blog-post.r
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,41 @@
    source("waterfall.r")

    ## Load the data and set correct column names
    df <- read.csv("data.csv", stringsAsFactors=FALSE)

    ## ----further-prep--------------------------------------------------------
    ## Tidy the levels
    df$category <- factor(df$category, levels=unique(df$category))
    levels(df$category) <- gsub("flows ", "flows \n", levels(df$category))
    levels(df$category) <- gsub(" emissions", "\nemissions", levels(df$category))
    df$sector <- factor(df$sector, levels=c("UK", "EU", "Annex 1", "Non-Annex 1",
    "China", "Other non-Annex 1"))

    ## ----prepare-plot, echo=TRUE---------------------------------------------

    ## Determines the spacing between columns in the waterfall chart
    offset <- 0.3
    gg <- waterfall(df, offset=offset) +
    coord_cartesian(ylim=c(600, 900)) +
    scale_fill_manual(guide="none", values=c(rgb(81, 34, 112, max=255),
    rgb(125, 96, 153, max=255),
    rgb(116, 173, 226, max=255),
    rgb(17, 135, 146, max=255),
    rgb(69, 171, 183, max=255),
    rgb(17, 135, 146, max=255))) +
    labs(x="", y="Mt CO2",
    title="UK embodied emissions balance (import and export) with major regions, 2004") +
    theme_classic() +
    annotate("text", x=6, y=838, label="China", colour="white") +
    annotate("text", x=8 + offset, y=900,
    hjust=1, vjust=1,
    size=3,
    label="Data source: Carbon Trust (2011)",
    fontface="italic") +
    theme(plot.title=element_text(face="bold", hjust=0, vjust=2))


    ## ----plot, dev='png', fig.width=12, fig.height=7.5-----------------------
    print(gg)


    10 changes: 10 additions & 0 deletions data.csv
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,10 @@
    category,value,sector
    UK production emissions,632,UK
    Carbon flows from EU,88,EU
    Carbon flows to EU,-61,EU
    Carbon flows from other Annex 1,82,Annex 1
    Carbon flows to other Annex 1,-39,Annex 1
    Carbon flows from non-Annex 1,104,Other non-Annex 1
    Carbon flows from non-Annex 1,64,China
    Carbon flows to non-Annex 1,-25,Non-Annex 1
    UK consumption emissions,845,UK
    75 changes: 75 additions & 0 deletions waterfall.r
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,75 @@
    #' Makes a waterfall plot
    #'
    #' Makes a waterfall plot using ggplot2. The bars will be plotted in
    #' the order specified by the factoring of the 'category' column.
    #' Values should represent the positive or negative changes relative
    #' to the previous bar.
    #'
    #' @param df a dataframe with columns 'category' (an ordered factor),
    #' 'value' (numeric), and 'sector' (character)
    #' @param offset the spacing between the columns, default = 0.3
    #'
    #' @examples
    #' raw <- data.frame(category=c("A", "B", "C", "D"),
    #' value=c(100, -20, 10, 90),
    #' sector=1)
    #'
    #' df1 <- transform(raw, category=factor(category))
    #' waterfall(df1) + theme_bw() + labs(x="", y="Value")
    #'
    #' df2 <- transform(raw, category=factor(category, levels=c("A", "C", "B", "D")))
    #' waterfall(df2) + theme_bw() + labs(x="", y="Value")
    #'
    #' @return a ggplot2 object
    waterfall <- function(df, offset=0.3) {

    library(ggplot2)
    library(scales)
    library(dplyr)

    ## Add the order column to the raw data frame and order appropriately
    df <- df %>% mutate(order=as.numeric(category)) %>% arrange(order)

    ## The last value needs to be negated so that it goes down to
    ## zero. Throws a warning if the cumulative sum doesn't match.
    last.id <- nrow(df)
    df$value[last.id] <- -df$value[last.id]

    ## Calculate the cumulative sums
    df <- df %>% mutate(cs1=cumsum(value))

    ## Throw a warning if the values don't match zero as expected
    final_value <- tail(df$cs1, 1)
    if (final_value!=0) {
    warning(sprintf("Final value doesn't return to 0. %.2d instead.", final_value))
    }

    ## Calculate the max and mins for each category and sector
    df <- transform(df, min.val=c(0, head(cs1, -1)),
    max.val=c(head(cs1, -1), 0))
    df <- df %>% group_by(order, category, sector, value, cs1) %>%
    summarize(min=min(min.val, max.val), max=max(min.val, max.val))

    ## Create the lines data frame to link the bars
    lines <- df %>% group_by(order) %>% summarize(cs=max(cs1))
    lines <- with(lines, data.frame(x=head(order, -1),
    xend=tail(order, -1),
    y=head(cs, -1),
    yend=head(cs, -1)))


    ## Add the offset parameter
    df <- transform(df, offset=offset)

    ## Make the plot
    gg <- ggplot() +
    geom_segment(data=lines, aes(x=x, y=y, xend=xend, yend=yend), linetype="dashed") +
    geom_rect(data=df, aes(xmin=order - offset,
    xmax=order + offset,
    ymin=min,
    ymax=max, fill=sector)) +
    scale_x_continuous(breaks=df$order, labels=df$category)

    return(gg)
    }