Skip to content

Instantly share code, notes, and snippets.

@jdavidson
Created January 13, 2014 19:25
Show Gist options
  • Select an option

  • Save jdavidson/8406379 to your computer and use it in GitHub Desktop.

Select an option

Save jdavidson/8406379 to your computer and use it in GitHub Desktop.

Revisions

  1. jdavidson created this gist Jan 13, 2014.
    114 changes: 114 additions & 0 deletions lifetime.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,114 @@
    library(ggplot2)
    library(ggthemes)
    library(plyr)
    library(lubridate)
    library(scales)
    library(data.table)
    options(scipen=999)
    options(stringsAsFactors = FALSE)

    rounds <- read.csv("2014-01-06-crunchbase_monthly_export_rounds.csv")
    # exclude non venture rounds (other, private equity, post-ipo)
    rounds <- subset(rounds, funding_round_type %in% c("venture", "angel", "series-a", "series-b"))# , "series-c+"))
    rounds <- subset(rounds, funded_month != "1960-01")
    # fix strange date data
    rounds$funded_at <- ymd(paste(rounds$funded_month, "01", sep="-"))
    rounds$round_raised_amount_usd <- cut(rounds$raised_amount_usd, breaks=c(0, 500000, 1000000, 5000000, 10000000, 20000000, 40000000, 80000000, Inf), right=FALSE)# round(rounds $raised_amount_usd / 500000) * 500000

    # dedup
    rounds <- data.table(rounds)
    setkeyv(rounds, c("company_name", "funded_at", "funding_round_type"))
    rounds <- unique(rounds)

    # restrict to companies first funded after 2008
    companies <- rounds[, list(first_funded_at = min(funded_at)), by = company_name]
    rounds <- join(rounds, companies)
    rounds <- subset(rounds, first_funded_at > ymd("2008-01-01"))

    # fix strange difference in units from diff
    my.diff <- function(x, lag=1) {
    n <- length(x)
    round(difftime(x[(1+lag):n], x[1:(n-lag)], units="days") / 30)
    }

    # round sequences
    rounds_index <- rounds[, id := seq_along(funded_at), by=company_name]
    rounds_index <- rounds_index[, diff := c(my.diff(funded_at), NA), by=company_name]
    rounds_index$lifetime <- rounds_index$diff
    rounds_index[is.na(rounds_index$lifetime),]$lifetime <- round(as.numeric(difftime(max(rounds_index$funded_at), rounds_index[is.na(rounds_index$lifetime),]$funded_at, units="days") / 30))

    # rounds_index <- ddply(rounds, .(company_name), transform, index=seq_along(funded_at), diff=c(my.diff(funded_at), NA))


    # aggregate
    medians <- ddply(rounds_index, .(funding_round_type), summarize, rounds=length(id), median=median(diff, na.rm=T), mean=mean(diff, na.rm=T))
    medians <- medians[order(medians$median),]

    diff_summary <- rounds_index[, list(rounds = length(id)), by = c("funding_round_type", "diff")]
    setnames(diff_summary, "diff", "lifetime")
    diff_summary <- diff_summary[!is.na(diff_summary$lifetime),]
    diff_summary <- diff_summary[order(funding_round_type, lifetime, decreasing=T),]
    diff_summary <- diff_summary[, cum_rounds := cumsum(rounds), by= funding_round_type]

    round_lifetimes <- rounds_index[, list(total_rounds = length(id)), by = c("funding_round_type", "lifetime")]
    round_lifetimes <- round_lifetimes[order(funding_round_type, lifetime, decreasing=T),]
    round_lifetimes <- round_lifetimes[, cum_total_rounds := cumsum(total_rounds), by= funding_round_type]
    diff_summary <- join(diff_summary, round_lifetimes)
    diff_summary <- diff_summary[order(funding_round_type, lifetime),]
    diff_summary$percent <- diff_summary$rounds / diff_summary$cum_total_rounds

    ggplot(diff_summary, aes(x=lifetime, y=rounds, color=funding_round_type)) + geom_point() + scale_x_continuous(breaks = 0:4 * 12, limits=c(0,48)) + geom_smooth() + ggtitle("Financings") + ylab("Financings") + xlab("Months After Funding") + scale_color_discrete(name = "Round")
    ggplot(diff_summary, aes(x=lifetime, y= cum_rounds / cum_total_rounds, color=funding_round_type)) + geom_line() + scale_x_continuous(breaks = 0:4 * 12, limits=c(0,48)) + ggtitle("Likelihood Of Raising A Follow On Round By Time") + ylab("Percent of Companies that Raise a Follow On Round") + xlab("Months After Funding") + scale_y_continuous(labels = percent_format()) + scale_color_discrete(name = "Round")
    ggsave("follow-on-likelihood-by-time.png")

    #### round_raised_amount_usd
    medians <- ddply(rounds_index, .(funding_round_type, round_raised_amount_usd), summarize, rounds=length(id), median=median(diff, na.rm=T), mean=mean(diff, na.rm=T))
    medians[order(medians$funding_round_type, medians$median),]

    diff_summary <- rounds_index[, list(rounds = length(id)), by = c("funding_round_type", "round_raised_amount_usd", "diff")]
    setnames(diff_summary, "diff", "lifetime")
    diff_summary <- diff_summary[!is.na(diff_summary$lifetime),]
    diff_summary <- diff_summary[order(funding_round_type, round_raised_amount_usd, lifetime, decreasing=T),]
    diff_summary <- diff_summary[, cum_rounds := cumsum(rounds), by= c("funding_round_type", "round_raised_amount_usd")]

    round_lifetimes <- rounds_index[, list(total_rounds = length(id)), by = c("funding_round_type", "round_raised_amount_usd", "lifetime")]
    round_lifetimes <- round_lifetimes[order(funding_round_type, round_raised_amount_usd, lifetime, decreasing=T),]
    round_lifetimes <- round_lifetimes[, cum_total_rounds := cumsum(total_rounds), by=c("funding_round_type", "round_raised_amount_usd")]
    diff_summary <- join(diff_summary, round_lifetimes)
    diff_summary <- diff_summary[order(funding_round_type, round_raised_amount_usd, lifetime),]
    diff_summary$percent <- diff_summary$rounds / diff_summary$cum_total_rounds
    diff_summary <- diff_summary[!is.na(diff_summary$round_raised_amount_usd),]

    ggplot(subset(diff_summary, funding_round_type == "angel"), aes(x=lifetime, y=rounds, color= as.factor(round_raised_amount_usd))) + geom_point() + xlim(0,48) + geom_smooth() + ggtitle("") + ylab("Rounds") + xlab("Months After Funding")
    ggplot(subset(diff_summary, funding_round_type == "angel"), aes(x=lifetime, y= cum_rounds / cum_total_rounds, color= as.factor(round_raised_amount_usd))) + geom_line() + xlim(0,48) + ggtitle("") + ylab("Likelihood of a Follow On Round") + xlab("Months After Funding") + scale_y_continuous(labels = percent_format())

    diff_summary$funding_round_type <- factor(diff_summary$funding_round_type, levels=c("angel", "venture", "series-a", "series-b"))
    diff_summary$round_raised_amount_usd <- mapvalues(diff_summary$round_raised_amount_usd, from = c("[0,5e+05)", "[5e+05,1e+06)", "[1e+06,5e+06)", "[5e+06,1e+07)", "[1e+07,2e+07)", "[2e+07,4e+07)", "[4e+07,8e+07)", "[8e+07,Inf)"), to = c("$0-.5M", "$.5-1M", "$1-5M", "$5-10M", "$10-20M", "$20-40M", "$40-80M", "$80M+"))

    ggplot(subset(diff_summary, cum_total_rounds > 20 & round_raised_amount_usd != "$80M+"), aes(x=lifetime, y= cum_rounds / cum_total_rounds, color=round_raised_amount_usd)) + geom_line() + xlim(0,48) + ggtitle("Follow On Likelihood by Round and Amount") + ylab("Likelihood of a Follow On Round") + xlab("Months After Funding") + scale_y_continuous(labels = percent_format()) + facet_wrap(~ funding_round_type) + scale_colour_few()
    ggsave("follow-on-likelihood-by-round-size.png")


    #### company_category_code
    medians <- ddply(rounds_index, .(funding_round_type, company_category_code), summarize, rounds=length(id), median=median(diff, na.rm=T), mean=mean(diff, na.rm=T))
    medians <- medians[order(medians$funding_round_type, medians$median),]

    diff_summary <- rounds_index[, list(rounds = length(id)), by = c("funding_round_type", "company_category_code", "diff")]
    setnames(diff_summary, "diff", "lifetime")
    diff_summary <- diff_summary[!is.na(diff_summary$lifetime),]
    diff_summary <- diff_summary[order(funding_round_type, company_category_code, lifetime, decreasing=T),]
    diff_summary <- diff_summary[, cum_rounds := cumsum(rounds), by= c("funding_round_type", "company_category_code")]

    round_lifetimes <- rounds_index[, list(total_rounds = length(id)), by = c("funding_round_type", "company_category_code", "lifetime")]
    round_lifetimes <- round_lifetimes[order(funding_round_type, company_category_code, lifetime, decreasing=T),]
    round_lifetimes <- round_lifetimes[, cum_total_rounds := cumsum(total_rounds), by=c("funding_round_type", "company_category_code")]
    diff_summary <- join(diff_summary, round_lifetimes)
    diff_summary <- diff_summary[order(funding_round_type, company_category_code, lifetime),]
    diff_summary$percent <- diff_summary$rounds / diff_summary$cum_total_rounds

    category_counts <- ddply(rounds, .(company_category_code), summarize, counts=length(unique(company_name)))
    category_counts <- category_counts[order(category_counts$counts, decreasing=T),]

    ggplot(subset(diff_summary, company_category_code %in% category_counts[1:9, "company_category_code"]), aes(x=lifetime, y=rounds, color=funding_round_type)) + geom_point() + xlim(0,48) + geom_smooth() + ggtitle("") + ylab("Rounds") + xlab("Months After Funding") + facet_wrap(~ company_category_code)
    ggplot(subset(diff_summary, company_category_code %in% category_counts[1:9, "company_category_code"]), aes(x=lifetime, y= cum_rounds / cum_total_rounds, color= funding_round_type)) + geom_line() + xlim(0,48) + ggtitle("Follow On Likelihood by Category") + ylab("Likelihood of a Follow On Round") + xlab("Months After Funding") + scale_y_continuous(labels = percent_format()) + facet_wrap(~ company_category_code)
    ggsave("follow-on-likelihood-by-round-category.png")