Skip to content

Instantly share code, notes, and snippets.

@milesgrimshaw
Created March 17, 2015 14:42
Show Gist options
  • Select an option

  • Save milesgrimshaw/477b32e661518de9b6c2 to your computer and use it in GitHub Desktop.

Select an option

Save milesgrimshaw/477b32e661518de9b6c2 to your computer and use it in GitHub Desktop.

Revisions

  1. milesgrimshaw created this gist Mar 17, 2015.
    133 changes: 133 additions & 0 deletions mindbody.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,133 @@
    # Load libraries
    library(ggplot2)

    # Set working directory
    setwd("~/Dropbox (Personal)/Personal/Github/MindBody/New/")

    # Read in the data
    data <- read.csv('mind_new.csv', header=TRUE, as.is=TRUE)

    # Convert everything to lower case
    data$Country <- tolower(data$Country)
    data$City <- tolower(data$City)
    data$Name <- tolower(data$Name)

    # Eliminate duplicates
    data$test <- paste(data$Name,data$Lon,data$Lat, sep="")
    new <- data[which(!duplicated(data$test)),]
    data <- new

    # Descriptive exploration
    nrow(data)
    length(unique(data$Country))
    length(unique(data$City))

    # Country breakdown
    country <- as.data.frame(unique(data$Country))
    names(country) <- "Country"
    country$Count <- sapply(country$Country, function (i) length(which(data$Country==i)))
    country <- country[order(country$Count, decreasing=F),]
    summary(country$Count)
    country_top <- country[which(country$Count > 10),]
    country <- transform(country, Country=reorder(Country, Count) )
    country_top <- transform(country_top, Country=reorder(Country, Count) )

    # Plot the countries
    countryplot <- ggplot(country, aes( y=Count,x=Country)) + geom_bar(stat = "identity") + coord_flip() +
    ggtitle("MindBodyOnline Clients by Country")
    countryplot

    countryplot <- ggplot(country_top, aes( y=Count,x=Country)) + geom_bar(stat = "identity") + coord_flip() +
    ggtitle("MindBodyOnline Clients by Country (10+ Customers)")
    countryplot

    # Rename industries to be easy to read
    data$Industry[which(data$Industry == 'Children&#39;s Programs')] <- 'Child Programs'

    # Create levels for each unique industry
    data$Industry <- factor(data$Industry)
    levels(data$Industry)

    # Select only US businesses
    US <- data[which(data$Country=="united states"),]

    # City breakdown across all countries
    city <- as.data.frame(unique(data$City))
    names(city) <- "City"
    city$Count <- sapply(city$City, function (i) length(which(data$City==i)))
    city_top <- city[which(city$Count > 50),]
    city <- transform(city, City=reorder(City, Count) )
    city_top <- transform(city_top, City=reorder(City, Count) )

    # Plot the top cities
    cityplot <- ggplot(city_top, aes( y=Count,x=City)) + geom_bar(stat = "identity") + coord_flip() +
    ggtitle("MindBodyOnline Clients by City (50+ Customers)")
    cityplot

    # Create a data frame of count by industry tags
    all <- as.data.frame(unique(data$Industry))
    names(all) <- "Industry"
    all$Count <- sapply(all$Industry, function (i) length(which(data$Industry==i)))
    all <- all[order(all$Count, decreasing=F),]
    all <- transform(all, Industry=reorder(Industry, Count) )

    # Data frame by industry just for the U.S.
    us <- as.data.frame(unique(US$Industry))
    names(us) <- "Industry"
    us$Count <- sapply(us$Industry, function (i) length(which(US$Industry==i)))
    us <- us[order(us$Count, decreasing=F),]
    us <- transform(us, Industry=reorder(Industry, Count) )

    # Data frame by industry just for the NYC
    NYC <- US[which(US$City=="new york"),]
    nyc <- as.data.frame(unique(NYC$Industry))
    names(nyc) <- "Industry"
    nyc$Count <- sapply(nyc$Industry, function (i) length(which(NYC$Industry==i)))
    nyc <- nyc[order(nyc$Count, decreasing=F),]
    nyc <- transform(nyc, Industry=reorder(Industry, Count) )

    # Plot the industries
    allplot <- ggplot(all, aes( y=Count,x=Industry)) + geom_bar(stat = "identity") + coord_flip() +
    ggtitle("MindBodyOnline Clients by Segment")
    allplot

    usplot <- ggplot(us, aes( y=Count,x=Industry)) + geom_bar(stat = "identity") + coord_flip() +
    ggtitle("MindBodyOnline Clients by Segment in the U.S.")
    usplot

    nycplot <- ggplot(nyc, aes( y=Count,x=Industry)) + geom_bar(stat = "identity") + coord_flip() +
    ggtitle("MindBodyOnline Clients by Segment in New York City")
    nycplot

    # How many of different names
    length(grep('crossfit',data$Name))
    length(grep('crossfit',US$Name))
    length(grep('zumba',data$Name))
    length(grep('yoga',data$Name))
    length(grep('bikram',data$Name))
    length(grep('bikram',US$Name))

    # Save the plots
    pdf(file="MindBody Countries.pdf",width=11,height=8.5)
    par(las=2)
    par(mar=c(5,8,4,2))
    countryplot
    dev.off()

    pdf(file="MindBody Cities.pdf",width=11,height=8.5)
    par(las=2)
    par(mar=c(5,8,4,2))
    cityplot
    dev.off()

    pdf(file="MindBody Industry Segments.pdf",width=11,height=8.5)
    par(las=2)
    par(mar=c(5,8,4,2))
    allplot
    dev.off()

    pdf(file="MindBody Industry Segments US.pdf",width=11,height=8.5)
    par(las=2)
    par(mar=c(5,8,4,2))
    usplot
    dev.off()