# http://www.amazon.com/Lattice-Multivariate-Data-Visualization-Use/dp/0387759689/ref=cm_cr_pr_product_top library(rbokeh) library(dplyr) library(pipeR) bp <- figure( height = 400, width = 700 ) data(Chem97,package = "mlmRev") ##### figure 1_01 ##### bp %>% ly_hist( gcsescore, Chem97, breaks = seq(0,8,0.5) ) # now do the facetted version lapply( as.character(sort(unique(Chem97$score))) , function(s){ figure( 300, 300, title = paste0("Score: ",s) ) %>% ly_hist( gcsescore ,filter(Chem97, score == as.numeric(s)) ,breaks = seq(0,8,0.5) ) } ) %>% grid_plot( nrow = 2, ncol = 3, same_axes=T ) ##### figure 1_02 ##### bp %>% ly_hist( gcsescore, Chem97, breaks = seq(0,8,0.5) ) # now do the facetted version lapply( as.character(sort(unique(Chem97$score))) , function(s){ figure( 300, 300, title = paste0("Score: ",s) ) %>% ly_density( gcsescore , data = filter(Chem97, score == as.numeric(s)) # get error with color mapping , color = substr(RColorBrewer::brewer.pal(name="Dark2",n=6)[as.numeric(s)/2+1],2,7) ) } ) %>>% (~show(grid_plot( ., nrow = 2, ncol = 3, same_axes=T ))) %>>% (~show(grid_plot( ., nrow = 2, ncol = 3, same_axes=T, byrow = F ))) %>>% (~show(grid_plot( ., nrow = 3, ncol = 2, same_axes=T ))) %>>% (grid_plot( ., nrow = 3, ncol = 2, same_axes=T, byrow = F )) ##### figure 1_03 ##### local({ bp2<- bp lapply( as.character(sort(unique(Chem97$score))) , function(s){ bp2 <<- bp2 %>% ly_density( gcsescore , data = filter(Chem97, score == as.numeric(s)) # get error with color mapping , color = substr(RColorBrewer::brewer.pal(name="Dark2",n=6)[as.numeric(s)/2+1],2,7) ) } ) bp2 }) ##### figure 2_01 ##### data(Oats, package = "MEMSS") apply( unique(expand.grid( levels(Oats$Block) , levels(Oats$Variety) , stringsAsFactors = F ) ) , MARGIN = 1 , function(tuple){ figure( width = 400, height = 400, title = paste0(tuple) ) %>>% ly_lines( nitro, yield , data = Oats %>>% filter( Variety == tuple[[2]] & Block == tuple[[1]] ) %>>% group_by ( nitro ) %>>% summarise ( yield = mean( yield ) ) , line_color = substr( RColorBrewer::brewer.pal(n=3,name="Set1")[which(tuple[[2]]==levels(Oats$Variety))] ,2,7 ) ) } ) -> bp2 bp2 %>% grid_plot( nrow = 6, ncol = 3, same_axes = T, byrow = F) ##### figure 2_02 ##### grid_plot(bp2[seq(1,18,6)], nrow = 1, ncol = 3, same_axes = T ) ##### figure 2_06 ##### data(barley,package="lattice") # no facet to get started bp %>% ly_points( yield, variety, barley, color = year, hover = list( variety, yield ) ) # now show facet lapply( levels( barley$site ) ,function(s){ figure( height = 200, width = 700, title = s ) %>% ly_points( yield, variety # for fun do without dplyr , data = subset(barley, site == s) , color = year , hover = list( variety, yield ) , size = 6 ) } ) %>% grid_plot( nrow = length(.), ncol = 1, same_axes = T ) ##### figure 2_07 ##### # no facet first bp %>% ly_lines( nitro, yield ,data = Oats ,color = Variety ,group = Variety ) # now with facets lapply( levels(Oats$Block) ,function(b){ figure( height = 400, width = 200, title = b ) %>>% ly_lines( nitro, yield ,data = subset( Oats, Block == b ) ,color = Variety ,group = Variety ) } ) %>>% grid_plot( nrow = 1, ncol = length(.), same_axes = T ) ##### figure 2_08 - 2_11 ##### # bar charts not yet supported but support is in process ##### figure 3_01 ##### data(faithful) bp %>>% # draw the density line using parameters in example ly_density( eruptions, faithful, bw = 0.2, n = 200, kernel = "rect" ) %>>% # draw density with defaults ly_density( eruptions, faithful, color = "blue" ) %>>% #add random y to jitter for rugplot of points at bottom ly_points( eruptions # note use of inline expression , runif(n = nrow(faithful), min = 0, max = 0.025) , faithful , size = 5 ) ##### figure 3_02 ##### # like above but a rug plot bp %>>% # draw the density line using parameters in example ly_density( eruptions, faithful, bw = 0.2, n = 200, kernel = "rect" ) %>>% # sort of a hack but hey it works ly_multi_line( xs = cbind(faithful$eruptions,faithful$eruptions) # note use of inline expression , ys = cbind(rep(0,nrow(faithful)),rep(0.025,nrow(faithful))) ) ##### figure 3_03 ######## data(gvhd10, package = "latticeExtra") #get density data for the plot lapply( levels(gvhd10$Days) ,function(d){ figure( height = 200, width = 400, title = paste0("Days: ", d) ) %>>% ly_density( log(FSC.H) ,data = subset(gvhd10, Days == d) ) } ) %>>% grid_plot( nrow = 4, ncol = 2, same_axes = T ) ##### figure 3_05 ######## data(Chem97, package = "mlmRev") # no facet first for simplicity # since first example of ly_quantile bp %>>% ly_quantile( x = gcsescore ,group = score ,data = Chem97 ,distn = qnorm ) lapply( sort(unique(Chem97$score)) ,function(d){ figure( width = 300 , height = 300, title = paste0("Score: ", d) ) %>>% ly_quantile( x = gcsescore ,data = subset(Chem97,score == d) ,distn = qnorm ,color = substr(RColorBrewer::brewer.pal(name="Dark2",n=6)[as.numeric(d)/2+1],2,7) ) } ) %>>% grid_plot( nrow = 2, ncol = 3, same_axes = T ) ##### figure 3_06 ######## local({ lapply( levels(Chem97$gender) ,function(g){ bp <- figure( width = 300 , height = 300, title = paste0("Gender: ", g) ) Reduce( function(x,y){ bp <<- ly_quantile( bp ,x = gcsescore ,data = subset(Chem97, score == y & gender == g) ,distn = qnorm ,color = substr(RColorBrewer::brewer.pal(name="Dark2",n=6)[as.numeric(y)/2+1],2,7) ,size = 5 ) } ,sort(unique(Chem97$score)) ) return(bp) } ) }) %>>% grid_plot( nrow = 1, ncol = 2, same_axes = T ) ##### figure 3_08 ####### # use a less functional approach to get the data in ECDF form data(Chem97, package = "mlmRev") data <- data.frame() gender <- unique(Chem97$gender) scores <- unique(Chem97$score) for (i in 1 : length(scores) ) { for (j in 1 : length(gender) ) { tempdata <- list() #code primarily from lattice panel.ecdfplot #note: example subset gcsescore > 0 n = sum(!is.na(Chem97[which( Chem97$gender == gender[j] & Chem97$score == scores[i] & Chem97$gcsescore > 0 ),]$gcsescore)) tempdata$x = sort(Chem97[which( Chem97$gender == gender[j] & Chem97$score == scores[i] & Chem97$gcsescore > 0 ),]$gcsescore) tempdata$y = seq_len(n)/n tempdata$gender = rep( gender[j], length(tempdata$x) ) tempdata$score = rep(scores[i], length(tempdata$x) ) data <- rbind( data, data.frame( tempdata ) ) } } colnames(data) <- c("x1", "y1", "gender", "score") local({ lapply( sort(unique(data$score)) ,function(s){ bp <- figure( width = 300 , height = 300, title = paste0("Score: ", s) ) Reduce( function(x,y){ print(y) bp <<- ly_points( bp ,x = x1 ,y = y1 ,data = subset(data, score == s & gender == y ) ,color = gender ,size = 3 ) } ,levels(data$gender) ,init = levels(data$gender)[1] ) return(bp) } ) }) %>>% grid_plot( nrow = 2, ncol = 3, same_axes = T ) ##### figure 3_09 ####### data(Chem97, package = "mlmRev") data <- data.frame() gender <- unique(Chem97$gender) scores <- unique(Chem97$score) for (i in 1 : length(scores) ) { for (j in 1 : length(gender) ) { tempdata <- list() #code primarily from lattice panel.qqmath n <- sum(!is.na( Chem97[which(Chem97$gcsescore > 0 & Chem97$score == scores[i] & Chem97$gender == gender[j] ),]$gcsescore)) tempdata$x = qunif(ppoints(n)) tempdata$y = quantile( x = Chem97[which(Chem97$gcsescore > 0 & Chem97$score == scores[i] & Chem97$gender == gender[j] ),]$gcsescore, ppoints(n), names = FALSE, type = 7, na.rm = TRUE) tempdata$gender = rep( gender[j], n ) tempdata$score = rep(scores[i], n ) data <- rbind( data, data.frame( tempdata ) ) } } colnames(data) <- c("x1", "y1", "gender", "score") local({ lapply( sort(unique(data$score)) ,function(s){ bp <- figure( width = 200 , height = 500, title = paste0("Score: ", s) ) Reduce( function(x,y){ print(y) bp <<- ly_points( bp ,x = x1 ,y = y1 ,data = subset(data, score == s & gender == y ) ,color = gender ,size = 1 ,hover = list(x1,y1) ) %>>% x_axis( label = "Std Normal ") %>>% y_axis( label = "GCSEScore") } ,levels(data$gender) ,init = levels(data$gender)[1] ) return(bp) } ) }) %>>% grid_plot( nrow = 1, ncol = 6, same_axes = T ) ##### figure 3_10 ####### data(Chem97, package = "mlmRev") Chem97 %>>% dplyr::group_by( gender, score ) %>% do( data.frame( x = qnorm(ppoints(100)) ,y = quantile(.$gcsescore, ppoints(100), names = F, type = 7, na.rm = F) ,score = unique(.$score) ,gender = unique(.$gender) ) ) %>>% (reshape2::dcast(., x + score ~ gender, value.var = "y")) %>>% (dat~ lapply( sort(unique(dat$score)) ,function(s){ figure( width = 200, height = 200, title = paste0("score: ", s) ) %>>% ly_points( M ,F ,data = filter( dat, score == s ) ,size = 4 ,hover = list(score,M,F) ) %>>% ly_abline( a= 0, b = 1 ) } ) ) %>>% grid_plot( nrow = 2, ncol = 3, same_axes = T ) ##### figure 3_11 ####### data(Chem97, package = "mlmRev") # non facet first since first boxplot figure( width = 700, height = 400 # little clunky to force sort on categorical axis , xlim = as.character(sort(unique(Chem97$score))) ) %>>% ly_boxplot( as.character(score) ,gcsescore ,data = Chem97 ) %>>% x_axis(label = "Average GCSE Score") # now facet it lapply( levels(Chem97$gender) ,function(g){ # non facet first since first boxplot figure( width = 500, height = 300 # little clunky to force sort on categorical axis , xlim = as.character(sort(unique(Chem97$score))) ) %>>% ly_boxplot( as.character(score) ,gcsescore ,data = filter(Chem97, gender == g) ) %>>% ly_lines( as.character(score) ,mean_gcse ,data = filter(Chem97, gender == g) %>>% group_by( score) %>>% summarize( mean_gcse = mean(gcsescore) ) , color = "black" ) %>>% x_axis(label = "Average GCSE Score") } ) %>>% grid_plot( nrow = 1, ncol = 2 )#, same_axes messes up sort ) ##### figure 3_12 ####### data(Chem97, package = "mlmRev") lapply( sort(unique(Chem97$score)) ,function(s){ # non facet first since first boxplot figure( width = 200, height = 500, title = paste0("score: ", s) ) %>>% ly_boxplot( gender ,gcsescore ,data = filter(Chem97, score == s) ) %>>% ly_points( gender ,mean_gcse ,data = filter(Chem97, score == s) %>>% group_by( gender ) %>>% summarize( mean_gcse = mean(gcsescore) ) , color = "black" , glyph = 9 ) } ) %>>% grid_plot( nrow = 1, ncol = 2, same_axes = T ) ##### figure 3_15 ####### data(quakes) figure( height = 400, width = 700, ylim = as.character(seq(4,6.5,0.1)) ) %>>% ly_points( depth ,factor(mag) ,data = quakes ,size = 4 ,color = "black" ,alpha = 1 ) ##### figure 3_16 ####### # don't think the jitter is possible with a categorical scale/axis data(quakes) figure( height = 400, width = 700, xlim = as.character(seq(4,6.5,0.1)) ) %>>% ly_points( factor(mag) ,depth ,data = quakes ,size = 4 ,color = "black" ,alpha = 1 ) ##### figure 3_17 ####### # again, not sure jitter possible with categorical scale/axis data(barley,package="lattice") barley %>>% ( data.frame( . ,residuals = sqrt(abs(residuals(lm(yield~variety+year+site,.)))) ) ) %>>% (dat~ ly_points( figure( width = 700, height = 400, xlim = levels(dat$site) ) ,site ,residuals ,data = dat ,color = year ) %>>% ly_points( site ,median_residual ,color = year ,data = dat %>>% group_by( site, year ) %>>% summarize( median_residual = median(residuals) ) ,glyph = 3 ,size = 30 ) %>>% ly_lines( site ,median_residual ,color = year ,data = dat %>>% group_by( site, year ) %>>% summarize( median_residual = median(residuals) ) ) )