calpolystat · January 6, 2023 14:38 · Jun 20, 2015 · Jun 20, 2015
diff --git a/bimodal.R b/bimodal.R
@@ -1,5 +1,4 @@
 
-
 ## WANT: FIND mu and sigma such that when
 ## X is defined by P(X<=x) = .5 Phi ((x+mu)/sigma)) + .5 Phi ((x-mu)/sigma))
 ## we have Var[X] = 1

diff --git a/server.R b/server.R
@@ -1,4 +1,3 @@
-
 # ------------------
 # App Title: Sampling distribution demonstration
 #    Author: Gail Potter

diff --git a/ui.R b/ui.R
@@ -58,7 +58,7 @@ shinyUI(fluidPage(
                  "Gail Potter"),align="right", style = "font-size: 8pt"),
 
            div("Shiny source files:",
-               a(href="https://gist.github.com/calpolystat/d896c5848934484181be",
+               a(href="https://gist.github.com/calpolystat/d7ed9873137267ee557b",
                  target="_blank","GitHub Gist"),align="right", style = "font-size: 8pt"),
 
            div(a(href="http://www.statistics.calpoly.edu/shiny",target="_blank", 

diff --git a/#Sampling_Distribution.txt b/#Sampling_Distribution.txt
@@ -0,0 +1,7 @@
+Sampling Distributions of Various Statistics Shiny App
+
+Base R code created by Gail Potter
+Shiny app files created by Gail Potter
+
+Cal Poly Statistics Dept Shiny Series 
+http://statistics.calpoly.edu/shiny
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -0,0 +1,7 @@
+Title: Sampling Distributions of Various Statistics
+Author: Gail Potter
+AuthorUrl: http://www.gailpotter.org
+License: MIT
+DisplayMode: Normal
+Tags: Sampling Distributions
+Type: Shiny
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Gail Potter
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/bimodal.R b/bimodal.R
@@ -0,0 +1,47 @@
+
+
+## WANT: FIND mu and sigma such that when
+## X is defined by P(X<=x) = .5 Phi ((x+mu)/sigma)) + .5 Phi ((x-mu)/sigma))
+## we have Var[X] = 1
+## NOTE THAT WE will satisfy E[X] 0 since the means of the 2 normals are -mu and mu.
+## We just need to find sigma so that Var[X]=1. 
+
+## Find the PDF:
+## f(x) = d/dx F(x) =  .5 psi ((x+mu)/sigma)) (1/sigma) + 
+##                    .5 psi ((x-mu)/sigma))(1/sigma),
+## where psi is the PDF of the standard normal.
+
+pdf = function(x, mu, sigma) {
+  .5* dnorm ((x+mu)/sigma) *(1/sigma) + 
+                   .5 *dnorm ((x-mu)/sigma)*(1/sigma)
+}
+
+
+E[X] = \int_-infty ^ infty xf(x) dx
+E[X^2] = \int_-infty ^ infty x^2f(x) dx = 
+  \int   x^2 .5 dnorm((x+mu)/sigma)(1/sigma) + 
+  \int   x^2 .5 dnorm((x-mu)/sigma)(1/sigma)  = 
+
+  .5*E[Y^2] +  .5*E[Z^2] , where Y~normal ( -mu,sigma) and Z~normal(mu, sigma)
+  = .5(2)(sigma^2 - mu^2) = sigma^2 - mu^2
+
+  Var[X] = E[X^2]-(E[X])^2
+  Var[Y] = sigma^2 = E[Y^2] - mu^2 
+
+  E[Y]^2 = sigma^2 - mu^2
+
+numsim = isolate(input$n)*isolate(input$nsim)
+numsim = 100000
+mu = .92
+sigma = sqrt(1-mu^2)
+"bimodal" = rnorm(numsim, mu*2*(rbinom(n=numsim, 
+            size=1, prob=.5)-.5), sd=sigma) ##, ncol=isolate(input$n)))
+hist(bimodal)
+sd(bimodal)
+mean(bimodal)
+
+
+## Compute Q1, Q3:  YES THEY ARE -mu and mu!!!
+x = -mu
+.5*pnorm(x, -mu, sigma) + .5*pnorm(x, mu, sigma)
+
diff --git a/server.R b/server.R
@@ -0,0 +1,306 @@
+
+# ------------------
+# App Title: Sampling distribution demonstration
+#    Author: Gail Potter
+# ------------------
+
+Q1=function(x) quantile(x, .25)
+Q3=function(x) quantile(x, .75)
+CV=function(x) sd(x)/mean(x)
+
+## Compute population parameters.  Populations are standardized so that they all have mean =0, 
+##  standard deviation = 1
+
+parameters = data.frame(
+  row.names= c("mean", "standard deviation", "Q1", "median", "Q3",  "minimum", "maximum"),
+     bimodal=rep(NA,7), normal=rep(NA,7), left.skewed=rep(NA,7), right.skewed=rep(NA,7),uniform=rep(NA,7))
+
+  parameters[1,]=0
+  parameters[2,]=1
+
+## normal quantiles
+  parameters$normal[3:5] = qnorm(c(.25, .5, .75))
+
+## left-skewed quantiles
+  parameters$left.skewed[3:5] = c(
+    (10-qgamma(1-.25, shape=2, scale=5)) / (5*sqrt(2)), ## Q1
+    (10-qgamma(1-.5, shape=2, scale=5)) / (5*sqrt(2)) , ## Q2
+    (10-qgamma(1-.75, shape=2, scale=5)) / (5*sqrt(2))) ## Q3
+
+
+## right-skewed quantiles:
+ parameters$right.skewed[3:5] = c(
+    (qgamma(.25, shape=2, scale=5)-10 ) / (5*sqrt(2)),
+    (qgamma(.5, shape=2, scale=5)-10 )  / (5*sqrt(2)),
+    (qgamma(.75, shape=2, scale=5)-10 ) / (5*sqrt(2)))
+
+## uniform quantiles
+parameters$uniform[3:5]=c(qunif(.25, -sqrt(3), sqrt(3)), 0, qunif(.75, -sqrt(3), sqrt(3)))
+
+parameters$bimodal[3:5]= c(-.92, 0, .92)
+
+parameters[6,] = -Inf 
+parameters[7,] =  Inf
+parameters[7, "left.skewed"] = 10/(5*sqrt(2))
+parameters[6, "right.skewed"] = -10/(5*sqrt(2))
+parameters[6:7, "uniform"] = c(-sqrt(3), sqrt(3))
+
+
+shinyServer(function(input, output, session) {
+
+  draw.sample <- reactiveValues()
+
+   observe({
+      if (input$n > 0  & input$n <= 1000 & is.numeric(input$n) & 
+            (input$n %% 1==0) & !is.na(input$n)) 
+        return()
+      showshinyalert(session, "shinyalert1", 
+                     paste("Please enter an integer between 1 and 1000:"))
+    })
+
+    observe({
+      if (input$nsim > 0  & input$nsim <= 100000 & is.numeric(input$nsim) & 
+            (input$nsim %% 1==0) & !is.na(input$nsim)) 
+        return()
+      showshinyalert(session, "shinyalert2", 
+                     paste("Please enter an integer between 1 and 100,000:"))
+    })
+
+    observe({
+      if (is.numeric(input$popmean) & !is.na(input$popmean)) 
+        return()
+      showshinyalert(session, "shinyalert3", 
+                     paste("Please enter a number for the population mean:"))
+    })
+
+
+    observe({
+      if (is.numeric(input$popsd) & !is.na(input$popsd)) 
+        return()
+      showshinyalert(session, "shinyalert4", 
+                     paste("Please enter a number for the population standard deviation:"))
+    })
+
+observe({
+      input$go      
+
+      x = switch(isolate(input$popdist), 
+
+        "normal"= matrix(rnorm(isolate(input$n)*isolate(input$nsim), 0,1), ncol=isolate(input$n)),
+
+        "right.skewed" = matrix(rgamma(isolate(input$n)*isolate(input$nsim), 
+                              shape=2, scale=5)/(5*sqrt(2))-10/(5*sqrt(2)), 
+                              ncol=isolate(input$n)),        
+
+        "left.skewed" = matrix(10/(5*sqrt(2))-rgamma(isolate(input$n)*isolate(input$nsim), 
+                                           shape=2, scale=5)/(5*sqrt(2)), 
+                               ncol=isolate(input$n)),
+
+        "uniform" = matrix(runif(isolate(input$n)*isolate(input$nsim), 
+                                  -sqrt(3),sqrt(3)), ncol=isolate(input$n)),
+
+        "bimodal" = matrix(rnorm(isolate(input$n)*isolate(input$nsim), 
+                                  2*.92*(rbinom(n=isolate(input$n)*isolate(input$nsim), 
+                                            size=1, prob=.5)-.5), sd=sqrt(1-.92^2)),
+                            ncol=isolate(input$n)))
+
+      x = isolate(input$popsd)*x + isolate(input$popmean)
+
+      f=switch(isolate(input$statistic),
+               mean=mean,
+               median=median,
+               Q1=Q1,
+               Q3=Q3,
+               "standard deviation"=sd,
+               maximum=max,
+               minimum=min,
+               CV=CV)
+
+    withProgress(session, {
+      if(isolate(input$nsim)>1000) setProgress(message = "Calculating, please wait.",
+                   detail = " ", value=.5)
+        sample.statistics = isolate(apply(x, 1, f))
+        draw.sample$sample.statistics <- 
+      c(sample.statistics, isolate(draw.sample$sample.statistics))
+      draw.sample$x = x[1,]
+    })  
+
+})
+
+
+  observe({
+    input$n
+    input$clear
+    input$popdist
+    input$statistic
+    input$popmean
+    input$popsd
+    draw.sample$x<-NULL
+    draw.sample$sample.statistics=NULL
+  })
+
+  output$popdistn <- renderPlot({
+
+    popname = switch(input$popdist,
+      "normal" = "Normal" ,
+      "left.skewed"= "Left-skewed", 
+      "uniform" = "Uniform",
+      "right.skewed" =  "Right-skewed" ,
+      "bimodal" = "Bimodal")
+
+      pdf = switch(input$popdist,
+      "normal"= dnorm,
+      "right.skewed" = function(x) 5*sqrt(2)*dgamma(5*sqrt(2)*x+10, shape=2, scale=5),
+      "left.skewed" = function(x) 5*sqrt(2)*dgamma(10-5*sqrt(2)*x, shape=2, scale=5),
+      "uniform" = function(x) dunif(x, -sqrt(3), sqrt(3)),
+      "bimodal" = function(x) (dnorm(x, mean=-.92, sd=sqrt(1-.92^2))+
+                                  dnorm(x, mean=.92, sd=sqrt(1-.92^2)))/2
+    )
+
+    xlim = switch(input$popdist,
+                "normal"=c(-3,3),
+                "right.skewed" = c(-3,3),
+                "left.skewed" = c(-3,3),
+                "uniform" = c(-2,2),
+                "bimodal" = c(-2,2))                        
+    par(mfrow=c(1,2), mar=rep(2,4))
+
+    xlim = input$popsd*xlim + input$popmean
+
+    parameters = input$popsd*parameters + input$popmean
+    parameters[2,]=input$popsd
+
+    title = paste(popname, "population,", 
+        input$statistic, "=", round(parameters[input$statistic, input$popdist], 2))
+    if (input$statistic=="standard deviation") title = 
+      paste(popname,", ", input$statistic, " = ", 
+            round(parameters[input$statistic, input$popdist], 2), sep="")
+    curve(pdf((x-input$popmean)/input$popsd), xlim=xlim, xlab="", ylab="", main=title, cex=.75)
+
+    pop.parameter = parameters[input$statistic, input$popdist]
+    if (input$statistic=="standard deviation"){
+      height=.2
+      if (input$popdist=="uniform") height=.1
+      abline(v=input$popmean, lty=2, col="red")
+          segments(input$popmean, height, (input$popmean+input$popsd), height,  col="red") 
+          s=input$popsd
+          text(input$popmean + .5*input$popsd, height+.05, expression(sigma==s), cex=1.25)
+    } else abline(v=pop.parameter, col="red")
+
+  })
+
+  output$dotplot <- renderPlot({
+    input$n
+    x = draw.sample$x
+    stats=draw.sample$sample.statistics
+    this.statistic = stats[1]
+
+    par(mfrow=c(1,2))
+
+    if (!is.null(x)){
+      ## Compute lower and upper limits for the histogram
+      default.lower  = -4*(input$popdist=="normal")+
+        (-1.5)*(input$popdist=="right.skewed")+
+          (-2)*(input$popdist=="uniform") + 
+          (-2.5)*(input$popdist=="bimodal")+
+          (-1.5)*(input$popdist=="left.skewed")
+
+      default.lower = input$popsd*default.lower + input$popmean
+
+      default.upper = 4*(input$popdist!="uniform" & input$popdist!= "bimodal" ) + 
+        2*(input$popdist == "bimodal" | input$popdist=="uniform")  
+
+      default.upper = input$popsd*default.upper + input$popmean
+
+      xmin = min(default.lower, floor(min(x)-.5))
+      xmax = max(default.upper, ceiling(max(x)+.5))
+
+      hist1.details = hist(x, col="slategray1", border="darkgray",
+                           main=paste("Histogram of sample",input$statistic,"=",
+                           round(this.statistic,2)), 
+                           xlab="Data from a single sample",breaks=seq(xmin,xmax,length.out=20))
+      abline(h=0)
+      height = max(hist1.details$counts)/2
+
+      if (input$statistic=="standard deviation") {
+          abline(v=mean(x), lty=2, col="red")
+          segments(mean(x), height, mean(x)+sd(x), height, lwd=2, col="red") 
+          text(mean(x)+.5*sd(x), height+.2, paste("s=", round(sd(x),2)), cex=1.25)
+          } else if (input$statistic!="CV") abline(v=this.statistic, col="red", lwd=2)
+
+     parameters = input$popsd*parameters + input$popmean
+     parameters[2,]=input$popsd
+     pop.parameter = parameters[input$statistic, input$popdist]
+
+
+      sample.size = input$n
+        xmin=min(pop.parameter - input$popsd, floor(min(stats)-.5))
+        xmax=max(pop.parameter + input$popsd, ceiling(max(stats)+.5))
+
+      hist.details = hist(draw.sample$sample.statistics,  
+       breaks=seq(xmin, xmax, length.out = 20), plot=FALSE)
+      ylim = c(0, max(6, max(hist.details$counts)+2))
+
+     title.end = switch(isolate(input$statistic),
+        mean="of the sample mean",
+        median = "of the sample median",
+        minimum = "of the sample minimum", 
+        maximum = "of the sample maximum",
+        Q1 = "of the first quartile (Q1)",
+        Q3 = "of the third quartile (Q3)",
+        "standard deviation"= "of the standard deviation",
+        CV = "of the coefficient of variation (CV)")
+
+     hist2.details = hist(draw.sample$sample.statistics,  col="tomato",#572, 
+           xlab=paste("Sample ", input$statistic, "s", sep=""), ylim=ylim,
+           main=paste("Sampling distribution \n",title.end),
+               breaks=seq(xmin,xmax,length.out=20) , border="darkgray")
+     abline(h=0)
+     if(input$display ){
+       n.stats = length(draw.sample$sample.statistics)
+       height2 = (max(hist2.details$counts)/2)
+         textheight = (max(hist2.details$counts)/2)*(n.stats>10)*1.1 + 
+         ((max(hist2.details$counts)/2)+1)*(n.stats<=10)
+
+       abline(v=mean(stats), lty=2, lwd=1.25)
+          segments(mean(stats),  lwd=1.25,
+                   height2, mean(stats)+
+                     sd(stats), height2) 
+          text(mean(stats)+.5*sd(stats),textheight, 
+               round(sd(stats),2), cex=1.2)
+          text(mean(stats)+.5*sd(stats), 
+               max(max(hist2.details$counts)*.9, ylim[2]*.9),
+              round(mean(stats),2), cex=1.25)
+          }
+
+    } 
+
+  })
+
+
+output$numsims  = renderText({  
+ paste("Total samples drawn =",
+        as.character(length(draw.sample$sample.statistics)),
+        "                          ")
+})
+
+output$display = renderText({
+  f=switch(isolate(input$statistic),
+           mean="mean",
+           median="median",
+           Q1="Q1",
+           Q3="Q3",
+           "standard deviation"="sd",
+           maximum="max",
+           minimum="min",
+           CV="CV")
+
+  if (input$display) {
+    str1 = paste("Mean of ", input$statistic, "s = ", round(mean(draw.sample$sample.statistics),2), sep="")
+    str2 = paste("Standard deviation of ",input$statistic, "s = ", round(sd(draw.sample$sample.statistics),2), sep="")
+
+    HTML(paste(str1, str2, sep = '<br/>'))
+  }
+})
+
+})
diff --git a/styles.css b/styles.css
@@ -0,0 +1,6 @@
+.shiny-progress {
+  top: 50% !important;
+  left: 50% !important;
+  margin-top: -220px !important;
+  margin-left: 50px !important;
+}
diff --git a/ui.R b/ui.R
@@ -0,0 +1,98 @@
+# ------------------
+# App Title: Sampling distribution demonstration
+#    Author: Gail Potter
+# ------------------
+
+
+if (!require("devtools")) install.packages("devtools")
+
+if (!require("shinyBS")) install.packages("shinyBS")
+  library(shinyBS)
+
+if (!require(shinyIncubator)) devtools::install_github("rstudio/shiny-incubator")
+library(shinyIncubator)
+
+if (!require("shinysky")) devtools::install_github("ShinySky","AnalytixWare")
+library(shinysky)
+
+
+shinyUI(fluidPage(
+  includeCSS('styles.css'),
+
+  progressInit(),
+
+  tags$head(tags$link(rel = "icon", type = "image/x-icon", href =  
+  "https://webresource.its.calpoly.edu/cpwebtemplate/5.0.1/common/images_html/favicon.ico")),  
+
+  h3("Sampling distribution demonstration"),
+  fluidRow(
+    column(3,
+           wellPanel( 
+             selectInput("popdist", label = h5("Population distribution"), 
+                         choices = list("Normal" = "normal", "Left-skewed" = "left.skewed",
+                                        "Uniform" = "uniform", "Right-skewed" = "right.skewed",
+                                        "Bimodal"="bimodal"), selected = "normal"),
+             br(),
+              shinyalert("shinyalert3", TRUE, auto.close.after=5),
+            numericInput("popmean", label = h5("Population mean"), value=0),
+            br(),
+              shinyalert("shinyalert4", TRUE, auto.close.after=5),
+            numericInput("popsd", label = h5("Population standard deviation"), value=1),
+            br(),
+              shinyalert("shinyalert1", TRUE, auto.close.after=5),
+
+             numericInput("n", label=h5("Sample size"), value=10, min=1, max=1000),
+             selectInput("statistic", label = h5("Statistic"), 
+                         choices = list("Mean" = "mean", "Median" = "median",
+                                        "1st quartile (Q1)" = "Q1",
+                                        "3rd quartile (Q3)" = "Q3", 
+                                        "Standard deviation" = "standard deviation",
+                                        "Maximum"="maximum", "Minimum"="minimum"), selected = "mean"),
+
+           div("Shiny app by", 
+               a(href="http://www.gailpotter.org",target="_blank", 
+                 "Gail Potter"),align="right", style = "font-size: 8pt"),
+
+           div("Base R code by", 
+               a(href="http://www.gailpotter.org",target="_blank", 
+                 "Gail Potter"),align="right", style = "font-size: 8pt"),
+
+           div("Shiny source files:",
+               a(href="https://gist.github.com/calpolystat/d896c5848934484181be",
+                 target="_blank","GitHub Gist"),align="right", style = "font-size: 8pt"),
+
+           div(a(href="http://www.statistics.calpoly.edu/shiny",target="_blank", 
+                 "Cal Poly Statistics Dept Shiny Series"),align="right", style = "font-size: 8pt"))
+
+    ),
+    tags$style(type="text/css",
+               ".shiny-output-error { visibility: hidden; }",
+               ".shiny-output-error:before { visibility: hidden; }"
+    ),
+
+    column(9, wellPanel(
+      p("In the left panel, specify a population shape, sample size, and statistic of interest.  When you press the 
+ 'Draw samples' button, a sample from that population will be generated and plotted below left.  The statistic will be
+calculated and added to the histogram at right.  By generating many different samples, you can see how the statistic tends to vary from one sample to the next.  
+That distribution is called the 'sampling distribution'.  You can change the population distribution 
+to see how that impacts your sample histogram as well as the sampling distribution."),
+      shinyalert("shinyalert2", TRUE, auto.close.after=5),
+
+      numericInput("nsim", label=h5("Number of samples"), value=1, min=1, max=1000000),
+      actionButton("go", label = "Draw samples"),
+      actionButton("clear",label="Clear"),
+
+      bsCollapse(multiple = FALSE, open = NULL, id = "collapse1",
+                 bsCollapsePanel("Click here to display population characteristics.  (Click again to hide.)", 
+                                 plotOutput("popdistn", height="200px"), 
+                                 id="popcurve", value="test3")
+      ) ,
+
+      plotOutput("dotplot", height="290px"),
+      textOutput("numsims"),
+      checkboxInput("display", label="Display summaries of sampling distribution"),
+      htmlOutput("display")
+      ))
+  )
+
+))