pm2r · December 20, 2017 07:34 · Jun 18, 2014
diff --git a/complete.R b/complete.R
@@ -0,0 +1,27 @@
+## Write a function that reads a directory full of files and reports the number of completely observed cases in each data file.
+## The function should return a data frame where the first column is the name of the file and the second column is the number
+## of complete cases. A prototype of this function follows
+
+complete <- function(directory, id = 1:332) {
+    ## 'directory' is a character vector of length 1 indicating
+    ## the location of the CSV files
+    ## 'id' is an integer vector indicating the monitor ID numbers
+    ## to be used
+    ## Return a data frame of the form:
+    ## id nobs
+    ## 1 117
+    ## 2 1041
+    ## ...
+    ## where 'id' is the monitor ID number and 'nobs' is the
+    ## number of complete cases
+
+    comp <- data.frame(id=numeric(), nobs=numeric())
+
+    for (i in id) {
+        filename <- obsFileName(directory, i)
+        data <- read.csv(filename)
+        comp <- rbind(comp, data.frame(id=i, nobs=nrow(data[complete.cases(data), ])))
+    }
+
+    comp
+}
diff --git a/corr.R b/corr.R
@@ -0,0 +1,32 @@
+## Write a function that takes a directory of data files and a threshold
+## for complete cases and calculates the correlation between sulfate and
+## nitrate for monitor locations where the number of completely observed
+## cases (on all variables) is greater than the threshold. The function
+## should return a vector of correlations for the monitors that meet the
+## threshold requirement. If no monitors meet the threshold requirement,
+## then the function should return a numeric vector of length 0.
+
+corr <- function(directory, threshold = 0) {
+    ## 'directory' is a character vector of length 1 indicating
+    ## the location of the CSV files
+    ## 'threshold' is a numeric vector of length 1 indicating the
+    ## number of completely observed observations (on all
+    ## variables) required to compute the correlation between
+    ## nitrate and sulfate; the default is 0
+    ## Return a numeric vector of correlations
+
+    source("complete.R")
+    source("obsFileName.R")
+    observations <- complete(directory, 1:332)
+    sulfate <- numeric()
+    nitrate <- numeric()
+    result <- numeric()
+
+    for (i in observations$id[observations$nobs > threshold]) {
+        filename <- obsFileName(directory, i)
+        data <- read.csv(filename)
+        result <- c(result, cor(data$sulfate, data$nitrate, use="complete.obs"))
+    }
+
+    result
+}
diff --git a/obsFileName.R b/obsFileName.R
@@ -0,0 +1,11 @@
+## Return relative path to csv file by detector number
+
+obsFileName <- function(directory, obs) {
+    if (obs<10) {
+        filename = paste(directory, "/","00", obs, ".csv", sep="")
+    } else if (obs >= 10 && obs < 100) {
+        filename = paste(directory, "/", "0", obs, ".csv", sep="")
+    } else {
+        filename = paste(directory, "/", obs, ".csv", sep="")
+    }
+}
diff --git a/pollutantmean.R b/pollutantmean.R
@@ -0,0 +1,36 @@
+## Write a function named 'pollutantmean' that calculates the mean of a pollutant
+## (sulfate or nitrate) across a specified list of monitors. The function
+## 'pollutantmean' takes three arguments: 'directory', 'pollutant', and 'id'.
+## Given a vector monitor ID numbers, 'pollutantmean' reads that monitors'
+## particulate matter data from the directory specified in the 'directory' argument
+## and returns the mean of the pollutant across all of the monitors,
+## ignoring any missing values coded as NA
+
+pollutantmean <- function(directory, pollutant, id = 1:332) {
+    ## 'directory' is a character vector of length 1 indicating
+    ## the location of the CSV files
+    ## 'pollutant' is a character vector of length 1 indicating
+    ## the name of the pollutant for which we will calculate the
+    ## mean; either "sulfate" or "nitrate".
+    ## 'id' is an integer vector indicating the monitor ID numbers
+    ## to be used
+    ## Return the mean of the pollutant across all monitors list
+    ## in the 'id' vector (ignoring NA values)
+
+    source("obsFileName.R")
+    allData <- numeric()
+
+    for (i in id) {
+        filename <- obsFileName(directory, i)
+
+        data <- read.csv(filename)
+
+        if (pollutant == "sulfate") {
+            allData <- c(allData, data$sulfate)
+        } else if (pollutant == "nitrate") {
+            allData <- c(allData, data$nitrate)
+        }
+    }
+
+    mean(allData, na.rm=TRUE)
+}
No results found