# Kevin Gilbert # CMU CS '14 # Formulas in R are hard to manipulate, and cannot have string functions applied to them. This fixes that. # ---------- ---------- ---------- ---------- ---------- # given two formulas, merge their predictors # X ~ m + n and X ~ y + z => X ~ m + n + y + z formula.merge = function(A, B) { tmpA = terms(A) tmpB = terms(B) resp = dimnames(attr(tmpA,"factors"))[[1]][1] # I assume A and B have the same response variable pred = c(attr(tmpA, "term.labels"), attr(tmpB, "term.labels")) return (as.formula(paste(paste(resp, " ~ "), paste(pred, collapse = "+")))) } # by default, wrap all predictors X with s(X), typically for mgcv # X ~ y + z => X ~ s(y) + s(z) formula.wrap = function(model, f = function(x) paste(c("s(",x,")"), collapse = "")) { tmp = terms(model) resp = dimnames(attr(tmp,"factors"))[[1]][1] pred = attr(tmp, "term.labels") return(as.formula(paste(paste(resp, " ~ "), paste(sapply(pred, f), collapse = "+")))) } # ---------- ---------- ---------- ---------- ---------- # Please refer to 36-402 lecture on Additive Models # ...you can now do MedianHouseValue ∼ s(.)! look_at_that_formula = formula.wrap(lm(log(MedianHouseValue) ~ ., data=calif)) # log(MedianHouseValue) ~ s(MedianIncome) + s(MedianHouseAge) + # s(TotalRooms) + s(TotalBedrooms) + s(Population) + s(Households) + # s(Latitude) + s(Longitude) addfit = gam(look_at_that_formula, data=calif) addfit2 = update(addfit, ~ . - s(Latitude) - s(Longitude) + s(Latitude, Longitude)) # log(MedianHouseValue) ~ s(MedianIncome) + s(MedianHouseAge) + # s(TotalRooms) + s(TotalBedrooms) + s(Population) + s(Households) + # s(Latitude, Longitude) # ---------- ---------- ---------- ---------- ---------- # formula.merge is useful for keeping main effects and interactions separate library(datasets) main_effects = formula(lm(mpg ~ ., data=mtcars)) interactions = formula(mpg ~ hp:cyl + disp:wt) formula.merge(main_effects, interactions) # mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb + # hp:cyl + disp:wt