Skip to content

Instantly share code, notes, and snippets.

@eqbalz
Last active February 8, 2017 19:03
Show Gist options
  • Save eqbalz/032b8698b8b3dd7e76fca5fd4a0da9fb to your computer and use it in GitHub Desktop.
Save eqbalz/032b8698b8b3dd7e76fca5fd4a0da9fb to your computer and use it in GitHub Desktop.
#RxHiveData only works with spark compute context
computeContext <- RxSpark(consoleOutput = TRUE, persistentRun = TRUE)
rxSetComputeContext(computeContext)
airColInfo <- list(
arrdelay = list(type = "integer"),
#crsdeptime = list(type = "numeric"),
dayofweek = list(
type = "factor",
levels = c(
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday"
)
)
)
hive_data <- RxHiveData(
query = "select * from AirlineDemoSmallHive",
colInfo = airColInfo
)
myData <- rxDataStep(inData = hive_data, rowSelection = arrdelay > 240 & arrdelay <= 300, varsToKeep = c("arrdelay", "dayofweek"))
head(myData)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment