#RxHiveData only works with spark compute context computeContext <- RxSpark(consoleOutput = TRUE, persistentRun = TRUE) rxSetComputeContext(computeContext) airColInfo <- list( arrdelay = list(type = "integer"), #crsdeptime = list(type = "numeric"), dayofweek = list( type = "factor", levels = c( "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday" ) ) ) hive_data <- RxHiveData( query = "select * from AirlineDemoSmallHive", colInfo = airColInfo ) myData <- rxDataStep(inData = hive_data, rowSelection = arrdelay > 240 & arrdelay <= 300, varsToKeep = c("arrdelay", "dayofweek")) head(myData)