val df = spark.createDataFrame(Seq(("col1", "col2", 4, 5, 7, 5), ("col1", "col2", 2, 0, 2, 2), ("col1", "col2", 2, 0, 2, 2), ("col1", "col1", 2, 0, 2, 2), ("col1", "col1", 5, 10, 3, 4))) .toDF("first_group", "second_group", "col1", "col2", "col3", "col4") df.groupBy("first_group", "second_group").min() .groupBy("first_group").avg() /* first_group:string avg(min(col1)):double avg(min(col2)):double avg(min(col3)):double avg(min(col4)):double */