Skip to main content

spark SQL 排序

数据准备

import spark.implicits._

val simpleData = Seq(("James", "Sales", "NY", 90000, 34, 10000),
("Michael", "Sales", "NY", 86000, 56, 20000),
("Robert", "Sales", "CA", 81000, 30, 23000),
("Maria", "Finance", "CA", 90000, 24, 23000),
("Raman", "Finance", "CA", 99000, 40, 24000),
("Scott", "Finance", "NY", 83000, 36, 19000),
("Jen", "Finance", "NY", 79000, 53, 15000),
("Jeff", "Marketing", "CA", 80000, 25, 18000),
("Kumar", "Marketing", "NY", 91000, 50, 21000)
)
val df = simpleData.toDF("employee_name", "department", "state", "salary", "age", "bonus")
df.printSchema()
df.show()

root
|-- employee_name: string (nullable = true)
|-- department: string (nullable = true)
|-- state: string (nullable = true)
|-- salary: integer (nullable = false)
|-- age: integer (nullable = false)
|-- bonus: integer (nullable = false)

+-------------+----------+-----+------+---+-----+
|employee_name|department|state|salary|age|bonus|
+-------------+----------+-----+------+---+-----+
| James| Sales| NY| 90000| 34|10000|
| Michael| Sales| NY| 86000| 56|20000|
| Robert| Sales| CA| 81000| 30|23000|
| Maria| Finance| CA| 90000| 24|23000|
| Raman| Finance| CA| 99000| 40|24000|
| Scott| Finance| NY| 83000| 36|19000|
| Jen| Finance| NY| 79000| 53|15000|
| Jeff| Marketing| CA| 80000| 25|18000|
| Kumar| Marketing| NY| 91000| 50|21000|
+-------------+----------+-----+------+---+-----+

sort

df.sort("department", "state").show(false)
df.sort(col("department"), col("state")).show(false)
+-------------+----------+-----+------+---+-----+
|employee_name|department|state|salary|age|bonus|
+-------------+----------+-----+------+---+-----+
|Raman |Finance |CA |99000 |40 |24000|
|Maria |Finance |CA |90000 |24 |23000|
|Jen |Finance |NY |79000 |53 |15000|
|Scott |Finance |NY |83000 |36 |19000|
|Jeff |Marketing |CA |80000 |25 |18000|
|Kumar |Marketing |NY |91000 |50 |21000|
|Robert |Sales |CA |81000 |30 |23000|
|James |Sales |NY |90000 |34 |10000|
|Michael |Sales |NY |86000 |56 |20000|
+-------------+----------+-----+------+---+-----+

orderBy

df.orderBy("department", "state").show(false)
df.orderBy(col("department"), col("state")).show(false)
+-------------+----------+-----+------+---+-----+
|employee_name|department|state|salary|age|bonus|
+-------------+----------+-----+------+---+-----+
|Raman |Finance |CA |99000 |40 |24000|
|Maria |Finance |CA |90000 |24 |23000|
|Jen |Finance |NY |79000 |53 |15000|
|Scott |Finance |NY |83000 |36 |19000|
|Jeff |Marketing |CA |80000 |25 |18000|
|Kumar |Marketing |NY |91000 |50 |21000|
|Robert |Sales |CA |81000 |30 |23000|
|Michael |Sales |NY |86000 |56 |20000|
|James |Sales |NY |90000 |34 |10000|
+-------------+----------+-----+------+---+-----+

ASC

df.sort(col("department").asc, col("state").asc).show(false)
df.orderBy(col("department").asc, col("state").asc).show(false)
+-------------+----------+-----+------+---+-----+
|employee_name|department|state|salary|age|bonus|
+-------------+----------+-----+------+---+-----+
|Maria |Finance |CA |90000 |24 |23000|
|Raman |Finance |CA |99000 |40 |24000|
|Scott |Finance |NY |83000 |36 |19000|
|Jen |Finance |NY |79000 |53 |15000|
|Jeff |Marketing |CA |80000 |25 |18000|
|Kumar |Marketing |NY |91000 |50 |21000|
|Robert |Sales |CA |81000 |30 |23000|
|James |Sales |NY |90000 |34 |10000|
|Michael |Sales |NY |86000 |56 |20000|
+-------------+----------+-----+------+---+-----+

DESC

df.sort(col("department").asc, col("state").desc).show(false)
df.orderBy(col("department").asc, col("state").desc).show(false)
+-------------+----------+-----+------+---+-----+
|employee_name|department|state|salary|age|bonus|
+-------------+----------+-----+------+---+-----+
|Jen |Finance |NY |79000 |53 |15000|
|Scott |Finance |NY |83000 |36 |19000|
|Raman |Finance |CA |99000 |40 |24000|
|Maria |Finance |CA |90000 |24 |23000|
|Kumar |Marketing |NY |91000 |50 |21000|
|Jeff |Marketing |CA |80000 |25 |18000|
|Michael |Sales |NY |86000 |56 |20000|
|James |Sales |NY |90000 |34 |10000|
|Robert |Sales |CA |81000 |30 |23000|
+-------------+----------+-----+------+---+-----+