窗口函数经常会在leetCode的题目中使用到
窗口函数的格式
Function() over(partition by query_patition_clause
order by order_by_clause Window_clause )
rows | range between start_expr and end_expr
create table test(id int,name varchar(10),sale int);
insert into test values(1,'aaa',100);
insert into test values(1,'bbb',200);
insert into test values(1,'ccc',200);
insert into test values(1,'ddd',300);
insert into test values(2,'eee',400);
insert into test values(2,'fff',200);
表中的数据为:
mysql> select * from test;
+------+------+------+
| id | name | sale |
+------+------+------+
| 1 | aaa | 100 |
| 1 | bbb | 200 |
| 1 | ccc | 200 |
| 1 | ddd | 300 |
| 2 | eee | 400 |
| 2 | fff | 200 |
+------+------+------+
ROW_NUMBER()
row_number() over(partition by col1 order by col2)
mysql> #对id进行分组,同一个组内的数据再根据sale进行排序,这个排序序号是唯一并且连续的
mysql> select t.*,row_number() over(partition by id order by sale) as rank1
-> from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | 1 |
| 1 | bbb | 200 | 2 |
| 1 | ccc | 200 | 3 |
| 1 | ddd | 300 | 4 |
| 2 | fff | 200 | 1 |
| 2 | eee | 400 | 2 |
+------+------+------+-------+
6 rows in set (0.00 sec)
mysql> #当没有partition by分组从句时,将视全部记录为一个分组
mysql> select t.*,row_number() over(order by sale) as rank1 from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | 1 |
| 1 | bbb | 200 | 2 |
| 1 | ccc | 200 | 3 |
| 2 | fff | 200 | 4 |
| 1 | ddd | 300 | 5 |
| 2 | eee | 400 | 6 |
+------+------+------+-------+
6 rows in set (0.00 sec)
RANK()与DENSE_RANK()
rank() over(partition by col1 order by col2)
dense_rank() over(partition by col1 order by col2)
mysql> #对id进行分组,分组后根据sale排序
mysql> #可以发现sale相同时有相同的序号,并且由于id=1的分组中没有排名第3的序号造成排序不连续
mysql> select t.*,rank() over(partition by id order by sale) as rank1
-> from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | 1 |
| 1 | bbb | 200 | 2 |
| 1 | ccc | 200 | 2 |
| 1 | ddd | 300 | 4 |
| 2 | fff | 200 | 1 |
| 2 | eee | 400 | 2 |
+------+------+------+-------+
6 rows in set (0.00 sec)
mysql> #没有分组,只根据sale排序,sale相同时有相同的序号,没有排名3和4造成排序不连续
mysql> select t.*,rank() over(order by sale) as rank1 from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | 1 |
| 1 | bbb | 200 | 2 |
| 1 | ccc | 200 | 2 |
| 2 | fff | 200 | 2 |
| 1 | ddd | 300 | 5 |
| 2 | eee | 400 | 6 |
+------+------+------+-------+
6 rows in set (0.00 sec)
mysql> #对id进行分组,分组后根据sale排序
mysql> #可以发现sale相同时有相同的序号,但是整个排序序号是连续的
mysql> select t.*,dense_rank() over(partition by id order by sale) as rank1
-> from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | 1 |
| 1 | bbb | 200 | 2 |
| 1 | ccc | 200 | 2 |
| 1 | ddd | 300 | 3 |
| 2 | fff | 200 | 1 |
| 2 | eee | 400 | 2 |
+------+------+------+-------+
6 rows in set (0.00 sec)
mysql> #没有分组,只根据sale排序,sale相同时有相同的序号,整个排序序号是连续的
mysql> select t.*,dense_rank() over(order by sale) as rank1 from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | 1 |
| 1 | bbb | 200 | 2 |
| 1 | ccc | 200 | 2 |
| 2 | fff | 200 | 2 |
| 1 | ddd | 300 | 3 |
| 2 | eee | 400 | 4 |
+------+------+------+-------+
6 rows in set (0.00 sec)
lead(EXPR,<OFFSET>,<DEFAULT>) over(partition by col1 order by col2)
lag(EXPR,<OFFSET>,<DEFAULT>) over(partition by col1 order by col2)
其中:
mysql> #为每一行数据的下一行数据进行开窗,如果该行没有下一行数据,则显示为NULL
mysql> select t.*,lead(sale) over(partition by id order by sale) as rank1
-> from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | 200 | <--下一行的sale值为200,开窗结果为200
| 1 | bbb | 200 | 200 | <--下一行的sale值为200,开窗结果为200
| 1 | ccc | 200 | 300 | <--下一行的sale值为300,开窗结果为300
| 1 | ddd | 300 | NULL | <--已经是最后一行,没有下一行数据,开窗结果为NULL
| 2 | fff | 200 | 400 |
| 2 | eee | 400 | NULL |
+------+------+------+-------+
6 rows in set (0.00 sec)
mysql> #为每一行数据的上一行数据进行开窗,如果该行没有上一行数据,则显示为NULL
mysql> select t.*,lag(sale) over(partition by id order by sale) as rank1
-> from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | NULL | <--当前行为第一行,没有上一行数据,开窗结果为NULL
| 1 | bbb | 200 | 100 | <--上一行的sale值为100,开窗结果为100
| 1 | ccc | 200 | 200 | <--上一行的sale值为200,开窗结果为200
| 1 | ddd | 300 | 200 | <--上一行的sale值为200,开窗结果为200
| 2 | fff | 200 | NULL |
| 2 | eee | 400 | 200 |
+------+------+------+-------+
6 rows in set (0.00 sec)
mysql> select t.*,lead(sale,2) over(partition by id order by sale) as rank1
-> from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | 200 | <--下2行的sale值为200,开窗结果为200
| 1 | bbb | 200 | 300 | <--下2行的sale值为300,开窗结果为300
| 1 | ccc | 200 | NULL | <--已经是倒数第2行,没有下2行的数据,开窗结果为NULL
| 1 | ddd | 300 | NULL | <--已经是最后一行,没有下2行的数据,开窗结果为NULL
| 2 | fff | 200 | NULL |
| 2 | eee | 400 | NULL |
+------+------+------+-------+
6 rows in set (0.00 sec)
mysql> select t.*,lead(sale,2,"Empty") over(partition by id order by sale) as rank1
-> from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | 200 |
| 1 | bbb | 200 | 300 |
| 1 | ccc | 200 | Empty | <--已经是倒数第2行,没有下2行的数据,开窗结果为"Empty"
| 1 | ddd | 300 | Empty | <--已经是最后一行,没有下2行的数据,开窗结果为"Empty"
| 2 | fff | 200 | Empty |
| 2 | eee | 400 | Empty |
+------+------+------+-------+
6 rows in set (0.00 sec)
mysql> #存在下一行数据显示为Exist,不存在下一行数据则显示NULL,这个NULL是默认的
mysql> select t.*,lead("Exist") over(partition by id order by sale) as rank1
-> from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | Exist | <--下一行的数据存在,开窗结果为"Exist"
| 1 | bbb | 200 | Exist | <--下一行的数据存在,开窗结果为"Exist"
| 1 | ccc | 200 | Exist | <--下一行的数据存在,开窗结果为"Exist"
| 1 | ddd | 300 | NULL | <--已经是最后一行,没有下一行数据,开窗结果为NULL
| 2 | fff | 200 | Exist |
| 2 | eee | 400 | NULL |
+------+------+------+-------+
6 rows in set (0.00 sec)
mysql> #存在下一行数据显示为Exist,不存在下一行数据则显示Empty
mysql> select t.*,lead("Exist",1,"Empty") over(partition by id order by sale)
-> as rank1 from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | Exist | <--下一行的数据存在,开窗结果为"Exist"
| 1 | bbb | 200 | Exist | <--下一行的数据存在,开窗结果为"Exist"
| 1 | ccc | 200 | Exist | <--下一行的数据存在,开窗结果为"Exist"
| 1 | ddd | 300 | Empty | <--已经是最后一行,没有下一行数据,开窗结果为"Empty"
| 2 | fff | 200 | Exist |
| 2 | eee | 400 | Empty |
+------+------+------+-------+
6 rows in set (0.00 sec)
first_value( EXPR ) over( partition by col1 order by col2 )
last_value( EXPR ) over( partition by col1 order by col2 )
mysql> #first_value函数查看每一个分组的第一个值
mysql> select t.*,first_value(sale) over(partition by id) as rank1 from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | 100 | <--分组的第一个值为100,开窗结果100
| 1 | bbb | 200 | 100 | <--分组的第一个值为100,开窗结果100
| 1 | ccc | 200 | 100 | <--分组的第一个值为100,开窗结果100
| 1 | ddd | 300 | 100 | <--分组的第一个值为100,开窗结果100
| 2 | eee | 400 | 400 |
| 2 | fff | 200 | 400 |
+------+------+------+-------+
6 rows in set (0.00 sec)
mysql> #对id进行分组,同一个组内的数据再根据sale进行排序,查看每一个分组的第一个值
mysql> select t.*,first_value(sale) over(partition by id order by sale)
-> as rank1 from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | 100 | <--分组排序之后的第一个值为100,开窗结果100
| 1 | bbb | 200 | 100 | <--分组排序之后的第一个值为100,开窗结果100
| 1 | ccc | 200 | 100 | <--分组排序之后的第一个值为100,开窗结果100
| 1 | ddd | 300 | 100 | <--分组排序之后的第一个值为100,开窗结果100
| 2 | fff | 200 | 200 |
| 2 | eee | 400 | 200 |
+------+------+------+-------+
6 rows in set (0.00 sec)
mysql> #last_value函数查看每一个分组的最后一个值
mysql> select t.*,last_value(sale) over(partition by id) as rank1
-> from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | 300 | <--分组排序之后的最后一个值为300,开窗结果300
| 1 | bbb | 200 | 300 | <--分组排序之后的最后一个值为300,开窗结果300
| 1 | ccc | 200 | 300 | <--分组排序之后的最后一个值为300,开窗结果300
| 1 | ddd | 300 | 300 | <--分组排序之后的最后一个值为300,开窗结果300
| 2 | eee | 400 | 200 |
| 2 | fff | 200 | 200 |
+------+------+------+-------+
6 rows in set (0.00 sec)
mysql> #对id进行分组,同一个组内的数据再根据sale进行排序,查看每一个分组的最后一个值
mysql> #但是你发现id=1的组每一行显示的不是300,id=2的分组每一行显示的不是400
mysql> select t.*,last_value(sale) over(partition by id order by sale) as rank1
-> from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | 100 |
| 1 | bbb | 200 | 200 |
| 1 | ccc | 200 | 200 |
| 1 | ddd | 300 | 300 |
| 2 | fff | 200 | 200 |
| 2 | eee | 400 | 400 |
+------+------+------+-------+
6 rows in set (0.00 sec)
ntile(ntile_num) OVER ( partition by col1 order by col2 )
mysql> 给所有数据分配四个桶
mysql> select t.*,ntile(4) over(partition by id order by sale) as rank1 from test as t;
+------+------+------+-------+
| id | name | sale | rank1 |
+------+------+------+-------+
| 1 | aaa | 100 | 1 |
| 1 | bbb | 200 | 2 |
| 1 | ccc | 200 | 3 |
| 1 | ddd | 300 | 4 |
| 2 | fff | 200 | 1 |
| 2 | eee | 400 | 2 |
+------+------+------+-------+
6 rows in set (0.00 sec)
max(EXPR) over(partition by col1 order by col2)
min(EXPR) over(partition by col1 order by col2)
avg(EXPR) over(partition by col1 order by col2)
sum(EXPR) over(partition by col1 order by col2)
count(EXPR) over(partition by col1 order by col2)
mysql> create table test( id int, val int );
mysql> insert into test values(1,1),(1,2),(1,3),(1,4),(1,5),(2,6),
-> (2,7),(2,8),(2,9),(1,3),(1,5);
mysql> select * from test;
+------+------+
| id | val |
+------+------+
| 1 | 1 |
| 1 | 2 |
| 1 | 3 |
| 1 | 4 |
| 1 | 5 |
| 2 | 6 |
| 2 | 7 |
| 2 | 8 |
| 2 | 9 |
| 1 | 3 |
| 1 | 5 |
+------+------+
11 rows in set (0.00 sec)
只有分组,没有排序,显示分组的最大值
mysql> select t.*,max(val) over(partition by id) as MAX from test as t;
+------+------+------+
| id | val | MAX |
+------+------+------+
| 1 | 1 | 5 |
| 1 | 2 | 5 |
| 1 | 3 | 5 |
| 1 | 4 | 5 |
| 1 | 5 | 5 |
| 1 | 3 | 5 |
| 1 | 5 | 5 |
| 2 | 6 | 9 |
| 2 | 7 | 9 |
| 2 | 8 | 9 |
| 2 | 9 | 9 |
+------+------+------+
11 rows in set (0.00 sec)
mysql> select t.*,max(val) over(partition by id order by val) as MAX
-> from test as t;
+------+------+------+
| id | val | MAX |
+------+------+------+
| 1 | 1 | 1 | <--第1行的最大值是1,所以显示1
| 1 | 2 | 2 | <--前面2行的最大值是2,所以显示2
| 1 | 3 | 3 | <--前面3行的最大值是3,所以显示3
| 1 | 3 | 3 | <--前面4行的最大值是3,所以显示3
| 1 | 4 | 4 | <--前面5行的最大值是4,所以显示4
| 1 | 5 | 5 | <--前面6行的最大值是5,所以显示5
| 1 | 5 | 5 | <--前面7行的最大值是5,所以显示5
| 2 | 6 | 6 |
| 2 | 7 | 7 |
| 2 | 8 | 8 |
| 2 | 9 | 9 |
+------+------+------+
11 rows in set (0.00 sec)
其实,在上面这个代码中,完整的显示是这样的:
mysql> select t.*,max(val) over(partition by id order by val range between unbounded preceding and current row)
-> as MAX from test as t;
+------+------+------+
| id | val | MAX |
+------+------+------+
| 1 | 1 | 1 |
| 1 | 2 | 2 |
| 1 | 3 | 3 |
| 1 | 3 | 3 |
| 1 | 4 | 4 |
| 1 | 5 | 5 |
| 1 | 5 | 5 |
| 2 | 6 | 6 |
| 2 | 7 | 7 |
| 2 | 8 | 8 |
| 2 | 9 | 9 |
+------+------+------+
11 rows in set (0.00 sec)
其中代码
range between unbounded preceding and current row
rows between unbounded preceding and current row
mysql> select t.*,max(val) over(partition by id order by val rows between unbounded preceding and unbounded following) as MAX
-> from test as t;
+------+------+------+
| id | val | MAX |
+------+------+------+
| 1 | 1 | 5 |
| 1 | 2 | 5 |
| 1 | 3 | 5 |
| 1 | 3 | 5 |
| 1 | 4 | 5 |
| 1 | 5 | 5 |
| 1 | 5 | 5 |
| 2 | 6 | 9 |
| 2 | 7 | 9 |
| 2 | 8 | 9 |
| 2 | 9 | 9 |
+------+------+------+
11 rows in set (0.00 sec)
rows between unbounded preceding and unbounded following
mysql> #分组之后没有排序,就没有默认的窗口子句,得到的结果是每一组的最大值
mysql> select t.*,sum(val) over(partition by id) as SUM from test as t;
+------+------+------+
| id | val | SUM |
+------+------+------+
| 1 | 1 | 23 |
| 1 | 2 | 23 |
| 1 | 3 | 23 |
| 1 | 4 | 23 |
| 1 | 5 | 23 |
| 1 | 3 | 23 |
| 1 | 5 | 23 |
| 2 | 6 | 30 |
| 2 | 7 | 30 |
| 2 | 8 | 30 |
| 2 | 9 | 30 |
+------+------+------+
11 rows in set (0.00 sec)
示例二,同时使用分组和排序:
mysql> #分组并且排序
mysql> #排序如果没有窗口子句会有一个默认的规则,即range between unbounded preceding and current row
mysql> select t.*,sum(val) over(partition by id order by val)
-> as SUM from test as t;
+------+------+------+
| id | val | SUM |
+------+------+------+
| 1 | 1 | 1 | <--计算前1行的和,开窗结果为1
| 1 | 2 | 3 | <--计算前2行的和,开窗结果为3
| 1 | 3 | 9 | <--计算前3行的和,由于是range逻辑范围,相同的val看作同一行,所以和为1+2+3+3=9
| 1 | 3 | 9 | <--计算前4行的和,该行和第三行同属于一行,所以和为9,开窗结果为9
| 1 | 4 | 13 | <--计算前5行的和,开窗结果为13
| 1 | 5 | 23 | <--计算前6行的和,由于是range逻辑范围,相同的val看作同一行,所以和为23
| 1 | 5 | 23 | <--计算前7行的和,该行和第6行同属于一行,所以和为23,开窗结果为23
| 2 | 6 | 6 |
| 2 | 7 | 13 |
| 2 | 8 | 21 |
| 2 | 9 | 30 |
+------+------+------+
11 rows in set (0.00 sec)
有兴趣的同学可以证明示例二的正确性,在排序之后手动添加窗口子句,一定会得到相同的结果:
mysql> #得到和上面一样的结果Orz
mysql> select t.*,sum(val) over(partition by id order by val range between unbounded preceding and current row)
-> as SUM from test as t;
+------+------+------+
| id | val | SUM |
+------+------+------+
| 1 | 1 | 1 |
| 1 | 2 | 3 |
| 1 | 3 | 9 |
| 1 | 3 | 9 |
| 1 | 4 | 13 |
| 1 | 5 | 23 |
| 1 | 5 | 23 |
| 2 | 6 | 6 |
| 2 | 7 | 13 |
| 2 | 8 | 21 |
| 2 | 9 | 30 |
+------+------+------+
11 rows in set (0.00 sec)
示例三,同时使用了分组和排序,但是窗口从句使用物理范围rows:
mysql> select t.*,sum(val) over(partition by id order by val rows between unbounded preceding and current row)
-> as SUM from test as t;
+------+------+------+
| id | val | SUM |
+------+------+------+
| 1 | 1 | 1 | <--计算前1行的和,开窗结果为1
| 1 | 2 | 3 | <--计算前2行的和,开窗结果为3
| 1 | 3 | 6 | <--计算前3行的和,开窗结果为1+2+3=6
| 1 | 3 | 9 | <--计算前4行的和,开窗结果为1+2+3+3=9
| 1 | 4 | 13 | <--计算前5行的和,开窗结果为1+2+3+3+4=13
| 1 | 5 | 18 | <--计算前6行的和,开窗结果为1+2+3+3+4+5=18
| 1 | 5 | 23 | <--计算前7行的和,开窗结果为1+2+3+3+4+5+5=23
| 2 | 6 | 6 |
| 2 | 7 | 13 |
| 2 | 8 | 21 |
| 2 | 9 | 30 |
+------+------+------+
11 rows in set (0.00 sec)
mysql> #使用rows物理范围
mysql> #使用1 preceding表示当前行的前一行作为起点
mysql> #使用1 following表示当前行的后一行作为终点
mysql> select t.*,max(val) over(partition by id order by val rows between 1 preceding and 1 following)
-> as MAX from test as t;
+------+------+------+
| id | val | MAX |
+------+------+------+
| 1 | 1 | 2 | <--前一行NULL、当前行1、后一行2,比较而得的最大值,开窗结果为2
| 1 | 2 | 3 | <--前一行1、当前行2、后一行3,比较而得的最大值,开窗结果为3
| 1 | 3 | 3 | <--前一行2、当前行3、后一行3,比较而得的最大值,开窗结果为3
| 1 | 3 | 4 | <--前一行3、当前行3、后一行4,比较而得的最大值,开窗结果为4
| 1 | 4 | 5 | <--前一行3、当前行4、后一行5,比较而得的最大值,开窗结果为5
| 1 | 5 | 5 | <--前一行4、当前行5、后一行5,比较而得的最大值,开窗结果为5
| 1 | 5 | 5 | <--前一行5、当前行5、后一行NULL,比较而得的最大值,开窗结果为5
| 2 | 6 | 7 |
| 2 | 7 | 8 |
| 2 | 8 | 9 |
| 2 | 9 | 9 |
+------+------+------+
11 rows in set (0.00 sec)
再来试试使用range逻辑范围,会产生什么奇妙的结果,这次我们使用sum函数
mysql> #使用range逻辑范围
mysql> #使用1 preceding表示当前行的前一行作为起点
mysql> #使用1 following表示当前行的后一行作为终点
mysql> select t.*,sum(val) over(partition by id order by val range between 1 preceding and 1 following)
-> as SUM from test as t;
+------+------+------+
| id | val | SUM |
+------+------+------+
| 1 | 1 | 3 | <--前一行NULL、当前行1、后一行2,1+2=3
| 1 | 2 | 9 | <--前一行1、当前行2、后一行有2个相同的值,逻辑上规定为同一行的3,1+2+3+3=9
| 1 | 3 | 12 | <--前一行2、当前行有2个相同的值,逻辑上规定为同一行的3、后一行4,2+3+3+4=12
| 1 | 3 | 12 | <--前一行2、当前行有2个相同的值,逻辑上规定为同一行的3、后一行4,2+3+3+4=12
| 1 | 4 | 20 | <--前一行有2个相同的值,逻辑上规定为同一行的3、当前行4、后一行有2个相同的值,逻辑上规定为同一行的5,3+3+4+5+5=20
| 1 | 5 | 14 | <--前一行4、当前行有2个相同的值,逻辑上规定为同一行的5、后一行NULL,4+5+5=14
| 1 | 5 | 14 | <--前一行4、当前行有2个相同的值,逻辑上规定为同一行的5、后一行NULL,4+5+5=14
| 2 | 6 | 13 |
| 2 | 7 | 21 |
| 2 | 8 | 24 |
| 2 | 9 | 17 |
+------+------+------+
11 rows in set (0.00 sec)
作者:五四青年
来源:https://zhuanlan.zhihu.com/p/514345120