这篇讲些聚合的手段
1.OVER子句
我们所了解的OVER字句不仅仅是用于排序上,其实它还可以用于标量的聚合函数--为每一行计算聚合,不需要你去分组.(分组这个点我觉得很舒服)
我在上个笔记中其实已经用到了这个用法 我再换个例子说说:
create table #(id int,a int)
insert # select
1,2 union all select
1,3 union all select
1,4 union all select
1,5 union all select
2,2 union all select
2,4 union all select
2,8 union all select
2,8
select id,a,
SUM(a) over(partition by id ),--分组总和
AVG(a) over(partition by id ),--分组平均
COUNT(a) over(partition by id ),--分组计数
MAX(a) over (partition by id )--分组最大数
from #
/*
id a
----------- ----------- ----------- ----------- ----------- -----------
1 2 14 3 4 5
1 3 14 3 4 5
1 4 14 3 4 5
1 5 14 3 4 5
2 2 22 5 4 8
2 4 22 5 4 8
2 8 22 5 4 8
2 8 22 5 4 8
*/
-------------
上面的SUM(a) over(partition by id)等价于(select sum(a) from # group by id)
我在上篇学习笔记也提过了,用OVER()聚合的效率比子查询 高非常多.
2.关于多属性的比较
create table #test (id int,a int,b int,c int)
insert #test select
1,2,3,5,7,8 union all select
2,1,9,9
--这个题目我们要求出根据ID分组,求出最大的一条记录..这个所谓的最大就是先比A 再比B 再比C 要求结果:
/*
1,8
2,3
*/
--如果有这么个写法 sql 可以支持就好了-- || 可惜没有
select ID,MAX(a,b,C)
from #test
group by ID
--普通做法,依次比较
select *
from #test k
where not exists(select * from #test
where id=k.id and (a>k.a or a=k.a and b>k.b or a=k.a and b=k.b and c>k.c))
--把每个字符拼接起来,一次比完大小,这里注意将字段要转化成相同的长度,这个的优点是无论是否有好的索引,因为它只扫描一次
select
ID,
a=SUBSTRING(COL,5),
b=SUBSTRING(COL,6,
c=SUBSTRING(COL,11,5)
from (
select ID,
MAX(CAST(a as char(5))+CAST(b as char(5))+CAST(c as char(5))) as col
from #test
group by ID
) l
3.PIVOT
这个基本语法大家都知道,我只说一点吧,你不能旋转多列属性,除非你提前在CTE 或者派生表里面处理好 ,如下:
create table #p(empid int,name varchar(10),val int)
insert #p select
1,'a','b',2 --drop table #p
;with cte as
(
select RTRIM(empid)+'_'+name as e_name,val from #p
)
select *
from cte pivot
(sum(val) for e_name in ([1_a],[1_b],[2_a],[2_b])) l
/*
1_a 1_b 2_a 2_b
----------- ----------- ----------- -----------
9 4 7 2
*/
-------------
至于Unpivot 简单理解就是Pivot的反向操作.当然注意一点:被旋转的数据必须具有相同的数据类型.
4。聚合问题
a.连接字符串问题:这个论坛里已经写烂了,2000用函数,2005用XML PATH 我就不写
这里提供个新的方法:
create table #p(empid int,name varchar(10))
insert #p select
1,'a'union all select
1,'b' union all select
1,'c'union all select
1,'d'union all select
2,'a' union all select
2,'t'union all select
2,'v' --drop table #p
--前提是name不能在同一组里出现重复,且每组内最大记录数不是很大
select
empid,
name=MAX(case when rn=1 then name else '' end)+MAX(case when rn=2 then ','+name else '' end)
+MAX(case when rn=3 then ','+name else '' end)+MAX(case when rn=4 then ','+name else '' end)
from(
select empid,name,
(select COUNT(*) from #p where k.empid=empid and k.name>=name) as rn
from #p k )z
group by empid
/*
empid name
----------- -------------------------------------------
1 a,c,d
2 a,t,v
*/
b.组内数字连乘
create table #p(empid int,val varchar(10))
insert #p select
1,2union all select
1,6union all select
1,3union all select
1,4union all select
2,2union all select
2,3union all select
2,6 --drop table #p
--loga(b)=x 等价于 power(a,x)=b
--loga(v1*......*vn)=loga(v1)+.....+loga(vn)
--v1*v2*....*vn=power(10,log10(v1*v2*....*vn))==POWER(10,sum(LOG10(val)))
select empid,
乘积=POWER(10,sum(LOG10(val)))
from #p
group by empid
/*
empid 乘积
----------- -----------
1 144
2 36
*/
c.取中值(昨天正好有个这个问题的贴)
--按rp分组、ddsj排序,选择出每组摆在正中间的记录行(若为偶数行的话,选择摆在正中间的两条记录行)
create table a(rq varchar(8),ddsj int)
insert into a
select
'200805',30 union all select
'200805',40 union all select
'200805',50 union all select
'200805',20 union all select
'200806',250 union all select
'200806',200 union all select
'200806',310 union all select
'200806',100 union all select
'200806',130;
--方法1:利用位置的收尾呼应~
select rq,ddsj=AVG(ddsj)
from
(select ROW_NUMBER() over(partition by rq order by ddsj ) as rn1,
ROW_NUMBER() over(partition by rq order by ddsj desc ) as rn2
,* from a ) k
where abs(rn1-rn2)<=1
--方法2:利用位置的收尾呼应~
select rq,ddsj=AVG(ddsj)
from
(select ROW_NUMBER() over(partition by rq order by ddsj) as rn1,
COUNT(*) over(partition by rq) as rn
,* from a ) k
where abs(2*rn1-rn-1)<=1
/*
rq ddsj
-------- -----------
200805 35
200806 200
*/