改进此查询(按行过去的销售总和)
我正试图改进此查询。它来自一个更大的桌子,但是我已经把基本要素放在了下面的问题上。此表格为我们提供了特定月份,商店和产品组的销售情况。对于每一行(即月份/商店/产品组合),我需要在此之前的两个月和四个月之前的销售额总和。改进此查询(按行过去的销售总和)
我在下面给出的正确值的工作很好,但它是一个大表上的性能猪。我使用PRECEDING/FOLLOWING约束考察了OVER子句,但我使用的SQL Server 2008不支持这些。你看到一个更好的方法来重写这个并给出相同的期望结果吗?谢谢。
create table #sales_by_month
(
period int, --YYYYMM
store varchar(8), --store number
product_group varchar(8),
sales int
)
insert into #sales_by_month values (201701, 51, 'shoes', 12)
insert into #sales_by_month values (201701, 51, 'clothes', 15)
insert into #sales_by_month values (201701, 12, 'shoes', 10)
insert into #sales_by_month values (201701, 12, 'clothes', 9)
insert into #sales_by_month values (201702, 51, 'shoes', 0)
insert into #sales_by_month values (201702, 51, 'clothes', 20)
insert into #sales_by_month values (201702, 12, 'shoes', 30)
insert into #sales_by_month values (201702, 12, 'clothes', 8)
insert into #sales_by_month values (201703, 51, 'shoes', 7)
insert into #sales_by_month values (201703, 51, 'clothes', 4)
insert into #sales_by_month values (201703, 12, 'shoes', 21)
insert into #sales_by_month values (201703, 12, 'clothes', 0)
insert into #sales_by_month values (201704, 51, 'shoes', 50)
insert into #sales_by_month values (201704, 51, 'clothes', 4)
insert into #sales_by_month values (201704, 12, 'shoes', 16)
insert into #sales_by_month values (201704, 12, 'clothes', 20)
insert into #sales_by_month values (201705, 51, 'shoes', 21)
insert into #sales_by_month values (201705, 51, 'clothes', 17)
insert into #sales_by_month values (201705, 12, 'shoes', 0)
insert into #sales_by_month values (201705, 12, 'clothes', 5)
select
period,
store,
product_group,
(select sum(sales)
from #sales_by_month x2
where x2.store = #sales_by_month.store
and x2.product_group = #sales_by_month.product_group
and left(x2.period, 4) * 12 + right(x2.period, 2)
between left(#sales_by_month.period, 4) * 12 + right(#sales_by_month.period, 2) - 1
and left(#sales_by_month.period, 4) * 12 + right(#sales_by_month.period, 2)) sales_to_date_last_2_months,
(select sum(sales)
from #sales_by_month x4
where x4.store = #sales_by_month.store
and x4.product_group = #sales_by_month.product_group
and left(x4.period, 4) * 12 + right(x4.period, 2)
between left(#sales_by_month.period, 4) * 12 + right(#sales_by_month.period, 2) - 3
and left(#sales_by_month.period, 4) * 12 + right(#sales_by_month.period, 2)) sales_to_date_last_4_months
from
#sales_by_month
--drop table #sales_by_month
您可以使用窗口子句与sum()
开始在SQL Server 2012中假设你有每个月的数据和存储
select sbm.*,
sum(sales) over (partition by store, productgroup
order by period
rows between 4 preceding and 2 preceding
) as sales_2_4
from #sales_by_month sbm;
这并不在SQL Server 2008的工作,我会建议apply
:
with sbm as (
select sbm.*,
row_number() over (partition by store, productgroup order by period) as seqnum
from #sales_by_month sbm
)
select sbm.*, sbm2.sales_2_4
from sbm outer apply
(select sum(sbm2.sales) as sales_2_4
from sbm sbm2
where sbm2.store = sbm.store and sbm2.productgroup = sbm2.productgroup and
sbm2.seqnum between sbm.seqnum - 4 and sbm.seqnum - 2
) sbm2
这很有趣,我会尝试一下......但是有没有什么内在的建议它会比我的查询更快? – bvy
@bvy。 。 。对于单个色谱柱来说可能不会更快。但是,您可以使用'apply'在单个子查询中计算两个销售数量,并且对于两列而言,只有一次通过该表,而不是每列一次。 –
试试这个:
SELECT CONVERT(DATE,SUBSTRING(CONVERT(NVARCHAR,period),1,4) +'-'+ SUBSTRING(CONVERT(NVARCHAR,period),5,2) + N'-01')periodDt
,DATEADD(MONTH,2,CONVERT(DATE,SUBSTRING(CONVERT(NVARCHAR,period),1,4) +'-'+ SUBSTRING(CONVERT(NVARCHAR,period),5,2) + N'-01'))periodDt2
,DATEADD(MONTH,4,CONVERT(DATE,SUBSTRING(CONVERT(NVARCHAR,period),1,4) +'-'+ SUBSTRING(CONVERT(NVARCHAR,period),5,2) + N'-01'))periodDt4
,* INTO #t1 FROM #sales_by_month
SELECT periodDt,periodDt2,periodDt4,period,store,product_group, SUM(sales)sales INTO #t2 FROM #t1 GROUP BY periodDt,periodDt2,periodDt4,period,store,product_group
SELECT ISNULL([2m].store,[4m].store),ISNULL([2m].product_group,[4m].product_group),ISNULL(sales2month,0),ISNULL(sales4month,0) FROM
(SELECT store,product_group,sales sales2month FROM #t2 WHERE perioddt2 >= GETDATE())[2m]
FULL JOIN
(SELECT store,product_group,sales sales4month FROM #t2 WHERE perioddt4 >= GETDATE())[4m]
ON [2m].store = [4m].store AND [2m].product_group = [4m].product_group
谢谢。临时表并不是我工作的环境中的一个选项。我可以使用CTE,但不知道这是否会给我带来任何性能提升。我会更仔细地看看你的答案。 – bvy
我在这里看到的最大问题是子查询中的所有非SARGable谓词。当将这些列包装到一个像这样的函数中时,意味着不能使用索引。这就是我们为什么喜欢使用适当的数据类型的原因。使用数据类型的日期添加计算列可能会有助于解决很多情况。但对于这个查询,我认为你可以使这个更简单一些。 –
@SeanLange - 你的意思是一段适当的数据类型吗?然后使用本地日期计算函数(DATEADD(),DATEDIFF())而不是所有数学来使BETWEEN子句工作? – bvy
这正是我的意思。然后,所有的日期计算都会针对GETDATE和存储的日期值。 –