改进此查询(按行过去的销售总和)

问题描述:

我正试图改进此查询。它来自一个更大的桌子,但是我已经把基本要素放在了下面的问题上。此表格为我们提供了特定月份,商店和产品组的销售情况。对于每一行(即月份/商店/产品组合),我需要在此之前的两个月和四个月之前的销售额总和。改进此查询(按行过去的销售总和)

我在下面给出的正确值的工作很好,但它是一个大表上的性能猪。我使用PRECEDING/FOLLOWING约束考察了OVER子句,但我使用的SQL Server 2008不支持这些。你看到一个更好的方法来重写这个并给出相同的期望结果吗?谢谢。

create table #sales_by_month 
(
    period int, --YYYYMM 
    store varchar(8), --store number 
    product_group varchar(8), 
    sales int 
) 

insert into #sales_by_month values (201701, 51, 'shoes', 12) 
insert into #sales_by_month values (201701, 51, 'clothes', 15) 
insert into #sales_by_month values (201701, 12, 'shoes', 10) 
insert into #sales_by_month values (201701, 12, 'clothes', 9) 
insert into #sales_by_month values (201702, 51, 'shoes', 0) 
insert into #sales_by_month values (201702, 51, 'clothes', 20) 
insert into #sales_by_month values (201702, 12, 'shoes', 30) 
insert into #sales_by_month values (201702, 12, 'clothes', 8) 
insert into #sales_by_month values (201703, 51, 'shoes', 7) 
insert into #sales_by_month values (201703, 51, 'clothes', 4) 
insert into #sales_by_month values (201703, 12, 'shoes', 21) 
insert into #sales_by_month values (201703, 12, 'clothes', 0) 
insert into #sales_by_month values (201704, 51, 'shoes', 50) 
insert into #sales_by_month values (201704, 51, 'clothes', 4) 
insert into #sales_by_month values (201704, 12, 'shoes', 16) 
insert into #sales_by_month values (201704, 12, 'clothes', 20) 
insert into #sales_by_month values (201705, 51, 'shoes', 21) 
insert into #sales_by_month values (201705, 51, 'clothes', 17) 
insert into #sales_by_month values (201705, 12, 'shoes', 0) 
insert into #sales_by_month values (201705, 12, 'clothes', 5) 

select 
    period, 
    store, 
    product_group, 
    (select sum(sales) 
    from #sales_by_month x2 
    where x2.store = #sales_by_month.store 
     and x2.product_group = #sales_by_month.product_group 
     and left(x2.period, 4) * 12 + right(x2.period, 2) 
       between left(#sales_by_month.period, 4) * 12 + right(#sales_by_month.period, 2) - 1 
        and left(#sales_by_month.period, 4) * 12 + right(#sales_by_month.period, 2)) sales_to_date_last_2_months, 
    (select sum(sales) 
    from #sales_by_month x4 
    where x4.store = #sales_by_month.store 
     and x4.product_group = #sales_by_month.product_group 
     and left(x4.period, 4) * 12 + right(x4.period, 2) 
       between left(#sales_by_month.period, 4) * 12 + right(#sales_by_month.period, 2) - 3 
        and left(#sales_by_month.period, 4) * 12 + right(#sales_by_month.period, 2)) sales_to_date_last_4_months 
from 
    #sales_by_month 

--drop table #sales_by_month 
+6

我在这里看到的最大问题是子查询中的所有非SARGable谓词。当将这些列包装到一个像这样的函数中时,意味着不能使用索引。这就是我们为什么喜欢使用适当的数据类型的原因。使用数据类型的日期添加计算列可能会有助于解决很多情况。但对于这个查询,我认为你可以使这个更简单一些。 –

+0

@SeanLange - 你的意思是一段适当的数据类型吗?然后使用本地日期计算函数(DATEADD(),DATEDIFF())而不是所有数学来使BETWEEN子句工作? – bvy

+0

这正是我的意思。然后,所有的日期计算都会针对GETDATE和存储的日期值。 –

您可以使用窗口子句与sum()开始在SQL Server 2012中假设你有每个月的数据和存储

select sbm.*, 
     sum(sales) over (partition by store, productgroup 
         order by period 
         rows between 4 preceding and 2 preceding 
         ) as sales_2_4 
from #sales_by_month sbm; 

这并不在SQL Server 2008的工作,我会建议apply

with sbm as (
     select sbm.*, 
       row_number() over (partition by store, productgroup order by period) as seqnum 
     from #sales_by_month sbm 
    ) 
select sbm.*, sbm2.sales_2_4 
from sbm outer apply 
    (select sum(sbm2.sales) as sales_2_4 
     from sbm sbm2 
     where sbm2.store = sbm.store and sbm2.productgroup = sbm2.productgroup and 
      sbm2.seqnum between sbm.seqnum - 4 and sbm.seqnum - 2 
    ) sbm2 
+0

这很有趣,我会尝试一下......但是有没有什么内在的建议它会比我的查询更快? – bvy

+0

@bvy。 。 。对于单个色谱柱来说可能不会更快。但是,您可以使用'apply'在单个子查询中计算两个销售数量,并且对于两列而言,只有一次通过该表,而不是每列一次。 –

试试这个:

SELECT CONVERT(DATE,SUBSTRING(CONVERT(NVARCHAR,period),1,4) +'-'+ SUBSTRING(CONVERT(NVARCHAR,period),5,2) + N'-01')periodDt 
,DATEADD(MONTH,2,CONVERT(DATE,SUBSTRING(CONVERT(NVARCHAR,period),1,4) +'-'+ SUBSTRING(CONVERT(NVARCHAR,period),5,2) + N'-01'))periodDt2 
,DATEADD(MONTH,4,CONVERT(DATE,SUBSTRING(CONVERT(NVARCHAR,period),1,4) +'-'+ SUBSTRING(CONVERT(NVARCHAR,period),5,2) + N'-01'))periodDt4 
,* INTO #t1 FROM #sales_by_month 

SELECT periodDt,periodDt2,periodDt4,period,store,product_group, SUM(sales)sales INTO #t2 FROM #t1 GROUP BY periodDt,periodDt2,periodDt4,period,store,product_group 

SELECT ISNULL([2m].store,[4m].store),ISNULL([2m].product_group,[4m].product_group),ISNULL(sales2month,0),ISNULL(sales4month,0) FROM 
(SELECT store,product_group,sales sales2month FROM #t2 WHERE perioddt2 >= GETDATE())[2m] 
FULL JOIN 
(SELECT store,product_group,sales sales4month FROM #t2 WHERE perioddt4 >= GETDATE())[4m] 
ON [2m].store = [4m].store AND [2m].product_group = [4m].product_group 
+0

谢谢。临时表并不是我工作的环境中的一个选项。我可以使用CTE,但不知道这是否会给我带来任何性能提升。我会更仔细地看看你的答案。 – bvy