Saturday, October 10, 2009

Optimize SQL Like Comparison for Retrieving Specific or All Result

My colleague found a strange case with his SQL. The SQL took more than two minutes to execute.
declare @Num int;
declare @Date datetime;
declare @CategoryId varchar(255);
declare @ProductionId varchar(50);
declare @UserId varchar(50);
select @Num=-1,@Date=getdate(),@CategoryId='All',@ProductionId='All',@UserId='ALEX';
select distinct (ROW_NUMBER() OVER (ORDER BY v.ProductVariantId ASC)+1)*-1 as FGLongtermPlanDetailNum,@Num as LongTermPlanNum,v.Warehouse_Dep,v.ProductVariantId,v.ProductVariantName,u.UnitId,isnull(SOQty,0) as TotalQtySO,isnull(fg.FGStock,0) as TotalFGStock,0 as FGRequirementQty,p.Weight,v.WeightPerBatch,@UserId as UpdateUserId,GETDATE() as UpdateTime,@UserId as CreateUserId,GETDATE() as CreateTime
from productvariant as v inner join productunit as pu on v.productid=pu.productid and pu.Warehouse_Dep=v.Warehouse_Dep inner join product p on p.productid=pu.productid and pu.Warehouse_Dep=p.Warehouse_Dep and (CategoryId like case when @CategoryId='All' then '%' else @CategoryId end) and (ProductionId like case when @ProductionId='All' then '%' else @ProductionId end)
inner join unit as u on pu.unitid=u.unitid
left join (select UnitId,VariantId,ProductId,Warehouse_Dep,sum(LeftQuantity) SOQty from soproduct where LeftQuantity<>0 and SODate<>'GR')
group by ProductVariantId,UnitId,ProductId) fg on fg.ProductId=v.ProductId and fg.ProductVariantId=v.ProductVariantId and v.Warehouse_Dep='FG' and u.UnitId=fg.UnitId
where v.Warehouse_Dep='FG' and u.IsExtended=0 and v.IsDisable=0;
But when he hard coded @ProductionId and @CategoryId into 'All' (in bold):
declare @Num int;
declare @Date datetime;
declare @CategoryId varchar(255);
declare @ProductionId varchar(50);
declare @UserId varchar(50);
select @Num=-1,@Date=getdate(),@CategoryId='All',@ProductionId='All',@UserId='ALEX';
select distinct (ROW_NUMBER() OVER (ORDER BY v.ProductVariantId ASC)+1)*-1 as FGLongtermPlanDetailNum,@Num as LongTermPlanNum,v.Warehouse_Dep,v.ProductVariantId,v.ProductVariantName,u.UnitId,isnull(SOQty,0) as TotalQtySO,isnull(fg.FGStock,0) as TotalFGStock,0 as FGRequirementQty,p.Weight,v.WeightPerBatch,@UserId as UpdateUserId,GETDATE() as UpdateTime,@UserId as CreateUserId,GETDATE() as CreateTime
from productvariant as v inner join productunit as pu on v.productid=pu.productid and pu.Warehouse_Dep=v.Warehouse_Dep inner join product p on p.productid=pu.productid and pu.Warehouse_Dep=p.Warehouse_Dep and (CategoryId like case when 'All'='All' then '%' else 'All' end) and (ProductionId like case when 'All'='All' then '%' else 'All' end)
inner join unit as u on pu.unitid=u.unitid
left join (select UnitId,VariantId,ProductId,Warehouse_Dep,sum(LeftQuantity) SOQty from soproduct where LeftQuantity<>0 and SODate<>'GR')
group by ProductVariantId,UnitId,ProductId) fg on fg.ProductId=v.ProductId and fg.ProductVariantId=v.ProductVariantId and v.Warehouse_Dep='FG' and u.UnitId=fg.UnitId
where v.Warehouse_Dep='FG' and u.IsExtended=0 and v.IsDisable=0;
The query only took two seconds to execute. After several trial and error efforts, we finally found the root cause. SQL Server seems to hate our like expression. Like expression used for giving freedom to user whether to retrieve specific or all result. So we change the like operator into equal operator (=) like this (changes are in bold):
declare @Num int;
declare @Date datetime;
declare @CategoryId varchar(255);
declare @ProductionId varchar(50);
declare @UserId varchar(50);
select @Num=-1,@Date=getdate(),@CategoryId='All',@ProductionId='All',@UserId='ALEX';
declare @MyCategoryId varchar(255);
declare @MyProductionId varchar(50);
set @MyCategoryId=case when @CategoryId='All' then '%' else @CategoryId end;
set @MyProductionId=case when @ProductionId='All' then '%' else @ProductionId end;

select distinct (ROW_NUMBER() OVER (ORDER BY v.ProductVariantId ASC)+1)*-1 as FGLongtermPlanDetailNum,@Num as LongTermPlanNum,v.Warehouse_Dep,v.ProductVariantId,v.ProductVariantName,u.UnitId,isnull(SOQty,0) as TotalQtySO,isnull(fg.FGStock,0) as TotalFGStock,0 as FGRequirementQty,p.Weight,v.WeightPerBatch,@UserId as UpdateUserId,GETDATE() as UpdateTime,@UserId as CreateUserId,GETDATE() as CreateTime
from productvariant as v inner join productunit as pu on v.productid=pu.productid and pu.Warehouse_Dep=v.Warehouse_Dep inner join product p on p.productid=pu.productid and pu.Warehouse_Dep=p.Warehouse_Dep and (CategoryId=@MyCategoryId or @MyCategoryId='%') and (ProductionId=@MyProductionId or @MyProductionId='%')
inner join unit as u on pu.unitid=u.unitid
left join (select UnitId,VariantId,ProductId,Warehouse_Dep,sum(LeftQuantity) SOQty from soproduct where LeftQuantity<>0 and SODate<>'GR')
group by ProductVariantId,UnitId,ProductId) fg on fg.ProductId=v.ProductId and fg.ProductVariantId=v.ProductVariantId and v.Warehouse_Dep='FG' and u.UnitId=fg.UnitId
where v.Warehouse_Dep='FG' and u.IsExtended=0 and v.IsDisable=0;
And it took only one second to execute, EUREKA. The only changes are:
CategoryId like case when @CategoryId='All' then '%' else @CategoryId end
Changed into:
CategoryId=@MyCategoryId or @MyCategoryId='%'
May be you're wondering why should I redeclare @ProductionId and @CategoryId into @MyProductionId and @MyCategoryId. I need to redeclare the variable for avoiding SQL Server's parameter sniffing disease in stored procedure. Parameter sniffing will cause your stored procedure execute way slower than the bare query. Just follow this rule of thumb for avoiding parameter sniffing: Do not assign values to parameters. Redeclare your parameters into local variables and assign values to local variables instead of parameters.
...Read more