select @list = @list + cast(foo as varchar(10)) + ',' from #test2
But I noticed that this technique was used in many pieces of code in our system, and it bothered me, because I was pretty sure that creating and joining to temp tables would be faster than assembling long strings and then using them with "in" clauses. I wasn't positive, however, so I assembled this test script:
SET NOCOUNT ON
if OBJECT_ID('tempdb..#timing') is not null
drop table #timing
if OBJECT_ID('tempdb..#test1') is not null
drop table #test1
if OBJECT_ID('tempdb..#test2') is not null
drop table #test2
declare
@i int
, @items int
set @items = 1022
create table #timing
(
testrun varchar(100)
, setupStartTime datetime
, setupEndTime datetime
, setupElapsed as datediff(ms, setupStartTime, setupEndTime)
, queryStartTime datetime
, queryEndTime datetime
, queryElapsed as datediff(ms, queryStartTime, queryEndTime)
, totalElapsed as datediff(ms, setupStartTime, setupEndTime) + datediff(ms, queryStartTime, queryEndTime)
)
select
identity(int, 10403, 96) as foo
, cast('some text' as varchar(20)) as bar
into
#test1
set @i = 0
while @i < @items begin insert into #test1 select 'cowabunga' set @i = @i + 1 end insert into #timing (testRun, setupStartTime) values ('Temp Table', getDate()) select foo into #test2 from #test1 create clustered index ix on #test2 (foo) update #timing set setupEndTime = getDate() where testRun = 'Temp Table' create clustered index ix on #test1 (foo) insert into #timing (testRun, setupStartTime) values ('List', getDate()) declare @list varchar(7000), @curID int set @list = '(' select @curID = min(foo) from #test2 --while @curID is not null --begin -- select @list = @list + cast(@curId as varchar(10)) + ',' -- select @curId = min(foo) from #test2 where foo > @curID
--end
select @list = @list + cast(foo as varchar(10)) + ',' from #test2
set @list = substring(@list, 0, len(@list) - 1) + ')'
declare @sql varchar(8000)
set @sql = 'select * from #test1 where foo in ' + @list
update #timing
set setupEndTime = getDate()
where testRun = 'List'
print(@sql)
update #timing
set queryStartTime = getDate()
where testRun = 'List'
exec (@sql)
update #timing
set queryEndTime = getDate()
where testRun = 'List'
update #timing
set queryStartTime = getDate()
where testRun = 'Temp Table'
select t1.* from #test1 t1
inner join #test2 t2
on t1.foo = t2.foo
update #timing
set queryEndTime = getDate()
where testRun = 'Temp Table'
select * from #timing
The results of this test vary somewhat, so it would be best to run the whole thing in a loop to assemble a statistically valid set of data, but the difference between the two methods is so pronounced that the SD doesn't really matter. Ready for the (approximate) difference?
Temp tables are 3x faster then strings.
Here's a typical result of this script running on our QA server, an 8-CPU 20-GB box running SQL 2000 build 2171:
Running it repeatedly yielded similar results with some higher outliers, but very few lower ones. The next problem will be how to propose changing our coding practices and re-writing a lot of stored procs. Stay tuned for that one, and possibly an entry about my subsequent de-hiring.
No comments:
Post a Comment