Chunking of Data From Oracle to SQL Server

Question

Chunking of Data From Oracle to SQL Server

tchovanec 65734

SSCommitted

Points: 1740
More actions
August 17, 2011 at 7:50 am

#255678

We currently use Stored Procedures for the majority of our ETL process. We have a table that is trun and loaded on a nightly basis. It has over 3 million records. We are running into performance issues in reagrd to bulk loading all that data from Oracle to SQL Server at once. Looking for a way to load data by chunks into SQL Server from Oracle. I am starting to think using SSIS is going to be the solution. Thanks.

Viewing 4 posts - 1 through 3 (of 3 total)

You must be logged in to reply to this topic. Login to reply

sturner SSC-Insane Points: 22411 More actions · Answer 1

if you are bulk loading (using bulk insert or BCP) you can specify the batch size and even the network packet size if it applies in your situation.

The probability of survival is inversely proportional to the angle of arrival.

tchovanec 65734 SSCommitted Points: 1740 More actions · Answer 2

Below is the stored procedure that I am using to pull the data. You will see that I am currently using row_num to do the chunking. Could I use bcp to do this?

BEGIN

SET NOCOUNT ON;

IF @piStartRowNumber IS NOT NULL AND @piStartRowNumber < 1 SET @piStartRowNumber = 1

declare @procedure_name varchar(200)

declare @dtStart datetime

declare @dtEnd datetime

begin try

--log start to buildlog

set @procedure_name='FullLoadTable: '+@pTablename

set @dtStart = GETDATE()

EXECUTE psp_BI_Build_Log @@SPID, @procedure_name, 'Start', @dtStart, @dtEnd, 'V2'

SET @pTablename = UPPER(@pTablename)

declare @tSQL varchar(max), @tsql2 varchar(max)--, @tSQL3 varchar(max), @tSQL4 varchar(max)

declare @tTmp varchar(max)

declare @tSchema varchar(100), @tTable varchar(100), @tDotIndex int, @tLoadTable varchar(100)

declare @tColumnName varchar(max), @tColumnType varchar(20), @tColumnLength int, @tColumnScale int

declare @tNewColumnLength int, @tNewColumnPrecision int, @tNewColumnScale int

declare @tRowCount int

declare @tLogID bigint

set @tDotIndex = CHARINDEX('.', @pTablename)

set @tSchema = LEFT(@pTablename, @tDotIndex - 1)

set @tTable = SUBSTRING(@pTablename, @tDotIndex + 1, 100)

--validate the table exist in staging

EXEC BIBuild.ValidateSchemaAndTable

@schemaName = @tSchema, -- varchar

@tableName = @tTable -- varchar(100)

set @tLoadTable = @tTable

IF @piStartRowNumber > 1

begin

exec ('truncate table ' + @tSchema+'.'+@tLoadTable)

END

exec ('alter index all on ' + @tSchema+'.'+@tLoadTable+' disable')

--Check if there's a cluster index and re-enable it if so

select @tTmp = si.name

from

sys.indexes si

join sys.all_objects sao on si.object_id = sao.object_id and sao.name = @tLoadTable

join sys.schemas ss on ss.schema_id = sao.schema_id and ss.name = @tSchema

where si.type = 1

if (@tTmp is not null)

exec ('alter index ' + @tTmp + ' on ' + @tSchema + '.' + @tLoadTable+' rebuild')

-- Make sure local column definitions match remote

exec BIBuild.MatchToOracle @tSchema, @tLoadTable, 'F'

--Build the SQL statement to copy the data

declare cColumnList cursor

for select lc.name, st.name, lc.scale

from

sys.columns lc

join sys.all_objects sao on lc.object_id = sao.object_id and sao.name = @tLoadTable

join sys.schemas ss on ss.schema_id = sao.schema_id and ss.name = @tSchema

join sys.types st on st.system_type_id = lc.system_type_id

join (select COLUMN_NAME, DATA_TYPE from PMI..SYS.DBA_TAB_COLS where OWNER = @tSchema and TABLE_NAME = @tTable) rc

on rc.COLUMN_NAME = lc.name

WHERE st.NAME <> 'sysname'

order by column_id

set @tSQL = ''

set @tsql2 = ''

--set @tSQL3 = ''

--set @tSQL4 = 'if exists (select * from sys.objects where object_id = OBJECT_ID(N''[' + @tSchema + '].[' + @tTable+'_empty]'') and type in (N''U'')) drop table [' + @tSchema + '].[' + @tTable+'_empty]'

open cColumnList

fetch next from cColumnList

into @tColumnName, @tColumnType, @tColumnScale

while @@FETCH_STATUS = 0 begin

set @tSQL = @tSQL + '['+@tColumnName+']'

SET @tsql2 = @tsql2 + (SELECT BIBuild.OpenQueryColumnFix(@tColumnType, @tColumnName))

--set @tSQL3 = @tSQL3 + @tColumnName + ' ' + @tColumnType

fetch next from cColumnList

into @tColumnName, @tColumnType, @tColumnScale

if @@FETCH_STATUS = 0 begin

set @tSQL = @tSQL + ','

set @tsql2 = @tsql2 + ','

--set @tSQL3 = @tSQL3 + ','

end

close cColumnList;

deallocate cColumnList;

--TODO: Add error handling

--Copy data

DECLARE @insertQuery VARCHAR(MAX)

SET @insertQuery = 'insert into ' + @tSchema+'.'+@tLoadTable+' (' + @tSQL + ') select '+@tSQL+' from OpenQuery(PMI, ''select b.* from (select a.*, rownum rnum from (select '+@tSQL2+' from ' + @tSchema+'.'+@tTable

DECLARE @insertQueryFinal VARCHAR(MAX)

DECLARE @startRowNumber INT = @piStartRowNumber

DECLARE @endRowNumber INT = @startRowNumber+@piChunkInterval

/* Added to do chunking. Orders the result set by the primary keys and

uses row number between a range.

if no primary key or no chunk interval is entered, then it just loads it all at once.

Q 7.12.2011

*/

IF @piPrimaryKey IS NOT NULL AND @piChunkInterval IS NOT NULL

BEGIN

--raiserror('IN IF CLAUSE'

DECLARE @orderByClause VARCHAR(MAX)DECLARE @column_name VARCHAR(50)

DECLARE primaryColumnCursor CURSOR FOR

SELECT column_name from BiBuild.ParsePrimaryColumns(@piPrimaryKey)

OPEN primaryColumnCursor

FETCH NEXT FROM primaryColumnCursor INTO @column_name

SET @orderByClause = ' order by '

WHILE @@FETCH_STATUS = 0

BEGIN

set @orderByClause = @orderByClause + @column_name

FETCH NEXT FROM primaryColumnCursor INTO @column_name

if @@fetch_status = 0 set @orderByClause = @orderByClause + ','

END

CLOSE primaryColumnCursor

DEALLOCATE primaryColumnCursor

DECLARE @totalRowNumber INT

CREATE TABLE #row_count

(

total int

)

DECLARE @countQuery VARCHAR(MAX) = 'select total from openquery(PMI, ''select count(*) as total from '+ @tSchema+'.'+@tTable+''')'

INSERT INTO #row_count EXEC(@countQuery)

SET @totalRowNumber = (SELECT total FROM #row_count)

WHILE @startRowNumber <= @totalRowNumber

BEGIN

DECLARE @text VARCHAR(2000) = 'ROWNUM: '+CAST(@startRowNumber AS VARCHAR)+' : '+CAST(@endRowNumber AS VARCHAR)

raiserror(@text, 0, 1) WITH nowait

SET @text = CONVERT(VARCHAR, GETDATE(), 109)

RAISERROR(@text, 0 ,1) WITH nowait

SET @insertQueryFinal = @insertQuery + @orderByClause+') a) b '

set @insertQueryFinal = @insertQueryFinal + ' where b.rnum >= '+CAST(@startRowNumber AS VARCHAR)+' and b.rnum <= '+CAST(@endRowNumber AS VARCHAR)

SET @insertQueryFinal = @insertQueryFinal + ''')'

raiserror(@insertQueryFinal, 0, 1) WITH nowait

EXEC(@insertQueryFinal)

SET @startRowNumber = @endRowNumber+1

SET @endRowNumber = @endRowNumber+@piChunkInterval

IF @endRowNumber > @totalRowNumber SET @endRowNumber = @totalRowNumber

IF @startRowNumber > @totalRowNumber BREAKEND

SET @text = CONVERT(VARCHAR, GETDATE(), 109)

RAISERROR(@text, 0 ,1) WITH nowaitEND

ELSE

BEGIN

SET @insertQuery = @insertQuery +') a ) b'')'

--raiserror(@insertQuery, 0, 1) with no wait

exec (@insertQuery)

end

end try

begin catch

--log error to buildlog

SET @Procedure_Name = 'FullLoadTable Error: '+@pTablename

SET @dtStart = GETDATE()

EXECUTE psp_BI_Build_Log @@SPID, @Procedure_Name, 'FAIL', @dtStart, @dtEnd, 'V2'

--log error to job history

DECLARE @ErrMsg nvarchar(4000), @ErrSeverity int

SELECT @ErrMsg = ERROR_MESSAGE(),

@ErrSeverity = ERROR_SEVERITY()

RAISERROR(@ErrMsg, @ErrSeverity, 1)

end catch

--log finish to buildlog

SET @Procedure_Name = 'FullLoadTable END: '+@pTablename

SET @dtStart = GETDATE()

EXECUTE psp_BI_Build_Log @@SPID, @Procedure_Name, 'END', @dtStart, @dtEnd, 'V2'

END

sturner SSC-Insane Points: 22411 More actions · Answer 3

The way you are doing it is probably the slowest way you could do it. Your procedure also has a lot of code for checking the columns and the schema which should not be necessary if you know the format and content of the source data.

If it were me I would dump the data from the source server into a CSV file then BULK INSERT (or BCP) it into SQL server. There is no faster way to load 3M rows that I know of. Also I would BCP into a staging database first then insert into the dest database after you know the load has been successful and perhaps the data is validated. Then you can truncate the dest table and do the insert. You can insert it using multiple batches to keep the transaction log from getting too big.

The probability of survival is inversely proportional to the angle of arrival.