Technical Article

Table-Column Data Profiler

,

Simply run the script with the DBName.SchName.TblName specified for the @Cat_Sch_TableName variable, e.g., 'Sales.dbo.Invoices'.

Returns a "Results" table with one row per Table's column, attributed with:

ColumnName

CondensedDataType

SequenceNumber (aka ORDINAL_POSITION)

NullCount

ModalValue

ModalCount

MinValue

MaxValue

MaxLength

Be warned: tall, wide tables may take up to 10 minutes or more to run...

DECLARE @Cat_Sch_TableName varchar(128) = 'Sales.dbo.Invoices'
-- Initialize fully qualified object name; script can be used like a stored procedure
DECLARE @sqlstring varchar(8000), @ColumnName varchar(128), @ColumnID smallint,
        @SequenceNumber smallint

-- cleanup temp tables        
IF OBJECT_ID('Tempdb..#RawColumns')      IS NOT NULL DROP TABLE #RawColumns
IF OBJECT_ID('Tempdb..#ProfiledColumns') IS NOT NULL DROP TABLE #ProfiledColumns

-- Populate working list of columns
SELECT ColumnID = IDENTITY(int, 1,1),
       COLUMN_NAME       AS ColumnName,
       ORDINAL_POSITION  AS SequenceNumber
      INTO #RawColumns 
     FROM information_schema.COLUMNS  
WHERE TABLE_CATALOG+'.'+TABLE_SCHEMA+'.'+TABLE_NAME=@Cat_Sch_TableName 

-- Initialize output table
CREATE TABLE #ProfiledColumns(
             ColumnName  varchar(128) NOT NULL,
             SequenceNumber varchar(10),
             NullCount int ,
             ModalValue varchar(255),
             ModalCount int,
             MinValue varchar(255),
             MaxValue varchar(255) ,
             MaxLength smallint )

-- Loop through columns
WHILE (EXISTS(
    SELECT  * 
    FROM #RawColumns 
    WHERE ColumnName IS NOT NULL ))

BEGIN

SELECT @ColumnID       = ColumnID, 
       @ColumnName     = ColumnName,  
       @SequenceNumber = SequenceNumber
FROM #RawColumns -- assign column data to variables

SET @sqlstring=
   'INSERT INTO #ProfiledColumns ' + 
   'SELECT '''+@ColumnName+''',' +
    CAST(@SequenceNumber AS varchar(10)) + ' , ' +
   'SUM(CASE WHEN ['+@ColumnName+'] IS NULL THEN 1 ELSE 0 END) ,'+ 
   '(SELECT TOP 1 ['+@ColumnName+'] FROM '+@Cat_Sch_TableName+' GROUP BY ['+@ColumnName+'] ORDER BY COUNT(*) DESC),' +
   '(SELECT TOP 1 COUNT(*) FROM '+@Cat_Sch_TableName+' GROUP BY ['+@ColumnName+'] ORDER BY COUNT(*) DESC),' +
   'CAST(MIN(['+@ColumnName+']) AS VARCHAR(255)),'+
   'CAST(MAX(['+@ColumnName+']) AS VARCHAR(255)),'+
   'MAX(LEN(CAST(['+@ColumnName+'] AS VARCHAR(255)))) '+   
   'FROM ' + @Cat_Sch_TableName    -- build query string
                  
DELETE FROM #RawColumns WHERE ColumnID=@ColumnID  -- one column down, next...
EXECUTE (@sqlstring)                              -- execute built query   
END                                               -- End loop block

-- Display data profile with condensed data type
SELECT 
ColumnName,
DATA_TYPE  + 
    CASE WHEN DATA_TYPE LIKE '%char'         THEN '('+LTRIM(CHARACTER_MAXIMUM_LENGTH)+')'
         WHEN COALESCE(NUMERIC_SCALE,0) <> 0 THEN '('+LTRIM(NUMERIC_PRECISION)+','+LTRIM(NUMERIC_SCALE)+')' ELSE ''
END AS DataTypeName,
NullCount,
ModalValue,
ModalCount,
MinValue,
MaxValue,
MaxLength 
FROM #ProfiledColumns  
JOIN information_schema.COLUMNS
  ON SequenceNumber=ORDINAL_POSITION  
WHERE TABLE_CATALOG+'.'+TABLE_SCHEMA+'.'+TABLE_NAME=@Cat_Sch_TableName 
ORDER BY CAST(SequenceNumber AS SMALLINT)

Rate

4.71 (7)

You rated this post out of 5. Change rating

Share

Share

Rate

4.71 (7)

You rated this post out of 5. Change rating