Comparing Character Fields in SQL T-SQL

In Uncategorised by Jayden MacRae

There are times when you may be inserting data from one table to another and you get the message:

String or binary data would be truncated

This is often because the data in the source table is too large for the data in the destination table. I’ve created the below script to quickly identify what field may be the culprit. Change the from and to table variables to represent the names of the tables you want to compare.


/* Dynamic script to compare two tables' character fields. Useful when getting message that string or binary data will be truncated
   or if you set the from and to tables to be the same, can show how much data is actually being used in text fields compared
   to their size.
 */

/* dynamic unpivot adapted from: https://dba.stackexchange.com/questions/48393/passing-column-names-dynamically-to-unpivot
 * thanks to https://stackoverflow.com/questions/19055902/unpivot-with-column-name for identifying how to get the column name into the result (duh)
 */

/* Setup variables below:
 * from tables should be where data is coming from - the table must contain data for this query to work
 * to tables should be where data is being inserted to
 */

declare @schema_name_from   nvarchar(max) = 'dbo'
declare @table_name_from    nvarchar(max) = 'FromTable'   /* this table must have data to be able to perform the comparison */
declare @schema_name_to     nvarchar(max) = 'dbo'
declare @table_name_to      nvarchar(max) = 'ToTable'

DECLARE     @colsUnpivot AS NVARCHAR(MAX),
            @colsMaxLength as nvarchar(max),
            @sql  AS NVARCHAR(MAX)

/* We want to form a list of columns names, one separated purely by commas, and the second applying the function we want to identify the maximum size of the data contained with each column */
select
    @colsUnpivot = string_agg(quotename(column_name), ',')
from
    (
        select
            distinct column_name
        from
            INFORMATION_SCHEMA.COLUMNS 
        where
        (
            table_name = @table_name_from
            and
            TABLE_SCHEMA = @schema_name_from and DATA_TYPE in ('nchar','nvarchar','varchar','char')
        )
        OR
        (
            table_name = @table_name_to
            and
            TABLE_SCHEMA = @schema_name_to and DATA_TYPE in ('nchar','nvarchar','varchar','char')
        )
    ) x

/* In this query, we must caset the result for each column to integer, otherwise the result comes out as the 
 * base datatype. It is then not possible to combine varchar(max) with varchar(n). By casting to integer
 * we get the counts coming through as integer, and we don't suffer the same issue.
 */
select
    @colsMaxLength = string_agg(quotename(column_name)+'=cast(isnull(max(len('+quotename(column_name)+')),0) as integer)', ',')     
from
    INFORMATION_SCHEMA.COLUMNS
where
    table_name = @table_name_from 
    and
    table_schema = @schema_name_from
    and
    data_type In ('varchar','nvarchar','char','nchar')

/* This SQL gets the data size in the 'from' table and unpivots it so it can be compared
 * to the column definitions returned from INFORMATION_SCHEMA.COLUMNS table. */

select @sql = '
/* calculate the data sizes within the from table */
with source_wide as (
SELECT
'+@colsMaxLength+'
from '+quotename(@schema_name_from)+'.'+quotename(@table_name_from)+'
),
/* unpivot the data from wide to long format */
source_long as (
    select SourceColumnName = d, SourceSize = data
    from source_wide
    unpivot(data for d in ('+@colsUnpivot+')) as unpvt
),
/* get the column definitions in the to table, in long format */
target_long as (
    select TargetColumnName = column_name, TargetSize = CHARACTER_MAXIMUM_LENGTH 
    from INFORMATION_SCHEMA.COLUMNS 
    where table_name = '''+@table_name_to+''' and table_schema = ''' + @schema_name_to + ''' and data_type In (''varchar'',''nvarchar'',''char'',''nchar'')
),
/* generate calculated fields */
output as (
select
    *,
    Status = case when SourceSize > TargetSize then ''Source too large'' else ''OK'' end,
    SizeDifference = TargetSize - SourceSize
from 
    source_long S
    full outer join
    target_long T on S.SourceColumnName = T.TargetColumnName
)
/* final output */
select * from output order by Status desc
'
print @sql
exec sp_executesql @sql