locked
Query help to find out which attribute/column is out of sync with the target table RRS feed

  • Question

  • Hello,

    We've source (OLTP) system and target (DW) system where we're validating attributes (HASH) comparison with the composite keys, we store hash value and store and unmatched hash value as a discrepancies report, here is an small example where we can validating using SSIS.

    USE [tempdb]
    GO
    
    IF OBJECT_ID('tempdb..#SOURCETABLE') IS NOT NULL
    	DROP TABLE #SOURCETABLE
    IF OBJECT_ID('tempdb..#SOURCEHASHTABLE') IS NOT NULL
    	DROP TABLE #SOURCEHASHTABLE
    IF OBJECT_ID('tempdb..#TARGETTABLE') IS NOT NULL
    	DROP TABLE #TARGETTABLE
    IF OBJECT_ID('tempdb..#TARGETHASHTABLE') IS NOT NULL
    	DROP TABLE #TARGETHASHTABLE
    
    CREATE TABLE #SOURCETABLE
    (
    	ID		VARCHAR(10),
    	EFF_DT	DATETIME,
    	TERM_DT	DATETIME,
    	FNAME	VARCHAR(10),
    	LNAME	VARCHAR(10)
    )
    
    CREATE TABLE #SOURCEHASHTABLE
    (
    	ID		VARCHAR(10),
    	EFF_DT	DATETIME,
    	TERM_DT	DATETIME,
    	HASH_VALUE BINARY(20)
    )
    
    CREATE TABLE #TARGETTABLE
    (
    	ID		VARCHAR(10),
    	EFF_DT	DATETIME,
    	TERM_DT	DATETIME,
    	FNAME	VARCHAR(10),
    	LNAME	VARCHAR(10)
    )
    
    CREATE TABLE #TARGETHASHTABLE
    (
    	ID		VARCHAR(10),
    	EFF_DT	DATETIME,
    	TERM_DT	DATETIME,
    	HASH_VALUE BINARY(20)
    )
    
    INSERT INTO #SOURCETABLE VALUES ( '123', '2000-10-12', '2005-12-31', 'John', 'Smith' )
    INSERT INTO #SOURCETABLE VALUES ( '124', '2010-01-01', '2017-12-31', 'Micheal', 'Park' )
    INSERT INTO #SOURCETABLE VALUES ( '125', '2015-01-01', '2016-12-31', 'Tim', 'Hasting' )
    
    INSERT INTO #TARGETTABLE VALUES ( '123', '2000-10-12', '2005-12-31', 'John', 'Smith' )
    INSERT INTO #TARGETTABLE VALUES ( '124', '2010-01-01', '2017-12-31', 'Micheal', 'Parker' )
    INSERT INTO #TARGETTABLE VALUES ( '125', '2015-01-01', '2016-12-31', 'Tim1', 'Hasting1' )
    
    INSERT INTO #SOURCEHASHTABLE ( ID, EFF_DT, TERM_DT, HASH_VALUE )
    SELECT	ID, EFF_DT, TERM_DT,
    		
    		CONVERT(BINARY(20), HASHBYTES('SHA1', 
    		ISNULL(CONVERT(VARCHAR(50), FNAME), '') + 
    		ISNULL(CONVERT(VARCHAR(50), LNAME), '') + 
    		'' )) AS HASH_VALUE	
    
    FROM	#SOURCETABLE
    
    INSERT INTO #TARGETHASHTABLE ( ID, EFF_DT, TERM_DT, HASH_VALUE )
    SELECT	ID, EFF_DT, TERM_DT,
    		
    		CONVERT(BINARY(20), HASHBYTES('SHA1', 
    		ISNULL(CONVERT(VARCHAR(50), FNAME), '') + 
    		ISNULL(CONVERT(VARCHAR(50), LNAME), '') + 
    		'' )) AS HASH_VALUE	
    
    FROM	#TARGETTABLE
    
    SELECT	a.*
    FROM	#SOURCEHASHTABLE a
    WHERE	NOT EXISTS
    		(
    			SELECT	1
    			FROM	#TARGETHASHTABLE b
    			WHERE	a.ID = b.ID
    						AND a.EFF_DT = b.EFF_DT
    						AND a.TERM_DT = b.TERM_DT
    						AND a.HASH_VALUE = b.HASH_VALUE
    		)
    ORDER BY	a.ID
    
    IF OBJECT_ID('tempdb..#SOURCETABLE') IS NOT NULL
    	DROP TABLE #SOURCETABLE
    IF OBJECT_ID('tempdb..#SOURCEHASHTABLE') IS NOT NULL
    	DROP TABLE #SOURCEHASHTABLE
    IF OBJECT_ID('tempdb..#TARGETTABLE') IS NOT NULL
    	DROP TABLE #TARGETTABLE
    IF OBJECT_ID('tempdb..#TARGETHASHTABLE') IS NOT NULL
    	DROP TABLE #TARGETHASHTABLE

    Up to here everything is working fine as we've expected, now our attributes comparison are going with huge list of columns (above example I'm comparing only 2 columns first & last name) so we want to know which field/fields where out of sync.

    Expected result:-

    SELECT	'124' AS ID, '2010-01-01 00:00:00.000' AS EFF_DT, '2017-12-31 00:00:00.000' AS TERM_DT, '0x0775E112B5D5ED83CF90719B43D17438259763E8' AS HASH_VALUE, 'LNAME' AS OutOfSyncFields
    UNION ALL
    SELECT	'125' AS ID, '2015-01-01 00:00:00.000' AS EFF_DT, '2016-12-31 00:00:00.000' AS TERM_DT, '0x48D69E2331A8B465B8892ED2AB25977A4E8382FE' AS HASH_VALUE, 'FNAME,LNAME' AS OutOfSyncFields

    Can anybody help me out or throw some suggestion(s) how to get it?

    Thank You

    Regards,

    gk03


    Please do let us know your feedback. Thank You - KG, MCTS

    Wednesday, February 22, 2017 6:01 PM

All replies

  • I'm not sure what you are asking.

    If you want to avoid typing all the column names, try using CHECKSUM(). With CHECKSUM() you can use an asterisk. Ie

    SELECT ID, EFF_DT, TERM_DT, CHECKSUM(*) FROM ...

    The asterisk will include the key columns, but they are equal by definition, so that should be OK. CHECKSUM will see NULL as different to an empty string.

    You can probably combine the two source tables into one query as a FULL OUTER JOIN. That will save having to materialise the first two temp tables.

    Hope that helps,


    Richard

    Thursday, February 23, 2017 3:27 AM
  • Hi gk03,

    You could have a look at following code that could return your expected result.

    USE [tempdb]
    GO
    
    IF OBJECT_ID('tempdb..#SOURCETABLE') IS NOT NULL
    	DROP TABLE #SOURCETABLE
    IF OBJECT_ID('tempdb..#SOURCEHASHTABLE') IS NOT NULL
    	DROP TABLE #SOURCEHASHTABLE
    IF OBJECT_ID('tempdb..#TARGETTABLE') IS NOT NULL
    	DROP TABLE #TARGETTABLE
    IF OBJECT_ID('tempdb..#TARGETHASHTABLE') IS NOT NULL
    	DROP TABLE #TARGETHASHTABLE
    
    CREATE TABLE #SOURCETABLE
    (
    	ID		VARCHAR(10),
    	EFF_DT	DATETIME,
    	TERM_DT	DATETIME,
    	FNAME	VARCHAR(10),
    	LNAME	VARCHAR(10)
    )
    
    CREATE TABLE #SOURCEHASHTABLE
    (
    	ID		VARCHAR(10),
    	EFF_DT	DATETIME,
    	TERM_DT	DATETIME,
    	HASH_VALUE BINARY(20)
    )
    
    CREATE TABLE #TARGETTABLE
    (
    	ID		VARCHAR(10),
    	EFF_DT	DATETIME,
    	TERM_DT	DATETIME,
    	FNAME	VARCHAR(10),
    	LNAME	VARCHAR(10)
    )
    
    CREATE TABLE #TARGETHASHTABLE
    (
    	ID		VARCHAR(10),
    	EFF_DT	DATETIME,
    	TERM_DT	DATETIME,
    	HASH_VALUE BINARY(20)
    )
    
    INSERT INTO #SOURCETABLE VALUES ( '123', '2000-10-12', '2005-12-31', 'John', 'Smith' )
    INSERT INTO #SOURCETABLE VALUES ( '124', '2010-01-01', '2017-12-31', 'Micheal', 'Park' )
    INSERT INTO #SOURCETABLE VALUES ( '125', '2015-01-01', '2016-12-31', 'Tim', 'Hasting' )
    
    INSERT INTO #TARGETTABLE VALUES ( '123', '2000-10-12', '2005-12-31', 'John', 'Smith' )
    INSERT INTO #TARGETTABLE VALUES ( '124', '2010-01-01', '2017-12-31', 'Micheal', 'Parker' )
    INSERT INTO #TARGETTABLE VALUES ( '125', '2015-01-01', '2016-12-31', 'Tim1', 'Hasting1' )
    
    INSERT INTO #SOURCEHASHTABLE ( ID, EFF_DT, TERM_DT, HASH_VALUE )
    SELECT	ID, EFF_DT, TERM_DT,
    		
    		CONVERT(BINARY(20), HASHBYTES('SHA1', 
    		ISNULL(CONVERT(VARCHAR(50), FNAME), '') + 
    		ISNULL(CONVERT(VARCHAR(50), LNAME), '') + 
    		'' )) AS HASH_VALUE	
    
    FROM	#SOURCETABLE
    
    INSERT INTO #TARGETHASHTABLE ( ID, EFF_DT, TERM_DT, HASH_VALUE )
    SELECT	ID, EFF_DT, TERM_DT,
    		
    		CONVERT(BINARY(20), HASHBYTES('SHA1', 
    		ISNULL(CONVERT(VARCHAR(50), FNAME), '') + 
    		ISNULL(CONVERT(VARCHAR(50), LNAME), '') + 
    		'' )) AS HASH_VALUE	
    
    FROM	#TARGETTABLE
    
    
    select * from #SOURCETABLE  
    select * from #TARGETTABLE  
    
    ;WITH cteSource as
    (
    	select S.*,P.COL,P.VAL 
    	from #SOURCETABLE S
    	CROSS APPLY
    	(
    		VALUES 
    		(CAST(S.EFF_DT AS varchar(2000)),'EFF_DT'),
    		(CAST(S.TERM_DT AS varchar(2000)),'TERM_DT'),
    		(CAST(S.FNAME AS varchar(2000)) ,'FNAME'),
    		(CAST(S.LNAME AS varchar(2000)),'LNAME')
    	)P(VAL,COL)
    )
    ,cteTarget as
    (
    	select S.*,P.COL,P.VAL 
    	from #TARGETTABLE S
    	CROSS APPLY
    	(
    		VALUES 
    		(CAST(S.EFF_DT AS varchar(2000)),'EFF_DT'),
    		(CAST(S.TERM_DT AS varchar(2000)),'TERM_DT'),
    		(CAST(S.FNAME AS varchar(2000)) ,'FNAME'),
    		(CAST(S.LNAME AS varchar(2000)),'LNAME')
    	)P(VAL,COL)
    )
    ,cteResult as
    (
    	select 
    		cs.ID
    		, cs.EFF_DT
    		,cs.TERM_DT
    		,CONVERT(BINARY(20), HASHBYTES('SHA1', 
    		ISNULL(CONVERT(VARCHAR(50), cs.FNAME), '') + 
    		ISNULL(CONVERT(VARCHAR(50), cs.LNAME), '') + 
    		'' )) AS HASH_VALUE	
    		,cs.col
    	from cteSource  cs
    	inner join cteTarget ct on cs.ID = ct.ID and cs.COL = ct.COL 
    	where cs.VAL <> ct.VAL 
    )
    select
    	ID,EFF_DT,TERM_DT,HASH_VALUE,
    	STUFF(
    		cast((
    		
    			select ','+c2.COL
    			from cteResult c2
    			where c2.ID = c1.ID 
    			for xml path('')
    		)as varchar(2000)) 
    	,1,1,'')as OutOfSyncFields
    from cteResult c1
    group by ID,EFF_DT,TERM_DT,HASH_VALUE

    Best Regards,

    Albert Zhang


    MSDN Community Support
    Please remember to click "Mark as Answer" the responses that resolved your issue, and to click "Unmark as Answer" if not. This can be beneficial to other community members reading this thread. If you have any compliments or complaints to MSDN Support, feel free to contact MSDNFSF@microsoft.com.

    • Proposed as answer by Albert_ Zhang Monday, March 6, 2017 11:58 AM
    Thursday, February 23, 2017 8:26 AM