none
Issue with Merge statement in Azure SQL DW RRS feed

  • Question

  • Hi All,

    I am trying to create one merge statement in Azure SQL DW, But getting a simple error which I am unable to identify.

    Below is my code.

    MERGE  dbo.DimStatus AS TARGET 
    USING (

    SELECT   CAST([Status] AS NVARCHAR(50)) AS [Status]
    ,CAST([Statuscategory] AS NVARCHAR(50)) AS [Statuscategory]
            ,CAST([Loadid] AS INT) AS [Loadid]
    FROM     [ext].[Dimstatus]

    )
      AS SOURCE 

    ON (     
    TARGET.[Status] = SOURCE.[Status]


    AND 
    HASHBYTES('MD5',ISNULL(UPPER(TARGET.Statuscategory),'')) <> HASHBYTES('MD5',ISNULL(UPPER(SOURCE.Statuscategory),'')) 
    THEN
    UPDATE 
    SET
    TARGET.[Statuscategory] = SOURCE.[Statuscategory] ,
    TARGET.[ValidFrom] = GETDATE() ,
    TARGET.[ValidTo] =   GETDATE()-1    

    WHEN NOT MATCHED BY TARGET THEN 
         INSERT ( 
    [Status] ,
    [Statuscategory] ,
    LoadId ,
    IsActive ,
    [ValidFrom] ,
    [ValidTo]


         VALUES ( 
        SOURCE.[Status] ,
    SOURCE.Statuscategory ,
    SOURCE.LoadId ,
    1 ,
    GETDATE() ,
    GETDATE()

    Can someone let me know what is the exact error with my code


    Thankx &amp; regards, Vipin jha MCP

    Sunday, September 30, 2018 4:18 PM

Answers

  • To add to Casey's recommendation, here is one way to perform a CTAS implementation of an SCD2 dimension. You'll find it very close to what you want to achieve.

    Note that when the dimension table is created, a zero-value SK is automatically added for a new row. That isn't clear from the script.


    -- -------------------------------------------------------------------
    -- Script:          dim_cat_transform
    -- Target:          Azure SQL Data Warehouse
    -- -------------------------------------------------------------------
    
    -- Delete temporary tables if present.
    
    if object_id ('dw.dim_cat_TEMP1','U') is not null drop table dw.dim_cat_TEMP1;
    if object_id ('dw.dim_cat_TEMP2','U') is not null drop table dw.dim_cat_TEMP2;
    
    -- -------------------------------------------------------------------
    -- First pass creates new versions of changing rows.
    -- -------------------------------------------------------------------
    
    -- Append new versions of changing rows
    create  table dw.dim_cat_TEMP1
            with (clustered columnstore index, distribution = hash(cat_code)) as
    select  cast((select max(cat_sk) from dw.dim_cat) + row_number() over (order by getdate()) as bigint) as cat_sk
            ,dim.cat_code as cat_code
            ,cast(stage.cat_name as nvarchar(256)) as cat_name
            ,cast(current_timestamp as datetime) as scd_from
            ,cast('2999-12-31T00:00:00' as datetime) as scd_to
            ,cast('Y' as char(1)) as scd_current
    from    dw.dim_cat dim
            join load.load_cat stage
            on stage.cat_code = dim.cat_code
    where   scd_current = 'Y'
            and (
                ((dim.cat_name <> stage.cat_name or dim.cat_name is null or stage.cat_name is null) and not (dim.cat_name is null and stage.cat_name is null))
                )
    
    union all
    
    -- Append all previous rows, turning off current flag for previous versions of changed rows
    select  dim.cat_sk as cat_sk
            ,dim.cat_code as cat_code
            ,dim.cat_name as cat_name
            ,dim.scd_from as scd_from
            ,case when stage.cat_code is not null and dim.scd_current = 'Y'
             then cast(current_timestamp as datetime)
             else dim.scd_to
             end as scd_to
            ,case when stage.cat_code is not null
                  and dim.scd_current = 'Y'
             then cast('N' as char(1))
             else dim.scd_current
             end as scd_current
    from    dw.dim_cat dim
            left outer join load.load_cat stage
            on stage.cat_code = dim.cat_code
            and scd_current = 'Y'
            and (
                ((dim.cat_name <> stage.cat_name or dim.cat_name is null or stage.cat_name is null) and not (dim.cat_name is null and stage.cat_name is null))
                )
    ;
    
    -- -------------------------------------------------------------------
    -- Second pass appends new rows to table
    -- -------------------------------------------------------------------
    
    -- Copy current table .. note redistribution to REPLICATE
    create  table dw.dim_cat_TEMP2
            with (clustered columnstore index, distribution = replicate) as
    select  dim.cat_sk
            ,dim.cat_code
            ,dim.cat_name
            ,dim.scd_from
            ,dim.scd_to
            ,dim.scd_current
    from    dw.dim_cat_TEMP1 dim
            left outer join load.load_cat stage
            on stage.cat_code = dim.cat_code
    
    union all
    
    -- Add new rows.
    select  cast((select max(cat_sk) from dw.dim_cat) + row_number() over (order by getdate()) as bigint) as cat_sk
            ,cast(stage.cat_code as nvarchar(256)) as cat_code
            ,cast(stage.cat_name as nvarchar(256)) as cat_name
            ,cast('1900-01-01' as datetime) as scd_from
            ,cast('2999-12-31' as datetime) as scd_to
            ,cast('Y' as char(1)) as scd_current
    from    load.load_cat stage
            left outer join dw.dim_cat_TEMP1 dim
            on dim.cat_code = stage.cat_code
    where   dim.cat_code is null
    ;
    
    -- Switch table contents replacing target with temp.
    
    alter table dw.dim_cat_TEMP2 switch to dw.dim_cat with (truncate_target=on);
    drop table dw.dim_cat_TEMP1;
    drop table dw.dim_cat_TEMP2;
    
    -- Force replication of table.
    
    select top 1 * from dw.dim_cat;
    


    • Marked as answer by Vipin jha Wednesday, October 3, 2018 9:47 AM
    Tuesday, October 2, 2018 11:40 AM
  • To make that comment about zero-value row a little more clear, here's a DDL script for the table:

    -- Create table.
    
    create table dw.dim_cat (
        cat_sk bigint
        ,cat_code nvarchar(256)
        ,cat_name nvarchar(256)
        ,scd_from    datetime
        ,scd_to      datetime
        ,scd_current char(1)
        )
    with (clustered columnstore index, distribution = replicate)
    ;
    
    -- Initialise dimension.
    
    insert into dw.dim_cat (
        cat_sk
        ,cat_code
        ,cat_name
        ,scd_from
        ,scd_to
        ,scd_current
        )
    values (
        0
        ,'*'
        ,'*'
        ,'1900-01-01T00:00:00'
        ,'2999-12-31T00:00:00'
        ,'Y'
        )
    ;
    


    • Marked as answer by Vipin jha Wednesday, October 3, 2018 9:47 AM
    Tuesday, October 2, 2018 11:42 AM

All replies

  • Hi Vipin,

    Merge is not supported at this time. You can use Create Table as Select (CTAS) in SQL DW to implement a more performant version of MERGE in SQL DW. 

    Monday, October 1, 2018 9:19 PM
  • To add to Casey's recommendation, here is one way to perform a CTAS implementation of an SCD2 dimension. You'll find it very close to what you want to achieve.

    Note that when the dimension table is created, a zero-value SK is automatically added for a new row. That isn't clear from the script.


    -- -------------------------------------------------------------------
    -- Script:          dim_cat_transform
    -- Target:          Azure SQL Data Warehouse
    -- -------------------------------------------------------------------
    
    -- Delete temporary tables if present.
    
    if object_id ('dw.dim_cat_TEMP1','U') is not null drop table dw.dim_cat_TEMP1;
    if object_id ('dw.dim_cat_TEMP2','U') is not null drop table dw.dim_cat_TEMP2;
    
    -- -------------------------------------------------------------------
    -- First pass creates new versions of changing rows.
    -- -------------------------------------------------------------------
    
    -- Append new versions of changing rows
    create  table dw.dim_cat_TEMP1
            with (clustered columnstore index, distribution = hash(cat_code)) as
    select  cast((select max(cat_sk) from dw.dim_cat) + row_number() over (order by getdate()) as bigint) as cat_sk
            ,dim.cat_code as cat_code
            ,cast(stage.cat_name as nvarchar(256)) as cat_name
            ,cast(current_timestamp as datetime) as scd_from
            ,cast('2999-12-31T00:00:00' as datetime) as scd_to
            ,cast('Y' as char(1)) as scd_current
    from    dw.dim_cat dim
            join load.load_cat stage
            on stage.cat_code = dim.cat_code
    where   scd_current = 'Y'
            and (
                ((dim.cat_name <> stage.cat_name or dim.cat_name is null or stage.cat_name is null) and not (dim.cat_name is null and stage.cat_name is null))
                )
    
    union all
    
    -- Append all previous rows, turning off current flag for previous versions of changed rows
    select  dim.cat_sk as cat_sk
            ,dim.cat_code as cat_code
            ,dim.cat_name as cat_name
            ,dim.scd_from as scd_from
            ,case when stage.cat_code is not null and dim.scd_current = 'Y'
             then cast(current_timestamp as datetime)
             else dim.scd_to
             end as scd_to
            ,case when stage.cat_code is not null
                  and dim.scd_current = 'Y'
             then cast('N' as char(1))
             else dim.scd_current
             end as scd_current
    from    dw.dim_cat dim
            left outer join load.load_cat stage
            on stage.cat_code = dim.cat_code
            and scd_current = 'Y'
            and (
                ((dim.cat_name <> stage.cat_name or dim.cat_name is null or stage.cat_name is null) and not (dim.cat_name is null and stage.cat_name is null))
                )
    ;
    
    -- -------------------------------------------------------------------
    -- Second pass appends new rows to table
    -- -------------------------------------------------------------------
    
    -- Copy current table .. note redistribution to REPLICATE
    create  table dw.dim_cat_TEMP2
            with (clustered columnstore index, distribution = replicate) as
    select  dim.cat_sk
            ,dim.cat_code
            ,dim.cat_name
            ,dim.scd_from
            ,dim.scd_to
            ,dim.scd_current
    from    dw.dim_cat_TEMP1 dim
            left outer join load.load_cat stage
            on stage.cat_code = dim.cat_code
    
    union all
    
    -- Add new rows.
    select  cast((select max(cat_sk) from dw.dim_cat) + row_number() over (order by getdate()) as bigint) as cat_sk
            ,cast(stage.cat_code as nvarchar(256)) as cat_code
            ,cast(stage.cat_name as nvarchar(256)) as cat_name
            ,cast('1900-01-01' as datetime) as scd_from
            ,cast('2999-12-31' as datetime) as scd_to
            ,cast('Y' as char(1)) as scd_current
    from    load.load_cat stage
            left outer join dw.dim_cat_TEMP1 dim
            on dim.cat_code = stage.cat_code
    where   dim.cat_code is null
    ;
    
    -- Switch table contents replacing target with temp.
    
    alter table dw.dim_cat_TEMP2 switch to dw.dim_cat with (truncate_target=on);
    drop table dw.dim_cat_TEMP1;
    drop table dw.dim_cat_TEMP2;
    
    -- Force replication of table.
    
    select top 1 * from dw.dim_cat;
    


    • Marked as answer by Vipin jha Wednesday, October 3, 2018 9:47 AM
    Tuesday, October 2, 2018 11:40 AM
  • To make that comment about zero-value row a little more clear, here's a DDL script for the table:

    -- Create table.
    
    create table dw.dim_cat (
        cat_sk bigint
        ,cat_code nvarchar(256)
        ,cat_name nvarchar(256)
        ,scd_from    datetime
        ,scd_to      datetime
        ,scd_current char(1)
        )
    with (clustered columnstore index, distribution = replicate)
    ;
    
    -- Initialise dimension.
    
    insert into dw.dim_cat (
        cat_sk
        ,cat_code
        ,cat_name
        ,scd_from
        ,scd_to
        ,scd_current
        )
    values (
        0
        ,'*'
        ,'*'
        ,'1900-01-01T00:00:00'
        ,'2999-12-31T00:00:00'
        ,'Y'
        )
    ;
    


    • Marked as answer by Vipin jha Wednesday, October 3, 2018 9:47 AM
    Tuesday, October 2, 2018 11:42 AM