Answered Summing Hierarchical Data

  • Monday, February 18, 2013 7:27 PM
     
      Has Code

    Hello, I can't seem to figure this out - never really ran across this before.  Bascially I have 3 tables where table 1 is the parent, table 2 is a child, and table 3 is a child of table 2.  (Simple hierarchy).  I'm trying to sum across 2 tables, however in my total, the record from table 2 is counted 2 times because there are 2 records in table 3 that reference table 2.

    Here is a simple SQL script:

    CREATE TABLE #TmpTbl1 ([Record_ID] INT PRIMARY KEY NOT NULL)
    INSERT INTO #TmpTbl1 ([Record_ID]) VALUES (1)
    INSERT INTO #TmpTbl1 ([Record_ID]) VALUES (2)
    
    CREATE TABLE #TmpTbl2 ([Record_ID] INT PRIMARY KEY NOT NULL, [Parent_ID] INT NOT NULL, [Value1] DECIMAL(10,2))
    INSERT INTO #TmpTbl2 ([Record_ID], [Parent_ID], [Value1]) VALUES (4,1,10.0)
    INSERT INTO #TmpTbl2 ([Record_ID], [Parent_ID], [Value1]) VALUES (5,2,20.0)
    
    CREATE TABLE #TmpTbl3 ([Record_ID] INT PRIMARY KEY NOT NULL, [Parent_ID] INT NOT NULL, [Value1] DECIMAL(10,2))
    INSERT INTO #TmpTbl3 ([Record_ID], [Parent_ID], [Value1]) VALUES (7,4,40.0)
    INSERT INTO #TmpTbl3 ([Record_ID], [Parent_ID], [Value1]) VALUES (8,4,50.0)
    INSERT INTO #TmpTbl3 ([Record_ID], [Parent_ID], [Value1]) VALUES (9,5,60.0)
    
    SELECT * FROM #TmpTbl1
    SELECT * FROM #TmpTbl2
    SELECT * FROM #TmpTbl3
    
    SELECT		SUM(#TmpTbl2.Value1) AS Tbl2Total, 
    			SUM(#TmpTbl3.Value1) AS Tbl3Total
    FROM		#TmpTbl1 INNER JOIN
    			#TmpTbl2 ON #TmpTbl1.Record_ID = #TmpTbl2.Parent_ID INNER JOIN
    			#TmpTbl3 ON #TmpTbl2.Record_ID = #TmpTbl3.Parent_ID
    WHERE		#TmpTbl1.Record_ID = 1
    
    DROP TABLE #TmpTbl1
    DROP TABLE #TmpTbl2
    DROP TABLE #TmpTbl3

    Here is the result sets:

    Table 1:
    Record_ID
    =========
    1
    2

    Table 2:
    Record_ID       Parent_ID        Value1
    ========= ========= ======
    4                     1                   10.00
    5                     2                   20.00

    Table 3:
    Record_ID       Parent_ID        Value1
    ========= ========= ======
    7                     4                    40.00
    8                     4                    50.00
    9                     5                    60.00

    Result:
    Tbl2Total         Tbl3Total
    ========= =========
    20.00              90.00

    What We Need Is:
    Tbl2Total         Tbl3Total
    ========= =========
    10.00              90.00

    I understand since table 3 is joined to table 2 and has 2 records is the reason why, but is there any other way to make this 10 and 90 without doing subqueries?

    Thanks

All Replies

  • Monday, February 18, 2013 7:34 PM
     
      Has Code

    Try with SUM(DISTINCT col) -

    SELECT	SUM(DISTINCT #TmpTbl2.Value1) AS Tbl2Total, 
    	SUM(DISTINCT #TmpTbl3.Value1) AS Tbl3Total
    


    Narsimha

    • Marked As Answer by Ryan_Ha Monday, February 18, 2013 7:38 PM
    • Unmarked As Answer by Ryan_Ha Monday, February 18, 2013 7:52 PM
    •  
  • Monday, February 18, 2013 7:38 PM
     
     
    That was too simple - thank you so much.
  • Monday, February 18, 2013 7:54 PM
     
      Has Code

    Actually, that works so long as 10 doesn't repeat again in table #2:

    CREATE TABLE #TmpTbl1 ([Record_ID] INT PRIMARY KEY NOT NULL)
    INSERT INTO #TmpTbl1 ([Record_ID]) VALUES (1)
    INSERT INTO #TmpTbl1 ([Record_ID]) VALUES (2)
    
    CREATE TABLE #TmpTbl2 ([Record_ID] INT PRIMARY KEY NOT NULL, [Parent_ID] INT NOT NULL, [Value1] DECIMAL(10,2))
    INSERT INTO #TmpTbl2 ([Record_ID], [Parent_ID], [Value1]) VALUES (4,1,10.0)
    INSERT INTO #TmpTbl2 ([Record_ID], [Parent_ID], [Value1]) VALUES (5,1,10.0)
    
    CREATE TABLE #TmpTbl3 ([Record_ID] INT PRIMARY KEY NOT NULL, [Parent_ID] INT NOT NULL, [Value1] DECIMAL(10,2))
    INSERT INTO #TmpTbl3 ([Record_ID], [Parent_ID], [Value1]) VALUES (7,4,40.0)
    INSERT INTO #TmpTbl3 ([Record_ID], [Parent_ID], [Value1]) VALUES (8,4,50.0)
    INSERT INTO #TmpTbl3 ([Record_ID], [Parent_ID], [Value1]) VALUES (9,5,60.0)
    
    SELECT * FROM #TmpTbl1
    SELECT * FROM #TmpTbl2
    SELECT * FROM #TmpTbl3
    
    SELECT		SUM(DISTINCT #TmpTbl2.Value1) AS Tbl2Total, 
    			SUM(#TmpTbl3.Value1) AS Tbl3Total
    FROM		#TmpTbl1 INNER JOIN
    			#TmpTbl2 ON #TmpTbl1.Record_ID = #TmpTbl2.Parent_ID INNER JOIN
    			#TmpTbl3 ON #TmpTbl2.Record_ID = #TmpTbl3.Parent_ID
    WHERE		#TmpTbl1.Record_ID = 1
    
    
    DROP TABLE #TmpTbl1
    DROP TABLE #TmpTbl2
    DROP TABLE #TmpTbl3
    

    Result:
    Tbl2Total         Tbl3Total
    ========= =========
    10.00              150.00

    What We Need Is:
    Tbl2Total         Tbl3Total
    ========= =========
    20.00              150.00

    Do you basically have to replace the SUM(xxxx) with a subquery? Is that the only way to do this?

    Thanks again.

  • Monday, February 18, 2013 8:06 PM
    Moderator
     
      Has Code
    CREATE TABLE #TmpTbl1 ([Record_ID] INT PRIMARY KEY NOT NULL)
    INSERT INTO #TmpTbl1 ([Record_ID]) VALUES (1)
    INSERT INTO #TmpTbl1 ([Record_ID]) VALUES (2)
    
    CREATE TABLE #TmpTbl2 ([Record_ID] INT PRIMARY KEY NOT NULL, [Parent_ID] INT NOT NULL, [Value1] DECIMAL(10,2))
    INSERT INTO #TmpTbl2 ([Record_ID], [Parent_ID], [Value1]) VALUES (4,1,10.0)
    INSERT INTO #TmpTbl2 ([Record_ID], [Parent_ID], [Value1]) VALUES (5,1,10.0)
    
    CREATE TABLE #TmpTbl3 ([Record_ID] INT PRIMARY KEY NOT NULL, [Parent_ID] INT NOT NULL, [Value1] DECIMAL(10,2))
    INSERT INTO #TmpTbl3 ([Record_ID], [Parent_ID], [Value1]) VALUES (7,4,40.0)
    INSERT INTO #TmpTbl3 ([Record_ID], [Parent_ID], [Value1]) VALUES (8,4,50.0)
    INSERT INTO #TmpTbl3 ([Record_ID], [Parent_ID], [Value1]) VALUES (9,5,60.0)
    
    SELECT * FROM #TmpTbl1
    SELECT * FROM #TmpTbl2
    SELECT * FROM #TmpTbl3
    
    SELECT	 SUM( t2.Value1)  AS Tbl2Total, 
    			Max(Tbl3Total) as Tbl3Total
    FROM		#TmpTbl1 t1 INNER JOIN  #TmpTbl2 t2 ON t1.Record_ID = t2.Parent_ID INNER JOIN
    			(SELECT SUM(Value1)  AS Tbl3Total, Parent_ID  FROM #TmpTbl3 GROUP BY Parent_ID) t3 ON t2.Record_ID = t3.Parent_ID
    WHERE		t1.Record_ID = 1
    
    
    
    
    DROP TABLE #TmpTbl1
    DROP TABLE #TmpTbl2
    DROP TABLE #TmpTbl3
    

  • Monday, February 18, 2013 8:32 PM
     
      Has Code
    SELECT		SUM(Temp.SUM1) AS Tbl2Total, 
    			SUM(Temp2.sum2) AS Tbl3Total
    FROM		#TmpTbl1 INNER JOIN
    			(Select Parent_ID,Record_ID,SUM(Value1) as SUM1 from #TmpTbl2 group by Parent_Id,REcord_id) Temp 
    			 ON #TmpTbl1.Record_ID = Temp.Parent_ID  INNER JOIN
    			(select #TmpTbl2.Record_ID,sum(#TmpTbl3.value1) as SUm2 from #TmpTbl3 INNER JOin #TmpTbl2 on #TmpTbl3.Parent_ID=#TmpTbl2.Record_ID
    			Group by #TmpTbl2.Record_ID) Temp2 ON Temp2.Record_ID = Temp.Record_ID
    WHERE		#TmpTbl1.Record_ID = 1

    output 

    ------

    20 150


    Hope it Helps!!


    • Edited by Stan210 Monday, February 18, 2013 8:36 PM
    •  
  • Monday, February 18, 2013 9:10 PM
     
      Has Code
    ;WITH CTE1 AS
    (
    SELECT #TmpTbl1.Record_ID,
              COALESCE(SUM(DISTINCT #TmpTbl2.Value1),0) AS Sum_t2
    FROM	#TmpTbl1 LEFT JOIN
    			#TmpTbl2 
    ON #TmpTbl1.Record_ID = #TmpTbl2.Parent_ID
    GROUP BY #TmpTbl1.Record_ID
    ),cte2 AS 
    (
    SELECT  #TmpTbl1.Record_ID,COALESCE(SUM(DISTINCT #TmpTbl3.Value1),0) AS Sum_t3
    FROM #TmpTbl3
    	LEFT JOIN #TmpTbl2 ON #TmpTbl3.Parent_ID = #TmpTbl2.Record_ID
    	LEFT JOIN #TmpTbl1 ON #TmpTbl2.Parent_ID = #TmpTbl1.Record_ID
    GROUP BY #TmpTbl1.Record_ID
    )
    SELECT Sum_t2,Sum_t3
    FROM CTE1
    INNER JOIN cte2 ON 
    CTE1.Record_ID = cte2.Record_ID


    Narsimha

  • Monday, February 18, 2013 9:26 PM
     
     Answered Has Code

    Thanks everyone, yeah I think everyone's solutions would work, here is what we came up with as well if anyone else needs it:

    CREATE TABLE #TmpTbl1 ([Record_ID] INT PRIMARY KEY NOT NULL)
    INSERT INTO #TmpTbl1 ([Record_ID]) VALUES (1)
    INSERT INTO #TmpTbl1 ([Record_ID]) VALUES (2)
    
    CREATE TABLE #TmpTbl2 ([Record_ID] INT PRIMARY KEY NOT NULL, [Parent_ID] INT NOT NULL, [Value1] DECIMAL(10,2))
    INSERT INTO #TmpTbl2 ([Record_ID], [Parent_ID], [Value1]) VALUES (4,1,10.0)
    INSERT INTO #TmpTbl2 ([Record_ID], [Parent_ID], [Value1]) VALUES (5,2,20.0)
    
    CREATE TABLE #TmpTbl3 ([Record_ID] INT PRIMARY KEY NOT NULL, [Parent_ID] INT NOT NULL, [Value1] DECIMAL(10,2))
    INSERT INTO #TmpTbl3 ([Record_ID], [Parent_ID], [Value1]) VALUES (7,4,40.0)
    INSERT INTO #TmpTbl3 ([Record_ID], [Parent_ID], [Value1]) VALUES (8,4,50.0)
    INSERT INTO #TmpTbl3 ([Record_ID], [Parent_ID], [Value1]) VALUES (9,5,60.0)
    
    SELECT * FROM #TmpTbl1
    SELECT * FROM #TmpTbl2
    SELECT * FROM #TmpTbl3
    
    SELECT            SUM(#TmpTbl2.Value1) AS Tbl2Total, 
                      SUM(T2.Value1) AS Tbl3Total
    FROM        #TmpTbl1 INNER JOIN
                      #TmpTbl2 ON #TmpTbl1.Record_ID = #TmpTbl2.Parent_ID INNER JOIN
                      (SELECT PARENT_ID, SUM(#TMPTBL3.VALUE1) AS VALUE1 FROM #TMPTBL3 GROUP BY PARENT_ID) T2 ON #TmpTbl2.Record_ID = T2.Parent_ID
    WHERE       #TmpTbl1.Record_ID = 1
    
    DROP TABLE #TmpTbl1
    DROP TABLE #TmpTbl2
    DROP TABLE #TmpTbl3
    

    The reason we had to do it this way was because we had custom aggregrate functions that the execution plan wasn't working if we used subqueries. So this changes the execution plan and fixes it.

    Thanks again for everyone's help.

    • Marked As Answer by Ryan_Ha Monday, February 18, 2013 9:26 PM
    •