Slowly Changing Dimensions

While designing Data Warehouse, it is extremely important to confirm dimension strategy and the way of data analysis with customer. Let’s discuss the most common SCD methodologies  (Type 1, 2).

SCD Type 1: Overwrite

Overwrite old record in dimension table, and therefore doesnt track historical data

SCD Type 2: Add New Row

Tracks historical data by adding new record to dimension table, managing “Valid From”, “Valid To” and “Is Active” flag.

Stored Procedure SCD1
 --CREATE PROCEDURE
 CREATE OR ALTER PROCEDURE [dwh].[LoadCustomer_SCD1] as
 Begin

 --DECLARE VARIABLES
 declare @NewRows int
 declare @UpdatedRows int
 declare @today datetime = getdate()

 --INSERT NEW EMAIL ADDRESSES
 INSERT INTO [dwh].[Customer_SCD1] (
 [EmailAddress]
 ,[FirstName]
 ,[LastName]
 ,[CompanyName]
 ,[Phone]
 ,[InsertDate]
 ,[UpdateDate])
 SELECT s.[EmailAddress]
 ,s.[FirstName]
 ,s.[LastName]
 ,s.[CompanyName]
 ,s.[Phone]
 ,@today as [InsertDate]
 ,null as [UpdateDate]
 FROM [SalesLT].[Customer_new] s
 --FOLLOWING JOIN STATEMENT ALLOWS TO FILTER MISSING EMAIL ADDRESSES IN TARGET TABLE 
 LEFT JOIN [dwh].[Customer_SCD1] t
  on s.[EmailAddress]=t.[EmailAddress] and t.EmailAddress is null
 --GET NUMBER OF NEW RECORDS
 select @NewRows =@@ROWCOUNT

 --UPDATE DESCRIPTION OF EXISTING EMAIL ADDRESSES
 UPDATE t SET
 t.[FirstName]=s.[FirstName]
 ,t.[LastName]=s.[LastName]
 ,t.[CompanyName]=s.[CompanyName]
 ,t.[Phone]=s.[Phone]
 ,t.UpdateDate=@today
 FROM [SalesLT].[Customer_new] s
 INNER JOIN [dwh].[Customer_SCD1] t
  on s.[EmailAddress]=t.[EmailAddress]
 --FOLLOWING EXISTS STATEMENT FILTERS RECORDS WHERE DESCRIPTION OF EMAIL ADDRESS IS DIFFERENT BETWEEN SOURCE AND TARGET TABLE
 where exists (
 select HASHBYTES('md5', Concat(isnull(s.[FirstName],''),isnull(s.[LastName],''),isnull(s.[CompanyName],''),isnull(s.[Phone],'n/a')))
 except
 select HASHBYTES('md5', Concat(isnull(t.[FirstName],''),isnull(t.[LastName],''),isnull(t.[CompanyName],''),isnull(t.[Phone],'n/a')))
 )
 --GET NUMBER OF UPDATED RECORDS
 select @UpdatedRows =@@ROWCOUNT

 if @NewRows is null
  set @NewRows = 0
 if @UpdatedRows is null
  set @UpdatedRows = 0

 --DISPLAY AUDIT INFORMATION
 print '-------- Inserted rows:' + cast (@NewRows as varchar(10))
 print '-------- Updated rows:' + cast (@UpdatedRows as varchar(10))

 END
 GO
Stored Procedure SCD2
 --CREATE PROCEDURE
 CREATE OR ALTER PROCEDURE [dwh].[LoadCustomer_SCD2] as
 Begin

 --DECLARE VARIABLES
declare @NewRows int
declare @UpdatedRows int
declare @today datetime = getdate()

 --ADD NEW RECORDS IF EMAIL ADDRESS DOESN'T EXISTS IN TARGET TABLE OR DESCRIPTION OF EMAIL ADDRESS WAS CHANGED
 INSERT INTO [dwh].[Customer_SCD2] (
 [EmailAddress]
 ,[FirstName]
 ,[LastName]
 ,[CompanyName]
 ,[Phone]
 ,[ValidFrom]
 ,[ValidTo]
 ,[Active]
 ,[hash_diff])
 SELECT s.[EmailAddress]
 ,s.[FirstName]
 ,s.[LastName]
 ,s.[CompanyName]
 ,s.[Phone]
 ,@today as [ValidFrom]
 ,null as [ValidTo]
 ,1 as [Active]
 ,CONVERT(varchar(32),HASHBYTES('md5', Concat(isnull(s.[FirstName],''),isnull(s.[LastName],''),isnull(s.[CompanyName],''),isnull(s.[Phone],'n/a'))),(2)) AS [hash_diff]
 FROM [SalesLT].[Customer_new] s
 --FOLLOWING JOIN STATEMENTS FILTER EITHER NEW EMAIL_ADDRESSES OR DIFFERENT DESCRIPTION BETWEEN SOURCE AND TARGET TABLE
 LEFT JOIN [dwh].[Customer_SCD2] t
  on s.[EmailAddress]=t.[EmailAddress] and t.ValidTo is null
 where exists (
 select CONVERT(varchar(32),HASHBYTES('md5', Concat(isnull(s.[FirstName],''),isnull(s.[LastName],''),isnull(s.[CompanyName],''),isnull(s.[Phone],'n/a'))),(2))
 except
 select t.[hash_diff]
 )
 --GET NUMBER OF INSERTED RECORDS (BOTH NEW AND UPDATED)
 select @NewRows =@@ROWCOUNT

 --UPDATE VALID_TO AND ACTIVE FLAG OF HISTORICAL RECORD BECAUSE THIS RECORD IS NOT ACTIVE ANYMORE
 UPDATE t SET
 t.ValidTo=s.ValidFrom
 ,t.Active=0
 from [dwh].[Customer_SCD2] as t
 --FOLLOWING JOIN AND WHERE STATEMENTS FILTER PREVIOUS ACTIVE RECORDS
 inner join [dwh].[Customer_SCD2] s
  on t.[EmailAddress]=s.[EmailAddress] and t.ValidFrom<s.ValidFrom
 where t.ValidTo is null
 --GET NUMBER OF UPDATED RECORDS
 select @UpdatedRows =@@ROWCOUNT

 if @NewRows is null
  set @NewRows = 0
 if @UpdatedRows is null
  set @UpdatedRows = 0

 --DISPLAY AUDIT INFORMATION
 print '-------- Inserted rows:' + cast (@NewRows as varchar(10))
 print '-------- Updated rows:' + cast (@UpdatedRows as varchar(10))

 END
 GO