Hello,
We have come across a scenario where we need to update the previous records in a target table based on if the value is NULL upto its last NON NULL occurence. Here is the test scenario we have tried to mock up:
-- Definition of target table
Create table test_target
(ID integer,
update_time timestamp(6),
change_log_1 varchar(10),
change_log_2 varchar(10),
change_log_3 varchar(10),
change_log_4 varchar(10),
change_log_5 varchar(10),
start_date timestamp(6),
end_date timestamp(6)
) PRIMARY INDEX (ID);
-- Test values - Existing Target table shows that there are many NULL values
INSERT INTO test_target values (1234,'2013-01-01 00:00.000000','abc','def',NULL,NULL,NULL,'2013-01-01 00:00.000000','2013-02-01 02:00.000000');
INSERT INTO test_target values (1234,'2013-02-01 02:00.000000',NULL,NULL,'cdf','fgd','ref','2013-02-01 02:00.000000','2013-03-01 03:00.000000');
INSERT INTO test_target values (1234,'2013-03-01 03:00.000000',NULL,NULL,'fgr','lkj',NULL,'2013-03-01 03:00.000000','2013-04-01 04:00.000000');
INSERT INTO test_target values (1234,'2013-04-01 04:00.000000','cfg',NULL,'tyh','poi','fdt','2013-04-01 04:00.000000','2013-05-01 05:00.000000');
INSERT INTO test_target values (1234,'2013-05-01 05:00.000000',NULL,NULL,'trd',NULL,NULL,'2013-05-01 05:00.000000','2013-06-01 06:00.000000');
INSERT INTO test_target values (1234,'2013-06-01 06:00.000000','erf',NULL,'hjk',NULL,'iuf','2013-06-01 06:00.000000','2013-07-01 07:00.000000');
INSERT INTO test_target values (1234,'2013-07-01 07:00.000000',NULL,NULL,NULL,'gdf',NULL,'2013-07-01 07:00.000000','3999-12-31 23:99:999999');
-- Load table definition
Create test_load
(ID integer,
update_time timestamp(6),
columnname varchar(30),
change_from varchar(10),
change_to varchar(10)
) PRIMARY INDEX (ID);
-- The delta coming in overrides what information was previously sent.
-- It shows columns were changed from a value other than NULL which means the earlier information that was sent
-- is now updated. For e.g.change_col_2 was changed from lks to fgc but earlier it had NULL value
INSERT INTO test_load values (1234,'2013-08-01 08:00.000000','change_col_1','frs','txh');
INSERT INTO test_load values (1234,'2013-08-01 08:00.000000','change_col_2','lks','fgc');
INSERT INTO test_load values (1234,'2013-08-01 08:00.000000','change_col_4','gdf','fdr');
INSERT INTO test_load values (1234,'2013-08-01 08:00.000000','change_col_5','ytf','tjb');
-- Another definition for the target table this is ideally the same table but it has been created only to see
-- what results are expected. This is ideally the same table test_target
-- NULLs in the history should be updated to the change_from values (but only upto a point where the previous value is NOT NULL. See the difference between change_col_1 & change_col_2)
Create table test_target_expected_res
(ID integer,
update_time timestamp(6),
change_log_1 varchar(10),
change_log_2 varchar(10),
change_log_3 varchar(10),
change_log_4 varchar(10),
change_log_5 varchar(10),
start_date timestamp(6),
end_date timestamp(6)
) PRIMARY INDEX (ID);
INSERT INTO test_target values (1234,'2013-01-01 00:00.000000','abc','def',NULL,NULL,NULL,'2013-01-01 00:00.000000','2013-02-01 02:00.000000');
INSERT INTO test_target values (1234,'2013-02-01 02:00.000000',NULL,'lks','cdf','fgd','ref','2013-02-01 02:00.000000','2013-03-01 03:00.000000');
INSERT INTO test_target values (1234,'2013-03-01 03:00.000000',NULL,'lks','fgr','lkj',NULL,'2013-03-01 03:00.000000','2013-04-01 04:00.000000');
INSERT INTO test_target values (1234,'2013-04-01 04:00.000000','cfg','lks','tyh','poi','fdt','2013-04-01 04:00.000000','2013-05-01 05:00.000000');
INSERT INTO test_target values (1234,'2013-05-01 05:00.000000',NULL,'lks','trd',NULL,NULL,'2013-05-01 05:00.000000','2013-06-01 06:00.000000');
INSERT INTO test_target values (1234,'2013-06-01 06:00.000000','erf','lks','hjk',NULL,'iuf','2013-06-01 06:00.000000','2013-07-01 07:00.000000');
INSERT INTO test_target values (1234,'2013-07-01 07:00.000000','frs','lks',NULL,'gdf','ytf','2013-07-01 07:00.000000','2013-08-01 08:00.000000');
INSERT INTO test_target values (1234,'2013-08-01 08:00.000000','txh','fgc',NULL,'fdr','tjb','2013-08-01 08:00.000000','3999-12-31 23:99:999999');
We are trying to achieve what we get in the table test_target_expected_res using a single query or even multiple passes.
Thank You,
Indrajit