I ended up using a suggestion from the guys over at stack overflow.
First create a table that has the group id and the upper and lower limits of each bucket.
-- create the first entry for the recursive query
INSERT TMP_WORK_DB.GRP_NBRS VALUES (0,1,0,2000000);
INSERT TMP_WORK_DB.GRP_NBRS (GRP_NBR,LOWER_LIMIT, UPPER_LIMIT)
WITH RECURSIVE GRP_RECRSV (GRP_NBR, LOWER_LIMIT, UPPER_LIMIT)
AS (
SELECT
1 AS GRP_NBR
, LOWER_LIMIT
, UPPER_LIMIT
FROM TMP_WORK_DB.GRP_NBRS
UNION ALL
SELECT
GRP_NBR + 1
, LOWER_LIMIT + 2000000 -- set the interval to 2 million
, UPPER_LIMIT + 2000000 -- can be adjusted as needed
FROM GRP_RECRSV
WHERE GRP_NBR < 120 -- needed a limit so that it would not be endless
)
SELECT * FROM GRP_RECRSV
;
Then use a simply WHERE BETWEEN on the data to determine which GROUP ID to assign.
-- delete the first entry because it was duplicated
DELETE FROM TMP_WORK_DB.GRP_NBRS WHERE GRP_NBR = 0;
-- set grp nbr using the limits table
INSERT TMP_WORK_DB.PROD_LIST_GRP
WITH NUMOFPRODS (PROD_NAME,QTY,RUNNING) AS
(
SELECT
PROD_NAME
, COUNT(DISTINCT PROD_ID) AS QTY
, SUM(QTY) OVER (ORDER BY QTY ROWS UNBOUNDED PRECEDING) RUNNING
FROM TMP_WORK_DB.PROD_LIST
GROUP BY 1
)
SELECT
PROD_NAME
, QTY
, RUNNING
, GRP_NBR
FROM NUMOFPRODS a
JOIN TMP_WORK_DB.GRP_NBRS b ON RUNNING BETWEEN LOWER_LIMIT AND UPPER_LIMIT
;
I ended up using a suggestion from the guys over at stack overflow.
First create a table that has the group id and the upper and lower limits of each bucket.
Then use a simply WHERE BETWEEN on the data to determine which GROUP ID to assign.