Thursday, February 03, 2022

How To: Generate Dates from a CHRON Pattern in Snowflake

I created this logic to generate dates based on a Chron Pattern.  Useful for when you have specific date patterns you want to pull events from to process on a continual basis in a pipeline.

CREATE OR REPLACE FUNCTION GENERATE_CHRON_PATTERNED_TIMESTAMPS (CHRON_PATTERN  STRING, DURATION NUMBER)
 RETURNS TABLE (  CHRON_START_TIMESTAMP TIMESTAMP
                , CHRON_END_TIMESTAMP TIMESTAMP
                , CHRON_PATTERN STRING
                , MIN_PATTERN STRING
                , HOUR_PATTERN STRING
                , DAY_PATTERN STRING
                , MON_PATTERN STRING
                , WDAY_PATTERN STRING
                , YEAR_PATTERN STRING
                , MIN STRING
                , HOUR STRING
                , DAY STRING
                , MON STRING
                , WDAY STRING
                , YEAR STRING
               )
  AS
$$
WITH PARSE_CHRON_PATTERN AS (SELECT CHRON_PATTERN as chron_pattern
                       , '[0-9*-\.\/]+' as parse_rule
                       ,  REGEXP_SUBSTR(chron_pattern,parse_rule,1,1) min_pattern
                       ,  REGEXP_SUBSTR(chron_pattern,parse_rule,1,2) hour_pattern
                       ,  REGEXP_SUBSTR(chron_pattern,parse_rule,1,3) day_pattern
                       ,  REGEXP_SUBSTR(chron_pattern,parse_rule,1,4) mon_pattern
                       ,  REGEXP_SUBSTR(chron_pattern,parse_rule,1,5) wday_pattern
                       ,  COALESCE(REGEXP_SUBSTR(chron_pattern,parse_rule,1,6), '*') year_pattern

                 )
,
EXPLODE_LIST AS (
            SELECT P.*
                   , MIN_LIST.VALUE AS MIN
                   , HOUR_LIST.VALUE AS HOUR
                   , DAY_LIST.VALUE AS DAY
                   , MON_LIST.VALUE AS MON
                   , WDAY_LIST.VALUE AS WDAY
                   , YEAR_LIST.VALUE AS YEAR
                    
              FROM PARSE_CHRON_PATTERN P,
              lateral split_to_table(P.min_pattern, ',') MIN_LIST,
              lateral split_to_table(P.hour_pattern, ',') HOUR_LIST,
              lateral split_to_table(P.day_pattern, ',') DAY_LIST,
              lateral split_to_table(P.mon_pattern, ',') MON_LIST,
              lateral split_to_table(P.wday_pattern, ',') WDAY_LIST,
              lateral split_to_table(P.year_pattern, ',') YEAR_LIST
)
, WILD_CARD_RANGE AS (
                         SELECT CHRON_PATTERN
                              , PARSE_RULE
                              , MIN_PATTERN
                              , HOUR_PATTERN
                              , DAY_PATTERN
                              , MON_PATTERN
                              , WDAY_PATTERN
                              , YEAR_PATTERN
                              , IFF(MIN = '*', '0-59', MIN) AS MIN
                              , IFF(HOUR = '*', '0-23', HOUR) AS HOUR
                              , IFF(DAY = '*', '1-31', DAY) AS DAY
                              , IFF(MON = '*', '1-12', MON) AS MON
                              , IFF(WDAY = '*', '0-6', WDAY) AS WDAY
                              , IFF(YEAR = '*' or YEAR IS NULL, '2019-' || DATE_PART(YEAR, CURRENT_DATE())::STRING, YEAR) AS YEAR
                          FROM EXPLODE_LIST
 )
, EXPLODE_RANGE AS (SELECT  E.CHRON_PATTERN
                          --, E.PARSE_RULE
                          , E.MIN_PATTERN
                          , E.HOUR_PATTERN
                          , E.DAY_PATTERN
                          , E.MON_PATTERN
                          , E.WDAY_PATTERN
                          , E.YEAR_PATTERN
                          , COALESCE(MIN_LIST.NUMBER::STRING, E.MIN) AS MIN
                          , COALESCE(HOUR_LIST.NUMBER::STRING, E.HOUR) AS HOUR
                          , COALESCE(DAY_LIST.NUMBER::STRING, E.DAY) AS DAY
                          , COALESCE(MON_LIST.NUMBER::STRING, E.MON) AS MON
                          , COALESCE(WDAY_LIST.NUMBER::STRING, E.WDAY) AS WDAY
                          , COALESCE(YEAR_LIST.NUMBER::STRING, E.YEAR) AS YEAR
                      FROM WILD_CARD_RANGE E
                      LEFT OUTER JOIN MEDIA_CONFIG.CHRON_PATTERN_SEQ_NUMBER MIN_LIST
                        ON MIN_LIST.NUMBER BETWEEN TRY_TO_NUMBER(SPLIT_PART(E.MIN,'-',1)) AND TRY_TO_NUMBER(SPLIT_PART(E.MIN,'-',2))
                      LEFT OUTER JOIN MEDIA_CONFIG.CHRON_PATTERN_SEQ_NUMBER HOUR_LIST
                        ON HOUR_LIST.NUMBER BETWEEN TRY_TO_NUMBER(SPLIT_PART(E.HOUR,'-',1)) AND TRY_TO_NUMBER(SPLIT_PART(E.HOUR,'-',2))
                      LEFT OUTER JOIN MEDIA_CONFIG.CHRON_PATTERN_SEQ_NUMBER  DAY_LIST
                        ON DAY_LIST.NUMBER BETWEEN TRY_TO_NUMBER(SPLIT_PART(E.DAY,'-',1)) AND TRY_TO_NUMBER(SPLIT_PART(E.DAY,'-',2))
                      LEFT OUTER JOIN MEDIA_CONFIG.CHRON_PATTERN_SEQ_NUMBER MON_LIST
                        ON MON_LIST.NUMBER BETWEEN TRY_TO_NUMBER(SPLIT_PART(E.MON,'-',1)) AND TRY_TO_NUMBER(SPLIT_PART(E.MON,'-',2))
                      LEFT OUTER JOIN MEDIA_CONFIG.CHRON_PATTERN_SEQ_NUMBER WDAY_LIST
                        ON WDAY_LIST.NUMBER BETWEEN TRY_TO_NUMBER(SPLIT_PART(E.WDAY,'-',1)) AND TRY_TO_NUMBER(SPLIT_PART(E.WDAY,'-',2))
                      LEFT OUTER JOIN MEDIA_CONFIG.CHRON_PATTERN_SEQ_NUMBER YEAR_LIST
                        ON YEAR_LIST.NUMBER BETWEEN TRY_TO_NUMBER(SPLIT_PART(E.YEAR,'-',1)) AND TRY_TO_NUMBER(SPLIT_PART(E.YEAR,'-',2))
                     
                    )
 
 SELECT   try_to_timestamp(YEAR || '-' || MON || '-' || DAY || ' ' || HOUR || ':' || MIN || ':00', 'yyyy-mm-dd hh24:mi:ss') AS CHRON_START_TIMESTAMP
        , DATEADD(SECONDS, DURATION, CHRON_START_TIMESTAMP) AS CHRON_END_TIMSTAMP
        , *
   FROM EXPLODE_RANGE
 HAVING CHRON_START_TIMESTAMP IS NOT NULL
    AND DATE_PART(WEEKDAY, CHRON_START_TIMESTAMP) = WDAY
    AND CHRON_START_TIMESTAMP <= CURRENT_TIMESTAMP()
  ORDER BY CHRON_START_TIMESTAMP DESC               
 $$
 ;
 
 
SELECT * FROM TABLE (GENERATE_CHRON_PATTERNED_TIMESTAMPS('0 17,19-23 1,15 * 1,2,4-6', 3600));


No comments: