Below are two examples.  One that can be ran in any DB that supports CTEs and the other can be ran in Snowflake.  I have also projected out the trend line beyond the sample data to show you how to do simple projections.  Also I throw in R2 so you can evaluate the model fit. 
Sample Data
CREATE OR REPLACE TEMPORARY TABLE DATA_SAMPLE (X INTEGER,Y INTEGER);
INSERT INTO DATA_SAMPLE (X,Y)
VALUES (1,41),
       (2,963),
       (3,4506),
       (4,13557),
       (5,40422),
       (6,61484),
       (7,85075),
       (8,118436),
       (9,134457),
       (10,180253);
Generic SQL Logic
WITH X_Y_AVG AS (
    SELECT   X
           , Y
           , AVG(X) OVER() AS X_AVG
           , AVG(Y) OVER() AS Y_AVG
      FROM DATA_SAMPLE
)
, SLOPE AS (
    SELECT   SUM((X - X_AVG) * (Y - Y_AVG)) / SUM((X - X_AVG) * (X - X_AVG)) AS SLOPE
           , MAX(X_AVG) AS X_AVG_MAX
           , MAX(Y_AVG) AS Y_AVG_MAX
      FROM X_Y_AVG
)
, CALC AS (
    SELECT   SLOPE
           , Y_AVG_MAX - X_AVG_MAX * SLOPE AS INTERCEPT
      FROM SLOPE
)
, TREND_LINE AS (
        SELECT   X_Y_AVG.X
               , X_Y_AVG.Y
               , X_Y_AVG.X_AVG
               , X_Y_AVG.Y_AVG
               , CALC.SLOPE
               , CALC.INTERCEPT
               , (X_Y_AVG.X * CALC.SLOPE) + CALC.INTERCEPT AS TREND_LINE_Y
          FROM X_Y_AVG
          CROSS JOIN CALC
         UNION ALL
        SELECT   PROJECTION_X.X
               , NULL
               , PROJECTION_X.X
               , NULL
               , CALC.SLOPE
               , CALC.INTERCEPT
               , (PROJECTION_X.X * CALC.SLOPE) + CALC.INTERCEPT AS TREND_LINE_Y
          FROM (SELECT COLUMN1 AS X
                  FROM VALUES  (11),
                               (12),
                               (13),
                               (14),
                               (15),
                               (16)) AS PROJECTION_X 
          CROSS JOIN CALC
         ORDER BY X
  )
, RSQUARED AS (
    SELECT 1-(SUM(POWER(Y - TREND_LINE_Y,2)))/(SUM(POWER(Y - Y_AVG,2))) AS R2
      FROM TREND_LINE
)
SELECT   TREND_LINE.X
       , TREND_LINE.Y
       , TREND_LINE.SLOPE
       , TREND_LINE.INTERCEPT
       , TREND_LINE.TREND_LINE_Y
       , IFF(TREND_LINE.Y IS NOT NULL, (SELECT R2 FROM RSQUARED), NULL) AS R2
  FROM TREND_LINE
;    
Snowflake Specific SQL Logic Using REGR_SLOPE & REGR_INTERCEPT Functions
WITH CALC AS (
    SELECT   REGR_SLOPE(Y, X) AS SLOPE
           , REGR_INTERCEPT(Y, X) AS INTERCEPT
     FROM DATA_SAMPLE
)
SELECT   DATA_SAMPLE.X
       , DATA_SAMPLE.Y
       , CALC.SLOPE
       , CALC.INTERCEPT
       , (DATA_SAMPLE.X * CALC.SLOPE) + CALC.INTERCEPT AS TREND_LINE_Y
       , REGR_R2(DATA_SAMPLE.Y, "TREND_LINE_Y") OVER() AS R2
  FROM DATA_SAMPLE
  CROSS JOIN CALC
 UNION ALL
SELECT   PROJECTION_X.X
       , NULL
       , CALC.SLOPE
       , CALC.INTERCEPT
       , (PROJECTION_X.X * CALC.SLOPE) + CALC.INTERCEPT AS TREND_LINE_Y
       , NULL
  FROM (SELECT COLUMN1 AS X
          FROM VALUES  (11),
                       (12),
                       (13),
                       (14),
                       (15),
                       (16)) AS PROJECTION_X 
  CROSS JOIN CALC
  ORDER BY X;  
 
No comments:
Post a Comment