Below are two examples. One that can be ran in any DB that supports CTEs and the other can be ran in Snowflake. I have also projected out the trend line beyond the sample data to show you how to do simple projections. Also I throw in R2 so you can evaluate the model fit.
Sample Data
CREATE OR REPLACE TEMPORARY TABLE DATA_SAMPLE (X INTEGER,Y INTEGER);
INSERT INTO DATA_SAMPLE (X,Y)
VALUES (1,41),
(2,963),
(3,4506),
(4,13557),
(5,40422),
(6,61484),
(7,85075),
(8,118436),
(9,134457),
(10,180253);
Generic SQL Logic
WITH X_Y_AVG AS (
SELECT X
, Y
, AVG(X) OVER() AS X_AVG
, AVG(Y) OVER() AS Y_AVG
FROM DATA_SAMPLE
)
, SLOPE AS (
SELECT SUM((X - X_AVG) * (Y - Y_AVG)) / SUM((X - X_AVG) * (X - X_AVG)) AS SLOPE
, MAX(X_AVG) AS X_AVG_MAX
, MAX(Y_AVG) AS Y_AVG_MAX
FROM X_Y_AVG
)
, CALC AS (
SELECT SLOPE
, Y_AVG_MAX - X_AVG_MAX * SLOPE AS INTERCEPT
FROM SLOPE
)
, TREND_LINE AS (
SELECT X_Y_AVG.X
, X_Y_AVG.Y
, X_Y_AVG.X_AVG
, X_Y_AVG.Y_AVG
, CALC.SLOPE
, CALC.INTERCEPT
, (X_Y_AVG.X * CALC.SLOPE) + CALC.INTERCEPT AS TREND_LINE_Y
FROM X_Y_AVG
CROSS JOIN CALC
UNION ALL
SELECT PROJECTION_X.X
, NULL
, PROJECTION_X.X
, NULL
, CALC.SLOPE
, CALC.INTERCEPT
, (PROJECTION_X.X * CALC.SLOPE) + CALC.INTERCEPT AS TREND_LINE_Y
FROM (SELECT COLUMN1 AS X
FROM VALUES (11),
(12),
(13),
(14),
(15),
(16)) AS PROJECTION_X
CROSS JOIN CALC
ORDER BY X
)
, RSQUARED AS (
SELECT 1-(SUM(POWER(Y - TREND_LINE_Y,2)))/(SUM(POWER(Y - Y_AVG,2))) AS R2
FROM TREND_LINE
)
SELECT TREND_LINE.X
, TREND_LINE.Y
, TREND_LINE.SLOPE
, TREND_LINE.INTERCEPT
, TREND_LINE.TREND_LINE_Y
, IFF(TREND_LINE.Y IS NOT NULL, (SELECT R2 FROM RSQUARED), NULL) AS R2
FROM TREND_LINE
;
Snowflake Specific SQL Logic Using REGR_SLOPE & REGR_INTERCEPT Functions
WITH CALC AS (
SELECT REGR_SLOPE(Y, X) AS SLOPE
, REGR_INTERCEPT(Y, X) AS INTERCEPT
FROM DATA_SAMPLE
)
SELECT DATA_SAMPLE.X
, DATA_SAMPLE.Y
, CALC.SLOPE
, CALC.INTERCEPT
, (DATA_SAMPLE.X * CALC.SLOPE) + CALC.INTERCEPT AS TREND_LINE_Y
, REGR_R2(DATA_SAMPLE.Y, "TREND_LINE_Y") OVER() AS R2
FROM DATA_SAMPLE
CROSS JOIN CALC
UNION ALL
SELECT PROJECTION_X.X
, NULL
, CALC.SLOPE
, CALC.INTERCEPT
, (PROJECTION_X.X * CALC.SLOPE) + CALC.INTERCEPT AS TREND_LINE_Y
, NULL
FROM (SELECT COLUMN1 AS X
FROM VALUES (11),
(12),
(13),
(14),
(15),
(16)) AS PROJECTION_X
CROSS JOIN CALC
ORDER BY X;
No comments:
Post a Comment