Monday, December 18, 2006

How To: Sessionize Events

This is a how to do sessionizing using SQL with just a set of events. We use alot of self-joins to accomplish this task.

Steps:
  • Determine your session timeout length.
  • Order events by date.
  • Identify the Exit Point Event and the duration between events. The Exit Point is the event that has a duration greater than the session timeout length.
  • Finally you can identify the sessionID and assign a sequence number for each event within a session.
-- HOW TO: SESSIONIZING EVENTS
-- This is an example of how to implicitly determine sessions with just a set of event date/times.

--------------------------------------------------------------------
-- Setup test scenerio.
--------------------------------------------------------------------
-- DROP TABLE WebLog
CREATE TABLE WebLog(UserID INT NOT NULL, EventDate DATETIME NOT NULL)
DECLARE @SessionTimeOut INT
SET @SessionTimeOut = 1800 -- 60 seconds * 30 minutes

INSERT INTO WebLog VALUES (123,'01-jan-2005 00:01:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 00:02:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 00:03:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 00:06:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 00:10:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 00:20:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 01:00:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 00:30:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 00:31:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 03:00:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 03:32:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 03:33:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 03:34:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 03:34:10');
INSERT INTO WebLog VALUES (123,'01-jan-2005 03:34:30');
INSERT INTO WebLog VALUES (123,'01-jan-2005 04:00:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 05:01:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 05:02:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 05:03:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 05:04:00');
INSERT INTO WebLog VALUES (123,'01-jan-2005 05:05:00');

INSERT INTO WebLog VALUES (121,'01-jan-2005 00:01:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 00:02:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 00:03:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 00:06:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 00:10:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 00:20:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 01:00:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 00:30:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 00:31:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 03:00:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 03:32:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 03:33:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 03:34:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 03:34:10');
INSERT INTO WebLog VALUES (121,'01-jan-2005 03:34:30');
INSERT INTO WebLog VALUES (121,'01-jan-2005 04:00:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 05:01:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 05:02:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 05:03:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 05:04:00');
INSERT INTO WebLog VALUES (121,'01-jan-2005 05:05:00');

DECLARE @WebLogWithRowID TABLE (UserID INT NOT NULL,
EventDate DATETIME NOT NULL,
RowID BIGINT NOT NULL)

DECLARE @WebLogExitPoint TABLE (UserID INT NOT NULL,
EventDate DATETIME NOT NULL,
RowID BIGINT NOT NULL,
Duration BIGINT NOT NULL,
ExitPointFlag BIT NOT NULL)
--------------------------------------------------------------------
-- Assign Record ID
--------------------------------------------------------------------
INSERT INTO @WebLogWithRowID(UserID, EventDate, RowID)
SELECT UserID, EventDate, ROW_NUMBER() OVER (PARTITION BY UserID ORDER BY EventDate) RowID
FROM WebLog
--------------------------------------------------------------------
-- Determine Duration and ExitPoint
--------------------------------------------------------------------
INSERT INTO @WebLogExitPoint(UserID, EventDate, RowID, Duration, ExitPointFlag)
SELECT a.UserID, a.EventDate, a.RowID,
ISNULL(DATEDIFF(SECOND, a.EventDate, b.EventDate), @SessionTimeOut) AS Duration,
CASE WHEN b.UserID IS NULL THEN 1 -- Last record
WHEN DATEDIFF(SECOND, a.EventDate, b.EventDate) > @SessionTimeOut THEN 1
ELSE 0
END AS ExitPointFlag
FROM @WebLogWithRowID a
LEFT OUTER JOIN @WebLogWithRowID b
ON b.RowID = a.RowID + 1
AND b.UserID = a.UserID

--------------------------------------------------------------------
-- Determine SeqNumber within Session and SessionID
--------------------------------------------------------------------
SELECT a.UserID, a.EventDate, a.RowID,
a.RowID - IsNULL(b.RowID, 0) AS SeqNumber,
a.Duration,
a.ExitPointFlag,
ISNULL(b.RowID, 1) AS SessionID
FROM @WebLogExitPoint a
LEFT OUTER JOIN @WebLogExitPoint b
ON b.RowID = (SELECT MAX(c.RowID)
FROM @WebLogExitPoint c
WHERE c.RowID < a.RowID
AND c.ExitPointFlag = 1 )
AND b.UserID = a.UserID