forked from johnny-chivers/aws-data-engineering
-
Notifications
You must be signed in to change notification settings - Fork 0
/
anomaly_detection.sql
57 lines (48 loc) · 1.47 KB
/
anomaly_detection.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
CREATE OR REPLACE STREAM "CLICKSTREAM" (
"CLICKCOUNT" DOUBLE
);
CREATE OR REPLACE PUMP "CLICKPUMP" AS
INSERT INTO "CLICKSTREAM" ("CLICKCOUNT")
SELECT STREAM COUNT(*)
FROM "SOURCE_SQL_STREAM_001"
WHERE "browseraction" = 'Click'
GROUP BY FLOOR(
("SOURCE_SQL_STREAM_001".ROWTIME - TIMESTAMP '1970-01-01 00:00:00')
SECOND / 10 TO SECOND
);
CREATE OR REPLACE STREAM "IMPRESSIONSTREAM" (
"IMPRESSIONCOUNT" DOUBLE
);
CREATE OR REPLACE PUMP "IMPRESSIONPUMP" AS
INSERT INTO "IMPRESSIONSTREAM" ("IMPRESSIONCOUNT")
SELECT STREAM COUNT(*)
FROM "SOURCE_SQL_STREAM_001"
WHERE "browseraction" = 'Impression'
GROUP BY FLOOR(
("SOURCE_SQL_STREAM_001".ROWTIME - TIMESTAMP '1970-01-01 00:00:00')
SECOND / 10 TO SECOND
);
CREATE OR REPLACE STREAM "CTRSTREAM" (
"CTR" DOUBLE
);
CREATE OR REPLACE PUMP "CTRPUMP" AS
INSERT INTO "CTRSTREAM" ("CTR")
SELECT STREAM "CLICKCOUNT" / "IMPRESSIONCOUNT" * 100.000 as "CTR"
FROM "IMPRESSIONSTREAM",
"CLICKSTREAM"
WHERE "IMPRESSIONSTREAM".ROWTIME = "CLICKSTREAM".ROWTIME;
CREATE OR REPLACE STREAM "DESTINATION_SQL_STREAM" (
"CTRPERCENT" DOUBLE,
"ANOMALY_SCORE" DOUBLE
);
CREATE OR REPLACE PUMP "OUTPUT_PUMP" AS
INSERT INTO "DESTINATION_SQL_STREAM"
SELECT STREAM * FROM
TABLE (RANDOM_CUT_FOREST(
CURSOR(SELECT STREAM "CTR" FROM "CTRSTREAM"), --inputStream
100, --numberOfTrees (default)
12, --subSampleSize
100000, --timeDecay (default)
1) --shingleSize (default)
)
WHERE ANOMALY_SCORE > 2;