-
Notifications
You must be signed in to change notification settings - Fork 5
/
SASCI360_SQL_Extraction.sas
314 lines (273 loc) · 10.6 KB
/
SASCI360_SQL_Extraction.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
/****************************************************************************************************************
PROGRAM: SAS Customer Intelligence 360 Direct Agent log's SQL(s) extraction utility
DESCRIPTION: This utility is intended to extract sql statements from the
onprem_direct.log.<date>. Extracts SQL for a single task or segment-map for a given time range
If multiple executions are found in the time range, then all the SQLs for those executions are
retrieved.
VERSION: 0.0
DATE MODIFIED: 03-SEPTEMBER-2024
AUTHOR: GLOBAL CUSTOMER INTELLIGENCE ENABLEMENT TEAM
#Copyright 2023, SAS Institute Inc., Cary, NC, USA. All Rights Reserved.
#SPDX-License-Identifier: Apache-2.0
******************************************************************************************************************/
/* Global Variables */
%global _CLIENTAPP;
%global returnKode;
%global p_infileDir p_objName p_timestamp_min p_timestamp_max;
%global g_logfilename g_max_ts g_min_ts;
options errabend fullstimer;
options nosource nosource2;
options nomlogic nomprint nosymbolgen;
/* Custom Initialization */
%let returnKode=0;
/*******Customize -- begin***********************/
%let p_infileDir=D:\Temp\SQL Extraction;
/*******Customize -- end*************************/
%let g_logfilename=%str(onprem_direct.log);
%let MAXSQLRECLEN=32767;
%let MAXRECLEN=500;
%let MAXSQLLINES=1500;
%let debug=N;
%let syscc=0;
/*
Function Name: init
Description: Initialize variables used further in program.
Parameters: None
*/
%macro init;
%if %index(%bquote(&_CLIENTAPP), %str(Enterprise Guide))=0 and %index(%bquote(&_CLIENTAPP), %str(Studio))=0 %then %do;
%stpbegin;
%end;
%else %do;
%let p_objname=Test Munvo Segment;
%let p_objname=Test_Task3;
%let p_timestamp_min=28Aug2024 00:00:00;
%let p_timestamp_max=28Aug2024 9:00:00;
%end;
%let datetimenow=%sysfunc(datetime(), 15.);
%let datenow=%sysfunc(datepart(&datetimenow.));
%let report_date=%sysfunc(putn(&datenow.,weekdate17.)) %sysfunc(time(),time8.) %sysfunc(tzonename());
%let currdate = %sysfunc(putn(&datenow., yymmddn8.),8.);
%let currtime = %sysfunc(compress(%sysfunc(putn(&datetimenow.,tod8.)),":"));
%let currdttm = &currdate.&currtime.;
%mend init;
/*
Function Name: validate_input
Description: Verify the input parameters:
- Only one log file can be processed, and since each log file contains data for a particular
day, both the min/max timestamps must be for the same day
- The log file named is derived based on the time range to be searched
- If the log file is not found the process terminates with error: 8
Parameters: None
*/
%macro validate_input;
%let l_min_date=%substr(&p_timestamp_min, 1, 9);
%let l_max_date=%substr(&p_timestamp_max, 1, 9);
%if "&l_min_date." ne "&l_max_date." %then %do;
%put &=l_min_date. &=l_max_date.;
%put %str(Both first and last timestamps must be on the same day. Processing aborted);
%let returnKode=16;
%goto exit_validate_input;
%end;
%let l_date = %sysfunc(putn("&l_min_date."d, yymmdd10.));
%let l_time = %substr(&p_timestamp_min, 11);
%if %length(&l_time) < 8 %then %let l_time=0&l_time.;
%let g_min_ts = &l_date. &l_time.;
%let l_date = %sysfunc(putn("&l_max_date."d, yymmdd10.));
%let l_time = %substr(&p_timestamp_max, 11);
%if %length(&l_time) < 8 %then %let l_time=0&l_time.;
%let g_max_ts = &l_date. &l_time.;
%let l_date_today = %sysfunc(putn(%sysfunc(today(), 15.), yymmdd10.));
%if "&l_date." eq "&l_date_today." %then %do;
%let g_logfilename=&g_logfilename.;
%end;
%else %do;
%let g_logfilename=&g_logfilename..&l_date.;
%end;
%put NOTE: Input log file name is &p_infileDir./&g_logfilename.;
%if %sysfunc(fileexist(&p_infileDir./&g_logfilename.)) ne 1 %then %do;
%put Input file does not exist. Processing aborted.;
%let returnKode=8;
%goto exit_validate_input;
%end;
%exit_validate_input:
%mend validate_input;
/*
Function Name: extract_sql
Description: We process the onprem log file here, after excluding skipping any log records prior to the
lower bound or after the higher bound on timestamp
1. Log an error and ignore record if the input record length exceeds MAXSQLRECLEN
2. If a timestamp is found at the beginning of the record, ensure that the timestamp is within range
3. Ignore one line SQLs that are often internal to DM Agent function rather than user queries. Note,
that one line SQLs still have at least 3 lines. First record, SQL, Terminator record
4. Find the end of SQL text is sometimes tricky, as the line with hyphens is missing. So we are
checking for either "quit;" or hyphen line for termination
In order to improve performance in reading the large log file, we avoid reading all the
record in one go. Multiple input statements are used to read the full record only when needed.
Parameters: None
*/
%macro extract_sql;
filename inlog "&p_infileDir./&g_logfilename";
data sqlext(keep=logTimestamp logRecordnumber sqlText nodeType nodeName sessionId threadId execId nodeId objName);
attrib logTimestamp length=8 format=datetime21.;
attrib sqlText length=$32767;
attrib logRecordnumber length=8;
attrib nodeName length=$50;
attrib nodeType length=$20;
attrib objName length=$100;
attrib threadId length=$25;
attrib sessionId length=$50;
attrib execId length=$50;
attrib nodeId length=$50;
retain logTimestamp sqlText nodeType nodeName sessionId threadId execId nodeId objName;
retain capture_sql_flag error_count lineStart_expr
logRecordnumber skip_min_ts_flag skip_max_ts_flag
sqlRecordNumber sqlStart_expr ;
infile inlog truncover obs=max end=eof length=input_line_length;
input @1 logts $25. @;
logRecordnumber = _N_;
if _N_=1 then do;
sqlStart_pat="/^([\d\-: ]+),[\d]+ INFO \[([\S]+)\] SID\[([\S]+)\] .* CC\[([^:]+):([\S)]+)\] .{1,50} com.sas.analytics.crm.custdata.sql[\s]+- TID\[([^\/]+)\/([^\/]+)\/([^\]]+)\]/";
sqlStart_expr=prxparse(sqlStart_pat);
lineStart_pat="/^%substr(%superq(g_max_ts),1,10) [\d\:]{8},[\d]{3} /";
lineStart_expr=prxparse(lineStart_pat);
skip_min_ts_flag = 'Y';
skip_max_ts_flag = 'N';
end;
if input_line_length gt &MAXSQLRECLEN. then do;
putlog 'ERROR: Maximum line length exceeded. Record skipped. ' _N_= input_line_length=;
error_count+1;
delete;
end;
if capture_sql_flag='Y' then do;
input @1 logtxt $varying&MAXSQLRECLEN.. input_line_length;
sqlRecordNumber + 1;
if sqlRecordNumber eq 1 then do;
sqlText='';
delete;
end;
if substr(logtxt, 1, 20) eq repeat('-', 19) then do;
capture_sql_flag='N';
end;
if substr(logtxt,1,5) eq 'quit;' then do;
sqlText = catx('^n', sqlText, logtxt);
capture_sql_flag='N';
end;
if sqlRecordNumber gt &MAXSQLLINES. and capture_sql_flag = 'Y' then do;
putlog "WARNING: SQL spans too many log record. Captured &MAXSQLLINES. records. Rest of sql is skipped";
capture_sql_flag='N';
end;
if capture_sql_flag='N' then do;
if sqlRecordNumber gt 3 then
output;
end;
else do;
sqlText = catx('^n', sqlText, logtxt);
end;
delete;
end;
if input_line_length lt 25 then delete;
if substr(logts, 1, 3) ne "202" then delete;
if prxmatch(lineStart_expr, logts) then do;
if skip_min_ts_flag = 'Y' then do;
if "&g_min_ts." le substr(logts, 1, 19) then do;
skip_min_ts_flag = 'N';
putlog "NOTE: Processing started at log record number: " _N_ " since timestamp range begins at &g_min_ts.";
end;
end;
if skip_max_ts_flag = 'N' then do;
if "&g_max_ts." lt substr(logts, 1, 19) then do;
skip_max_ts_flag = 'Y';
putlog "NOTE: Processing stopped at log record number: " _N_ " since timestamp range ends at &g_max_ts." ;
stop;
end;
end;
end;
if skip_min_ts_flag = 'Y' then delete;
input_line_length = &MAXRECLEN.;
input @1 logtxt $varying&MAXSQLRECLEN.. input_line_length;
if prxmatch(sqlStart_expr, trim(logtxt)) then do;
sqlRecordNumber = 0;
do i=1 to prxparen(sqlStart_expr);
call prxposn(sqlStart_expr, i, start, length);
if start ne 0 then do;
select (i);
when (1) logTimestamp = input(substr(logtxt, start, length), ymddttm19.);
when (2) threadId = substr(logtxt, start, length);
when (3) sessionId = substr(logtxt, start, length);
when (4) objName = substr(logtxt, start, length);
when (5) execId = substr(logtxt, start, length);
when (6) nodeType = substr(logtxt, start, length);
when (7) nodeName = substr(logtxt, start, length);
when (8) nodeId = substr(logtxt, start, length);
otherwise do;
putlog "WARNING: Additional unhandled match in the regex pattern ignored";
error_count+1;
if error_count le 25 then
putlog 'NOTE: ' logtxt=;
end; /* end of select-otherwise */
end; /* end of select statement */
end; /* end of matches found */
end; /* do loop for each match */
if upcase(objName) eq "%upcase(&p_objname.)" then
capture_sql_flag = 'Y';
else capture_sql_flag = 'N';
end; /* sql line found in log */
run;
filename inlog clear;
%mend extract_sql;
/*
Function Name: display_report
Description: In order to ensure that the resultant report properly renders the SQL, the ods option
is required. Otherwise, all the SQL will be shown on one line.
Further, "node name" is set to the object name for some nodes.
Parameters: None
*/
%macro display_report;
ods escapechar='^';
Title1 "SQL Extracted between &g_min_ts. and &g_max_ts.";
Title2 "Object name is &p_objname.";
Title3 "Input file is &p_infileDir./&g_logfilename on &systcpiphostname.";
%let nobs=0;
data _null_;
set sqlext nobs=nobs;
if _N_ eq 1 then do;
call symputx('nobs', nobs);
stop;
end;
run;
%if &nobs. eq 0 %then %do;
data no_results;
msg = 'There was no SQL extracted with this name and timestamp range';
run;
proc report data=no_results;
run;
%end;
%else %do;
proc report data=sqlext(keep=logTimestamp nodeName nodeType sqlText) nowindows;
columns logTimestamp nodeName nodeType sqlText;
define logTimestamp / display 'Log timestamp' width=20;
define sqlText / display 'SQL text' width=80;
define nodeName / display 'Node name' width=25;
define nodeType / display 'Node type' width=12;
run;
%end;
%mend display_report;
/*
Function Name: finish
Description: Processing completed with a return code.
Parameters: None
*/
%macro finish;
%put NOTE: Processing completed with return code &returnKode.;
%let syscc=&returnKode.;
%if %index(%bquote(&_CLIENTAPP), %str(Enterprise Guide))=0 and %index(%bquote(&_CLIENTAPP), %str(Studio))=0 %then %do;
%stpend;
%end;
%mend finish;
options nomprint nomlogic nosymbolgen;
%init;
%validate_input;
%extract_sql;
%display_report;
%finish;