-
Notifications
You must be signed in to change notification settings - Fork 5
/
utl_onpremdiext.sas
756 lines (655 loc) · 21.7 KB
/
utl_onpremdiext.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
/* Copyright © 2023, SAS Institute Inc., Cary, NC, USA. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0
*/
/******************************************************************************
This utility is intended to extract data-items and/or sql statements from the
onprem_direct.log.<date>. Note, that the current day file is never (without
the date suffix) is never accessed - to avoid processing an incomplete file.
MAXFILES: This number determines the number of files processed in a single
execution of this process.
Modification History
04/22/2022 Force TASK dataset to have fixed set of columns. With each new release
there may be new columns added and this can cause issues when combining
current data to history.
Updated default maxreclen to 18000 (previously, 12000) due to issue
while processing sample log provided from Denmark team
Avoid capturing same dataitem multiple times in a queryTask, as this
could lead to overflow of dataitem_list and truncation of dataitem_name.
Fix handling of CountOnly segment maps
01/15/2024 Expand data item width to accommodate larger names
Provide option to extract data item values specified in filter predicates
Capture calc data items
******************************************************************************/
options errabend fullstimer;
options nosource nosource2;
options nomlogic nomprint nosymbolgen;
%global _CLIENTAPPABREV;
/*
The following variables will need to be customized
c_lastUpdateFile:
This file stores the last "date" processed
It should contain 1 record with date formatted as yyyy-mm-dd
If the file does not exist - one will be created for you. In this case,
(only) yesterdays' log file will be processed
c_outputDir:
Specify the location to generate output SAS datasets.
c_infile_dir:
This is the directory containing the onprem_direct.log files
It is assumed that the files are suffixed by a date: .yyyy-mm-dd
*/
%let c_lastUpdateFile=D:\mydocs\w2k_race_01\data\onpremdiext_lastupdate.txt;
%let c_infile_dir=D:\mydocs\w2k_race_01\data\onprem;
%let c_outputDir=D:\Temp;
%let c_extract_dataitem_values=Y;
libname outlib "&c_outputDir." compress=yes;
/*
This macro removes all files from a directory
*/
%macro cleanDir(dir=, delete_dir=N);
data _null_;
dirname = "&dir.";
dref = 'thisdir';
filref = 'thisfile';
length fname $200;
rc = filename(dref,dirname);
did = dopen(dref);
do i = 1 to dnum(did);
fname = dread(did,i);
rc = filename(filref,catx('/',dirname,fname));
rc = fdelete(filref);
msg = catx(' ', 'NOTE: File', fname, 'deleted from work directory');
putlog msg;
end;
rc = dclose(did);
%if &delete_dir. eq Y %then %do;
rc = fdelete(dref);
msg = catx(' ', 'NOTE: Directory', dirname, 'deleted rc=', rc);
putlog msg;
%end;
run;
%mend cleanDir;
%let MAXRECLEN=18000;
%let MAXSQLLEN=30000;
%let MAXSQLLINES=1500;
%let MAXFILES=31;
%symdel g_last_date_processed g_file_list workLocation /nowarn;
%global g_last_date_processed g_file_list g_counts_only;
%global workLocation;
%let debug=N;
%let syscc=0;
%let datetimenow=%sysfunc(datetime(), 15.);
%let LOGFILENAME=onprem_direct.log;
/*
*options mlogic mprint symbolgen;
*%let debug=Y;
%let LOGFILENAME=onprem_direct_small.log;
%let datetimenow=%sysfunc(putn('28MAR2022 20:45:00'dt, 15.));
%let datetimenow=%sysfunc(putn('04DEC2021 20:45:00'dt, 15.));
%let datetimenow=%sysfunc(putn('07APR2022 20:45:00'dt, 15.));
%cleanDir(dir=&c_outputDir);
*/
%let datenow=%sysfunc(datepart(&datetimenow.));
%let report_date=%sysfunc(putn(&datenow.,weekdate17.)) %sysfunc(time(),time8.) %sysfunc(tzonename());
%let currdate = %sysfunc(putn(&datenow., yymmddn8.),8.);
%let currtime = %sysfunc(compress(%sysfunc(putn(&datetimenow.,tod8.)),":"));
%let currdttm = &currdate.&currtime.;
/*
1. Read the configuration file and get the last date already processed
2. Get each date from the date in #1 to current date (maximum of 10) excluding today
3. Generate list of input file and assign filename.
*/
%macro init;
options dlcreatedir;
%let workLocation=%sysfunc(getoption(work))/onpremext;
%if &debug eq Y %then %do;
%let workLocation=d:\temp\onpremext;
%end;
libname appdir "&workLocation.";
options nodlcreatedir;
options user=appdir;
%put &=workLocation.;
%cleanDir(dir=&workLocation.);
%if %sysfunc(fileexist(&c_lastUpdateFile.)) %then %do;
data _null_;
attrib file_list length=$8192.;
attrib file_suffix length=$10;
infile "&c_lastUpdateFile." dsd truncover;
input last_date_processed yymmdd10.;
today_date = datepart(&datetimenow.);
if ((today_date - last_date_processed)) gt 100 or
((today_date - last_date_processed) le 1) then do;
put "Invalid configuration file: Today is: " today_date date9. " Last processed date: " last_date_processed date9.;
put "There are too many or zero days between last_date_processed and today";
abort cancel;
end;
file_list='';
do i=last_date_processed+1 to today_date-1 by 1;
if file_count lt &MAXFILES. then do;
file_suffix=put(i, yymmdd10.);
this_filename=quote(cats("&c_infile_dir./&LOGFILENAME..", file_suffix));
if fileexist(this_filename) then do;
file_count+1;
file_list=catx(' ', file_list, this_filename);
call symputx('g_last_date_processed', file_suffix, 'g');
call symputx('g_file_list', file_list, 'g');
end;
end;
end;
run;
%end;
%else %do;
%let yesterday=%eval(%sysfunc(datepart(&datetimenow.)) - 1);
%let g_last_date_processed=%sysfunc(putn(&yesterday., yymmdd10.));
%let g_file_list="&c_infile_dir./&LOGFILENAME..&g_last_date_processed";
data _null_;
if fileexist(&g_file_list.) ne 1 then do;
put "ERROR: The input file " &g_file_list. " does not exist. Process terminated";
abort cancel;
end;
run;
%end;
%put &=g_last_date_processed.;
%put &=g_file_list.;
%if &g_file_list. eq %then %do;
%put "There are no files to process. Processing aborted";
data _null_;
abort cancel;
run;
%end;
%else %do;
filename inlog (&g_file_list.);
%end;
%mend init;
%macro update_cfg_file;
/* %let g_last_date_processed=%str(2022-04-05); */
data _null_;
file "&c_lastUpdateFile." dsd;
put "&g_last_date_processed.";
run;
%mend update_cfg_file;
/*
The "clientData" lines in runTasks JSON can be extremely long and are not of value
for the current project. So, we choose to drop that record.
For each "runTasks" log record, we capture the entire JSON and write it to a flat file
file_record_nbr:
Since we are processing multiple input files in one go, we need this counter to count
the number of records in the specific file we are processing.
*/
%macro extract_runtask_data;
data filesList(keep= task_filename task_runtime file_record_nbr);
;
attrib capture_taskinfo_flag length=$1;
attrib task_filename length=$512;
attrib fname length=$512;
attrib task_runtime length=$23;
attrib input_filename length=$512;
attrib prev_input_filename length=$512;
retain capture_taskinfo_flag file_record_nbr fname prev_input_filename
rownum taskStart_expr task_runtime;
infile inlog lrecl=&MAXRECLEN. truncover obs=max end=eof filename=input_filename length=input_line_length;
input @1 logtxt $&MAXRECLEN..;
if prev_input_filename ne input_filename then do;
prev_input_filename = input_filename;
file_record_nbr=0;
end;
rownum+1;
file_record_nbr+1;
if _N_=1 then do;
taskStart_pat="/^.{23} INFO {2}\[([\S]+)].{10,75}?agent.service.DirectMarketingTaskService - runTasks /";
taskStart_expr=prxparse(taskStart_pat);
end;
if capture_taskinfo_flag='Y' then do;
if input_line_length gt 20 then do;
if index(substr(logtxt,1,20), """clientData"":") then
delete;
end;
if input_line_length ge &MAXRECLEN. then do;
putlog 'ERROR: Line length exceeded. Record skipped. ' input_filename= file_record_nbr= input_line_length=;
error_count+1;
if error_count le 5 then
putlog logtxt=;
delete;
end;
file taskinfo filevar=fname;
put logtxt;
if logtxt = '}' and length(_infile_) = 1 then do;
capture_taskinfo_flag='N';
task_filename = fname;
output filesList;
end;
delete;
end;
*if length(logtxt) lt 50 or length(logtxt) gt 200 then delete;
if prxmatch(taskStart_expr, trim(logtxt)) then do;
runtask_cnt+1;
capture_taskinfo_flag='Y';
task_runtime=substr(logtxt,1,23);
fname=cats("&workLocation.", '/', scan(input_filename,-1,'/\'), '_', file_record_nbr);
putlog "Write to file: " fname=;
end;
run;
filename inlog clear;
%mend extract_runtask_data;
%macro printall(libname,worklib=work);
%local num i;
proc datasets library=&libname memtype=data nodetails;
contents out=&worklib..temp1(keep=memname) data=_all_ noprint;
run;
data _null_;
set &worklib..temp1 end=final;
by memname notsorted;
if last.memname;
n+1;
call symput('ds'||left(put(n,8.)),trim(memname));
if final then call symput('num',put(n,8.));
run;
%do i=1 %to #
proc print data=&libname..&&ds&i noobs;
title "Data Set &libname..&&ds&i";
run;
%end;
proc sql noprint;
drop table &worklib..temp1;
quit;
%mend printall;
/*
The top level information about the runTask is captured here
The variables g_task_id, g_task_name are saved and written to other
datasets to enable common-key join between the different output tables
*/
%macro create_runtask(inds=);
%local col_names_list;
proc sql noprint;
select name into :col_names_list separated by ' '
from dictionary.columns
where libname = upcase(scan("&inds.", 1, '.'))
and memname = upcase(scan("&inds.", 2, '.'))
;
quit;
data stg_runtask(keep=task_name runtask_id type externalCode modifiedByUserName task_runtime businessContextUUID countsOnly log_filename file_record_nbr );
attrib name length=$100;
attrib type length=$15;
attrib externalCode length=$15;
attrib modifiedByUserName length=$50;
attrib id length=$40;
attrib log_filename length=$100;
attrib task_runtime length=8 format=e8601dt.;
set &inds.;
log_filename = scan("&task_filename.", -1, "/\");
task_runtime = &task_runtime. ;
file_record_nbr = &file_record_nbr.;
rename name=task_name;
rename id=runtask_id;
call symputx('g_task_id', id, 'g');
call symputx('g_task_name', name, 'g');
call symputx('g_counts_only', countsOnly, 'g');
run;
%mend create_runtask;
/*
Some of the export header information is shown at the end of other export
definition information, which is not convenient for the purpose of writing
export data items one at a time.
*/
%macro setup_exportHeader_view;
%if %sysfunc(exist(injson.exportdatatasks_exportdefiniti)) and "&g_counts_only." ne "1" %then %do;
proc sql noprint;
create table vw_exp_header as
select et.segmentNodeId, et.outputSubjectId, et.id length=50,
ed.outputName, ed.outputPath, ed.outputType, ed.removeDups,
ed.quoteOption
from injson.exportdatatasks et
inner join injson.exportdatatasks_exportdefiniti ed
on et.ordinal_exportDataTasks = ed.ordinal_exportDataTasks
;
quit;
%end;
%else %do;
data vw_exp_header;
attrib outputName length=$60;
attrib outputSubjectId length=$50;
run;
%end;
%mend setup_exportHeader_view;
/*
Mapping the segment map information for each node executed by a task,
requires a lookup on the following view
If we are processing a segment map (instead of task), these tables may
not exist.
*/
%macro setup_queryHeader_view;
%if %sysfunc(exist(injson.segmentsinfo)) %then %do;
proc sql noprint;
create table vw_qry_header as
select qt.id length=50, qt.segmentNodeId, qt.outputSubjectId, qt.name,
sf.segmentMapName, sf.segmentName, sf.segmentMapCode, sf.segmentCode
from injson.segmentsinfo sf
inner join injson.querytasks qt
on sf.segmentNodeId = qt.segmentNodeId
;
quit;
%end;
%else %do;
data vw_qry_header;
attrib id length=$50;
attrib segmentMapName length=$50;
run;
%end;
%mend setup_queryHeader_view;
/*
If dataitem contains a full-stop, then it is from the Information map.
It is debatable, whether auto dataitems, from Segment/task in CDM should
be included? Here we have chosen to include them.
The "AllData" dataset is dynamic and can contain more/less columns based on
the query complexity. So, we first retrieve the list of columns and dynamically
generate the code to loop through all the columns.
*/
%macro capture_nodeInfo(inds=);
%local col_names_list i p_col_count;
%let p_col_count=0;
proc sql noprint;
select name into :col_names_list separated by ' '
from dictionary.columns
where libname = upcase(scan("&inds.", 1, '.'))
and memname = upcase(scan("&inds.", 2, '.'))
;
quit;
%do i=1 %to %sysfunc(countw(&col_names_list.));
%if %index(&col_names_list., %str( P&i. )) %then
%let p_col_count=%eval(&p_col_count.+1);;
%end;
%setup_exportHeader_view;
* %setup_queryHeader_view;
data stg_node (keep=externalCode runtask_id task_runtime node_type segmentMapName node_name outputSubjectId dataitem_name
%if &c_extract_dataitem_values eq %str(Y) %then %do;
dataitem_value
operator_name
%end;
)
stg_export (keep=externalCode runtask_id task_runtime outputName outputSubjectId dataitem_name)
stg_calc_item (keep=ci_id ci_type ci_name ci_expression ci_related_items)
;
attrib externalCode length=$15;
attrib runtask_id length=$40;
attrib segmentMapName length=$50;
attrib dataitem_name length=$100;
attrib outputName length=$60;
attrib node_name length=$60;
attrib prev_dataitem_name length=$100;
attrib dataitem_name_list length=$2048;
attrib outputSubjectId length=$50;
attrib query_id length=$50;
attrib task_runtime length=8 format=e8601dt.;
attrib dataitem_value length=$200;
attrib dataitem_value_list length=$32000;
attrib operator_list length=$256;
attrib operator_name length=$20;
attrib dataitem_name_switch length=$1;
attrib dateType length=$100;
attrib capture_calc_columns length=$1;
attrib ci_id length=$36;
attrib ci_name length=$100;
attrib ci_type length=$20;
attrib ci_expression length=$2048;
attrib ci_related_items length=$100;
retain capture_calc_columns;
retain ci_id ci_type ci_name ci_expression ci_related_items;
retain dataitem_name dataitem_name_list externalCode node_name segmentMapName
outputName outputSubjectId prev_dataitem_name query_id dataitem_value dataitem_value_list
operator_list dateType dataitem_name_switch;
if 0 then set &inds. vw_exp_header;
set &inds.;
runtask_id = "&g_task_id";
task_runtime = &task_runtime. ;
rename p1=node_type;
where p1 in ("exportDataTasks", "queryTasks", "splitTasks", "segmentsInfo" "calculatedDataItems") or (p=1);
if _n_ = 1 then do;
declare hash explookup (dataset: 'vw_exp_header');
explookup.definekey('outputName');
explookup.definedata('outputSubjectId');
explookup.definedone();
end;
if p1 eq "externalCode" then do;
externalCode = Value;
if substr(externalCode, 1, 4) eq 'MAP_' then
segmentMapName = "%superq(g_task_name)";
delete;
end;
if p1 eq "exportDataTasks" and p3 eq "outputName" then do;
outputName = Value;
rc = explookup.find(key:outputName);
if rc then call missing(outputSubjectId);
delete;
end;
if p1 eq "exportDataTasks" and p4 eq "columnValue" and p5 eq "id" then do;
dataitem_name = Value;
if index(Value, '.') then
output stg_export;
delete;
end;
if p1 eq "splitTasks" and p2 eq "name" then do;
node_name = Value;
prev_dataitem_name='';
end;
if p1 eq "splitTasks" and p5 eq "varRefId" then do;
dataitem_name = Value;
if index(Value, '.') then do;
if prev_dataitem_name ne dataitem_name then do;
output stg_node;
prev_dataitem_name = dataitem_name;
end;
delete;
end;
end;
if p1 eq "calculatedDataItems" then do;
capture_calc_columns='Y';
end;
if capture_calc_columns eq 'Y' then do;
if p2 eq "type" then ci_type = Value;
else if p2 eq "expression" then ci_expression = Value;
else if p2 eq "name" then ci_name = Value;
else if p2 eq "relatedCalculatedItems" then ci_related_items = Value;
else if p2 eq "id" then do;
ci_id = Value;
capture_calc_columns='N';
output stg_calc_item;
end;
delete;
end;
if p = 1 and p1 = 'queryTasks' then do;
prev_dataitem_name = '';
dateType = '';
dataitem_name_switch = 'N';
end;
%if &p_col_count. >= 6 %then %do;
if p >= 6 then do;
select (p);
%do i=6 %to &p_col_count;
when (&i.) do;
if p1 eq "queryTasks" and p&i. eq "varRefId" then do;
if prev_dataitem_name ne Value or dataitem_name_switch = 'Y' then do;
dataitem_name_list = catx(',', dataitem_name_list, Value);
if prev_dataitem_name ne '' or dataitem_name_switch = 'Y' then do;
dataitem_value_list = cats(dataitem_value_list, '|');
operator_list = cats(operator_list, ',');
end;
prev_dataitem_name=Value;
dataitem_name_switch = 'N';
end;
end;
if p1 eq "queryTasks" and substr(p&i.,1,6) eq "values" and length(p&i.) gt 6 then do;
if index(Value, '%%') and index(Value, '.') then do;
dataitem_name_list = catx(',', dataitem_name_list, compress(Value, '%'));
end;
else do;
if Value = "" then do;
dateType = "";
Value = ".";
end;
if dataitem_value_list = '' then
dataitem_value_list = cats(dateType,Value);
else if substr(dataitem_value_list, length(dataitem_value_list)) eq '|' then
dataitem_value_list = cats(dataitem_value_list, dateType, Value);
else
dataitem_value_list = catx(',', dataitem_value_list, cats(dateType,Value));
dateType = '';
end;
end;
if p1 eq "queryTasks" and p&i eq "operator" then do;
if Value ne "equals" then do;
dataitem_name_switch = 'Y';
end;
if operator_list = '' then
operator_list = Value;
else
operator_list = cats(operator_list, Value);
end;
if p1 eq "queryTasks" and p&i eq "dateType" then do;
dateType = Value;
end;
end;
%end;
otherwise do;
end;
end;
end;
%end;
if p2 eq "outputSubjectId" then do;
outputSubjectId = Value;
delete;
end;
if p1 eq "queryTasks" and p2 eq "id" then do;
query_id = Value;
end;
/*
Here we create one row per data item from the comma separated dataitem_name_list
However data_item_value_list can contain multiple values per data item separated
by pipe symbol.
These multiple values are stored in dataitem_value separated by commas
*/
if p1 eq "queryTasks" and p2 eq "name" then do;
node_name = Value;
prev_dataitem_name='';
j=1;
if externalCode = "MAP_138" then do;
putlog dataitem_name_list=;
putlog dataitem_value_list=;
putlog operator_list=;
end;
do i=1 to countw(dataitem_name_list, ',');
dataitem_name = scan(dataitem_name_list, i, ',');
k = index(dataitem_value_list, '|') - 1;
if k > 0 then do;
dataitem_value = substr(dataitem_value_list, j, k);
dataitem_value_list = substr(dataitem_value_list, k+2);
end;
else do;
dataitem_value = substr(dataitem_value_list, j);
dataitem_value_list = '';
end;
operator_name = scan(operator_list, i, ',');
if substr(operator_name,1,6) = "equals" then
operator_name = "equals";
output stg_node;
end;
dataitem_name_list = '';
dataitem_value_list = '';
operator_list = '';
delete;
end;
run;
%mend capture_nodeInfo;
/*
The data for each runTask is saved to the combined (one for each execution) tables
for reference
*/
%macro save_runtask_data;
%if "&g_task_id." eq "" %then %goto exit_save_runtask_data;
proc append base=runtask data=stg_runtask;
run;
proc append base=export data=stg_export;
run;
proc append base=node data=stg_node;
run;
proc append base=calc_item data=stg_calc_item;
run;
%exit_save_runtask_data:
%mend save_runtask_data;
/*
This process considers each file (corresponds to one runTask) in sequence
It creates three outputs:
- Task/Map header
- Node
- Export
which are linked together by common keys
*/
%macro process_each_taskinfo(ids=fileslist);
%local i start_file_no;
data _null_;
if 0 then set &ids. nobs=nobs;
call symputx('filesCount', nobs);
stop;
run;
%put &=filesCount.;
%let start_file_no=1;
%if &debug eq Y %then %do;
%let start_file_no=103;
%let filesCount=103;
proc delete data=outlib.task outlib.node outlib.export;
run;
%end;
%do i=&start_file_no. %to &filesCount.;
data _null_;
obsNum=&i.;
set &ids. point=obsNum;
call symputx('task_filename', task_filename);
call symputx('task_runtime', input(substr(task_runtime,1,19), ymddttm19.));
call symputx('file_record_nbr', file_record_nbr);
stop;
run;
filename injson "&task_filename.";
libname injson json;
%if &debug eq Y %then %do;
%put &=task_filename. &=task_runtime. &=i. &=file_record_nbr.;
options mlogic mprint symbolgen noerrabend;
%if &i. eq 103 %then
%printall(injson);;
%end;
%create_runtask(inds=injson.root);
%if "&g_task_id." ne "" %then %do;
%capture_nodeInfo(inds=injson.alldata);
%save_runtask_data;
%end;
libname injson clear;
%end;
%mend process_each_taskinfo;
/*
The data for each execution of this process is saved to a permanent data store
*/
%macro save_data;
%if %sysfunc(exist(runtask)) %then %do;
proc append base=outlib.runtask data=runtask force;
run;
%end;
%if %sysfunc(exist(export)) %then %do;
proc append base=outlib.export data=export force;
run;
%end;
%if %sysfunc(exist(node)) %then %do;
proc append base=outlib.node data=node force;
run;
%end;
%exit_save_data:
%mend save_data;
/*
1. The required data is extracted from the log files (multiple files in JSON format)
2. Each JSON file is processed and saved as a SAS dataset in work library
3. The SAS datasets from work library are copied to permanent library for future use
4. The "last date processed" is saved to an external file. This file will be read/updated
on the next execution of the program to avoid re-processing the same log files.
*/
%init;
%extract_runtask_data;
%process_each_taskinfo;
%save_data;
%update_cfg_file;
libname outlib clear;