-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathharvestableGet.json
450 lines (449 loc) · 17.3 KB
/
harvestableGet.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
{
"description": "A harvest job definition",
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique, numeric ID for the job definition. Will be assigned if not provided.",
"pattern": "^[0-9]*$"
},
"name": {
"type": "string",
"description": "The name assigned to the harvest configuration."
},
"description": {
"type": "string",
"description": "Free form description of the configuration to support the administration."
},
"openAccess": {
"type": "string",
"description": "tbd",
"enum": ["true", "false"]
},
"storage": {
"type": "object",
"description": "The storage definition used when storing the harvested records.",
"properties": {
"entityType": {
"type": "string",
"description": "Type of storage engine.",
"enum": ["inventoryStorageEntity", "solrStorageEntity"]
},
"bulkSize": {
"type": "string",
"description": "Obsolete constant."
},
"currentStatus": {
"type": "string",
"description": "Obsolete constant."
},
"customClass": {
"type": "string",
"description": "Obsolete."
},
"enabled": {
"type": "string",
"description": "Defines if this storage configuration can by used by a harvest job.",
"enum": ["true", "false"]
},
"idAsString": {
"type": "string",
"description": "Readonly for internal use in Harvester. Here same value as 'id'."
},
"name": {
"type": "string",
"description": "The name assigned to the storage configuration."
},
"url": {
"type": "string",
"description": "The HTTP address of the storage engine."
}
}
},
"transformation": {
"type": "object",
"description": "The transformation pipeline used for the harvest job.",
"properties": {
"entityType": {
"type": "string",
"description": "The type of transformation (the only existing type is basic transformation).",
"enum": ["basicTransformation"]
},
"acl": {
"type": "string",
"description": "The name of the tenant that created the transformation pipeline."
},
"description": {
"type": "string",
"description": "Free text description of the transformation pipeline."
},
"enabled": {
"type": "string",
"description": "Indicates if the transformation pipeline can be used by harvest jobs",
"enum": ["true"]
},
"name": {
"type": "string",
"description": "The name assigned to the transformation pipeline."
},
"parallel": {
"type": "string",
"description": "Indicates if steps should be run concurrently (each in its own thread).",
"enum": ["true", "false"],
"default": "false"
},
"stepAssociations": {
"type": "array",
"description": "The list of transformation steps that makes up the pipeline.",
"items": {
"type": "object",
"description": "Definition of an associated transformation step.",
"properties": {
"id": {
"type": "string",
"description": "The id for the association.",
"pattern": "^[0-9]*$"
},
"position": {
"type": "string",
"description": "The sequence number of the step in the ordering of steps in the transformation pipeline.",
"pattern": "^[0-9]*$"
},
"step": {
"type": "object",
"description": "Details or the transformation step configuration.",
"properties": {
"entityType": {
"type": "string",
"description": "Type of step, XSLT or custom",
"enum": [
"xmlTransformationStep",
"customTransformationStep"
]
},
"acl": {
"type": "string",
"description": "Then tenant that created this step."
},
"description": {
"type": "string",
"description": "Free text; administrators notes about the step."
},
"inputFormat": {
"type": "string",
"description": "Free text; informational; the format of the record before this step, id 'XML'"
},
"name": {
"type": "string",
"description": "The name assigned to the step."
},
"outputFormat": {
"type": "string",
"description": "Free text; informational; the format of the record after this step."
},
"script": {
"type": "string",
"description": "Would by the XSLT script, if any, but omitted in the context of a harvestable.",
"enum": [
"<'script' omitted from nested displays>",
""
]
},
"id": {
"type": "string",
"description": "Unique ID for the step.",
"pattern": "^[0-9]*$"
},
"testData": {
"type": "string",
"description": "Would be source XML for testing XSLT script but omitted in context of a harvestable.",
"enum": ["<'testData' omitted from nested displays>",""]
},
"testOutput": {
"type": "string",
"description": "Would be the resulting XML from running the script on `testData` but omitted in context of a harvestable.",
"enum": ["<'testOutput' omitted from nested displays>",""]
}
}
},
"transformation": {
"type": "string",
"description": "The ID of the transformation that the step is part of.",
"pattern": "^[0-9]*$"
}
}
}
},
"id": {
"type": "string",
"description": "The ID of the transformation pipeline.",
"pattern": "^[0-9]*$"
}
}
},
"enabled": {
"type": "string",
"enum": ["true", "false"],
"description": "Indicates if the job is scheduled for running"
},
"harvestImmediately": {
"type": "string",
"enum": ["true", "false"],
"description": "Whether to harvest when the config is persisted."
},
"scheduleString": {
"type": "string",
"description": "Crontab style schedule string (simplified): minute(0-59) hour(0-24) day of month(* or 1-31) month (* or 1-12) day of week (* or 0-6)."
},
"dateFormat": {
"type": "string",
"description": "For example yyyy-MM-dd'T'hh:mm:ss'Z'."
},
"url": {
"type": "string",
"description": "The URL to access the data from."
},
"timeout": {
"type": "string",
"description": "Connection/read timeout in seconds; application depending on the specific protocol used for fetching data.",
"pattern": "^[0-9]*$",
"default": "300"
},
"cacheEnabled": {
"type": "string",
"enum": ["true", "false"],
"description": "Whether or not to store incoming records in Harvester's file system."
},
"diskRun": {
"type": "string",
"enum": ["true", "false"],
"description": "Whether or not to run harvest job from records cached in a previous job run."
},
"recordLimit": {
"type": "string",
"description": "Maximum number of records to harvest.",
"pattern": "^[0-9]*$"
},
"laxParsing": {
"type": "string",
"description": "When enabled, Harvester will attempt to parse malformed XML (missing closing tags, entities)",
"enum": ["true", "false"],
"default": "false"
},
"constantFields": {
"type": "string",
"description": "Values related to target handling in MasterKey. Otherwise obsolete."
},
"storeOriginal": {
"type": "string",
"description": "Indicates whether to store incoming original record, if supported by the job type and the storage configuration.",
"enum": ["true", "false"],
"default": "false"
},
"currentStatus": {
"type": "string",
"enum": [
"NEW",
"OK",
"WARN",
"ERROR",
"RUNNING",
"FINISHED",
"KILLED"
],
"readOnly": true,
"description": "Indicates the state of the job. Assigned by API."
},
"managedBy": {
"type": "string",
"description": "Free-text field for tagging a job with the producer or manager of the resource. Multiple tags may be separated by commas. The tags can be used for filtering status reports by job administrators for example."
},
"usedBy": {
"type": "string",
"description": "Free form administrative information; could be tags for the clients using this harvestable."
},
"serviceProvider": {
"type": "string",
"description": "Free-text field for administrative information about the harvest job."
},
"contactNotes": {
"type": "string",
"description": "Free form text field for administrator's notes."
},
"technicalNotes": {
"type": "string",
"description": "Free-text field for administrative information."
},
"logLevel": {
"type": "string",
"description": "Specifies the logging level for the job with TRACE being the most (extremely) verbose. INFO is the recommended log level in most cases.",
"enum": ["ERROR","WARN","INFO","DEBUG","TRACE"]
},
"failedRecordsLogging": {
"type": "string",
"description": "Specify whether or not failed records should be saved as XML files in a designated log directory. Also specifies retention policy for the directory, that is, whether to retain files that were saved in previous runs (CLEAN_DIRECTORY = don't retain) and, if so, whether to overwrite any existing files if the same record fails again (CREATE_OVERWRITE) or rather add a sequence number to the new file name in order not to overwrite (ADD_ALL).",
"enum": ["NO_STORE", "CLEAN_DIRECTORY", "CREATE_OVERWRITE", "ADD_ALL"],
"default": "CLEAN_DIRECTORY"
},
"maxSavedFailedRecordsPerRun": {
"type": "string",
"description": "Sets a maximum number of files to save in the failed records directory per run. The job log will tell when the limit is reached.",
"pattern": "^[0-9]*$",
"default": "100"
},
"maxSavedFailedRecordsTotal": {
"type": "string",
"description": "Sets a maximum number of files to be saved in the failed records directory at any given time - as the sum of previously saved records (that were not cleaned up before this run) plus any new records added during the run.The job log will tell when the limit is reached.",
"pattern": "^[0-9]*$",
"default": "1000"
},
"mailAddress": {
"type": "string",
"description": "Comma separated list of e-mail addresses that should receive notification on job completion."
},
"mailLevel": {
"type": "string",
"description": "The minimum severity of a job's completion status that will trigger email notification.",
"enum": ["OK","WARN","ERROR"]
},
"lastHarvestFinished": {
"type": "string",
"description": "Assigned by API. The date and time when the most recent harvest job with this configuration completed."
},
"lastHarvestStarted": {
"type": "string",
"description": "Assigned by API. The date and time when the most recent harvest job with this configuration began."
},
"lastUpdated": {
"type": "string",
"description": "Assigned by API. The date and time when this definition was last modified."
},
"nextHarvestSchedule": {
"type": "string",
"description": "The date and time when a job with this definition should be run (if job is enabled)."
},
"amountHarvested": {
"type": "string",
"description": "Assigned by API. Number of records harvested in last run. It seems this should really be an integer, but string is what the WSAPI gives us.",
"pattern": "^[0-9]*$"
},
"message": {
"type": "string",
"description": "Assigned by API. Message summarising results of last run."
},
"acl": {
"type": "string",
"description": "Assigned by API. Name of the tenant that created the configuration record.",
"readOnly": true
}
},
"oneOf": [
{
"properties": {
"type": {
"type": "string",
"enum": ["oaiPmh"],
"description": "Indicates OAI-PMH job."
},
"metadataPrefix": {
"type": "string",
"description": "OAI-PMH only. The metadata prefix supported by the OAI-PMH service to harvest from."
},
"oaiSetName": {
"type": "string",
"description": "OAI-PMH only. The name of a record set offered by the OAI-PMH service to harvest from."
},
"resumptionToken": {
"type": "string",
"description": "OAI-PMH only. PMH identifier for fetching the next batch of records."
},
"retryCount": {
"type": "string",
"description": "Indicates how many times Harvester should retry a failed OAI-PMH request.",
"pattern": "^[0-9]*$",
"default": "2"
},
"retryWait": {
"type": "string",
"description": "Indicates how many seconds Harvester should wait before retrying a failed OAI-PMH request.",
"pattern": "^[0-9]*$",
"default": "60"
}
},
"required": ["metadataPrefix", "oaiSetName"]
},
{
"properties": {
"type": {
"type": "string",
"enum": ["xmlBulk"],
"description": "Indicates bulk XML job."
},
"allowErrors": {
"type": "string",
"description": "Whether or not to continue despite harvest record errors.",
"default": "false",
"enum": ["true", "false"]
},
"overwrite": {
"type": "string",
"description": "Applies to storages that supports this (Solr, not FOLIO Inventory). Will delete all previously harvested data before beginning the next scheduled (or manually triggered) run, if set to true.",
"enum": ["true", "false"]
},
"allowCondReq": {
"type": "string",
"description": "Whether or not to filter on file date to only harvest new XML files",
"enum": ["true", "false"],
"default": "false"
},
"fromDate": {
"type": "string",
"description": "Initial start date (yyyy-MM-dd) for incremental updates (when allowConfReq is 'true')"
},
"csvConfiguration": {
"type": "string",
"description": "Semicolon-separated key-value pairs that specifies parsing of a CSV file into XML for further processing (see Harvester documentation for details)."
},
"excludeFilePattern": {
"type": "string",
"description": "Regular expression; setting to skip harvesting of files with names matching the given regular expression (see Harvester documentation for details)."
},
"expectedSchema": {
"type": "string",
"description": "Mime-type override (e.g: application/marc; charset=MARC-8)."
},
"includeFilePattern": {
"type": "string",
"description": "Regular expression; setting to request harvesting of files with names matching the given regular expression unless those file names are simultaneously excluded by the excludeFilePattern. .zip, .gz, .tar included by default unless explicitly excluded by excludeFilePattern (see Harvester documentation for details)."
},
"outputSchema": {
"type": "string",
"description": "MARC XML transformation format (application/marc or application/tmarc)."
},
"passiveMode": {
"type": "string",
"description": "Whether or not to use passive mode for FTP transfers.",
"enum": ["true","false"],
"default": "false"
},
"recurse": {
"type": "string",
"description": "Whether or not to recurse into sub-folders in the source directory tree.",
"enum": ["true","false"],
"default": "false"
},
"splitAt": {
"type": "string",
"description": "Level/depth to split XML files at to extract records. Zero/empty disables split.",
"pattern": "^[0-9]*$"
},
"splitSize": {
"type": "string",
"description": "Setting to split large XML files into chunks of `splitSize' number of records; to preserve memory during XSLT transformations.",
"pattern": "^[0-9]*$"
}
}
}
],
"required": ["name", "type", "enabled", "harvestImmediately", "storage", "transformation", "url"]
}