src/main/resources/openapi/schemas/harvestableGet.json

{
  "description": "A harvest job definition",
  "type": "object",
  "properties": {
    "id": {
      "type": "string",
      "description": "Unique, numeric ID for the job definition. Will be assigned if not provided.",
      "pattern": "^[0-9]*$"
    },
    "name": {
      "type": "string",
      "description": "The name assigned to the harvest configuration."
    },
    "description": {
      "type": "string",
      "description": "Free form description of the configuration to support the administration."
    },
    "openAccess": {
      "type": "string",
      "description": "tbd",
      "enum": ["true", "false"]
    },
    "storage": {
      "type": "object",
      "description": "The storage definition used when storing the harvested records.",
      "properties": {
        "entityType": {
          "type": "string",
          "description": "Type of storage engine.",
          "enum": ["inventoryStorageEntity", "solrStorageEntity"]
        },
        "bulkSize": {
          "type": "string",
          "description": "Obsolete constant."
        },
        "currentStatus": {
          "type": "string",
          "description": "Obsolete constant."
        },
        "customClass": {
          "type": "string",
          "description": "Obsolete."
        },
        "enabled": {
          "type": "string",
          "description": "Defines if this storage configuration can by used by a harvest job.",
          "enum": ["true", "false"]
        },
        "idAsString": {
          "type": "string",
          "description": "Readonly for internal use in Harvester. Here same value as 'id'."
        },
        "name": {
          "type": "string",
          "description": "The name assigned to the storage configuration."
        },
        "url": {
          "type": "string",
          "description": "The HTTP address of the storage engine."
        }
      }
    },
    "transformation": {
      "type": "object",
      "description": "The transformation pipeline used for the harvest job.",
      "properties": {
        "entityType": {
          "type": "string",
          "description": "The type of transformation (the only existing type is basic transformation).",
          "enum": ["basicTransformation"]
        },
        "acl": {
          "type": "string",
          "description": "The name of the tenant that created the transformation pipeline."
        },
        "description": {
          "type": "string",
          "description": "Free text description of the transformation pipeline."
        },
        "enabled": {
          "type": "string",
          "description": "Indicates if the transformation pipeline can be used by harvest jobs",
          "enum": ["true"]
        },
        "name": {
          "type": "string",
          "description": "The name assigned to the transformation pipeline."
        },
        "parallel": {
          "type": "string",
          "description": "Indicates if steps should be run concurrently (each in its own thread).",
          "enum": ["true", "false"],
          "default": "false"
        },
        "stepAssociations": {
          "type": "array",
          "description": "The list of transformation steps that makes up the pipeline.",
          "items": {
            "type": "object",
            "description": "Definition of an associated transformation step.",
            "properties": {
              "id": {
                "type": "string",
                "description": "The id for the association.",
                "pattern": "^[0-9]*$"
              },
              "position": {
                "type": "string",
                "description": "The sequence number of the step in the ordering of steps in the transformation pipeline.",
                "pattern": "^[0-9]*$"
              },
              "step": {
                "type": "object",
                "description": "Details or the transformation step configuration.",
                "properties": {
                  "entityType": {
                    "type": "string",
                    "description": "Type of step, XSLT or custom",
                    "enum": [
                      "xmlTransformationStep",
                      "customTransformationStep"
                    ]
                  },
                  "acl": {
                    "type": "string",
                    "description": "Then tenant that created this step."
                  },
                  "description": {
                    "type": "string",
                    "description": "Free text; administrators notes about the step."
                  },
                  "inputFormat": {
                    "type": "string",
                    "description": "Free text; informational; the format of the record before this step, id 'XML'"
                  },
                  "name": {
                    "type": "string",
                    "description": "The name assigned to the step."
                  },
                  "outputFormat": {
                    "type": "string",
                    "description": "Free text; informational; the format of the record after this step."
                  },
                  "script": {
                    "type": "string",
                    "description": "Would by the XSLT script, if any, but omitted in the context of a harvestable.",
                    "enum": [
                      "<'script' omitted from nested displays>",
                      ""
                    ]
                  },
                  "id": {
                    "type": "string",
                    "description": "Unique ID for the step.",
                    "pattern": "^[0-9]*$"
                  },
                  "testData": {
                    "type": "string",
                    "description": "Would be source XML for testing XSLT script but omitted in context of a harvestable.",
                    "enum": ["<'testData' omitted from nested displays>",""]
                  },
                  "testOutput": {
                    "type": "string",
                    "description": "Would be the resulting XML from running the script on `testData` but omitted in context of a harvestable.",
                    "enum": ["<'testOutput' omitted from nested displays>",""]
                  }
                }
              },
              "transformation": {
                "type": "string",
                "description": "The ID of the transformation that the step is part of.",
                "pattern": "^[0-9]*$"
              }
            }
          }
        },
        "id": {
          "type": "string",
          "description": "The ID of the transformation pipeline.",
          "pattern": "^[0-9]*$"
        }
      }
    },
    "enabled": {
      "type": "string",
      "enum": ["true", "false"],
      "description": "Indicates if the job is scheduled for running"
    },
    "harvestImmediately": {
      "type": "string",
      "enum": ["true", "false"],
      "description": "Whether to harvest when the config is persisted."
    },
    "scheduleString": {
      "type": "string",
      "description": "Crontab style schedule string (simplified): minute(0-59) hour(0-24)  day of month(* or 1-31)  month (* or 1-12)  day of week (* or 0-6)."
    },
    "dateFormat": {
      "type": "string",
      "description": "For example yyyy-MM-dd'T'hh:mm:ss'Z'."
    },
    "url": {
      "type": "string",
      "description": "The URL to access the data from."
    },
    "timeout": {
      "type": "string",
      "description": "Connection/read timeout in seconds; application depending on the specific protocol used for fetching data.",
      "pattern": "^[0-9]*$",
      "default": "300"
    },
    "cacheEnabled": {
      "type": "string",
      "enum": ["true", "false"],
      "description": "Whether or not to store incoming records in Harvester's file system."
    },
    "diskRun": {
      "type": "string",
      "enum": ["true", "false"],
      "description": "Whether or not to run harvest job from records cached in a previous job run."
    },
    "recordLimit": {
      "type": "string",
      "description": "Maximum number of records to harvest.",
      "pattern": "^[0-9]*$"
    },
    "laxParsing": {
      "type": "string",
      "description": "When enabled, Harvester will attempt to parse malformed XML (missing closing tags, entities)",
      "enum": ["true", "false"],
      "default": "false"
    },
    "constantFields": {
      "type": "string",
      "description": "Values related to target handling in MasterKey. Otherwise obsolete."
    },
    "storeOriginal": {
      "type": "string",
      "description": "Indicates whether to store incoming original record, if supported by the job type and the storage configuration.",
      "enum": ["true", "false"],
      "default": "false"
    },
    "currentStatus": {
      "type": "string",
      "enum": [
        "NEW",
        "OK",
        "WARN",
        "ERROR",
        "RUNNING",
        "FINISHED",
        "KILLED"
      ],
      "readOnly": true,
      "description": "Indicates the state of the job. Assigned by API."
    },
    "managedBy": {
      "type": "string",
      "description": "Free-text field for tagging a job with the producer or manager of the resource. Multiple tags may be separated by commas. The tags can be used for filtering status reports by job administrators for example."
    },
    "usedBy": {
      "type": "string",
      "description": "Free form administrative information; could be tags for the clients using this harvestable."
    },
    "serviceProvider": {
      "type": "string",
      "description": "Free-text field for administrative information about the harvest job."
    },
    "contactNotes": {
      "type": "string",
      "description": "Free form text field for administrator's notes."
    },
    "technicalNotes": {
      "type": "string",
      "description": "Free-text field for administrative information."
    },
    "logLevel": {
      "type": "string",
      "description": "Specifies the logging level for the job with TRACE being the most (extremely) verbose. INFO is the recommended log level in most cases.",
      "enum": ["ERROR","WARN","INFO","DEBUG","TRACE"]
    },
    "failedRecordsLogging": {
      "type": "string",
      "description": "Specify whether or not failed records should be saved as XML files in a designated log directory. Also specifies retention policy for the directory, that is, whether to retain files that were saved in previous runs (CLEAN_DIRECTORY = don't retain) and, if so, whether to overwrite any existing files if the same record fails again (CREATE_OVERWRITE) or rather add a sequence number to the new file name in order not to overwrite (ADD_ALL).",
      "enum": ["NO_STORE", "CLEAN_DIRECTORY", "CREATE_OVERWRITE", "ADD_ALL"],
      "default": "CLEAN_DIRECTORY"
    },
    "maxSavedFailedRecordsPerRun": {
      "type": "string",
      "description": "Sets a maximum number of files to save in the failed records directory per run. The job log will tell when the limit is reached.",
      "pattern": "^[0-9]*$",
      "default": "100"
    },
    "maxSavedFailedRecordsTotal": {
      "type": "string",
      "description": "Sets a maximum number of files to be saved in the failed records directory at any given time - as the sum of previously saved records (that were not cleaned up before this run) plus any new records added during the run.The job log will tell when the limit is reached.",
      "pattern": "^[0-9]*$",
      "default": "1000"
    },
    "mailAddress": {
      "type": "string",
      "description": "Comma separated list of e-mail addresses that should receive notification on job completion."
    },
    "mailLevel": {
      "type": "string",
      "description": "The minimum severity of a job's completion status that will trigger email notification.",
      "enum": ["OK","WARN","ERROR"]
    },
    "lastHarvestFinished": {
      "type": "string",
      "description": "Assigned by API. The date and time when the most recent harvest job with this configuration completed."
    },
    "lastHarvestStarted": {
      "type": "string",
      "description": "Assigned by API. The date and time when the most recent harvest job with this configuration began."
    },
    "lastUpdated": {
      "type": "string",
      "description": "Assigned by API. The date and time when this definition was last modified."
    },
    "nextHarvestSchedule": {
      "type": "string",
      "description": "The date and time when a job with this definition should be run (if job is enabled)."
    },
    "amountHarvested": {
      "type": "string",
      "description": "Assigned by API. Number of records harvested in last run. It seems this should really be an integer, but string is what the WSAPI gives us.",
      "pattern": "^[0-9]*$"
    },
    "message": {
      "type": "string",
      "description": "Assigned by API. Message summarising results of last run."
    },
    "acl": {
      "type": "string",
      "description": "Assigned by API. Name of the tenant that created the configuration record.",
      "readOnly": true
    }
  },
  "oneOf": [
    {
      "properties": {
        "type": {
          "type": "string",
          "enum": ["oaiPmh"],
          "description": "Indicates OAI-PMH job."
        },
        "metadataPrefix": {
          "type": "string",
          "description": "OAI-PMH only. The metadata prefix supported by the OAI-PMH service to harvest from."
        },
        "oaiSetName": {
          "type": "string",
          "description": "OAI-PMH only. The name of a record set offered by the OAI-PMH service to harvest from."
        },
        "resumptionToken": {
          "type": "string",
          "description": "OAI-PMH only. PMH identifier for fetching the next batch of records."
        },
        "retryCount": {
          "type": "string",
          "description": "Indicates how many times Harvester should retry a failed OAI-PMH request.",
          "pattern": "^[0-9]*$",
          "default": "2"
        },
        "retryWait": {
          "type": "string",
          "description": "Indicates how many seconds Harvester should wait before retrying a failed OAI-PMH request.",
          "pattern": "^[0-9]*$",
          "default": "60"
        }

      },
      "required": ["metadataPrefix", "oaiSetName"]
    },
    {
      "properties": {
        "type": {
          "type": "string",
          "enum": ["xmlBulk"],
          "description": "Indicates bulk XML job."
        },
        "allowErrors": {
          "type": "string",
          "description": "Whether or not to continue despite harvest record errors.",
          "default": "false",
          "enum": ["true", "false"]
        },
        "overwrite": {
          "type": "string",
          "description": "Applies to storages that supports this (Solr, not FOLIO Inventory). Will delete all previously harvested data before beginning the next scheduled (or manually triggered) run, if set to true.",
          "enum": ["true", "false"]
        },
        "allowCondReq": {
          "type": "string",
          "description": "Whether or not to filter on file date to only harvest new XML files",
          "enum": ["true", "false"],
          "default": "false"
        },
        "fromDate": {
          "type": "string",
          "description": "Initial start date (yyyy-MM-dd) for incremental updates (when allowConfReq is 'true')"
        },
        "csvConfiguration": {
          "type": "string",
          "description": "Semicolon-separated key-value pairs that specifies parsing of a CSV file into XML for further processing (see Harvester documentation for details)."
        },
        "excludeFilePattern": {
          "type": "string",
          "description": "Regular expression; setting to skip harvesting of files with names matching the given regular expression (see Harvester documentation for details)."
        },
        "expectedSchema": {
          "type": "string",
          "description": "Mime-type override (e.g: application/marc; charset=MARC-8)."
        },
        "includeFilePattern": {
          "type": "string",
          "description": "Regular expression; setting to request harvesting of files with names matching the given regular expression unless those file names are simultaneously excluded by the excludeFilePattern. .zip, .gz, .tar included by default unless explicitly excluded by excludeFilePattern (see Harvester documentation for details)."
        },
        "outputSchema": {
          "type": "string",
          "description": "MARC XML transformation format (application/marc or application/tmarc)."
        },
        "passiveMode": {
          "type": "string",
          "description": "Whether or not to use passive mode for FTP transfers.",
          "enum": ["true","false"],
          "default": "false"
        },
        "recurse": {
          "type": "string",
          "description": "Whether or not to recurse into sub-folders in the source directory tree.",
          "enum": ["true","false"],
          "default": "false"
        },
        "splitAt": {
          "type": "string",
          "description": "Level/depth to split XML files at to extract records. Zero/empty disables split.",
          "pattern": "^[0-9]*$"
        },
        "splitSize": {
          "type": "string",
          "description": "Setting to split large XML files into chunks of `splitSize' number of records; to preserve memory during XSLT transformations.",
          "pattern": "^[0-9]*$"
        }
      }
    }
  ],
  "required": ["name", "type", "enabled", "harvestImmediately", "storage", "transformation", "url"]
}