Widen · freimer · Dec 2, 2021 · Dec 2, 2021 · Dec 2, 2021 · Dec 2, 2021
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 # Secrets and internal config files
 **/.secrets/*
+*.env
 
 # Ignore meltano internal cache and sqlite systemdb
 

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -0,0 +1,28 @@
+---
+variables:
+  PYTHON_IMAGE_TAG: 3.9.7
+stages:
+  - build
+  - deploy
+
+build_package:
+  stage: build
+  image: ${CI_DEPENDENCY_PROXY_GROUP_IMAGE_PREFIX}/python:$PYTHON_IMAGE_TAG
+  artifacts:
+    paths:
+      - dist
+  script:
+    - curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python3 -
+    - export PATH=$HOME/.local/bin:$PATH
+    - poetry build
+
+deploy_package:
+  stage: deploy
+  image: ${CI_DEPENDENCY_PROXY_GROUP_IMAGE_PREFIX}/python:$PYTHON_IMAGE_TAG
+  rules:
+    - if: '$CI_COMMIT_TAG =~ /^v\d+\.\d+\.\d+$/'
+  script:
+    - curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python3 -
+    - export PATH=$HOME/.local/bin:$PATH
+    - poetry config repositories.privatepypi ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/pypi
+    - poetry publish --repository privatepypi --username gitlab-ci-token --password ${CI_JOB_TOKEN}
diff --git a/README.md b/README.md
@@ -40,6 +40,10 @@ plugins:
         - name: replication_key
         - name: except_keys
         - name: num_inference_records
+        - name: pagination_request_style
+        - name: pagination_response_style
+        - name: pagination_page_size
+        - name: next_page_token_path
 ```
 
 ```bash
@@ -65,6 +69,9 @@ Config Options:
 - `params`: optional: an object of objects that provide the `params` in a `requests.get` method.
 - `headers`: optional: an object of headers to pass into the api calls.
 - `records_path`: optional: a jsonpath string representing the path in the requests response that contains the records to process. Defaults to `$[*]`.
+- `pagination_request_style`: optional: style for requesting pagination, defaults to `default`, see Pagination below.
+- `pagination_result_style`: optional: style of pagination results, defaults to `default`, see Pagination below.
+- `pagination_page_size`: optional: limit for size of page, defaults to None.
 - `next_page_token_path`: optional: a jsonpath string representing the path to the "next page" token. Defaults to `$.next_page`.
 - `primary_keys`: required: a list of the json keys of the primary key for the stream.
 - `replication_key`: optional: the json key of the replication key. Note that this should be an incrementing integer or datetime object.
@@ -74,6 +81,36 @@ Config Options:
   records are not duplicated for each item in lists.
 - `num_inference_keys`: optional: number of records used to infer the stream's schema. Defaults to 50.
 
+## Pagination
+
+Pagination is a complex topic as there is no real single standard, and many different implementations.  Unless options are provided, both the request and results stype default to the `default`, which is the pagination style originally implemented.
+### Default Request Style
+The default request style for pagination is described below:
+- Use next_page_token_path if provided to extract the token from response if found; otherwise
+- use X-Next-Page header from response
+
+### Default Response Style
+The default response style for pagination is described below:
+- If there is a token, add that as a `page` URL parameter.
+
+### Additional Request Styles
+There are additional request styles supported as follows.
+- `style1` - This style uses URL parameters named offset and limit
+  - `offset` is calculated from the previous response, or not set if there is no previous response
+  - `limit` is set to the `pagination_page_size` value, if specified, or not set
+
+### Additional Response Styles
+There are additional response styles supported as follows.
+- `style1` - This style retrieves pagination information from the `pagination` top-level element in the response.  Expected format is as follows:
+    ```json
+    "pagination": {
+        "total": 136,
+        "limit": 2,
+        "offset": 2
+    }
+    ```
+  The next page token, which in this case is really the next starting record number, is calculated by the limit, current offset, or None is returned to indicate no more data.  For this style, the response style _must_ include the limit in the response, even if none is specified in the request, as well as total and offset to calculate the next token value.
+
 ## Usage
 
 You can easily run `tap-rest-api-msdk` by itself or in a pipeline using [Meltano](www.meltano.com).

diff --git a/meltano.yml b/meltano.yml
@@ -4,37 +4,87 @@ plugins:
   extractors:
     - name: tap-rest-api-msdk
       namespace: tap_rest_api_msdk
-      executable: ./tap-rest-api-msdk.sh
+      # executable: ./tap-rest-api-msdk.sh
+      pip_url: .
       capabilities:
         - state
         - catalog
         - discover
       settings:
-        - name: api_url
         - name: name
+          kind: string
+        - name: api_url
+          kind: string
         - name: path
+          kind: string
         - name: params
+          kind: object
         - name: headers
+          kind: object
         - name: records_path
+          kind: string
         - name: primary_keys
-        - name: replication_key
+          kind: array
         - name: except_keys
-        - name: num_inference_records
+          kind: array
+        - name: pagination_request_style
+        - name: pagination_response_style
+        - name: pagination_page_size
+          kind: integer
+        - name: streams
+          kind: array
+        - name: streams_auth
+          kind: array
+      # config:
+      #   name: us_earthquakes
+      #   api_url: https://earthquake.usgs.gov/fdsnws
+      #   path: /event/1/query
+      #   params:
+      #     format: geojson
+      #     starttime: "2014-01-01"
+      #     endtime: "2014-01-02"
+      #     minmagnitude: 1
+      #   primary_keys:
+      #     - id
+      #   records_path: "$.features[*]"
+      #   num_inference_records: 100
+      #   select:
+      #     - "*.*"
       config:
-        name: us_earthquakes
-        api_url: https://earthquake.usgs.gov/fdsnws
-        path: /event/1/query
-        params:
-          format: geojson
-          starttime: "2014-01-01"
-          endtime: "2014-01-02"
-          minmagnitude: 1
-        primary_keys:
-          - id
-        records_path: "$.features[*]"
-        num_inference_records: 100
-      select:
-        - '*.*'
+        streams:
+          - name: us_earthquakes
+            api_url: https://earthquake.usgs.gov/fdsnws
+            path: /event/1/query
+            params:
+              format: geojson
+              starttime: "2014-01-01"
+              endtime: "2014-01-02"
+              minmagnitude: 1
+            primary_keys:
+              - id
+            records_path: "$.features[*]"
+            num_inference_records: 100
+            select:
+              - "*.*"
+      # If you need auth, put it in a list of objects
+      # matched with the name of the stream names
+      # steams_auth:
+      #   - match_type: exact
+      #     name: us_earthquakes
+      #     headers:
+      #       "X-ApiKeys": "my secret api keys"
+      #   - match_type: prefix
+      #     name: us_
+      #     headers:
+      #       "X-ApiKeys": "my secret api keys"
+      #   - match_type: suffix
+      #     name: earthquakes
+      #     headers:
+      #       "X-ApiKeys": "my secret api keys"
+      #   - match_type: regex
+      #     name: "^.*earth.*$"
+      #     headers:
+      #       "X-ApiKeys": "my secret api keys"
   loaders:
     - name: target-jsonl
       variant: andyh1203