diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
new file mode 100644
index 0000000000..2a94d68b05
--- /dev/null
+++ b/.github/workflows/publish.yml
@@ -0,0 +1,36 @@
+name: Publish
+
+on:
+ push:
+ branches:
+ - devel
+
+jobs:
+ build:
+
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v1
+ - name: Install dependencies
+ run: |
+ export PATH="$HOME/.local/bin:$PATH"
+ sudo apt-get install -y python3-setuptools
+ pip3 install --user -r docs/requirements.txt
+ - name: Build the docs
+ run: |
+ export PATH="$HOME/.local/bin:$PATH"
+ make html
+ - name: Deploy the docs
+ run: |
+ mkdir $HOME/output
+ mv _output/html $HOME/output/latest
+ touch $HOME/output/.nojekyll
+ mv docs/html/index2.html $HOME/output/index.html
+ cd $HOME/output
+ git init
+ git config --global user.name "${GITHUB_ACTOR}"
+ git config --global user.email "${GITHUB_ACTOR}@github.com"
+ git add .
+ git commit -m "latest html output"
+ git push -f https://${GITHUB_ACTOR}:${{secrets.ACCESS_TOKEN}}@github.com/intelkevinputnam/pmem-csi.git HEAD:gh-pages
diff --git a/.gitignore b/.gitignore
index 2d434329cf..404a50f9c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,7 @@
/vendor
/_output
/_work
+/.tox
+Manifest
+/_build
+
diff --git a/Jenkinsfile b/Jenkinsfile
index 5c46aa4027..b5f68b3804 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -171,7 +171,8 @@ pipeline {
// Install additional tools:
// - ssh client for govm
- sh "docker exec ${env.BUILD_CONTAINER} swupd bundle-add openssh-client"
+ // - python3 for Sphinx (i.e. make html)
+ sh "docker exec ${env.BUILD_CONTAINER} swupd bundle-add openssh-client python3-basic"
// Now commit those changes to ensure that the result of "swupd bundle add" gets cached.
sh "docker commit ${env.BUILD_CONTAINER} ${env.BUILD_IMAGE}"
@@ -213,6 +214,13 @@ pipeline {
}
}
+ stage('docsite') {
+ steps {
+ sh "${RunInBuilder()} ${env.BUILD_CONTAINER} make vhtml"
+ publishHTML([allowMissing: false, alwaysLinkToLastBuild: false, keepAll: false, reportDir: '_output/html', reportFiles: 'index.html', reportName: 'Doc Site', reportTitles: ''])
+ }
+ }
+
stage('make test') {
options {
timeout(time: 20, unit: "MINUTES")
diff --git a/Makefile b/Makefile
index 4852533db5..5df4dcd4f2 100644
--- a/Makefile
+++ b/Makefile
@@ -209,3 +209,28 @@ $(addprefix test-kustomize-,$(KUSTOMIZE_OUTPUT)): test-kustomize-%: _work/kustom
.PHONY: check-go-version-%
check-go-version-%:
@ hack/verify-go-version.sh "$*"
+
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+SOURCEDIR = .
+BUILDDIR = _output
+
+# Generate doc site under _build/html with Sphinx.
+vhtml: _work/venv/.stamp
+ . _work/venv/bin/activate && \
+ $(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) && \
+ cp docs/html/index.html $(BUILDDIR)/html/index.html
+
+html:
+ $(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) && \
+ cp docs/html/index.html $(BUILDDIR)/html/index.html
+
+clean-html:
+ rm -rf _output/html
+
+# Set up a Python3 environment with the necessary tools for document creation.
+_work/venv/.stamp: docs/requirements.txt
+ rm -rf ${@D}
+ python3 -m venv ${@D}
+ . ${@D}/bin/activate && pip install -r $<
+ touch $@
diff --git a/README.md b/README.md
index 72f25f0d72..1ce47a2834 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# PMEM-CSI for Kubernetes
+# Introduction to PMEM-CSI for Kubernetes
**Note: This is Alpha code and not production ready.**
@@ -12,7 +12,7 @@ library](https://github.com/pmem/ndctl). In this readme, we use
module (NVDIMM).
The [v0.6.0 release](https://github.com/intel/pmem-csi/releases/tag/v0.6.0)
-is the latest feature release and is [regularly updated](./DEVELOPMENT.md#release-management) with newer base images
+is the latest feature release and is [regularly updated](docs/DEVELOPMENT.md#release-management) with newer base images
and bug fixes. Older versions are no longer supported.
The PMEM-CSI driver follows the [CSI
@@ -20,576 +20,15 @@ specification](https://github.com/container-storage-interface/spec) by
listening for API requests and provisioning volumes accordingly.
- [PMEM-CSI for Kubernetes](#pmem-csi-for-kubernetes)
- - [Design](#design)
- - [Architecture and Operation](#architecture-and-operation)
- - [LVM device mode](#lvm-device-mode)
- - [Direct device mode](#direct-device-mode)
- - [Driver modes](#driver-modes)
- - [Driver Components](#driver-components)
- - [Communication between components](#communication-between-components)
- - [Security](#security)
- - [Volume Persistency](#volume-persistency)
- - [Capacity-aware pod scheduling](#capacity-aware-pod-scheduling)
- - [Prerequisites](#prerequisites)
- - [Software required](#software-required)
- - [Hardware required](#hardware-required)
- - [Persistent memory pre-provisioning](#persistent-memory-pre-provisioning)
- [Supported Kubernetes versions](#supported-kubernetes-versions)
- - [Setup](#setup)
- - [Get source code](#get-source-code)
- - [Run PMEM-CSI on Kubernetes](#run-pmem-csi-on-kubernetes)
- - [Automated testing](#automated-testing)
- - [Unit testing and code quality](#unit-testing-and-code-quality)
- - [QEMU and Kubernetes](#qemu-and-kubernetes)
- - [Starting and stopping a test cluster](#starting-and-stopping-a-test-cluster)
- - [Running commands on test cluster nodes over ssh](#running-commands-on-test-cluster-nodes-over-ssh)
- - [Configuration options](#configuration-options)
- - [Running E2E tests](#running-e2e-tests)
- - [Application examples](#application-examples)
- - [Communication and contribution](#communication-and-contribution)
-
-## Design
-
-### Architecture and Operation
-
-The PMEM-CSI driver can operate in two different device modes: *LVM* and
-*direct*. This table contains an overview and comparison of those modes.
-There is a more detailed explanation in the following paragraphs.
-
-| |`LVM` |`direct` |
-|:-- |:-- |:-- |
-|Main advantage |avoids free space fragmentation1 |simpler, somewhat faster, but free space may get fragmented1 |
-|What is served |LVM logical volume |pmem block device |
-|Region affinity2 |yes: one LVM volume group is created per region, and a volume has to be in one volume group |yes: namespace can belong to one region only |
-|Startup |two extra stages: pmem-ns-init (creates namespaces), vgm (creates volume groups) |no extra steps at startup |
-|Namespace modes |`fsdax` mode3 namespaces pre-created as pools |namespace in `fsdax` mode created directly, no need to pre-create pools |
-|Limiting space usage | can leave part of device unused during pools creation |no limits, creates namespaces on device until runs out of space |
-| *Name* field in namespace | *Name* gets set to 'pmem-csi' to achieve own vs. foreign marking | *Name* gets set to VolumeID, without attempting own vs. foreign marking |
-|Minimum volume size| 4 MB | 1 GB (see also alignment adjustment below) |
-|Alignment requirements |LVM creation aligns size up to next 4MB boundary |driver aligns size up to next alignment boundary. The default alignment step is 1 GB. Device(s) in interleaved mode will require larger minimum as size has to be at least one alignment step. The possibly bigger alignment step is calculated as interleave-set-size multiplied by 1 GB |
-
-1 **Free space fragmentation** is a problem when there appears to
-be enough free capacity for a new namespace, but there isn't a contiguous
-region big enough to allocate it. The PMEM-CSI driver is only capable of
-allocating continguous memory to a namespace and cannot de-fragment or combine
-smaller blocks. For example, this could happen when you create a 63 GB
-namespace, followed by a 1 GB namespace, and then delete the 63 GB namespace.
-Eventhough there is 127 GB available, the driver cannot create a namespace
-larger than 64 GB.
-
-```
----------------------------------------------------------------------
-| 63 GB free | 1GB used | 64 GB free |
----------------------------------------------------------------------
-```
-
-2 **Region affinity** means that all parts of a provisioned file
-system are physically located on device(s) that belong to same PMEM region.
-This is important on multi-socket systems where media access time may vary
-based on where the storage device(s) are physically attached.
-
-3 **fsdax mode** is required for NVDIMM
-namespaces. See [Persistent Memory
-Programming](https://pmem.io/ndctl/ndctl-create-namespace.html) for
-details. `devdax` mode is not supported. Though a
-raw block volume would be useful when a filesystem isn't needed, Kubernetes
-cannot handle [binding a character device to a loop device](https://github.com/kubernetes/kubernetes/blob/7c87b5fb55ca096c007c8739d4657a5a4e29fb09/pkg/volume/util/util.go#L531-L534).
-
-### LVM device mode
-
-In Logical Volume Management (LVM) mode the PMEM-CSI driver
-uses LVM for logical volume Management to avoid the risk of fragmentation. The
-LVM logical volumes are served to satisfy API requests. There is one volume
-group created per region, ensuring the region-affinity of served volumes.
-
-![devicemode-lvm diagram](/docs/images/devicemodes/pmem-csi-lvm.png)
-
-The driver consists of three separate binaries that form two
-initialization stages and a third API-serving stage.
-
-During startup, the driver scans persistent memory for regions and
-namespaces, and tries to create more namespaces using all or part
-(selectable via option) of the remaining available space. This first
-stage is performed by a separate entity `pmem-ns-init`.
-
-The second stage of initialization arranges physical volumes provided
-by namespaces into LVM volume groups. This is performed by a separate
-binary `pmem-vgm`.
-
-After two initialization stages, the third binary `pmem-csi-driver`
-starts serving CSI API requests.
-
-#### Namespace modes in LVM device mode
-
-The PMEM-CSI driver pre-creates namespaces in `fsdax` mode forming
-the corresponding LVM volume group. The amount of space to be
-used is determined using the option `-useforfsdax` given to `pmem-ns-init`.
-This options specifies an integer presenting limit as percentage.
-The default value is `useforfsdax=100`.
-
-#### Using limited amount of total space in LVM device mode
-
-The PMEM-CSI driver can leave space on devices for others, and
-recognize "own" namespaces. Leaving space for others can be achieved
-by specifying lower-than-100 value to `-useforfsdax` options
-The distinction "own" vs. "foreign" is
-implemented by setting the _Name_ field in namespace to a static
-string "pmem-csi" during namespace creation. When adding physical
-volumes to volume groups, only those physical volumes that are based on
-namespaces with the name "pmem-csi" are considered.
-
-### Direct device mode
-
-The following diagram illustrates the operation in Direct device mode:
-![devicemode-direct diagram](/docs/images/devicemodes/pmem-csi-direct.png)
-
-In direct device mode PMEM-CSI driver allocates namespaces directly
-from the storage device. This creates device space fragmentation risk,
-but reduces complexity and run-time overhead by avoiding additional
-device mapping layer. Direct mode also ensures the region-affinity of
-served volumes, because provisioned volume can belong to one region
-only.
-
-In Direct mode, the two preparation stages used in LVM mode, are not
-needed.
-
-#### Namespace modes in direct device mode
-
-The PMEM-CSI driver creates a namespace directly in the mode which is
-asked by volume creation request, thus bypassing the complexity of
-pre-allocated pools that are used in LVM device mode.
-
-#### Using limited amount of total space in direct device mode
-
-In direct device mode, the driver does not attempt to limit space
-use. It also does not mark "own" namespaces. The _Name_ field of a
-namespace gets value of the VolumeID.
-
-### Driver modes
-
-The PMEM-CSI driver supports running in different modes, which can be
-controlled by passing one of the below options to the driver's
-'_-mode_' command line option. In each mode, it starts a different set
-of open source Remote Procedure Call (gRPC)
-[servers](#driver-components) on given driver endpoint(s).
-
-* **_Controller_** should run as a single instance in cluster level. When the
- driver is running in _Controller_ mode, it forwards the pmem volume
- create/delete requests to the registered node controller servers
- running on the worker node. In this mode, the driver starts the
- following gRPC servers:
-
- * [IdentityServer](#identity-server)
- * [NodeRegistryServer](#node-registry-server)
- * [MasterControllerServer](#master-controller-server)
-
-* One **_Node_** instance should run on each
- worker node that has persistent memory devices installed. When the
- driver starts in such mode, it registers with the _Controller_
- driver running on a given _-registryEndpoint_. In this mode, the
- driver starts the following servers:
-
- * [IdentityServer](#identity-server)
- * [NodeControllerServer](#node-controller-server)
- * [NodeServer](#node-server)
-
-### Driver Components
-
-#### Identity Server
-
-This gRPC server operates on a given endpoint in all driver modes and
-implements the CSI [Identity
-interface](https://github.com/container-storage-interface/spec/blob/master/spec.md#identity-service-rpc).
-
-#### Node Registry Server
-
-When the PMEM-CSI driver runs in _Controller_ mode, it starts a gRPC
-server on a given endpoint(_-registryEndpoint_) and serves the
-[RegistryServer](pkg/pmem-registry/pmem-registry.proto) interface. The
-driver(s) running in _Node_ mode can register themselves with node
-specific information such as node id,
-[NodeControllerServer](#node-controller-server) endpoint, and their
-available persistent memory capacity.
-
-#### Master Controller Server
-
-This gRPC server is started by the PMEM-CSI driver running in
-_Controller_ mode and serves the
-[Controller](https://github.com/container-storage-interface/spec/blob/master/spec.md#controller-service-rpc)
-interface defined by the CSI specification. The server responds to
-CreateVolume(), DeleteVolume(), ControllerPublishVolume(),
-ControllerUnpublishVolume(), and ListVolumes() calls coming from
-[external-provisioner]() and [external-attacher]() sidecars. It
-forwards the publish and unpublish volume requests to the appropriate
-[Node controller server](#node-controller-server) running on a worker
-node that was registered with the driver.
-
-#### Node Controller Server
-
-This gRPC server is started by the PMEM-CSI driver running in _Node_
-mode and implements the
-[ControllerPublishVolume](https://github.com/container-storage-interface/spec/blob/master/spec.md#controllerpublishvolume)
-and
-[ControllerUnpublishVolume](https://github.com/container-storage-interface/spec/blob/master/spec.md#controllerunpublishvolume)
-methods of the [Controller
-service](https://github.com/container-storage-interface/spec/blob/master/spec.md#controller-service-rpc)
-interface defined by the CSI specification. It serves the
-ControllerPublishVolume() and ControllerUnpublish() requests coming
-from the [Master controller server](#master-controller-server) and
-creates/deletes persistent memory devices.
-
-#### Node Server
-
-This gRPC server is started by the driver running in _Node_ mode and
-implements the [Node
-service](https://github.com/container-storage-interface/spec/blob/master/spec.md#node-service-rpc)
-interface defined in the CSI specification. It serves the
-NodeStageVolume(), NodeUnstageVolume(), NodePublishVolume(), and
-NodeUnpublishVolume() requests coming from the Container Orchestrator
-(CO).
-
-### Communication between components
-
-The following diagram illustrates the communication channels between driver components:
-![communication diagram](/docs/images/communication/pmem-csi-communication-diagram.png)
-
-### Security
-
-All PMEM-CSI specific communication [shown in above
-section](#communication-channels) between Master
-Controller([RegistryServer](#node-registry-server),
-[MasterControllerServer](#master-controller-server)) and
-NodeControllers([NodeControllerServer](#node-controller-server)) is
-protected by mutual TLS. Both client and server must identify
-themselves and the certificate they present must be trusted. The
-common name in each certificate is used to identify the different
-components. The following common names have a special meaning:
-
-- `pmem-registry` is used by the [RegistryServer](#node-registry-server).
-- `pmem-node-controller` is used by [NodeControllerServers](#node-controller-server)
-
-The [`test/setup-ca.sh`](test/setup-ca.sh)
-script shows how to generate self-signed certificates. The test cluster is set
-up using certificates created by that script, with secrets prepared by
-[`test/setup-deployment.sh`](test/setup-deployment.sh) before
-deploying the driver using the provided [deployment files](deploy/).
-
-Beware that these are just examples. Administrators of a cluster must
-ensure that they choose key lengths and algorithms of sufficient
-strength for their purposes and manage certificate distribution.
-
-A production deployment can improve upon that by using some other key
-delivery mechanism, like for example
-[Vault](https://www.vaultproject.io/).
-
-
-
-### Volume Persistency
-
-In a typical CSI deployment, volumes are provided by a storage backend
-that is independent of a particular node. When a node goes offline,
-the volume can be mounted elsewhere. But PMEM volumes are *local* to
-node and thus can only be used on the node where they were
-created. This means the applications using PMEM volume cannot freely
-move between nodes. This limitation needs to be considered when
-designing and deploying applications that are to use *local storage*.
-
-These are the volume persistency models considered for implementation
-in PMEM-CSI to serve different application use cases:
-
-* **Persistent volumes**
-A volume gets created independently of the application, on some node
-where there is enough free space. Applications using such a volume are
-then forced to run on that node and cannot run when the node is
-down. Data is retained until the volume gets deleted.
-
-* **Ephemeral volumes**
-Each time an application starts to run on a node, a new volume is
-created for it on that node. When the application stops, the volume is
-deleted. The volume cannot be shared with other applications. Data on
-this volume is retained only while the application runs.
-
-* **Cache volumes**
-Volumes are pre-created on a certain set of nodes, each with its own
-local data. Applications are started on those nodes and then get to
-use the volume on their node. Data persists across application
-restarts. This is useful when the data is only cached information that
-can be discarded and reconstructed at any time *and* the application
-can reuse existing local data when restarting.
-
-Volume | Kubernetes | PMEM-CSI | Limitations
---- | --- | --- | ---
-Persistent | supported | supported | topology aware scheduling1
-Ephemeral | supported2 | supported | resource constraints3
-Cache | supported | supported | topology aware scheduling1
-
-1 [Topology aware
-scheduling](https://github.com/kubernetes/enhancements/issues/490)
-ensures that an application runs on a node where the volume was
-created. For CSI-based drivers like PMEM-CSI, Kubernetes >= 1.13 is
-needed. On older Kubernetes releases, pods must be scheduled manually
-onto the right node(s).
-
-2 [CSI ephemeral volumes](https://kubernetes.io/docs/concepts/storage/volumes/#csi-ephemeral-volumes)
-feature support is alpha in Kubernetes v1.15, and beta in v1.16.
-
-3 The upstream design for ephemeral volumes currently does
-not take [resource
-constraints](https://github.com/kubernetes/enhancements/pull/716#discussion_r250536632)
-into account. If an application gets scheduled onto a node and then
-creating the ephemeral volume on that node fails, the application on
-the node cannot start until resources become available.
-
-#### Usage on Kubernetes
-
-Kubernetes cluster administrators can expose above mentioned persistent and cache volumes
-to applications using
-[`StorageClass
-Parameters`](https://kubernetes.io/docs/concepts/storage/storage-classes/#parameters). An
-optional `persistencyModel` parameter differentiates how the
-provisioned volume can be used:
-
-* no `persistencyModel` parameter or `persistencyModel: normal` in `StorageClass`
- A normal Kubernetes persistent volume. In this case
- PMEM-CSI creates PMEM volume on a node and the application that
- claims to use this volume is supposed to be scheduled onto this node
- by Kubernetes. Choosing of node is depend on StorageClass
- `volumeBindingMode`. In case of `volumeBindingMode: Immediate`
- PMEM-CSI chooses a node randomly, and in case of `volumeBindingMode:
- WaitForFirstConsumer` (also known as late binding) Kubernetes first chooses a node for scheduling
- the application, and PMEM-CSI creates the volume on that
- node. Applications which claim a normal persistent volume has to use
- `ReadOnlyOnce` access mode in its `accessModes` list. This
- [diagram](/docs/images/sequence/pmem-csi-persistent-sequence-diagram.png)
- illustrates how a normal persistent volume gets provisioned in
- Kubernetes using PMEM-CSI driver.
-
-* `persistencyModel: cache`
-Volumes of this type shall be used in combination with
-`volumeBindingMode: Immediate`. In this case, PMEM-CSI creates a set
-of PMEM volumes each volume on different node. The number of PMEM
-volumes to create can be specified by `cacheSize` StorageClass
-parameter. Applications which claim a `cache` volume can use
-`ReadWriteMany` in its `accessModes` list. Check with provided [cache
-StorageClass](deploy/common/pmem-storageclass-cache.yaml)
-example. This
-[diagram](/docs/images/sequence/pmem-csi-cache-sequence-diagram.png)
-illustrates how a cache volume gets provisioned in Kubernetes using
-PMEM-CSI driver.
-
-**NOTE**: Cache volumes are associated with a node, not a pod. Multiple
-pods using the same cache volume on the same node will not get their
-own instance but will end up sharing the same PMEM volume instead.
-Application deployment has to consider this and use available Kubernetes
-mechanisms like [node
-anti-affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity).
-Check with the provided [cache
-application](deploy/common/pmem-app-cache.yaml) example.
-
-**WARNING**: late binding (`volumeBindingMode:WaitForFirstConsume`) has some caveats:
-* Pod creation may get stuck when there isn't enough capacity left for
- the volumes; see the next section for details.
-* A node is only chosen the first time a pod starts. After that it will always restart
- on that node, because that is where the persistent volume was created.
-
-Volume requests embedded in Pod spec are provisioned as ephemeral volumes. The volume request could use below fields as [`volumeAttributes`](https://kubernetes.io/docs/concepts/storage/volumes/#csi):
-
-|key|meaning|optional|values|
-|---|-------|--------|-------------|
-|`size`|Size of the requested ephemeral volume as [Kubernetes memory string](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-memory) ("1Mi" = 1024*1024 bytes, "1e3K = 1000000 bytes)|No||
-|`eraseAfter`|Clear all data after use and before
deleting the volume|Yes|`true` (default),
`false`|
-
-Check with provided [example application](deploy/kubernetes-1.15/pmem-app-ephemeral.yaml) for
-ephemeral volume usage.
-
-### Capacity-aware pod scheduling
-
-PMEM-CSI implements the CSI `GetCapacity` call, but Kubernetes
-currently doesn't call that and schedules pods onto nodes without
-being aware of available storage capacity on the nodes. The effect is
-that pods using volumes with late binding may get tentatively assigned
-to a node and then get stuck because that decision is not reconsidered
-when the volume cannot be created there ([a
-bug](https://github.com/kubernetes/kubernetes/issues/72031)). Even if
-that decision is reconsidered, the same node may get selected again
-because Kubernetes does not get informed about the insufficient
-storage. Pods with ephemeral inline volumes always get stuck because
-the decision to use the node [is final](https://github.com/kubernetes-sigs/descheduler/issues/62).
-
-Work is [under
-way](https://github.com/kubernetes/enhancements/pull/1353) to enhance
-scheduling in Kubernetes. In the meantime, PMEM-CSI provides two components
-that help with pod scheduling:
-
-#### Scheduler extender
-
-When a pod requests the special [extended
-resource](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#extended-resources)
-called `pmem-csi.intel.com/scheduler`, the Kubernetes scheduler calls
-a [scheduler
-extender](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/scheduling/scheduler_extender.md)
-provided by PMEM-CSI with a list of nodes that a pod might run
-on. This extender is implemented in the master controller and thus can
-connect to the controller on each of these nodes to check for
-capacity. PMEM-CSI then filters out all nodes which currently do not
-have enough storage left for the volumes that still need to be
-created. This considers inline ephemeral volumes and all unbound
-volumes, regardless whether they use late binding or immediate
-binding.
-
-This special scheduling can be requested manually by adding this snippet
-to one container in the pod spec:
-```
-containers:
-- name: some-container
- ...
- resources:
- limits:
- pmem-csi.intel.com/scheduler: "1"
- requests:
- pmem-csi.intel.com/scheduler: "1"
-```
-
-This scheduler extender is optional and not necessarily installed in
-all clusters that have PMEM-CSI. Don't add this extended resource
-unless the scheduler extender is installed, otherwise the pod won't
-start!
-
-#### Pod admission webhook
-
-Having to add `pmem-csi.intel.com/scheduler` manually is not
-user-friendly. To simplify this, PMEM-CSI provides a [mutating
-admission
-webhook](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/)
-which intercepts the creation of all pods. If that pod uses inline
-ephemeral volumes or volumes with late binding that are provided by
-PMEM-CSI, the webhook transparently adds the extended resource
-request. PMEM-CSI volumes with immediate binding are ignored because
-for those the normal topology support ensures that unsuitable nodes
-are filtered out.
-
-The webhook can only do that if the persistent volume claim (PVC) and
-its storage class have been created already. This is normally not
-required: it's okay to create the pod first, then later add the
-PVC. The pod simply won't start in the meantime.
-
-The webhook deals with this uncertainty by allowing the creation of
-the pod without adding the extended resource when it lacks the
-necessary information. The alternative would be to reject the pod, but
-that would be a change of behavior of the cluster that may affect also pods
-that don't use PMEM-CSI at all.
-
-Users must take care to create PVCs first, then the pods if they want
-to use the webhook. In practice, that is often already done because it
-is more natural, so it is not a big limitation.
-
-## Prerequisites
-
-### Software required
-
-The recommended mimimum Linux kernel version for running the PMEM-CSI driver is 4.15. See [Persistent Memory Programming](https://pmem.io/2018/05/15/using_persistent_memory_devices_with_the_linux_device_mapper.html) for more details about supported kernel versions.
-
-### Hardware required
-
-Persistent memory device(s) are required for operation. However, some
-development and testing can be done using QEMU-emulated persistent
-memory devices. See the ["QEMU and Kubernetes"](#qemu-and-kubernetes)
-section for the commands that create such a virtual test cluster.
-
-### Persistent memory pre-provisioning
-
-The PMEM-CSI driver needs pre-provisioned regions on the NVDIMM
-device(s). The PMEM-CSI driver itself intentionally leaves that to the
-administrator who then can decide how much and how PMEM is to be used
-for PMEM-CSI.
-
-Beware that the PMEM-CSI driver will run without errors on a node
-where PMEM was not prepared for it. It will then report zero local
-storage for that node, something that currently is only visible in the
-log files.
-
-When running the Kubernetes cluster and PMEM-CSI on bare metal,
-the [ipmctl](https://github.com/intel/ipmctl) utility can be used to create regions.
-App Direct Mode has two configuration options - interleaved or non-interleaved.
-One region per each NVDIMM is created in non-interleaved configuration.
-In such a configuration, a PMEM-CSI volume cannot be larger than one NVDIMM.
-
-Example of creating regions without interleaving, using all NVDIMMs:
-```sh
-# ipmctl create -goal PersistentMemoryType=AppDirectNotInterleaved
-```
-
-Alternatively, multiple NVDIMMs can be combined to form an interleaved set.
-This causes the data to be striped over multiple NVDIMM devices
-for improved read/write performance and allowing one region (also, PMEM-CSI volume)
-to be larger than single NVDIMM.
-
-Example of creating regions in interleaved mode, using all NVDIMMs:
-```sh
-# ipmctl create -goal PersistentMemoryType=AppDirect
-```
-
-When running inside virtual machines, each virtual machine typically
-already gets access to one region and `ipmctl` is not needed inside
-the virtual machine. Instead, that region must be made available for
-use with PMEM-CSI because when the virtual machine comes up for the
-first time, the entire region is already allocated for use as a single
-block device:
-``` sh
-# ndctl list -RN
-{
- "regions":[
- {
- "dev":"region0",
- "size":34357641216,
- "available_size":0,
- "max_available_extent":0,
- "type":"pmem",
- "persistence_domain":"unknown",
- "namespaces":[
- {
- "dev":"namespace0.0",
- "mode":"raw",
- "size":34357641216,
- "sector_size":512,
- "blockdev":"pmem0"
- }
- ]
- }
- ]
-}
-# ls -l /dev/pmem*
-brw-rw---- 1 root disk 259, 0 Jun 4 16:41 /dev/pmem0
-```
-
-Labels must be initialized in such a region, which must be performed
-once after the first boot:
-``` sh
-# ndctl disable-region region0
-disabled 1 region
-# ndctl init-labels nmem0
-initialized 1 nmem
-# ndctl enable-region region0
-enabled 1 region
-# ndctl list -RN
-[
- {
- "dev":"region0",
- "size":34357641216,
- "available_size":34357641216,
- "max_available_extent":34357641216,
- "type":"pmem",
- "iset_id":10248187106440278,
- "persistence_domain":"unknown"
- }
-]
-# ls -l /dev/pmem*
-ls: cannot access '/dev/pmem*': No such file or directory
-```
+ - [Design and architecture](docs/design.md)
+ - [Instructions for Admins and Users](docs/install.md)
+ - [Prerequisites](docs/install.md#prerequisites)
+ - [Installation and setup](docs/install.md#installation-and-setup)
+ - [Filing issues and contributing](docs/install.md#filing-issues-and-contributing)
+ - [Develop and contribute](docs/DEVELOPMENT.md)
+ - [Automated testing](docs/autotest.md)
+ - [Application examples](examples/readme.rst)
## Supported Kubernetes versions
@@ -613,634 +52,3 @@ available in later versions. The external-provisioner v1.0.1 for
Kubernetes 1.13 lacks the `--strict-topology` flag and therefore late
binding is unreliable. It's also a release that is not supported
officially by upstream anymore.
-
-
-## Setup
-
-### Get source code
-
-PMEM-CSI uses Go modules and thus can be checked out and (if that should be desired)
-built anywhere in the filesystem. Pre-built container images are available and thus
-users don't need to build from source, but they will still need some additional files.
-To get the source code, use:
-
-```
-git clone https://github.com/intel/pmem-csi
-```
-
-### Run PMEM-CSI on Kubernetes
-
-This section assumes that a Kubernetes cluster is already available
-with at least one node that has persistent memory device(s). For development or
-testing, it is also possible to use a cluster that runs on QEMU virtual
-machines, see the ["QEMU and Kubernetes"](#qemu-and-kubernetes) section below.
-
-- **Make sure that the alpha feature gates CSINodeInfo and CSIDriverRegistry are enabled**
-
-The method to configure alpha feature gates may vary, depending on the Kubernetes deployment.
-It may not be necessary anymore when the feature has reached beta state, which depends
-on the Kubernetes version.
-
-- **Label the cluster nodes that provide persistent memory device(s)**
-
-```sh
- $ kubectl label node storage=pmem
-```
-
-- **Set up certificates**
-
-Certificates are required as explained in [Security](#security).
-If you are not using the test cluster described in
-[Starting and stopping a test cluster](#starting-and-stopping-a-test-cluster)
-where certificates are created automatically, you must set up certificates manually.
-This can be done by running the `./test/setup-ca-kubernetes.sh` script for your cluster.
-This script requires "cfssl" tools which can be downloaded.
-These are the steps for manual set-up of certificates:
-
-- Download cfssl tools
-
-```sh
- $ curl -L https://pkg.cfssl.org/R1.2/cfssl_linux-amd64 -o _work/bin/cfssl --create-dirs
- $ curl -L https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64 -o _work/bin/cfssljson --create-dirs
- $ chmod a+x _work/bin/cfssl _work/bin/cfssljson
-```
-
-- Run certificates set-up script
-
-```sh
- $ KUBCONFIG="<> PATH="$PATH:$PWD/_work/bin" ./test/setup-ca-kubernetes.sh
-```
-
-- **Deploy the driver to Kubernetes**
-
-The `deploy/kubernetes-` directory contains
-`pmem-csi*.yaml` files which can be used to deploy the driver on that
-Kubernetes version. The files in the directory with the highest
-Kubernetes version might also work for more recent Kubernetes
-releases. All of these deployments use images published by Intel on
-[Docker Hub](https://hub.docker.com/u/intel).
-
-For each Kubernetes version, four different deployment variants are provided:
-
- - `direct` or `lvm`: one uses direct device mode, the other LVM device mode.
- - `testing`: the variants with `testing` in the name enable debugging
- features and shouldn't be used in production.
-
-For example, to deploy for production with LVM device mode onto Kubernetes 1.14, use:
-
-```sh
- $ kubectl create -f deploy/kubernetes-1.14/pmem-csi-lvm.yaml
-```
-
-The PMEM-CSI [scheduler extender](#scheduler-extender) and
-[webhook](#pod-admission-webhook) are not enabled in this basic
-installation. See [below](#enable-scheduler-extensions) for
-instructions about that.
-
-These variants were generated with
-[`kustomize`](https://github.com/kubernetes-sigs/kustomize).
-`kubectl` >= 1.14 includes some support for that. The sub-directories
-of `deploy/kubernetes-` can be used as bases
-for `kubectl kustomize`. For example:
-
- - Change namespace:
- ```
- $ mkdir -p my-pmem-csi-deployment
- $ cat >my-pmem-csi-deployment/kustomization.yaml <my-pmem-csi-deployment/kustomization.yaml <my-pmem-csi-deployment/lvm-parameters-patch.yaml <,storage=pmem
-```
-
-If **storage=pmem** is missing, label manually as described above. If
-**pmem-csi.intel.com/node** is missing, then double-check that the
-alpha feature gates are enabled, that the CSI driver is running on the node,
-and that the driver's log output doesn't contain errors.
-
-- **Define two storage classes using the driver**
-
-```sh
- $ kubectl create -f deploy/kubernetes-/pmem-storageclass-ext4.yaml
- $ kubectl create -f deploy/kubernetes-/pmem-storageclass-xfs.yaml
-```
-
-- **Provision two pmem-csi volumes**
-
-```sh
- $ kubectl create -f deploy/kubernetes-/pmem-pvc.yaml
-```
-
-- **Verify two Persistent Volume Claims have 'Bound' status**
-
-```sh
- $ kubectl get pvc
- NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
- pmem-csi-pvc-ext4 Bound pvc-f70f7b36-6b36-11e9-bf09-deadbeef0100 4Gi RWO pmem-csi-sc-ext4 16s
- pmem-csi-pvc-xfs Bound pvc-f7101fd2-6b36-11e9-bf09-deadbeef0100 4Gi RWO pmem-csi-sc-xfs 16s
-```
-
-- **Start two applications requesting one provisioned volume each**
-
-```sh
- $ kubectl create -f deploy/kubernetes-/pmem-app.yaml
-```
-
-These applications use **storage: pmem** in the nodeSelector
-list to ensure scheduling to a node supporting pmem device, and each requests a mount of a volume,
-one with ext4-format and another with xfs-format file system.
-
-- **Verify two application pods reach 'Running' status**
-
-```sh
- $ kubectl get po my-csi-app-1 my-csi-app-2
- NAME READY STATUS RESTARTS AGE
- my-csi-app-1 1/1 Running 0 6m5s
- NAME READY STATUS RESTARTS AGE
- my-csi-app-2 1/1 Running 0 6m1s
-```
-
-- **Check that applications have a pmem volume mounted with added dax option**
-
-```sh
- $ kubectl exec my-csi-app-1 -- df /data
- Filesystem 1K-blocks Used Available Use% Mounted on
- /dev/ndbus0region0fsdax/5ccaa889-551d-11e9-a584-928299ac4b17
- 4062912 16376 3820440 0% /data
- $ kubectl exec my-csi-app-2 -- df /data
- Filesystem 1K-blocks Used Available Use% Mounted on
- /dev/ndbus0region0fsdax/5cc9b19e-551d-11e9-a584-928299ac4b17
- 4184064 37264 4146800 1% /data
-
- $ kubectl exec my-csi-app-1 -- mount |grep /data
- /dev/ndbus0region0fsdax/5ccaa889-551d-11e9-a584-928299ac4b17 on /data type ext4 (rw,relatime,dax)
- $ kubectl exec my-csi-app-2 -- mount |grep /data
- /dev/ndbus0region0fsdax/5cc9b19e-551d-11e9-a584-928299ac4b17 on /data type xfs (rw,relatime,attr2,dax,inode64,noquota)
-```
-
-#### Note about raw block volumes
-
-Applications can use volumes provisioned by PMEM-CSI as [raw block
-devices](https://kubernetes.io/blog/2019/03/07/raw-block-volume-support-to-beta/). Such
-volumes use the same "fsdax" namespace mode as filesystem volumes
-and therefore are block devices. That mode only supports dax (=
-`mmap(MAP_SYNC)`) through a filesystem. Pages mapped on the raw block
-device go through the Linux page cache. Applications have to format
-and mount the raw block volume themselves if they want dax. The
-advantage then is that they have full control over that part.
-
-For provisioning a PMEM volume as raw block device, one has to create a
-`PersistentVolumeClaim` with `volumeMode: Block`. See example [PVC](
-deploy/common/pmem-pvc-block-volume.yaml) and
-[application](deploy/common/pmem-app-block-volume.yaml) for usage reference.
-
-That example demonstrates how to handle some details:
-- `mkfs.ext4` needs `-b 4096` to produce volumes that support dax;
- without it, the automatic block size detection may end up choosing
- an unsuitable value depending on the volume size.
-- [Kubernetes bug #85624](https://github.com/kubernetes/kubernetes/issues/85624)
- must be worked around to format and mount the raw block device.
-
-#### Enable scheduler extensions
-
-The PMEM-CSI scheduler extender and admission webhook are provided by
-the PMEM-CSI controller. They need to be enabled during deployment via
-the `--schedulerListen=[]:` parameter. The
-listen address is optional and can be left out. The port is where a
-HTTPS server will run. It uses the same certificates as the internal
-gRPC service. When using the CA creation script described above, they
-will contain alternative names for the URLs described in this section
-(service names, `127.0.0.1` IP address).
-
-This parameter can be added to one of the existing deployment files
-with `kustomize`. All of the following examples assume that the
-current directory contains the `deploy` directory from the PMEM-CSI
-repository. It is also possible to reference the base via a
-[URL](https://github.com/kubernetes-sigs/kustomize/blob/master/examples/remoteBuild.md).
-
-``` sh
-mkdir my-pmem-csi-deployment
-
-cat >my-pmem-csi-deployment/kustomization.yaml <my-pmem-csi-deployment/scheduler-patch.yaml <my-scheduler/kustomization.yaml <my-scheduler/node-port-patch.yaml </var/lib/scheduler/scheduler-policy.cfg' <:",
- "filterVerb": "filter",
- "prioritizeVerb": "prioritize",
- "nodeCacheCapable": false,
- "weight": 1,
- "managedResources":
- [{
- "name": "pmem-csi.intel.com/scheduler",
- "ignoredByScheduler": true
- }]
- }]
-}
-EOF
-
-cat >kubeadm.config <=
-1.15, it can also be used to let individual pods bypass the webhook by
-adding that label. The CA gets configured explicitly, which is
-supported for webhooks.
-
-``` sh
-mkdir my-webhook
-
-cat >my-webhook/kustomization.yaml <my-webhook/webhook-patch.yaml <
-
-## Automated testing
-
-### Unit testing and code quality
-
-Use the `make test` command.
-
-### QEMU and Kubernetes
-
-E2E testing relies on a cluster running inside multiple QEMU virtual
-machines deployed by [GoVM](https://github.com/govm-project/govm). The
-same cluster can also be used interactively when real hardware is not
-available.
-
-E2E testing is known to work on a Linux development host system. The user
-must be allowed to use Docker.
-
-KVM must be enabled. Usually this is the case when `/dev/kvm` exists.
-The current user does not need the privileges to use KVM and QEMU
-doesn't have to be installed because GoVM will run QEMU inside a
-container with root privileges.
-
-Note that cloud providers often don't offer KVM support on their
-regular machines. Search for "nested virtualization" for your provider
-to determine whether and how it supports KVM.
-
-Nested virtualization is also needed when using Kata Containers inside
-the cluster. On Intel-based machines it can be enabled by loading the
-`kvm_intel` module with `nested=1` (see
-https://wiki.archlinux.org/index.php/KVM#Nested_virtualization). At
-this time, Kata Containers up to and including 1.9.1 is [not
-compatible with
-PMEM-CSI](https://github.com/intel/pmem-csi/issues/303) because
-volumes are not passed in as PMEM, but Kata Containers [can be
-installed](https://github.com/kata-containers/packaging/tree/master/kata-deploy#kubernetes-quick-start)
-and used for applications that are not using PMEM.
-
-The `clear-cloud` image is downloaded automatically. By default,
-four different virtual machines are prepared. Each image is pre-configured
-with its own hostname and with network.
-
-The images will contain the latest
-[Clear Linux OS](https://clearlinux.org/) and have the Kubernetes
-version supported by Clear Linux installed.
-
-PMEM-CSI images must have been created and published in some Docker
-registry, as described earlier in [build PMEM-CSI](#build-pmem-csi).
-In addition, that registry must be accessible from inside the
-cluster. That works for the default (a local registry in the build
-host) but may require setting additional [configuration
-options](#configuration-options) for other scenarios.
-
-### Starting and stopping a test cluster
-
-`make start` will bring up a Kubernetes test cluster inside four QEMU
-virtual machines.
-The first node is the Kubernetes master without
-persistent memory.
-The other three nodes are worker nodes with one emulated 32GB NVDIMM each.
-After the cluster has been formed, `make start` adds `storage=pmem` label
-to the worker nodes and deploys the PMEM-CSI driver.
-Once `make start` completes, the cluster is ready for interactive use via
-`kubectl` inside the virtual machine. Alternatively, you can also
-set `KUBECONFIG` as shown at the end of the `make start` output
-and use `kubectl` binary on the host running VMs.
-
-Use `make stop` to stop and remove the virtual machines.
-
-`make restart` can be used to cleanly reboot all virtual
-machines. This is useful during development after a `make push-images`
-to ensure that the cluster runs those rebuilt images.
-
-### Running commands on test cluster nodes over ssh
-
-`make start` generates ssh wrapper scripts `_work/pmem-govm/ssh.N` for each
-test cluster node which are handy for running a single command or to
-start an interactive shell. Examples:
-
-`_work/pmem-govm/ssh.0 kubectl get pods` runs a kubectl command on
-the master node.
-
-`_work/pmem-govm/ssh.1` starts a shell on the first worker node.
-
-### Deploying PMEM-CSI on a test cluster
-
-After `make start`, PMEM-CSI is *not* installed yet. Either install
-manually as [described for a normal
-cluster](#run-pmem-csi-on-kubernetes) or use the
-[setup-deployment.sh](./test/setup-deployment.sh) script.
-
-### Configuration options
-
-Several aspects of the cluster and build setup can be configured by overriding
-the settings in the [test-config.sh](test/test-config.sh) file. See
-that file for a description of all options. Options can be set as
-environment variables of `make start` on a case-by-case basis or
-permanently by creating a file like `test/test-config.d/my-config.sh`.
-
-Multiple different clusters can be brought up in parallel by changing
-the default `pmem-govm` cluster name via the `CLUSTER` env variable.
-
-For example, this invocation sets up a cluster using the non-default
-Fedora distro:
-
-``` sh
-TEST_DISTRO=fedora CLUSTER=fedora-govm make start
-```
-
-### Running E2E tests
-
-`make test_e2e` will run [csi-test
-sanity](https://github.com/kubernetes-csi/csi-test/tree/master/pkg/sanity)
-tests and some [Kubernetes storage
-tests](https://github.com/kubernetes/kubernetes/tree/master/test/e2e/storage/testsuites)
-against the PMEM-CSI driver.
-
-When [ginkgo](https://onsi.github.io/ginkgo/) is installed, then it
-can be used to run individual tests and to control additional aspects
-of the test run. For example, to run just the E2E provisioning test
-(create PVC, write data in one pod, read it in another) in verbose mode:
-
-``` sh
-$ KUBECONFIG=$(pwd)/_work/pmem-govm/kube.config REPO_ROOT=$(pwd) ginkgo -v -focus=pmem-csi.*should.provision.storage.with.defaults ./test/e2e/
-Nov 26 11:21:28.805: INFO: The --provider flag is not set. Treating as a conformance test. Some tests may not be run.
-Running Suite: PMEM E2E suite
-=============================
-Random Seed: 1543227683 - Will randomize all specs
-Will run 1 of 61 specs
-
-Nov 26 11:21:28.812: INFO: checking config
-Nov 26 11:21:28.812: INFO: >>> kubeConfig: /nvme/gopath/src/github.com/intel/pmem-csi/_work/pmem-govm/kube.config
-Nov 26 11:21:28.817: INFO: Waiting up to 30m0s for all (but 0) nodes to be schedulable
-...
-Ran 1 of 61 Specs in 58.465 seconds
-SUCCESS! -- 1 Passed | 0 Failed | 0 Pending | 60 Skipped
-PASS
-
-Ginkgo ran 1 suite in 1m3.850672246s
-Test Suite Passed
-```
-
-It is also possible to run just the sanity tests until one of them fails:
-
-``` sh
-$ REPO_ROOT=`pwd` ginkgo '-focus=sanity' -failFast ./test/e2e/
-...
-```
-
-## Application examples
-
-Information about specific usages of PMEM-CSI are described in separate documents:
-
-* Deploying a Redis cluster through the redis-operator using QEMU-emulated persistent memory devices ([examples/redis-operator.md](examples/redis-operator.md)).
-* Installing Kubernetes and PMEM-CSI on Google Cloud machines. ([examples/gce.md](examples/gce.md)).
-
-## Communication and contribution
-
-Report a bug by [filing a new issue](https://github.com/intel/pmem-csi/issues).
-
-Before making your first contribution, be sure to read the [development documentation](DEVELOPMENT.md)
-for guidance on code quality and branches.
-
-Contribute by [opening a pull request](https://github.com/intel/pmem-csi/pulls).
-
-Learn [about pull requests](https://help.github.com/articles/using-pull-requests/).
-
-**Reporting a Potential Security Vulnerability:** If you have discovered potential security vulnerability in PMEM-CSI, please send an e-mail to secure@intel.com. For issues related to Intel Products, please visit [Intel Security Center](https://security-center.intel.com).
-
-It is important to include the following details:
-
-- The projects and versions affected
-- Detailed description of the vulnerability
-- Information on known exploits
-
-Vulnerability information is extremely sensitive. Please encrypt all security vulnerability reports using our [PGP key](https://www.intel.com/content/www/us/en/security-center/pgp-public-key.html).
-
-A member of the Intel Product Security Team will review your e-mail and contact you to collaborate on resolving the issue. For more information on how Intel works to resolve security issues, see: [vulnerability handling guidelines](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html).
-
-
diff --git a/conf.json b/conf.json
new file mode 100644
index 0000000000..0d922c9e15
--- /dev/null
+++ b/conf.json
@@ -0,0 +1,32 @@
+{
+ "author": "",
+ "copyright": "2019,",
+ "exclude_patterns": [
+ "_output",
+ "Thumbs.db",
+ ".DS_Store",
+ ".tox",
+ "_work",
+ "deploy/kustomize",
+ "test/test-config.d",
+ "pkg/scheduler"
+ ],
+ "extensions": [
+ "recommonmark",
+ "sphinx_markdown_tables"
+ ],
+ "html_static_path": [
+ "_static"
+ ],
+ "html_theme": "sphinx_rtd_theme",
+ "project": "PMEM-CSI",
+ "templates_path": [
+ "_templates"
+ ],
+ "html_copy_source": false,
+ "rst_epilog": ".. include:: /docs/substitutions.txt",
+ "source_suffix": {
+ ".rst": "restructuredtext",
+ ".md": "markdown"
+ }
+}
diff --git a/conf.py b/conf.py
new file mode 100644
index 0000000000..d0e82f59a8
--- /dev/null
+++ b/conf.py
@@ -0,0 +1,169 @@
+
+import json
+from docutils import nodes
+from os.path import isdir, isfile, join, basename, dirname
+from os import makedirs, getenv
+from shutil import copyfile
+
+##############################################################################
+#
+# This section determines the behavior of links to local items in .md files.
+#
+# if useGitHubURL == True:
+#
+# links to local files and directories will be turned into github URLs
+# using either the baseBranch defined here or using the commit SHA.
+#
+# if useGitHubURL == False:
+#
+# local files will be moved to the website directory structure when built
+# local directories will still be links to github URLs
+#
+# if built with GitHub workflows:
+#
+# the GitHub URLs will use the commit SHA (GITHUB_SHA environment variable
+# is defined by GitHub workflows) to link to the specific commit.
+#
+##############################################################################
+
+baseBranch = "devel"
+useGitHubURL = True
+commitSHA = getenv('GITHUB_SHA')
+githubBaseURL = "https://github.com/intelkevinputnam/pmem-csi/"
+githubFileURL = githubBaseURL + "blob/"
+githubDirURL = githubBaseURL + "tree/"
+if commitSHA:
+ githubFileURL = githubFileURL + commitSHA + "/"
+ githubDirURL = githubDirURL + commitSHA + "/"
+else:
+ githubFileURL = githubFileURL + baseBranch + "/"
+ githubDirURL = githubDirURL + baseBranch + "/"
+
+# End GitHub URL section
+
+with open('conf.json') as jsonFile:
+ conf = json.load(jsonFile)
+
+for item in conf:
+ globals()[item] = (conf[item])
+
+def setup(app):
+ app.connect('doctree-resolved',fixLocalMDAnchors)
+ app.connect('missing-reference',fixRSTLinkInMD)
+
+##############################################################################
+#
+# This section defines callbacks that make markdown specific tweaks to
+# either:
+#
+# 1. Fix something that recommonmark does wrong.
+# 2. Provide support for .md files that are written as READMEs in a GitHub
+# repo.
+#
+# Only use these changes if using the extension ``recommonmark``.
+#
+##############################################################################
+
+
+# Callback registerd with 'missing-reference'.
+def fixRSTLinkInMD(app, env, node, contnode):
+ refTarget = node.get('reftarget')
+ filePath = refTarget.lstrip("/")
+ if '.rst' in refTarget and "://" not in refTarget:
+ # This occurs when a .rst file is referenced from a .md file
+ # Currently unable to check if file exists as no file
+ # context is provided and links are relative.
+ #
+ # Example: [Application examples](examples/readme.rst)
+ #
+ contnode['refuri'] = contnode['refuri'].replace('.rst','.html')
+ contnode['internal'] = "True"
+ return contnode
+ else:
+ # This occurs when a file is referenced for download from an .md file.
+ # Construct a list of them and short-circuit the warning. The files
+ # are moved later (need file location context). To avoid warnings,
+ # write .md files, make the links absolute. This only marks them fixed
+ # if it can verify that they exist.
+ #
+ # Example: [Makefile](/Makefile)
+ #
+ if isfile(filePath) or isdir(filePath):
+ return contnode
+
+
+def normalizePath(docPath,uriPath):
+ if uriPath == "":
+ return uriPath
+ if "#" in uriPath:
+ # Strip out anchors
+ uriPath = uriPath.split("#")[0]
+ if uriPath.startswith("/"):
+ # It's an absolute path
+ return uriPath.lstrip("/") #path to file from project directory
+ else:
+ # It's a relative path
+ docDir = dirname(docPath)
+ return join(docDir,uriPath) #path to file from referencing file
+
+
+# Callback registerd with 'doctree-resolved'.
+def fixLocalMDAnchors(app, doctree, docname):
+ for node in doctree.traverse(nodes.reference):
+ uri = node.get('refuri')
+ filePath = normalizePath(docname,uri)
+ if isfile(filePath):
+ # Only do this if the file exists.
+ #
+ # TODO: Pop a warning if the file doesn't exist.
+ #
+ if '.md' in uri and '://' not in uri:
+ # Make sure .md file links that weren't caught are converted.
+ # These occur when creating an explicit link to an .md file
+ # from an .rst file. By default these are not validated by Sphinx
+ # or recommonmark. Only toctree references are validated. recommonmark
+ # also fails to convert links to local Markdown files that include
+ # anchors. This fixes that as well.
+ #
+ # Only include this code if .md files are being converted to html
+ #
+ # Example: `Google Cloud Engine `__
+ # [configuration options](autotest.md#configuration-options)
+ #
+ node['refuri'] = node['refuri'].replace('.md','.html')
+ else:
+ # Handle the case where markdown is referencing local files in the repo
+ #
+ # Example: [Makefile](/Makefile)
+ #
+ if useGitHubURL:
+ # Replace references to local files with links to the GitHub repo
+ #
+ newURI = githubFileURL + filePath
+ print("new url: ", newURI)
+ node['refuri']=newURI
+ else:
+ # If there are links to local files other than .md (.rst files are caught
+ # when warnings are fired), move the files into the Sphinx project, so
+ # they can be accessed.
+ newFileDir = join(app.outdir,dirname(filePath)) # where to move the file in Sphinx output.
+ newFilePath = join(app.outdir,filePath)
+ newURI = uri # if the path is relative no need to change it.
+ if uri.startswith("/"):
+ # It's an absolute path. Need to make it relative.
+ uri = uri.lstrip("/")
+ docDirDepth = len(docname.split("/")) - 1
+ newURI = "../"*docDirDepth + uri
+ if not isdir(newFileDir):
+ makedirs(newFileDir)
+ copyfile(filePath,newFilePath)
+ node['refuri'] = newURI
+ elif "#" not in uri: # ignore anchors
+ # turn links to directories into links to the repo
+ if isdir(filePath):
+ newURI = githubDirURL + filePath
+ node['refuri']=newURI
+
+
+
+
diff --git a/DEVELOPMENT.md b/docs/DEVELOPMENT.md
similarity index 77%
rename from DEVELOPMENT.md
rename to docs/DEVELOPMENT.md
index 51734b0c3a..4488507c75 100644
--- a/DEVELOPMENT.md
+++ b/docs/DEVELOPMENT.md
@@ -1,4 +1,4 @@
-# Develop and Contribute
+# Develop and contribute
- [Setup](#setup)
- [Build PMEM-CSI](#build-pmem-csi)
@@ -20,10 +20,10 @@
- [Specific arguments to pmem-csi-driver](#specific-arguments-to-pmem-csi-driver)
- [Environment variables](#environment-variables)
- [Logging](#logging)
-- [Notes about switching device mode](#notes-about-switching-device-mode)
+- [Switching device mode](#switching-device-mode)
- [Going from LVM device mode to direct device mode](#going-from-lvm-device-mode-to-direct-device-mode)
- [Going from direct device mode to LVM device mode](#going-from-direct-device-mode-to-lvm-device-mode)
-- [Notes about accessing system directories in a container](#notes-about-accessing-system-directories-in-a-container)
+- [Accessing system directories in a container](#accessing-system-directories-in-a-container)
- [Read-only access to /sys](#read-only-access-to-sys)
- [Access to /dev of host](#access-to-dev-of-host)
- [Repository elements which are generated or created separately](#repository-elements-which-are-generated-or-created-separately)
@@ -32,6 +32,7 @@
- [Diagrams describing provisioning sequence](#diagrams-describing-provisioning-sequence)
- [RegistryServer spec](#registryserver-spec)
- [Table of Contents in README and DEVELOPMENT](#table-of-contents-in-readme-and-development)
+- [Edit, build, and deploy the Read the Docs site](#build-edit-and-deploy-the-read-the-docs-site)
## Setup
@@ -42,14 +43,14 @@
2. Use `make push-images` to push Docker container images to a Docker image registry. The
default is to push to a local [Docker registry](https://docs.docker.com/registry/deploying/).
Some other registry can be configured by setting the variables described in
- in the [test-config.sh](test/test-config.sh) file, see the [configuration options](#configuration-options)
+ in the [test-config.sh](/test/test-config.sh) file, see the [configuration options](autotest.md#configuration-options)
section below. Alternatively, the registry can also be set with a make variable:
`make push-images REGISTRY_NAME=my-registry:5000`
-See the [Makefile](Makefile) for additional make targets and possible make variables.
+See the [Makefile](/Makefile) for additional make targets and possible make variables.
The source code gets developed and tested using the version of Go that
-is set with `GO_VERSION` in the [Dockerfile](Dockerfile). Some other
+is set with `GO_VERSION` in the [Dockerfile](/Dockerfile). Some other
version may or may not work. In particular, `test_fmt` and
`test_vendor` are known to be sensitive to the version of Go.
@@ -174,7 +175,7 @@ Network ports are opened as configured in manifest files:
- registry endpoint: typical port value 10000, used for PMEM-CSI internal communication
- controller endpoint: typical port value 10001, used for serving CSI API
-- webhook endpoint: disabled by default, port chosen when [enabling the scheduler extensions](./README.md#enable-scheduler-extensions)
+- webhook endpoint: disabled by default, port chosen when [enabling the scheduler extensions](../README.md#enable-scheduler-extensions)
### Local sockets
@@ -250,7 +251,7 @@ The klog.Info statements are used via the verbosity checker using the following
There are also messages using klog.Warning, klog.Error and klog.Fatal, and their formatted counterparts.
-## Notes about switching device mode
+## Switching device mode
If device mode is switched between LVM and direct(aka ndctl), please keep
in mind that PMEM-CSI driver does not clean up or reclaim namespaces,
@@ -264,7 +265,7 @@ will create trouble in another device mode.
- examine LV physical volumes state on a node: `pvs`
- delete LV groups before deleting namespaces to avoid orphaned volume groups: `vgremove VGNAME`
-NOTE: The next **WILL DELETE ALL NAMESPACES** so be careful!
+NOTE: The following **WILL DELETE ALL NAMESPACES** so be careful!
- Delete namespaces on a node using CLI: `ndctl destroy-namespace all --force`
@@ -279,7 +280,7 @@ those (LVM device mode does honor "foreign" namespaces and leaves those
alone) if you have enough space, or you can choose to delete those
using `ndctl` on node.
-## Notes about accessing system directories in a container
+## Accessing system directories in a container
The PMEM-CSI driver will run as container, but it needs access to
system directories /sys and /dev. Two related potential problems have
@@ -345,7 +346,8 @@ $ git clone https://github.com/golang/protobuf.git && cd protobuf
$ make # installs needed binary in $GOPATH/bin/protoc-gen-go
```
-- generate by running in ~/go/src/github.com/intel/pmem-csi/pkg/pmem-registry:
+- generate by running in \~/go/src/github.com/intel/pmem-csi/pkg/pmem-registry:
+
```sh
protoc --plugin=protoc-gen-go=$GOPATH/bin/protoc-gen-go --go_out=plugins=grpc:./ pmem-registry.proto
```
@@ -364,3 +366,117 @@ Note that pandoc is known to produce incorrect TOC entries if headers contain sp
means TOC generation will be more reliable if we avoid non-letter-or-number characters in the headers.
- Another method is to use emacs command markdown-toc-generate-toc and manually check and edit the generated part: we do not show generated 3rd-level headings in README.md.
+
+## Build, edit, and deploy the Read the Docs site
+
+The PMEM-CSI documentation is available as in-repo READMEs and as a GitHub\*
+hosted [website](https://intel.github.io/pmem-csi). The website is created
+using the [Sphinx](https://www.sphinx-doc.org/) documentation generator and
+the well-known [Read the Docs](https://sphinx-rtd-theme.readthedocs.io/)
+theme.
+
+### Build
+
+Building the documentation requires Python 3.x and venv.
+
+```bash
+make vhtml
+```
+
+### Edit
+
+Sphinx uses [reStructuredText](https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html) (reST) as the primary document source type but can be
+extended to use Markdown by adding the ``recommonmark`` and
+``sphinx_markdown_tables`` extensions (see [conf.json](/conf.json)).
+
+Change the navigation tree or add documents by updating the ``toctree``. The
+main ``toctree`` is in ``index.rst``:
+
+``` rst
+.. toctree::
+ :maxdepth: 2
+
+ README.md
+ docs/design.md
+ docs/install.md
+ docs/DEVELOPMENT.md
+ docs/autotest.md
+ examples/readme.rst
+ Project GitHub repository
+```
+
+reST files, Markdown files, and URLs can be added to a ``toctree``. The
+``:maxdepth:`` argument dictates the number of header levels that will be
+displayed on that page. This website replaces the ``index.html`` output of
+this project with a redirect to ``README.html`` (the conversion of the top
+level README) to closer match the in-repo documentation.
+
+Any reST or Markdown file not referenced by a ``toctree`` will generate a
+warning in the build. This document has a ``toctree`` in:
+
+1. ``index.rst``
+2. ``examples/readme.rst``
+
+NOTE: Though GitHub can parse reST files, the ``toctree`` directive is Sphinx
+specific, so it is not understood by GitHub. ``examples/readme.rst`` is a good
+example. Adding the ``:hidden:`` argument to the ``toctree`` directive means
+that the ``toctree`` is not displayed in the Sphinx built version of the page.
+
+### Custom link handling
+
+This project has some custom capabilities added to the [conf.py](/conf.py) to
+fix or improve how Sphinx generates the HTML site.
+
+1. Markdown files: Converts references to Markdown files that include anchors.
+ ``` md
+ [configuration options](autotest.md#configuration-options)
+ ```
+2. reST files: Fixes explicit links to Markdown files.
+ ``` rst
+ `Google Cloud Engine `__
+ ```
+3. Markdown files: Fixes references to reST files.
+ ``` md
+ [Application examples](examples/readme.rst)
+ ```
+4. Markdown files: Fixes links to files and directories within the GitHub repo.
+ ``` md
+ [Makefile](/Makefile)
+ [deploy/kustomize](/deploy/kustomize)
+ ```
+ Links to files can be fixed one of two ways, which can be set in the
+ [conf.py](/conf.py).
+
+ ``` python
+ baseBranch = "devel"
+ useGitHubURL = True
+ commitSHA = getenv('GITHUB_SHA')
+ githubBaseURL = "https://github.com/intelkevinputnam/pmem-csi/"
+ ```
+
+ If ``useGitHubURL`` is set to True, it will try to create links based on
+ your ``githubBaseURL`` and the SHA for the commit to the GitHub repo
+ determined by the GitHub workflow on merge). If there is no SHA available,
+ it will use the value of ``baseBranch``.
+
+ If ``useGitHubURL`` is set to False, it will copy the files to the HTML
+ output directory and provide links to that location.
+
+ NOTE: Links to files and directories should use absolute paths relative to
+ the repo (see Makefile and deploy/kustomize above). This will work both for
+ the Sphinx build and when viewing in the GitHub repo.
+
+ Links to directories are always converted to links to the GitHub repository.
+
+### Deploying with GitHub actions
+
+The publish [workflow](/.github/workflows/publish.yml) is run each time a commit is made to the designated branch and pushes the rendered HTML to the gh-pages branch. Other rules can be created for other branches.
+
+``` yaml
+on:
+ push:
+ branches:
+ - devel
+```
+
+NOTE: Create a secret called ``ACCESS_TOKEN`` in repo>settings>secrets with a [token](https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line) generated by a user with write privileges to enable the automated push to the gh-pages branch.
\ No newline at end of file
diff --git a/docs/autotest.md b/docs/autotest.md
new file mode 100644
index 0000000000..e75097c562
--- /dev/null
+++ b/docs/autotest.md
@@ -0,0 +1,156 @@
+# Automated testing
+
+- [Automated testing](#automated-testing)
+ - [Unit testing and code quality](#unit-testing-and-code-quality)
+ - [QEMU and Kubernetes](#qemu-and-kubernetes)
+ - [Starting and stopping a test cluster](#starting-and-stopping-a-test-cluster)
+ - [Running commands on test cluster nodes over ssh](#running-commands-on-test-cluster-nodes-over-ssh)
+ - [Configuration options](#configuration-options)
+ - [Running E2E tests](#running-e2e-tests)
+
+## Unit testing and code quality
+
+Use the `make test` command.
+
+## QEMU and Kubernetes
+
+E2E testing relies on a cluster running inside multiple QEMU virtual
+machines deployed by [GoVM](https://github.com/govm-project/govm). The
+same cluster can also be used interactively when real hardware is not
+available.
+
+E2E testing is known to work on a Linux development host system. The user
+must be allowed to use Docker.
+
+KVM must be enabled. Usually this is the case when `/dev/kvm` exists.
+The current user does not need the privileges to use KVM and QEMU
+doesn't have to be installed because GoVM will run QEMU inside a
+container with root privileges.
+
+Note that cloud providers often don't offer KVM support on their
+regular machines. Search for "nested virtualization" for your provider
+to determine whether and how it supports KVM.
+
+Nested virtualization is also needed when using Kata Containers inside
+the cluster. On Intel-based machines it can be enabled by loading the
+`kvm_intel` module with `nested=1` (see
+https://wiki.archlinux.org/index.php/KVM#Nested_virtualization). At
+this time, Kata Containers up to and including 1.9.1 is [not
+compatible with
+PMEM-CSI](https://github.com/intel/pmem-csi/issues/303) because
+volumes are not passed in as PMEM, but Kata Containers [can be
+installed](https://github.com/kata-containers/packaging/tree/master/kata-deploy#kubernetes-quick-start)
+and used for applications that are not using PMEM.
+
+The `clear-cloud` image is downloaded automatically. By default,
+four different virtual machines are prepared. Each image is pre-configured
+with its own hostname and with network.
+
+The images will contain the latest
+[Clear Linux OS](https://clearlinux.org/) and have the Kubernetes
+version supported by Clear Linux installed.
+
+PMEM-CSI images must have been created and published in some Docker
+registry, as described earlier in [build PMEM-CSI](DEVELOPMENT.md#build-pmem-csi).
+In addition, that registry must be accessible from inside the
+cluster. That works for the default (a local registry in the build
+host) but may require setting additional [configuration
+options](#configuration-options) for other scenarios.
+
+## Starting and stopping a test cluster
+
+`make start` will bring up a Kubernetes test cluster inside four QEMU
+virtual machines.
+The first node is the Kubernetes master without
+persistent memory.
+The other three nodes are worker nodes with one emulated 32GB NVDIMM each.
+After the cluster has been formed, `make start` adds `storage=pmem` label
+to the worker nodes and deploys the PMEM-CSI driver.
+Once `make start` completes, the cluster is ready for interactive use via
+`kubectl` inside the virtual machine. Alternatively, you can also
+set `KUBECONFIG` as shown at the end of the `make start` output
+and use `kubectl` binary on the host running VMs.
+
+Use `make stop` to stop and remove the virtual machines.
+
+`make restart` can be used to cleanly reboot all virtual
+machines. This is useful during development after a `make push-images`
+to ensure that the cluster runs those rebuilt images.
+
+## Running commands on test cluster nodes over ssh
+
+`make start` generates ssh wrapper scripts `_work/pmem-govm/ssh.N` for each
+test cluster node which are handy for running a single command or to
+start an interactive shell. Examples:
+
+`_work/pmem-govm/ssh.0 kubectl get pods` runs a kubectl command on
+the master node.
+
+`_work/pmem-govm/ssh.1` starts a shell on the first worker node.
+
+## Deploying PMEM-CSI on a test cluster
+
+After `make start`, PMEM-CSI is *not* installed yet. Either install
+manually as [described for a normal
+cluster](#run-pmem-csi-on-kubernetes) or use the
+[setup-deployment.sh](/test/setup-deployment.sh) script.
+
+## Configuration options
+
+Several aspects of the cluster and build setup can be configured by overriding
+the settings in the [test-config.sh](/test/test-config.sh) file. See
+that file for a description of all options. Options can be set as
+environment variables of `make start` on a case-by-case basis or
+permanently by creating a file like `test/test-config.d/my-config.sh`.
+
+Multiple different clusters can be brought up in parallel by changing
+the default `pmem-govm` cluster name via the `CLUSTER` env variable.
+
+For example, this invocation sets up a cluster using the non-default
+Fedora distro:
+
+``` sh
+TEST_DISTRO=fedora CLUSTER=fedora-govm make start
+```
+
+See additional details in [test/test-config.d](/test/test-config.d).
+
+## Running E2E tests
+
+`make test_e2e` will run [csi-test
+sanity](https://github.com/kubernetes-csi/csi-test/tree/master/pkg/sanity)
+tests and some [Kubernetes storage
+tests](https://github.com/kubernetes/kubernetes/tree/master/test/e2e/storage/testsuites)
+against the PMEM-CSI driver.
+
+When [ginkgo](https://onsi.github.io/ginkgo/) is installed, then it
+can be used to run individual tests and to control additional aspects
+of the test run. For example, to run just the E2E provisioning test
+(create PVC, write data in one pod, read it in another) in verbose mode:
+
+``` sh
+$ KUBECONFIG=$(pwd)/_work/pmem-govm/kube.config REPO_ROOT=$(pwd) ginkgo -v -focus=pmem-csi.*should.provision.storage.with.defaults ./test/e2e/
+Nov 26 11:21:28.805: INFO: The --provider flag is not set. Treating as a conformance test. Some tests may not be run.
+Running Suite: PMEM E2E suite
+=============================
+Random Seed: 1543227683 - Will randomize all specs
+Will run 1 of 61 specs
+
+Nov 26 11:21:28.812: INFO: checking config
+Nov 26 11:21:28.812: INFO: >>> kubeConfig: /nvme/gopath/src/github.com/intel/pmem-csi/_work/pmem-govm/kube.config
+Nov 26 11:21:28.817: INFO: Waiting up to 30m0s for all (but 0) nodes to be schedulable
+...
+Ran 1 of 61 Specs in 58.465 seconds
+SUCCESS! -- 1 Passed | 0 Failed | 0 Pending | 60 Skipped
+PASS
+
+Ginkgo ran 1 suite in 1m3.850672246s
+Test Suite Passed
+```
+
+It is also possible to run just the sanity tests until one of them fails:
+
+``` sh
+$ REPO_ROOT=`pwd` ginkgo '-focus=sanity' -failFast ./test/e2e/
+...
+```
\ No newline at end of file
diff --git a/docs/design.md b/docs/design.md
new file mode 100644
index 0000000000..25225b5263
--- /dev/null
+++ b/docs/design.md
@@ -0,0 +1,391 @@
+# Design and architecture
+
+- [Design](#design)
+ - [Architecture and Operation](#architecture-and-operation)
+ - [LVM device mode](#lvm-device-mode)
+ - [Direct device mode](#direct-device-mode)
+ - [Driver modes](#driver-modes)
+ - [Driver Components](#driver-components)
+ - [Communication between components](#communication-between-components)
+ - [Security](#security)
+ - [Volume Persistency](#volume-persistency)
+ - [Capacity-aware pod scheduling](#capacity-aware-pod-scheduling)
+
+## Architecture and Operation
+
+The PMEM-CSI driver can operate in two different device modes: *LVM* and
+*direct*. This table contains an overview and comparison of those modes.
+There is a more detailed explanation in the following paragraphs.
+
+| |`LVM` |`direct` |
+|:-- |:-- |:-- |
+|Main advantage |avoids free space fragmentation1 |simpler, somewhat faster, but free space may get fragmented1 |
+|What is served |LVM logical volume |pmem block device |
+|Region affinity2 |yes: one LVM volume group is created per region, and a volume has to be in one volume group |yes: namespace can belong to one region only |
+|Startup |two extra stages: pmem-ns-init (creates namespaces), vgm (creates volume groups) |no extra steps at startup |
+|Namespace modes |`fsdax` mode3 namespaces pre-created as pools |namespace in `fsdax` mode created directly, no need to pre-create pools |
+|Limiting space usage | can leave part of device unused during pools creation |no limits, creates namespaces on device until runs out of space |
+| *Name* field in namespace | *Name* gets set to 'pmem-csi' to achieve own vs. foreign marking | *Name* gets set to VolumeID, without attempting own vs. foreign marking |
+|Minimum volume size| 4 MB | 1 GB (see also alignment adjustment below) |
+|Alignment requirements |LVM creation aligns size up to next 4MB boundary |driver aligns size up to next alignment boundary. The default alignment step is 1 GB. Device(s) in interleaved mode will require larger minimum as size has to be at least one alignment step. The possibly bigger alignment step is calculated as interleave-set-size multiplied by 1 GB |
+
+1 **Free space fragmentation** is a problem when there appears to
+be enough free capacity for a new namespace, but there isn't a contiguous
+region big enough to allocate it. The PMEM-CSI driver is only capable of
+allocating continguous memory to a namespace and cannot de-fragment or combine
+smaller blocks. For example, this could happen when you create a 63 GB
+namespace, followed by a 1 GB namespace, and then delete the 63 GB namespace.
+Eventhough there is 127 GB available, the driver cannot create a namespace
+larger than 64 GB.
+
+```
+---------------------------------------------------------------------
+| 63 GB free | 1GB used | 64 GB free |
+---------------------------------------------------------------------
+```
+
+2 **Region affinity** means that all parts of a provisioned file
+system are physically located on device(s) that belong to same PMEM region.
+This is important on multi-socket systems where media access time may vary
+based on where the storage device(s) are physically attached.
+
+3 **fsdax mode** is required for NVDIMM
+namespaces. See [Persistent Memory
+Programming](https://pmem.io/ndctl/ndctl-create-namespace.html) for
+details. `devdax` mode is not supported. Though a
+raw block volume would be useful when a filesystem isn't needed, Kubernetes
+cannot handle [binding a character device to a loop device](https://github.com/kubernetes/kubernetes/blob/7c87b5fb55ca096c007c8739d4657a5a4e29fb09/pkg/volume/util/util.go#L531-L534).
+
+## LVM device mode
+
+In Logical Volume Management (LVM) mode the PMEM-CSI driver
+uses LVM for logical volume Management to avoid the risk of fragmentation. The
+LVM logical volumes are served to satisfy API requests. There is one volume
+group created per region, ensuring the region-affinity of served volumes.
+
+![devicemode-lvm diagram](/docs/images/devicemodes/pmem-csi-lvm.png)
+
+The driver consists of three separate binaries that form two
+initialization stages and a third API-serving stage.
+
+During startup, the driver scans persistent memory for regions and
+namespaces, and tries to create more namespaces using all or part
+(selectable via option) of the remaining available space. This first
+stage is performed by a separate entity `pmem-ns-init`.
+
+The second stage of initialization arranges physical volumes provided
+by namespaces into LVM volume groups. This is performed by a separate
+binary `pmem-vgm`.
+
+After two initialization stages, the third binary `pmem-csi-driver`
+starts serving CSI API requests.
+
+### Namespace modes in LVM device mode
+
+The PMEM-CSI driver pre-creates namespaces in `fsdax` mode forming
+the corresponding LVM volume group. The amount of space to be
+used is determined using the option `-useforfsdax` given to `pmem-ns-init`.
+This options specifies an integer presenting limit as percentage.
+The default value is `useforfsdax=100`.
+
+### Using limited amount of total space in LVM device mode
+
+The PMEM-CSI driver can leave space on devices for others, and
+recognize "own" namespaces. Leaving space for others can be achieved
+by specifying lower-than-100 value to `-useforfsdax` options
+The distinction "own" vs. "foreign" is
+implemented by setting the _Name_ field in namespace to a static
+string "pmem-csi" during namespace creation. When adding physical
+volumes to volume groups, only those physical volumes that are based on
+namespaces with the name "pmem-csi" are considered.
+
+## Direct device mode
+
+The following diagram illustrates the operation in Direct device mode:
+![devicemode-direct diagram](/docs/images/devicemodes/pmem-csi-direct.png)
+
+In direct device mode PMEM-CSI driver allocates namespaces directly
+from the storage device. This creates device space fragmentation risk,
+but reduces complexity and run-time overhead by avoiding additional
+device mapping layer. Direct mode also ensures the region-affinity of
+served volumes, because provisioned volume can belong to one region
+only.
+
+In Direct mode, the two preparation stages used in LVM mode, are not
+needed.
+
+### Namespace modes in direct device mode
+
+The PMEM-CSI driver creates a namespace directly in the mode which is
+asked by volume creation request, thus bypassing the complexity of
+pre-allocated pools that are used in LVM device mode.
+
+### Using limited amount of total space in direct device mode
+
+In direct device mode, the driver does not attempt to limit space
+use. It also does not mark "own" namespaces. The _Name_ field of a
+namespace gets value of the VolumeID.
+
+## Driver modes
+
+The PMEM-CSI driver supports running in different modes, which can be
+controlled by passing one of the below options to the driver's
+'_-mode_' command line option. In each mode, it starts a different set
+of open source Remote Procedure Call (gRPC)
+[servers](#driver-components) on given driver endpoint(s).
+
+* **_Controller_** should run as a single instance in cluster level. When the
+ driver is running in _Controller_ mode, it forwards the pmem volume
+ create/delete requests to the registered node controller servers
+ running on the worker node. In this mode, the driver starts the
+ following gRPC servers:
+
+ * [IdentityServer](#identity-server)
+ * [NodeRegistryServer](#node-registry-server)
+ * [MasterControllerServer](#master-controller-server)
+
+* One **_Node_** instance should run on each
+ worker node that has persistent memory devices installed. When the
+ driver starts in such mode, it registers with the _Controller_
+ driver running on a given _-registryEndpoint_. In this mode, the
+ driver starts the following servers:
+
+ * [IdentityServer](#identity-server)
+ * [NodeControllerServer](#node-controller-server)
+ * [NodeServer](#node-server)
+
+## Driver Components
+
+### Identity Server
+
+This gRPC server operates on a given endpoint in all driver modes and
+implements the CSI [Identity
+interface](https://github.com/container-storage-interface/spec/blob/master/spec.md#identity-service-rpc).
+
+### Node Registry Server
+
+When the PMEM-CSI driver runs in _Controller_ mode, it starts a gRPC
+server on a given endpoint(_-registryEndpoint_) and serves the
+[RegistryServer](/pkg/pmem-registry/pmem-registry.proto) interface. The
+driver(s) running in _Node_ mode can register themselves with node
+specific information such as node id,
+[NodeControllerServer](#node-controller-server) endpoint, and their
+available persistent memory capacity.
+
+### Master Controller Server
+
+This gRPC server is started by the PMEM-CSI driver running in
+_Controller_ mode and serves the
+[Controller](https://github.com/container-storage-interface/spec/blob/master/spec.md#controller-service-rpc)
+interface defined by the CSI specification. The server responds to
+CreateVolume(), DeleteVolume(), ControllerPublishVolume(),
+ControllerUnpublishVolume(), and ListVolumes() calls coming from
+external-provisioner() and external-attacher() sidecars. It
+forwards the publish and unpublish volume requests to the appropriate
+[Node controller server](#node-controller-server) running on a worker
+node that was registered with the driver.
+
+### Node Controller Server
+
+This gRPC server is started by the PMEM-CSI driver running in _Node_
+mode and implements the
+[ControllerPublishVolume](https://github.com/container-storage-interface/spec/blob/master/spec.md#controllerpublishvolume)
+and
+[ControllerUnpublishVolume](https://github.com/container-storage-interface/spec/blob/master/spec.md#controllerunpublishvolume)
+methods of the [Controller
+service](https://github.com/container-storage-interface/spec/blob/master/spec.md#controller-service-rpc)
+interface defined by the CSI specification. It serves the
+ControllerPublishVolume() and ControllerUnpublish() requests coming
+from the [Master controller server](#master-controller-server) and
+creates/deletes persistent memory devices.
+
+### Node Server
+
+This gRPC server is started by the driver running in _Node_ mode and
+implements the [Node
+service](https://github.com/container-storage-interface/spec/blob/master/spec.md#node-service-rpc)
+interface defined in the CSI specification. It serves the
+NodeStageVolume(), NodeUnstageVolume(), NodePublishVolume(), and
+NodeUnpublishVolume() requests coming from the Container Orchestrator
+(CO).
+
+## Communication between components
+
+The following diagram illustrates the communication channels between driver components:
+![communication diagram](/docs/images/communication/pmem-csi-communication-diagram.png)
+
+## Security
+
+All PMEM-CSI specific communication [shown in above
+section](#communication-between-components) between Master
+Controller([RegistryServer](#node-registry-server),
+[MasterControllerServer](#master-controller-server)) and
+NodeControllers([NodeControllerServer](#node-controller-server)) is
+protected by mutual TLS. Both client and server must identify
+themselves and the certificate they present must be trusted. The
+common name in each certificate is used to identify the different
+components. The following common names have a special meaning:
+
+- `pmem-registry` is used by the [RegistryServer](#node-registry-server).
+- `pmem-node-controller` is used by [NodeControllerServers](#node-controller-server)
+
+The [`test/setup-ca.sh`](/test/setup-ca.sh)
+script shows how to generate self-signed certificates. The test cluster is set
+up using certificates created by that script, with secrets prepared by
+[`test/setup-deployment.sh`](/test/setup-deployment.sh) before
+deploying the driver using the provided [deployment files](/deploy/).
+
+Beware that these are just examples. Administrators of a cluster must
+ensure that they choose key lengths and algorithms of sufficient
+strength for their purposes and manage certificate distribution.
+
+A production deployment can improve upon that by using some other key
+delivery mechanism, like for example
+[Vault](https://www.vaultproject.io/).
+
+
+
+## Volume Persistency
+
+In a typical CSI deployment, volumes are provided by a storage backend
+that is independent of a particular node. When a node goes offline,
+the volume can be mounted elsewhere. But PMEM volumes are *local* to
+node and thus can only be used on the node where they were
+created. This means the applications using PMEM volume cannot freely
+move between nodes. This limitation needs to be considered when
+designing and deploying applications that are to use *local storage*.
+
+These are the volume persistency models considered for implementation
+in PMEM-CSI to serve different application use cases:
+
+* **Persistent volumes**
+A volume gets created independently of the application, on some node
+where there is enough free space. Applications using such a volume are
+then forced to run on that node and cannot run when the node is
+down. Data is retained until the volume gets deleted.
+
+* **Ephemeral volumes**
+Each time an application starts to run on a node, a new volume is
+created for it on that node. When the application stops, the volume is
+deleted. The volume cannot be shared with other applications. Data on
+this volume is retained only while the application runs.
+
+* **Cache volumes**
+Volumes are pre-created on a certain set of nodes, each with its own
+local data. Applications are started on those nodes and then get to
+use the volume on their node. Data persists across application
+restarts. This is useful when the data is only cached information that
+can be discarded and reconstructed at any time *and* the application
+can reuse existing local data when restarting.
+
+Volume | Kubernetes | PMEM-CSI | Limitations
+--- | --- | --- | ---
+Persistent | supported | supported | topology aware scheduling1
+Ephemeral | supported2 | supported | resource constraints3
+Cache | supported | supported | topology aware scheduling1
+
+1 [Topology aware
+scheduling](https://github.com/kubernetes/enhancements/issues/490)
+ensures that an application runs on a node where the volume was
+created. For CSI-based drivers like PMEM-CSI, Kubernetes >= 1.13 is
+needed. On older Kubernetes releases, pods must be scheduled manually
+onto the right node(s).
+
+2 [CSI ephemeral volumes](https://kubernetes.io/docs/concepts/storage/volumes/#csi-ephemeral-volumes)
+feature support is alpha in Kubernetes v1.15, and beta in v1.16.
+
+3 The upstream design for ephemeral volumes currently does
+not take [resource
+constraints](https://github.com/kubernetes/enhancements/pull/716#discussion_r250536632)
+into account. If an application gets scheduled onto a node and then
+creating the ephemeral volume on that node fails, the application on
+the node cannot start until resources become available.
+
+See [exposing persistent and cache volumes](install.md#expose-persistent-and-cache-volumes-to-applications) for configuration information.
+
+## Capacity-aware pod scheduling
+
+PMEM-CSI implements the CSI `GetCapacity` call, but Kubernetes
+currently doesn't call that and schedules pods onto nodes without
+being aware of available storage capacity on the nodes. The effect is
+that pods using volumes with late binding may get tentatively assigned
+to a node and then get stuck because that decision is not reconsidered
+when the volume cannot be created there ([a
+bug](https://github.com/kubernetes/kubernetes/issues/72031)). Even if
+that decision is reconsidered, the same node may get selected again
+because Kubernetes does not get informed about the insufficient
+storage. Pods with ephemeral inline volumes always get stuck because
+the decision to use the node [is final](https://github.com/kubernetes-sigs/descheduler/issues/62).
+
+Work is [under
+way](https://github.com/kubernetes/enhancements/pull/1353) to enhance
+scheduling in Kubernetes. In the meantime, PMEM-CSI provides two components
+that help with pod scheduling:
+
+### Scheduler extender
+
+When a pod requests the special [extended
+resource](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#extended-resources)
+called `pmem-csi.intel.com/scheduler`, the Kubernetes scheduler calls
+a [scheduler
+extender](https://github.com/kubernetes/community/blob/master/contributors/design-proposals/scheduling/scheduler_extender.md)
+provided by PMEM-CSI with a list of nodes that a pod might run
+on. This extender is implemented in the master controller and thus can
+connect to the controller on each of these nodes to check for
+capacity. PMEM-CSI then filters out all nodes which currently do not
+have enough storage left for the volumes that still need to be
+created. This considers inline ephemeral volumes and all unbound
+volumes, regardless whether they use late binding or immediate
+binding.
+
+This special scheduling can be requested manually by adding this snippet
+to one container in the pod spec:
+```
+containers:
+- name: some-container
+ ...
+ resources:
+ limits:
+ pmem-csi.intel.com/scheduler: "1"
+ requests:
+ pmem-csi.intel.com/scheduler: "1"
+```
+
+This scheduler extender is optional and not necessarily installed in
+all clusters that have PMEM-CSI. Don't add this extended resource
+unless the scheduler extender is installed, otherwise the pod won't
+start!
+
+See our [implementation](http://github.com/intel/pmem-csi/tree/devel/pkg/scheduler) of a scheduler extender.
+
+### Pod admission webhook
+
+Having to add `pmem-csi.intel.com/scheduler` manually is not
+user-friendly. To simplify this, PMEM-CSI provides a [mutating
+admission
+webhook](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/)
+which intercepts the creation of all pods. If that pod uses inline
+ephemeral volumes or volumes with late binding that are provided by
+PMEM-CSI, the webhook transparently adds the extended resource
+request. PMEM-CSI volumes with immediate binding are ignored because
+for those the normal topology support ensures that unsuitable nodes
+are filtered out.
+
+The webhook can only do that if the persistent volume claim (PVC) and
+its storage class have been created already. This is normally not
+required: it's okay to create the pod first, then later add the
+PVC. The pod simply won't start in the meantime.
+
+The webhook deals with this uncertainty by allowing the creation of
+the pod without adding the extended resource when it lacks the
+necessary information. The alternative would be to reject the pod, but
+that would be a change of behavior of the cluster that may affect also pods
+that don't use PMEM-CSI at all.
+
+Users must take care to create PVCs first, then the pods if they want
+to use the webhook. In practice, that is often already done because it
+is more natural, so it is not a big limitation.
\ No newline at end of file
diff --git a/docs/html/index.html b/docs/html/index.html
new file mode 100644
index 0000000000..5f62e3d9be
--- /dev/null
+++ b/docs/html/index.html
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/html/index2.html b/docs/html/index2.html
new file mode 100644
index 0000000000..c19cf06e84
--- /dev/null
+++ b/docs/html/index2.html
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/docs/install.md b/docs/install.md
new file mode 100644
index 0000000000..783ac37e4d
--- /dev/null
+++ b/docs/install.md
@@ -0,0 +1,655 @@
+# Instructions for Admins and Users
+
+- [Prerequisites](#prerequisites)
+ - [Software required](#software-required)
+ - [Hardware required](#hardware-required)
+ - [Persistent memory pre-provisioning](#persistent-memory-pre-provisioning)
+- [Installation and setup](#installation-and-setup)
+ - [Get source code](#get-source-code)
+ - [Run PMEM-CSI on Kubernetes](#run-pmem-csi-on-kubernetes)
+ - [Expose persistent and cache volumes to applications](#expose-persistent-and-cache-volumes-to-applications)
+ - [Raw block volumes](#raw-block-volumes)
+ - [Enable scheduler extensions](#enable-scheduler-extensions)
+- [Filing issues and contributing](#filing-issues-and-contributing)
+
+## Prerequisites
+
+### Software required
+
+The recommended mimimum Linux kernel version for running the PMEM-CSI driver is 4.15. See [Persistent Memory Programming](https://pmem.io/2018/05/15/using_persistent_memory_devices_with_the_linux_device_mapper.html) for more details about supported kernel versions.
+
+### Hardware required
+
+Persistent memory device(s) are required for operation. However, some
+development and testing can be done using QEMU-emulated persistent
+memory devices. See the ["QEMU and Kubernetes"](autotest.md#qemu-and-kubernetes)
+section for the commands that create such a virtual test cluster.
+
+### Persistent memory pre-provisioning
+
+The PMEM-CSI driver needs pre-provisioned regions on the NVDIMM
+device(s). The PMEM-CSI driver itself intentionally leaves that to the
+administrator who then can decide how much and how PMEM is to be used
+for PMEM-CSI.
+
+Beware that the PMEM-CSI driver will run without errors on a node
+where PMEM was not prepared for it. It will then report zero local
+storage for that node, something that currently is only visible in the
+log files.
+
+When running the Kubernetes cluster and PMEM-CSI on bare metal,
+the [ipmctl](https://github.com/intel/ipmctl) utility can be used to create regions.
+App Direct Mode has two configuration options - interleaved or non-interleaved.
+One region per each NVDIMM is created in non-interleaved configuration.
+In such a configuration, a PMEM-CSI volume cannot be larger than one NVDIMM.
+
+Example of creating regions without interleaving, using all NVDIMMs:
+```sh
+# ipmctl create -goal PersistentMemoryType=AppDirectNotInterleaved
+```
+
+Alternatively, multiple NVDIMMs can be combined to form an interleaved set.
+This causes the data to be striped over multiple NVDIMM devices
+for improved read/write performance and allowing one region (also, PMEM-CSI volume)
+to be larger than single NVDIMM.
+
+Example of creating regions in interleaved mode, using all NVDIMMs:
+```sh
+# ipmctl create -goal PersistentMemoryType=AppDirect
+```
+
+When running inside virtual machines, each virtual machine typically
+already gets access to one region and `ipmctl` is not needed inside
+the virtual machine. Instead, that region must be made available for
+use with PMEM-CSI because when the virtual machine comes up for the
+first time, the entire region is already allocated for use as a single
+block device:
+``` sh
+# ndctl list -RN
+{
+ "regions":[
+ {
+ "dev":"region0",
+ "size":34357641216,
+ "available_size":0,
+ "max_available_extent":0,
+ "type":"pmem",
+ "persistence_domain":"unknown",
+ "namespaces":[
+ {
+ "dev":"namespace0.0",
+ "mode":"raw",
+ "size":34357641216,
+ "sector_size":512,
+ "blockdev":"pmem0"
+ }
+ ]
+ }
+ ]
+}
+# ls -l /dev/pmem*
+brw-rw---- 1 root disk 259, 0 Jun 4 16:41 /dev/pmem0
+```
+
+Labels must be initialized in such a region, which must be performed
+once after the first boot:
+``` sh
+# ndctl disable-region region0
+disabled 1 region
+# ndctl init-labels nmem0
+initialized 1 nmem
+# ndctl enable-region region0
+enabled 1 region
+# ndctl list -RN
+[
+ {
+ "dev":"region0",
+ "size":34357641216,
+ "available_size":34357641216,
+ "max_available_extent":34357641216,
+ "type":"pmem",
+ "iset_id":10248187106440278,
+ "persistence_domain":"unknown"
+ }
+]
+# ls -l /dev/pmem*
+ls: cannot access '/dev/pmem*': No such file or directory
+```
+
+## Installation and setup
+
+### Get source code
+
+PMEM-CSI uses Go modules and thus can be checked out and (if that should be desired)
+built anywhere in the filesystem. Pre-built container images are available and thus
+users don't need to build from source, but they will still need some additional files.
+To get the source code, use:
+
+```
+git clone https://github.com/intel/pmem-csi
+```
+
+### Run PMEM-CSI on Kubernetes
+
+This section assumes that a Kubernetes cluster is already available
+with at least one node that has persistent memory device(s). For development or
+testing, it is also possible to use a cluster that runs on QEMU virtual
+machines, see the ["QEMU and Kubernetes"](autotest.md#qemu-and-kubernetes).
+
+- **Make sure that the alpha feature gates CSINodeInfo and CSIDriverRegistry are enabled**
+
+The method to configure alpha feature gates may vary, depending on the Kubernetes deployment.
+It may not be necessary anymore when the feature has reached beta state, which depends
+on the Kubernetes version.
+
+- **Label the cluster nodes that provide persistent memory device(s)**
+
+```sh
+ $ kubectl label node storage=pmem
+```
+
+- **Set up certificates**
+
+Certificates are required as explained in [Security](design.md#security).
+If you are not using the test cluster described in
+[Starting and stopping a test cluster](autotest.md#starting-and-stopping-a-test-cluster)
+where certificates are created automatically, you must set up certificates manually.
+This can be done by running the `./test/setup-ca-kubernetes.sh` script for your cluster.
+This script requires "cfssl" tools which can be downloaded.
+These are the steps for manual set-up of certificates:
+
+- Download cfssl tools
+
+```sh
+ $ curl -L https://pkg.cfssl.org/R1.2/cfssl_linux-amd64 -o _work/bin/cfssl --create-dirs
+ $ curl -L https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64 -o _work/bin/cfssljson --create-dirs
+ $ chmod a+x _work/bin/cfssl _work/bin/cfssljson
+```
+
+- Run certificates set-up script
+
+```sh
+ $ KUBCONFIG="<> PATH="$PATH:$PWD/_work/bin" ./test/setup-ca-kubernetes.sh
+```
+
+- **Deploy the driver to Kubernetes**
+
+The `deploy/kubernetes-` directory contains
+`pmem-csi*.yaml` files which can be used to deploy the driver on that
+Kubernetes version. The files in the directory with the highest
+Kubernetes version might also work for more recent Kubernetes
+releases. All of these deployments use images published by Intel on
+[Docker Hub](https://hub.docker.com/u/intel).
+
+For each Kubernetes version, four different deployment variants are provided:
+
+ - `direct` or `lvm`: one uses direct device mode, the other LVM device mode.
+ - `testing`: the variants with `testing` in the name enable debugging
+ features and shouldn't be used in production.
+
+For example, to deploy for production with LVM device mode onto Kubernetes 1.14, use:
+
+```sh
+ $ kubectl create -f deploy/kubernetes-1.14/pmem-csi-lvm.yaml
+```
+
+The PMEM-CSI [scheduler extender](design.md#scheduler-extender) and
+[webhook](design.md#pod-admission-webhook) are not enabled in this basic
+installation. See [below](#enable-scheduler-extensions) for
+instructions about that.
+
+These variants were generated with
+[`kustomize`](https://github.com/kubernetes-sigs/kustomize).
+`kubectl` >= 1.14 includes some support for that. The sub-directories
+of [deploy/kustomize](/deploy/kustomize)`-` can be used as bases
+for `kubectl kustomize`. For example:
+
+ - Change namespace:
+ ```
+ $ mkdir -p my-pmem-csi-deployment
+ $ cat >my-pmem-csi-deployment/kustomization.yaml <my-pmem-csi-deployment/kustomization.yaml <my-pmem-csi-deployment/lvm-parameters-patch.yaml <,storage=pmem
+```
+
+If **storage=pmem** is missing, label manually as described above. If
+**pmem-csi.intel.com/node** is missing, then double-check that the
+alpha feature gates are enabled, that the CSI driver is running on the node,
+and that the driver's log output doesn't contain errors.
+
+- **Define two storage classes using the driver**
+
+```sh
+ $ kubectl create -f deploy/kubernetes-/pmem-storageclass-ext4.yaml
+ $ kubectl create -f deploy/kubernetes-/pmem-storageclass-xfs.yaml
+```
+
+- **Provision two pmem-csi volumes**
+
+```sh
+ $ kubectl create -f deploy/kubernetes-/pmem-pvc.yaml
+```
+
+- **Verify two Persistent Volume Claims have 'Bound' status**
+
+```sh
+ $ kubectl get pvc
+ NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
+ pmem-csi-pvc-ext4 Bound pvc-f70f7b36-6b36-11e9-bf09-deadbeef0100 4Gi RWO pmem-csi-sc-ext4 16s
+ pmem-csi-pvc-xfs Bound pvc-f7101fd2-6b36-11e9-bf09-deadbeef0100 4Gi RWO pmem-csi-sc-xfs 16s
+```
+
+- **Start two applications requesting one provisioned volume each**
+
+```sh
+ $ kubectl create -f deploy/kubernetes-/pmem-app.yaml
+```
+
+These applications use **storage: pmem** in the nodeSelector
+list to ensure scheduling to a node supporting pmem device, and each requests a mount of a volume,
+one with ext4-format and another with xfs-format file system.
+
+- **Verify two application pods reach 'Running' status**
+
+```sh
+ $ kubectl get po my-csi-app-1 my-csi-app-2
+ NAME READY STATUS RESTARTS AGE
+ my-csi-app-1 1/1 Running 0 6m5s
+ NAME READY STATUS RESTARTS AGE
+ my-csi-app-2 1/1 Running 0 6m1s
+```
+
+- **Check that applications have a pmem volume mounted with added dax option**
+
+```sh
+ $ kubectl exec my-csi-app-1 -- df /data
+ Filesystem 1K-blocks Used Available Use% Mounted on
+ /dev/ndbus0region0fsdax/5ccaa889-551d-11e9-a584-928299ac4b17
+ 4062912 16376 3820440 0% /data
+ $ kubectl exec my-csi-app-2 -- df /data
+ Filesystem 1K-blocks Used Available Use% Mounted on
+ /dev/ndbus0region0fsdax/5cc9b19e-551d-11e9-a584-928299ac4b17
+ 4184064 37264 4146800 1% /data
+
+ $ kubectl exec my-csi-app-1 -- mount |grep /data
+ /dev/ndbus0region0fsdax/5ccaa889-551d-11e9-a584-928299ac4b17 on /data type ext4 (rw,relatime,dax)
+ $ kubectl exec my-csi-app-2 -- mount |grep /data
+ /dev/ndbus0region0fsdax/5cc9b19e-551d-11e9-a584-928299ac4b17 on /data type xfs (rw,relatime,attr2,dax,inode64,noquota)
+```
+
+#### Expose persistent and cache volumes to applications
+
+Kubernetes cluster administrators can expose persistent and cache volumes
+to applications using
+[`StorageClass
+Parameters`](https://kubernetes.io/docs/concepts/storage/storage-classes/#parameters). An
+optional `persistencyModel` parameter differentiates how the
+provisioned volume can be used:
+
+* no `persistencyModel` parameter or `persistencyModel: normal` in `StorageClass`
+
+ A normal Kubernetes persistent volume. In this case
+ PMEM-CSI creates PMEM volume on a node and the application that
+ claims to use this volume is supposed to be scheduled onto this node
+ by Kubernetes. Choosing of node is depend on StorageClass
+ `volumeBindingMode`. In case of `volumeBindingMode: Immediate`
+ PMEM-CSI chooses a node randomly, and in case of `volumeBindingMode:
+ WaitForFirstConsumer` (also known as late binding) Kubernetes first chooses a node for scheduling
+ the application, and PMEM-CSI creates the volume on that
+ node. Applications which claim a normal persistent volume has to use
+ `ReadOnlyOnce` access mode in its `accessModes` list. This
+ [diagram](/docs/images/sequence/pmem-csi-persistent-sequence-diagram.png)
+ illustrates how a normal persistent volume gets provisioned in
+ Kubernetes using PMEM-CSI driver.
+
+* `persistencyModel: cache`
+
+ Volumes of this type shall be used in combination with
+ `volumeBindingMode: Immediate`. In this case, PMEM-CSI creates a set
+ of PMEM volumes each volume on different node. The number of PMEM
+ volumes to create can be specified by `cacheSize` StorageClass
+ parameter. Applications which claim a `cache` volume can use
+ `ReadWriteMany` in its `accessModes` list. Check with provided
+ [cacheStorageClass](/deploy/common/pmem-storageclass-cache.yaml)
+ example. This
+ [diagram](/docs/images/sequence/pmem-csi-cache-sequence-diagram.png)
+ illustrates how a cache volume gets provisioned in Kubernetes using
+ PMEM-CSI driver.
+
+**NOTE**: Cache volumes are associated with a node, not a pod. Multiple
+pods using the same cache volume on the same node will not get their
+own instance but will end up sharing the same PMEM volume instead.
+Application deployment has to consider this and use available Kubernetes
+mechanisms like [node
+anti-affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity).
+Check with the provided
+[cacheapplication](/deploy/common/pmem-app-cache.yaml) example.
+
+**WARNING**: late binding (`volumeBindingMode:WaitForFirstConsume`) has some caveats:
+* Pod creation may get stuck when there isn't enough capacity left for
+ the volumes; see the next section for details.
+* A node is only chosen the first time a pod starts. After that it will always restart
+ on that node, because that is where the persistent volume was created.
+
+Volume requests embedded in Pod spec are provisioned as ephemeral volumes. The volume request could use below fields as [`volumeAttributes`](https://kubernetes.io/docs/concepts/storage/volumes/#csi):
+
+|key|meaning|optional|values|
+|---|-------|--------|-------------|
+|`size`|Size of the requested ephemeral volume as [Kubernetes memory string](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-memory) ("1Mi" = 1024*1024 bytes, "1e3K = 1000000 bytes)|No||
+|`eraseAfter`|Clear all data after use and before
deleting the volume|Yes|`true` (default),
`false`|
+
+Check with provided [example application](/deploy/kubernetes-1.15/pmem-app-ephemeral.yaml) for
+ephemeral volume usage.
+
+#### Raw block volumes
+
+Applications can use volumes provisioned by PMEM-CSI as [raw block
+devices](https://kubernetes.io/blog/2019/03/07/raw-block-volume-support-to-beta/). Such
+volumes use the same "fsdax" namespace mode as filesystem volumes
+and therefore are block devices. That mode only supports dax (=
+`mmap(MAP_SYNC)`) through a filesystem. Pages mapped on the raw block
+device go through the Linux page cache. Applications have to format
+and mount the raw block volume themselves if they want dax. The
+advantage then is that they have full control over that part.
+
+For provisioning a PMEM volume as raw block device, one has to create a
+`PersistentVolumeClaim` with `volumeMode: Block`. See example [PVC](
+/deploy/common/pmem-pvc-block-volume.yaml) and
+[application](/deploy/common/pmem-app-block-volume.yaml) for usage reference.
+
+That example demonstrates how to handle some details:
+- `mkfs.ext4` needs `-b 4096` to produce volumes that support dax;
+ without it, the automatic block size detection may end up choosing
+ an unsuitable value depending on the volume size.
+- [Kubernetes bug #85624](https://github.com/kubernetes/kubernetes/issues/85624)
+ must be worked around to format and mount the raw block device.
+
+#### Enable scheduler extensions
+
+The PMEM-CSI scheduler extender and admission webhook are provided by
+the PMEM-CSI controller. They need to be enabled during deployment via
+the `--schedulerListen=[]:` parameter. The
+listen address is optional and can be left out. The port is where a
+HTTPS server will run. It uses the same certificates as the internal
+gRPC service. When using the CA creation script described above, they
+will contain alternative names for the URLs described in this section
+(service names, `127.0.0.1` IP address).
+
+This parameter can be added to one of the existing deployment files
+with `kustomize`. All of the following examples assume that the
+current directory contains the `deploy` directory from the PMEM-CSI
+repository. It is also possible to reference the base via a
+[URL](https://github.com/kubernetes-sigs/kustomize/blob/master/examples/remoteBuild.md).
+
+``` sh
+mkdir my-pmem-csi-deployment
+
+cat >my-pmem-csi-deployment/kustomization.yaml <my-pmem-csi-deployment/scheduler-patch.yaml <my-scheduler/kustomization.yaml <my-scheduler/node-port-patch.yaml </var/lib/scheduler/scheduler-policy.cfg' <:",
+ "filterVerb": "filter",
+ "prioritizeVerb": "prioritize",
+ "nodeCacheCapable": false,
+ "weight": 1,
+ "managedResources":
+ [{
+ "name": "pmem-csi.intel.com/scheduler",
+ "ignoredByScheduler": true
+ }]
+ }]
+}
+EOF
+
+cat >kubeadm.config <=
+1.15, it can also be used to let individual pods bypass the webhook by
+adding that label. The CA gets configured explicitly, which is
+supported for webhooks.
+
+``` sh
+mkdir my-webhook
+
+cat >my-webhook/kustomization.yaml <my-webhook/webhook-patch.yaml <
+
+## Filing issues and contributing
+
+Report a bug by [filing a new issue](https://github.com/intel/pmem-csi/issues).
+
+Before making your first contribution, be sure to read the [development documentation](DEVELOPMENT.md)
+for guidance on code quality and branches.
+
+Contribute by [opening a pull request](https://github.com/intel/pmem-csi/pulls).
+
+Learn [about pull requests](https://help.github.com/articles/using-pull-requests/).
+
+**Reporting a Potential Security Vulnerability:** If you have discovered potential security vulnerability in PMEM-CSI, please send an e-mail to secure@intel.com. For issues related to Intel Products, please visit [Intel Security Center](https://security-center.intel.com).
+
+It is important to include the following details:
+
+- The projects and versions affected
+- Detailed description of the vulnerability
+- Information on known exploits
+
+Vulnerability information is extremely sensitive. Please encrypt all security vulnerability reports using our [PGP key](https://www.intel.com/content/www/us/en/security-center/pgp-public-key.html).
+
+A member of the Intel Product Security Team will review your e-mail and contact you to collaborate on resolving the issue. For more information on how Intel works to resolve security issues, see: [vulnerability handling guidelines](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html).
+
+
diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 0000000000..b6f6a3a327
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,4 @@
+sphinx
+sphinx_rtd_theme
+recommonmark
+sphinx-markdown-tables
\ No newline at end of file
diff --git a/docs/substitutions.txt b/docs/substitutions.txt
new file mode 100644
index 0000000000..3eef4529a3
--- /dev/null
+++ b/docs/substitutions.txt
@@ -0,0 +1 @@
+.. |PR| replace:: Project Name
\ No newline at end of file
diff --git a/examples/gce.md b/examples/gce.md
index d26ea7da25..03cbb43f85 100644
--- a/examples/gce.md
+++ b/examples/gce.md
@@ -206,7 +206,7 @@ To stop the cluster, use the same env variables for the
After the previous step, `kubectl` works and is configured to use the
new cluster. What follows next are the steps explained in more details
in the top-level README's [Run PMEM-CSI on
-Kubernetes](../run-pmem-csi-on-kubernetes) section.
+Kubernetes](../docs/install.md#run-pmem-csi-on-kubernetes) section.
First the worker nodes need to be labeled:
diff --git a/examples/readme.rst b/examples/readme.rst
new file mode 100644
index 0000000000..72db86de6c
--- /dev/null
+++ b/examples/readme.rst
@@ -0,0 +1,15 @@
+Application examples
+####################
+
+`Redis-pmem operator `__
+ Deploy a Redis cluster through the redis-operator using QEMU-emulated persistent memory devices
+
+`Google Cloud Engine `__
+ Install Kubernetes and PMEM-CSI on Google Cloud machines.
+
+.. toctree::
+ :hidden:
+
+ redis-operator.md
+ gce.md
+
\ No newline at end of file
diff --git a/examples/redis-operator.md b/examples/redis-operator.md
index 92e2cab3d4..87b5554cd8 100644
--- a/examples/redis-operator.md
+++ b/examples/redis-operator.md
@@ -1,4 +1,5 @@
# Redis-pmem operator
+
This readme describes a complete example to deploy a Redis cluster through the [redis-operator](https://github.com/spotahome/redis-operator) using QEMU-emulated persistent memory devices.
## Prerequisites
diff --git a/index.rst b/index.rst
new file mode 100644
index 0000000000..6109596696
--- /dev/null
+++ b/index.rst
@@ -0,0 +1,18 @@
+.. Project Name documentation master file, created by
+ sphinx-quickstart on Tue Nov 5 14:52:28 2019.
+ You can adapt this file completely to your liking, but it should at least
+ contain the root `toctree` directive.
+
+PMEM-CSI
+=========
+
+.. toctree::
+ :maxdepth: 2
+
+ README.md
+ docs/design.md
+ docs/install.md
+ docs/DEVELOPMENT.md
+ docs/autotest.md
+ examples/readme.rst
+ Project GitHub repository
diff --git a/make.bat b/make.bat
new file mode 100644
index 0000000000..455cbe3888
--- /dev/null
+++ b/make.bat
@@ -0,0 +1,43 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+ set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=.
+set BUILDDIR=_output
+
+if "%1" == "" goto help
+
+if "%1" == "html" goto html
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+ echo.
+ echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+ echo.installed, then set the SPHINXBUILD environment variable to point
+ echo.to the full path of the 'sphinx-build' executable. Alternatively you
+ echo.may add the Sphinx directory to PATH.
+ echo.
+ echo.If you don't have Sphinx installed, grab it from
+ echo.http://sphinx-doc.org/
+ exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+goto end
+
+:html
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+python.exe .\fix-refs.py
+copy index.html %BUILDDIR%\html\index.html
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS%
+
+:end
+popd
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000000..e15c9ea6f3
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+
+from distutils.core import setup
+
+setup(name='Sphinx GUI Utility',
+ version='0.1',
+ description='Build Sphinx docs from a GUI',
+ author='Kevin Putnam',
+ author_email='kevin.putnam@intel.com',
+ url='https://github.com/intel/pmem-csi',
+ )
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000000..ffeb174d91
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,13 @@
+[tox]
+envlist = py3-{mylinux,mywindows}
+
+[testenv]
+platform = mylinux: linux
+ mywindows: win32
+whitelist_externals = make.bat
+ /usr/bin/make
+deps = -rrequirements.txt
+commands =
+ mylinux: make {posargs}
+ mywindows: make.bat {posargs}
+