Skip to content

Commit

Permalink
Update container handling with charliecloud (#5300)
Browse files Browse the repository at this point in the history
Update CharliecloudCache. writeFake as default. with writeFake CH_IMAGE_STORAGE is preferred, in other cases charliecloud.cacheDir over NXF_CHARLIECLOUD_CACHE over work/charliecloud, prevent usage of CH_IMAGE_STORAGE when not using writeFake


Signed-off-by: Niklas Schandry <[email protected]>
  • Loading branch information
nschan authored Sep 16, 2024
1 parent fe3e3ac commit 8e6068d
Show file tree
Hide file tree
Showing 5 changed files with 191 additions and 90 deletions.
16 changes: 6 additions & 10 deletions docs/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -488,16 +488,18 @@ The following settings are available:
### Scope `charliecloud`

The `charliecloud` scope controls how [Charliecloud](https://hpc.github.io/charliecloud/) containers are executed by Nextflow.
If `charliecloud.writeFake` is unset / `false`, charliecloud will create a copy of the container in the process working directory.

The following settings are available:

`charliecloud.cacheDir`
: The directory where remote Charliecloud images are stored. When using a computing cluster it must be a shared folder accessible to all compute nodes.

`charliecloud.enabled`
: Enable Charliecloud execution (default: `false`).

`charliecloud.writeFake`
: Enable `writeFake` with charliecloud (default: `true`) This allows to run containers from storage in writeable mode, using overlayfs. `writeFake` requires unprivileged `overlayfs` (Linux kernel >= 5.11). For full support, tempfs with xattrs in the user namespace (Linux kernel >= 6.6) is required, see [charliecloud documentation](https://hpc.github.io/charliecloud/ch-run.html#ch-run-overlay) for details.

`charliecloud.cacheDir`
: The directory where remote Charliecloud images are stored. When using a computing cluster it must be a shared folder accessible to all compute nodes.

`charliecloud.envWhitelist`
: Comma separated list of environment variable names to be included in the container environment.

Expand All @@ -513,12 +515,6 @@ The following settings are available:
`charliecloud.registry`
: The registry from where images are pulled. It should be only used to specify a private registry server. It should NOT include the protocol prefix i.e. `http://`.

`charliecloud.writeFake`
: Enable `writeFake` with charliecloud. This allows to run containers from storage in writeable mode, using overlayfs, see [charliecloud documentation](https://hpc.github.io/charliecloud/ch-run.html#ch-run-overlay) for details

`charliecloud.useSquash`
: Create a temporary squashFS container image in the process work directory instead of a folder.

Read the {ref}`container-charliecloud` page to learn more about how to use Charliecloud containers with Nextflow.

(config-conda)=
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import java.nio.file.Path
import java.nio.file.Paths
import groovy.transform.CompileStatic
import groovy.util.logging.Slf4j
import nextflow.Global
/**
* Implements a builder for Charliecloud containerisation
*
Expand All @@ -33,10 +34,8 @@ import groovy.util.logging.Slf4j
@Slf4j
class CharliecloudBuilder extends ContainerBuilder<CharliecloudBuilder> {

protected boolean useSquash
private boolean writeFake = true

protected boolean writeFake

CharliecloudBuilder(String name) {
this.image = name
}
Expand All @@ -52,16 +51,13 @@ class CharliecloudBuilder extends ContainerBuilder<CharliecloudBuilder> {

if( params.containsKey('runOptions') )
addRunOptions(params.runOptions.toString())

if ( params.containsKey('useSquash') )
this.useSquash = params.useSquash?.toString() == 'true'


if ( params.containsKey('writeFake') )
this.writeFake = params.writeFake?.toString() == 'true'
this.writeFake = params.writeFake?.toString() != 'false'

if( params.containsKey('readOnlyInputs') )
this.readOnlyInputs = params.readOnlyInputs?.toString() == 'true'

return this
}

Expand All @@ -72,40 +68,22 @@ class CharliecloudBuilder extends ContainerBuilder<CharliecloudBuilder> {

@Override
CharliecloudBuilder build(StringBuilder result) {

assert image
def imageStorage = Paths.get(image).parent.parent
def imageToRun = String

if (!writeFake) {
// define image to run, if --write-fake is not used this is a copy of the image in the current workDir
imageToRun = '"$NXF_TASK_WORKDIR"/container_' + image.split('/')[-1]

// optional squash
if (useSquash) {
imageToRun = imageToRun + '.squashfs'
}

result << 'ch-convert -i ch-image --storage '
// handle storage to deal with cases where CH_IMAGE_STORAGE is not set
result << imageStorage
result << ' '
result << image.split('/')[-1]
result << ' '
result << imageToRun
result << ' && '
}
def imageName = image.split('/')[-1]

result << 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env '

if (writeFake) {
result << '--write-fake '
// if we are using writeFake we do not need to create a temporary imagae
// image is run by name from the storage directory
imageToRun = image.split('/')[-1]
}

if (!readOnlyInputs)
if ( writeFake )
result << '--write-fake '

if ( !writeFake && !readOnlyInputs ) {
// -w and CH_IMAGE_STORAGE are incompatible.
if(System.getenv('CH_IMAGE_STORAGE') == imageStorage)
throw new Exception('It is not possible to run writeable images from `$CH_IMAGE_STORAGE`')
result << '-w '
}

appendEnv(result)

Expand All @@ -116,8 +94,15 @@ class CharliecloudBuilder extends ContainerBuilder<CharliecloudBuilder> {

if( runOptions )
result << runOptions.join(' ') << ' '

if( writeFake && System.getenv('CH_IMAGE_STORAGE') ) {
// Run by name if writeFake is true and CH_IMAGE_STORAGE is set
result << imageName
} else {
// Otherwise run by path
result << image
}

result << imageToRun
result << ' --'

runCommand = result.toString()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,10 @@ class CharliecloudCache {
/**
* Retrieve the directory where to store the charliecloud images once downloaded.
* It tries these settings in the following order:
* 1) {@code charliecloud.cacheDir} setting in the nextflow config file
* 2) the {@code NXF_CHARLIECLOUD_CACHEDIR} environment variable
* 3) the {@code $workDir/charliecloud} path
* 1) If writeFake is enabled, the {@code CH_IMAGE_STORAGE} environment variable.
* 2) {@code charliecloud.cacheDir} setting in the nextflow config file
* 3) the {@code NXF_CHARLIECLOUD_CACHEDIR} environment variable
* 4) the {@code $workDir/charliecloud} path
*
* @return
* the {@code Path} where store the charliecloud images as flattened directories
Expand All @@ -130,19 +131,38 @@ class CharliecloudCache {
if( config.pullTimeout )
pullTimeout = config.pullTimeout as Duration

def writeFake = true

if( config.writeFake )
writeFake = config.writeFake?.toString() == 'true'

def str = config.cacheDir as String
if( str )

def charliecloudImageStorage = env.get('CH_IMAGE_STORAGE')

if( charliecloudImageStorage && writeFake) {
return checkDir(charliecloudImageStorage)
}

if( str ) {
// If charliecloudImageStorage exists and writeFake is true, we never get here
if( str.equals( charliecloudImageStorage ) ) {
throw new Exception("`charliecloud.cacheDir` configuration parameter must be different from env variable `CH_IMAGE_STORAGE`")
}
return checkDir(str)
}

str = env.get('NXF_CHARLIECLOUD_CACHEDIR')
if( str )
return checkDir(str)

str = env.get('CH_IMAGE_STORAGE')
if( str )
if( str ) {
if( str.equals( charliecloudImageStorage ) ) {
throw new Exception("`NXF_CHARLIECLOUD_CACHEDIR` env variable must be different from env variable `CH_IMAGE_STORAGE`")
}
return checkDir(str)
}

def workDir = Global.session.workDir

if( workDir.fileSystem != FileSystems.default ) {
throw new IOException("Charliecloud cannot store image in a remote work directory -- Use a POSIX compatible work directory or specify an alternative path with the `NXF_CHARLIECLOUD_CACHEDIR` env variable")
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,110 +36,132 @@ class CharliecloudBuilderTest extends Specification {
def path2 = Paths.get('/bar/data/file2')

expect:
new CharliecloudBuilder('/cacheDir/img/busybox')
new CharliecloudBuilder('/cacheDir/busybox')
.build()
.runCommand == 'ch-convert -i ch-image --storage /cacheDir busybox "$NXF_TASK_WORKDIR"/container_busybox && ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -w -b "$NXF_TASK_WORKDIR" "$NXF_TASK_WORKDIR"/container_busybox --'
.runCommand == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env --write-fake -b "$NXF_TASK_WORKDIR" /cacheDir/busybox --'

new CharliecloudBuilder('/cacheDir/img/busybox')
new CharliecloudBuilder('/cacheDir/busybox')
.params(writeFake: false)
.build()
.runCommand == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -w -b "$NXF_TASK_WORKDIR" /cacheDir/busybox --'

new CharliecloudBuilder('/cacheDir/busybox')
.params(writeFake: false)
.params(readOnlyInputs: true)
.build()
.runCommand == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -b "$NXF_TASK_WORKDIR" /cacheDir/busybox --'

new CharliecloudBuilder('/cacheDir/busybox')
.params(runOptions: '-j --no-home')
.build()
.runCommand == 'ch-convert -i ch-image --storage /cacheDir busybox "$NXF_TASK_WORKDIR"/container_busybox && ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -w -b "$NXF_TASK_WORKDIR" -j --no-home "$NXF_TASK_WORKDIR"/container_busybox --'
.runCommand == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env --write-fake -b "$NXF_TASK_WORKDIR" -j --no-home /cacheDir/busybox --'

new CharliecloudBuilder('/cacheDir/img/busybox')
new CharliecloudBuilder('/cacheDir/busybox')
.params(temp: '/foo')
.build()
.runCommand == 'ch-convert -i ch-image --storage /cacheDir busybox "$NXF_TASK_WORKDIR"/container_busybox && ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -w -b /foo:/tmp -b "$NXF_TASK_WORKDIR" "$NXF_TASK_WORKDIR"/container_busybox --'
.runCommand == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env --write-fake -b /foo:/tmp -b "$NXF_TASK_WORKDIR" /cacheDir/busybox --'

new CharliecloudBuilder('/cacheDir/img/busybox')
new CharliecloudBuilder('/cacheDir/busybox')
.addEnv('X=1')
.addEnv(ALPHA:'aaa', BETA: 'bbb')
.build()
.runCommand == 'ch-convert -i ch-image --storage /cacheDir busybox "$NXF_TASK_WORKDIR"/container_busybox && ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -w --set-env=X=1 --set-env=ALPHA=aaa --set-env=BETA=bbb -b "$NXF_TASK_WORKDIR" "$NXF_TASK_WORKDIR"/container_busybox --'
.runCommand == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env --write-fake --set-env=X=1 --set-env=ALPHA=aaa --set-env=BETA=bbb -b "$NXF_TASK_WORKDIR" /cacheDir/busybox --'

new CharliecloudBuilder('/cacheDir/img/ubuntu')
new CharliecloudBuilder('/cacheDir/ubuntu')
.addMount(path1)
.build()
.runCommand == 'ch-convert -i ch-image --storage /cacheDir ubuntu "$NXF_TASK_WORKDIR"/container_ubuntu && ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -w -b /foo/data/file1 -b "$NXF_TASK_WORKDIR" "$NXF_TASK_WORKDIR"/container_ubuntu --'
.runCommand == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env --write-fake -b /foo/data/file1 -b "$NXF_TASK_WORKDIR" /cacheDir/ubuntu --'

new CharliecloudBuilder('/cacheDir/img/ubuntu')
new CharliecloudBuilder('/cacheDir/ubuntu')
.addMount(path1)
.addMount(path2)
.build()
.runCommand == 'ch-convert -i ch-image --storage /cacheDir ubuntu "$NXF_TASK_WORKDIR"/container_ubuntu && ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -w -b /foo/data/file1 -b /bar/data/file2 -b "$NXF_TASK_WORKDIR" "$NXF_TASK_WORKDIR"/container_ubuntu --'
.runCommand == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env --write-fake -b /foo/data/file1 -b /bar/data/file2 -b "$NXF_TASK_WORKDIR" /cacheDir/ubuntu --'
}

def db_file = Paths.get('/home/db')
def 'should get run command' () {

when:
def cmd = new CharliecloudBuilder('/cacheDir/img/ubuntu')
def cmd = new CharliecloudBuilder('/cacheDir/ubuntu')
.build()
.getRunCommand()
then:
cmd == 'ch-convert -i ch-image --storage /cacheDir ubuntu "$NXF_TASK_WORKDIR"/container_ubuntu && ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -w -b "$NXF_TASK_WORKDIR" "$NXF_TASK_WORKDIR"/container_ubuntu --'
cmd == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env --write-fake -b "$NXF_TASK_WORKDIR" /cacheDir/ubuntu --'

when:
cmd = new CharliecloudBuilder('/cacheDir/img/ubuntu')
.params(useSquash: 'true')
cmd = new CharliecloudBuilder('/cacheDir/ubuntu')
.params(writeFake: 'true')
.build()
.getRunCommand()
then:
cmd == 'ch-convert -i ch-image --storage /cacheDir ubuntu "$NXF_TASK_WORKDIR"/container_ubuntu.squashfs && ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -w -b "$NXF_TASK_WORKDIR" "$NXF_TASK_WORKDIR"/container_ubuntu.squashfs --'
cmd == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env --write-fake -b "$NXF_TASK_WORKDIR" /cacheDir/ubuntu --'

when:
cmd = new CharliecloudBuilder('/cacheDir/img/ubuntu')
.params(writeFake: 'true')
cmd = new CharliecloudBuilder('/cacheDir/ubuntu')
.params(writeFake: 'false')
.build()
.getRunCommand()
then:
cmd == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env --write-fake -w -b "$NXF_TASK_WORKDIR" ubuntu --'
cmd == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -w -b "$NXF_TASK_WORKDIR" /cacheDir/ubuntu --'

when:
cmd = new CharliecloudBuilder('/cacheDir/img/ubuntu')
cmd = new CharliecloudBuilder('/cacheDir/ubuntu')
.params(entry:'/bin/sh')
.build()
.getRunCommand('bwa --this --that file.fastq')
then:
cmd == 'ch-convert -i ch-image --storage /cacheDir ubuntu "$NXF_TASK_WORKDIR"/container_ubuntu && ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -w -b "$NXF_TASK_WORKDIR" "$NXF_TASK_WORKDIR"/container_ubuntu -- /bin/sh -c "bwa --this --that file.fastq"'
cmd == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env --write-fake -b "$NXF_TASK_WORKDIR" /cacheDir/ubuntu -- /bin/sh -c "bwa --this --that file.fastq"'

when:
cmd = new CharliecloudBuilder('/cacheDir/img/ubuntu')
cmd = new CharliecloudBuilder('/cacheDir/ubuntu')
.params(entry:'/bin/sh')
.params(readOnlyInputs: 'true')
.build()
.getRunCommand('bwa --this --that file.fastq')
then:
cmd == 'ch-convert -i ch-image --storage /cacheDir ubuntu "$NXF_TASK_WORKDIR"/container_ubuntu && ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -b "$NXF_TASK_WORKDIR" "$NXF_TASK_WORKDIR"/container_ubuntu -- /bin/sh -c "bwa --this --that file.fastq"'
cmd == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env --write-fake -b "$NXF_TASK_WORKDIR" /cacheDir/ubuntu -- /bin/sh -c "bwa --this --that file.fastq"'

when:
cmd = new CharliecloudBuilder('/cacheDir/img/ubuntu')
cmd = new CharliecloudBuilder('/cacheDir/ubuntu')
.params(entry:'/bin/sh')
.params(readOnlyInputs: 'false')
.build()
.getRunCommand('bwa --this --that file.fastq')
then:
cmd == 'ch-convert -i ch-image --storage /cacheDir ubuntu "$NXF_TASK_WORKDIR"/container_ubuntu && ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -w -b "$NXF_TASK_WORKDIR" "$NXF_TASK_WORKDIR"/container_ubuntu -- /bin/sh -c "bwa --this --that file.fastq"'
cmd == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env --write-fake -b "$NXF_TASK_WORKDIR" /cacheDir/ubuntu -- /bin/sh -c "bwa --this --that file.fastq"'

when:
cmd = new CharliecloudBuilder('/cacheDir/img/ubuntu')
cmd = new CharliecloudBuilder('/cacheDir/ubuntu')
.params(entry:'/bin/sh')
.params(readOnlyInputs: 'false')
.params(writeFake: 'false')
.build()
.getRunCommand('bwa --this --that file.fastq')
then:
cmd == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -w -b "$NXF_TASK_WORKDIR" /cacheDir/ubuntu -- /bin/sh -c "bwa --this --that file.fastq"'

when:
cmd = new CharliecloudBuilder('/cacheDir/ubuntu')
.params(entry:'/bin/sh')
.params(readOnlyInputs: 'true')
.params(writeFake: 'false')
.addMount(db_file)
.addMount(db_file)
.params(readOnlyInputs: 'true')
.build().getRunCommand('bwa --this --that file.fastq')
then:
cmd == 'ch-convert -i ch-image --storage /cacheDir ubuntu "$NXF_TASK_WORKDIR"/container_ubuntu && ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -b /home -b "$NXF_TASK_WORKDIR" "$NXF_TASK_WORKDIR"/container_ubuntu -- /bin/sh -c "bwa --this --that file.fastq"'
cmd == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -b /home -b "$NXF_TASK_WORKDIR" /cacheDir/ubuntu -- /bin/sh -c "bwa --this --that file.fastq"'

when:
cmd = new CharliecloudBuilder('/cacheDir/img/ubuntu')
cmd = new CharliecloudBuilder('/cacheDir/ubuntu')
.params(entry:'/bin/sh')
.addMount(db_file)
.addMount(db_file)
.params(readOnlyInputs: 'false')
.build()
.getRunCommand('bwa --this --that file.fastq')
then:
cmd == 'ch-convert -i ch-image --storage /cacheDir ubuntu "$NXF_TASK_WORKDIR"/container_ubuntu && ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env -w -b /home/db -b "$NXF_TASK_WORKDIR" "$NXF_TASK_WORKDIR"/container_ubuntu -- /bin/sh -c "bwa --this --that file.fastq"'
cmd == 'ch-run --unset-env="*" -c "$NXF_TASK_WORKDIR" --set-env --write-fake -b /home/db -b "$NXF_TASK_WORKDIR" /cacheDir/ubuntu -- /bin/sh -c "bwa --this --that file.fastq"'
}

@Unroll
Expand Down
Loading

0 comments on commit 8e6068d

Please sign in to comment.