Skip to content

Commit

Permalink
Stream docker
Browse files Browse the repository at this point in the history
Fix streaming for tasks that use docker
  • Loading branch information
orodeh authored Aug 22, 2017
1 parent acd350b commit 8c6f394
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 67 deletions.
3 changes: 3 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Release Notes

## 0.39
- Streaming works with tasks that use docker

## 0.38
- Minor fixes, and better testing, for file streaming

Expand Down
2 changes: 1 addition & 1 deletion reference_stanza.conf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
dxWDL {
version = "0.38"
version = "0.39"
asset_ids = []
}
172 changes: 110 additions & 62 deletions src/main/scala/dxWDL/RunnerTask.scala
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,8 @@ case class RunnerTask(task:Task,
Utils.writeFileContent(jobOutputPath, ast_pp)
}

private def writeSubmitBashScript(env: Map[String, WdlValue]) : Unit = {
// Figure out if a docker image is specified. If so, return it as a string.
private def dockerImage(env: Map[String, WdlValue]) : Option[String] = {
def lookup(varName : String) : WdlValue = {
env.get(varName) match {
case Some(x) => x
Expand All @@ -235,37 +236,16 @@ case class RunnerTask(task:Task,
}
// Figure out if docker is used. If so, it is specified by an
// expression that requires evaluation.
val docker: Option[String] =
task.runtimeAttributes.attrs.get("docker") match {
case None => None
case Some(expr) => Some(evalStringExpr(expr))
}
docker match {
case None => ()
case Some(imgName) =>
// The user wants to use a docker container with the
// image [imgName]. We implement this with dx-docker.
// There may be corner cases where the image will run
// into permission limitations due to security.
//
// Map the home directory into the container, so that
// we can reach the result files, and upload them to
// the platform.
val DX_HOME = Utils.DX_HOME
val dockerRunPath = getMetaDir().resolve("script.submit")
val dockerRunScript =
s"""|#!/bin/bash -ex
|dx-docker run --entrypoint /bin/bash -v ${DX_HOME}:${DX_HOME} ${imgName} $${HOME}/execution/meta/script""".stripMargin.trim
System.err.println(s"writing docker run script to ${dockerRunPath}")
Utils.writeFileContent(dockerRunPath, dockerRunScript)
dockerRunPath.toFile.setExecutable(true)
task.runtimeAttributes.attrs.get("docker") match {
case None => None
case Some(expr) => Some(evalStringExpr(expr))
}
}

// Each file marked "stream", is converted into a special fifo
// file on the instance.
private def handleStreamingFiles(inputs: Map[String, BValue])
: (String, String, Map[Declaration, WdlValue]) = {
: (Option[(String, String)], Map[String, BValue]) = {
// A file that needs to be stream-downloaded.
// Make a named pipe, and stream the file from the platform to the pipe.
// Keep track of the download process. We need to ensure pipes have
Expand All @@ -281,55 +261,76 @@ case class RunnerTask(task:Task,
val bashSnippet:String =
s"""|mkfifo ${fifo.toString}
|dx cat ${dxFileId} > ${fifo.toString} &
|download_stream_pids+=($$!)
|""".stripMargin
(WdlSingleFile(fifo.toString), bashSnippet)
}

val m:Map[Declaration, (WdlValue, String)] = inputs.map{
case (_, BValue(_, _, None)) => throw new Exception("Sanity")
case (_, BValue(wvl, wdlValue, Some(decl))) =>
wdlValue match {
val m:Map[String, (String, BValue)] = inputs.map{
case (varName, BValue(wvl, wdlValue, declOpt)) =>
val (wdlValueRewrite,bashSnippet) = wdlValue match {
case WdlSingleFile(path) if wvl.attrs.stream =>
decl -> mkfifo(wvl, path)
mkfifo(wvl, path)
case WdlOptionalValue(_,Some(WdlSingleFile(path))) if wvl.attrs.stream =>
decl -> mkfifo(wvl, path)
mkfifo(wvl, path)
case _ =>
// everything else
decl -> (wdlValue, "")
(wdlValue,"")
}
}
val bVal:BValue = BValue(wvl, wdlValueRewrite, declOpt)
varName -> (bashSnippet, bVal)
}.toMap

// set up all the named pipes
val snippets = m.collect{
case (_, (_, bashSnippet)) if !bashSnippet.isEmpty => bashSnippet
case (_, (bashSnippet,_)) if !bashSnippet.isEmpty => bashSnippet
}.toVector
val bashProlog = ("download_stream_pids=()" +:
val bashProlog = ("background_pids=()" +:
snippets).mkString("\n")

// Wait for all download processes to complete. It is legal
// Wait for all background processes to complete. It is legal
// for the user job to read only the beginning of the
// file. This causes the download streams to close
// prematurely, which can be show up as an error. We need to tolerate this
// case.
val bashEpilog:String = "wait ${download_stream_pids[@]}"
val inputsWithPipes = m.map{ case (decl, (wdlValue, _)) => decl -> wdlValue }.toMap
(bashProlog, bashEpilog, inputsWithPipes)
// prematurely, which can be show up as an error. We need to
// tolerate this case.
val bashEpilog = ""
// "wait ${background_pids[@]}"
/* """|echo "robust wait for ${background_pids[@]}"
|for pid in ${background_pids[@]}; do
| while [[ ( -d /proc/$pid ) && ( -z `grep zombie /proc/$pid/status` ) ]]; do
| sleep 10
| echo "waiting for $pid"
| done
|done
|""".stripMargin.trim + "\n" */
val inputsWithPipes = m.map{ case (varName, (_,bValue)) => varName -> bValue }.toMap
val bashPrologEpilog =
if (fifoCount == 0) {
// No streaming files
None
} else {
// There are some streaming files
Some((bashProlog, bashEpilog))
}
(bashPrologEpilog, inputsWithPipes)
}

private def writeBashScript(inputs: Map[String, BValue]) : Unit = {
// Write the core bash script into a file. In some cases, we
// need to run some shell setup statements before and after this
// script. Returns these as two strings (prolog, epilog).
private def writeBashScript(inputs: Map[String, BValue],
bashPrologEpilog: Option[(String, String)]) : Unit = {
val metaDir = getMetaDir()
val scriptPath = metaDir.resolve("script")
val stdoutPath = metaDir.resolve("stdout")
val stderrPath = metaDir.resolve("stderr")
val rcPath = metaDir.resolve("rc")

// deal with files
val (bashProlog, bashEpilog, inputsWithPipes) = handleStreamingFiles(inputs)

// instantiate the command
val taskCmd : String = task.instantiateCommand(inputsWithPipes, DxFunctions).get
val shellCmd = List(bashProlog, taskCmd, bashEpilog).mkString("\n")
val env: Map[Declaration, WdlValue] = inputs.map {
case (_, BValue(_,wdlValue,Some(decl))) => decl -> wdlValue
case (_, BValue(varName,_,None)) => throw new Exception("missing declaration")
}.toMap
val shellCmd : String = task.instantiateCommand(env, DxFunctions).get

// This is based on Cromwell code from
// [BackgroundAsyncJobExecutionActor.scala]. Generate a bash
Expand All @@ -349,12 +350,17 @@ case class RunnerTask(task:Task,
|echo 0 > ${rcPath}
|""".stripMargin.trim + "\n"
} else {
val cdHome = s"cd ${Utils.DX_HOME}"
var cmdLines: List[String] = bashPrologEpilog match {
case None =>
List(cdHome, shellCmd)
case Some((bashProlog, bashEpilog)) =>
List(cdHome, bashProlog, shellCmd, bashEpilog)
}
val cmd = cmdLines.mkString("\n")
s"""|#!/bin/bash
|(
|if [ -d ${Utils.DX_HOME} ]; then
| cd ${Utils.DX_HOME}
|fi
|${shellCmd}
|${cmd}
|) \\
| > >( tee ${stdoutPath} ) \\
| 2> >( tee ${stderrPath} >&2 )
Expand All @@ -365,6 +371,40 @@ case class RunnerTask(task:Task,
Utils.writeFileContent(scriptPath, script)
}

private def writeDockerSubmitBashScript(env: Map[String, WdlValue],
imgName: String,
bashPrologEpilog: Option[(String, String)]) : Unit = {
// The user wants to use a docker container with the
// image [imgName]. We implement this with dx-docker.
// There may be corner cases where the image will run
// into permission limitations due to security.
//
// Map the home directory into the container, so that
// we can reach the result files, and upload them to
// the platform.
val DX_HOME = Utils.DX_HOME
val dockerCmd = s"""|dx-docker run --entrypoint /bin/bash
|-v ${DX_HOME}:${DX_HOME}
|${imgName}
|$${HOME}/execution/meta/script""".stripMargin.replaceAll("\n", " ")
val dockerRunPath = getMetaDir().resolve("script.submit")
val dockerRunScript = bashPrologEpilog match {
case None =>
s"""|#!/bin/bash -ex
|${dockerCmd}""".stripMargin
case Some((bashProlog, bashEpilog)) =>
List("#!/bin/bash -ex",
bashProlog,
dockerCmd,
bashEpilog
).mkString("\n")
}
System.err.println(s"writing docker run script to ${dockerRunPath}")
Utils.writeFileContent(dockerRunPath,
dockerRunScript)
dockerRunPath.toFile.setExecutable(true)
}

// Calculate the input variables for the task, download the input files,
// and build a shell script to run the command.
def prolog(jobInputPath : Path,
Expand All @@ -385,17 +425,25 @@ case class RunnerTask(task:Task,
}.toMap

// evaluate the top declarations
val decls: Map[String, BValue] = evalDeclarations(task.declarations, inputWvls)

// Write shell script to a file. It will be executed by the dx-applet code
writeBashScript(decls)

// write the script that launches the shell script. It could be a docker
// image.
val env:Map[String, WdlValue] = decls.map{
val inputs: Map[String, BValue] = evalDeclarations(task.declarations, inputWvls)
val env:Map[String, WdlValue] = inputs.map{
case (varName, BValue(_,wdlValue,_)) => varName -> wdlValue
}.toMap
writeSubmitBashScript(env)
val docker = dockerImage(env)

// deal with files that need streaming
val (bashPrologEpilog, inputsWithPipes) = handleStreamingFiles(inputs)

// Write shell script to a file. It will be executed by the dx-applet code
docker match {
case None =>
writeBashScript(inputsWithPipes, bashPrologEpilog)
case Some(img) =>
// write a script that launches the actual command inside a docker image.
// Streamed files are set up before launching docker.
writeBashScript(inputsWithPipes, None)
writeDockerSubmitBashScript(env, img, bashPrologEpilog)
}

// serialize the environment, so we don't have to calculate it again in
// the epilog
Expand Down
8 changes: 4 additions & 4 deletions test/files.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,6 @@ workflow files {
File f1
File f2

call lib.Colocation as colocation {
input : A=f1, B=f2
}
# Try an applet that streams two files
call lib.diff as diff1 {
input: a=f, b=f
Expand All @@ -133,6 +129,10 @@ workflow files {
input: a=f, b=f2
}

call lib.Colocation as colocation {
input : A=f1, B=f2
}
call z_Copy as Copy { input : src=f, basename="tearFrog" }
call z_Copy as Copy2 { input : src=Copy.outf, basename="mixing" }
call z_FindFiles as FindFiles
Expand Down
3 changes: 3 additions & 0 deletions test/library_sys_call.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ task diff {
a : "stream"
b : "stream"
}
runtime {
docker: "ubuntu:16.04"
}
command {
diff ${a} ${b} | wc -l
}
Expand Down
30 changes: 30 additions & 0 deletions test/quick_diff.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
task diff {
File a
File b

parameter_meta {
a : "stream"
b : "stream"
}
runtime {
docker: "ubuntu:16.04"
}
command {
diff ${a} ${b} | wc -l
}
output {
Int result = read_int(stdout())
}
}

workflow quick_diff {
File x
File y

call diff {
input: a=x, b=y
}
output {
diff.result
}
}
4 changes: 4 additions & 0 deletions test/quick_diff_input.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"quick_diff.x" : "dx://dxWDL_playground:/test_data/fileA",
"quick_diff.y" : "dx://dxWDL_playground:/test_data/fileB"
}
1 change: 1 addition & 0 deletions test/quick_diff_results.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}

0 comments on commit 8c6f394

Please sign in to comment.