Skip to content

Commit

Permalink
Type lookup and evaluation improvements
Browse files Browse the repository at this point in the history
* count.wdl pipeline is working
* Specializing maps with types of type string
* Fixes for type lookup and evaluation
  • Loading branch information
orodeh authored Oct 31, 2017
1 parent 622561b commit 76ffbf7
Show file tree
Hide file tree
Showing 24 changed files with 3,958 additions and 181 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ added according to user interest:

- Nested workflows (sub-workflows)
- Nested scatters
- Objects


*Use at your own risk:* for the time being, dxWDL is an exploratory
Expand Down
6 changes: 6 additions & 0 deletions RELEASE_NOTES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Release Notes

## 0.49
- Removed limitation on maximal hourly price per instance. Some bioinformatics pipelines
regularly require heavy weight instances.
- Fixed several WDL typing issues
- Improving the internal representation of JSON objects given as input.

## 0.48
- Support string interpolation in workflow expressions
- Handle scatters where the called tasks return file arrays, and non-native
Expand Down
32 changes: 29 additions & 3 deletions src/main/scala/dxWDL/CompilerErrorFormatter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package dxWDL
import wdl4s.wdl.AstTools
import wdl4s.parser.WdlParser.{Ast, Terminal}
import wdl4s.wdl._
import wdl4s.wdl.types._

case class CompilerErrorFormatter(terminalMap: Map[Terminal, WorkflowSource]) {
private def pointToSource(t: Terminal): String = s"${line(t)}\n${" " * (t.getColumn - 1)}^"
Expand All @@ -16,9 +17,9 @@ case class CompilerErrorFormatter(terminalMap: Map[Terminal, WorkflowSource]) {
|""".stripMargin
}

def couldNotEvaluateType(ast: Ast) : String = {
val t: Terminal = AstTools.findTerminals(ast).head
s"""|Could not evaluate the WDL type for this expression
def couldNotEvaluateType(expr: WdlExpression) : String = {
val t: Terminal = AstTools.findTerminals(expr.ast).head
s"""|Could not evaluate the WDL type for expression
|
|${pointToSource(t)}
|""".stripMargin
Expand Down Expand Up @@ -94,6 +95,31 @@ case class CompilerErrorFormatter(terminalMap: Map[Terminal, WorkflowSource]) {
|""".stripMargin
}

// debugging
def traceExpression(ast: Ast) : String = {
val t: Terminal = AstTools.findTerminals(ast).head
s"""|
|${pointToSource(t)}
|""".stripMargin
}

def typeConversionRequired(expr: WdlExpression,
call: WdlCall,
srcType: WdlType,
trgType: WdlType) : String = {
val termList: Seq[Terminal] = AstTools.findTerminals(expr.ast)
val t:Terminal = termList match {
case Nil => AstTools.findTerminals(call.ast).head
case _ => AstTools.findTerminals(expr.ast).head
}
s"""|Warning: expression <${expr.toWdlString}> is coerced from type ${srcType.toWdlString}
|to ${trgType.toWdlString}.
|
|${pointToSource(t)}
|""".stripMargin
}


def undefinedMemberAccess(ast: Ast): String = {
val lhsAst = ast.getAttribute("lhs").asInstanceOf[Terminal]
val fqn = WdlExpression.toString(ast)
Expand Down
39 changes: 10 additions & 29 deletions src/main/scala/dxWDL/CompilerNative.scala
Original file line number Diff line number Diff line change
Expand Up @@ -578,32 +578,17 @@ case class CompilerNative(dxWDLrtId: String,

// Link source values to targets. This is the same as
// WdlVarLinks.genFields, but overcomes certain cases where the
// source and target WDL types do not match. For example, if the
// source is a File, and the target is an Array[File], we can
// modify the JSON to get around this.
// source and target WDL types do not match.
def genFieldsCastIfRequired(wvl: WdlVarLinks,
rawSrcType: WdlType,
bindName: String) : List[(String, JsValue)] = {
// System.err.println(s"genFieldsCastIfRequired(${bindName}) trgType=${wvl.wdlType.toWdlString} srcType=${srcType.toWdlString}")
cVar: IR.CVar) : List[(String, JsValue)] = {
val srcType = Utils.stripOptional(rawSrcType)
val trgType = Utils.stripOptional(wvl.wdlType)

val l:List[(String, JsValue)] =
if (trgType == srcType) {
WdlVarLinks.genFields(wvl, bindName).map { case(key, jsv) =>
(key, jsv)
}
} else if (trgType == WdlArrayType(srcType)) {
// Cast from T to Array[T]
WdlVarLinks.genFields(wvl, bindName).map{ case(key, jsv) =>
(key, JsArray(jsv))
}
} else {
throw new Exception(s"""|Linking error: source type=${rawSrcType.toWdlString}
|target type=${wvl.wdlType.toWdlString}, bindName=${bindName}"""
.stripMargin.replaceAll("\n", " "))
}
l.map{ case (key, json) => key -> json }
if (srcType != trgType)
throw new Exception(s"""|Link time casting from ${srcType.toWdlString}
|to ${trgType.toWdlString} not supported."""
.stripMargin.replaceAll("\n", " "))
WdlVarLinks.genFields(wvl, cVar.dxVarName)
}

// Calculate the stage inputs from the call closure
Expand All @@ -620,19 +605,15 @@ case class CompilerNative(dxWDLrtId: String,
// in a value at runtime.
m
case IR.SArgConst(wValue) =>
val wvl = WdlVarLinks.apply(cVar.wdlType, cVar.attrs, wValue)
val fields = genFieldsCastIfRequired(wvl,
wValue.wdlType,
cVar.dxVarName)
val wvl = WdlVarLinks.importFromWDL(cVar.wdlType, cVar.attrs, wValue)
val fields = genFieldsCastIfRequired(wvl, wValue.wdlType, cVar)
m ++ fields.toMap
case IR.SArgLink(stageName, argName) =>
val dxStage = stageDict(stageName)
val wvl = WdlVarLinks(cVar.wdlType,
cVar.attrs,
DxlStage(dxStage, IORef.Output, argName.dxVarName))
val fields = genFieldsCastIfRequired(wvl,
argName.wdlType,
cVar.dxVarName)
val fields = genFieldsCastIfRequired(wvl, argName.wdlType, cVar)
m ++ fields.toMap
}
}
Expand Down
179 changes: 134 additions & 45 deletions src/main/scala/dxWDL/CompilerSimplifyExpr.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,30 +7,124 @@
package dxWDL

import scala.collection.mutable.Queue
import scala.util.{Success}
import Utils.{genTmpVarName, Verbose}
import scala.util.{Failure, Success}
import Utils.{genTmpVarName, stripOptional, trace, Verbose}
import wdl4s.wdl._
import wdl4s.wdl.expression._
import wdl4s.parser.WdlParser.{Ast, Terminal}
import wdl4s.wdl.types._
//import wdl4s.wdl.WdlExpression.AstForExpressions

case class CompilerSimplifyExpr(wf: WdlWorkflow,
cef: CompilerErrorFormatter,
verbose: Verbose) {
val verbose2:Boolean = verbose.keywords contains "simplify"

private def isMemberAccess(a: Ast) = {
wdl4s.wdl.WdlExpression.AstForExpressions(a).isMemberAccess
}

// A member access expression such as [A.x]. Check if
// A is a call.
private def isCallOutputAccess(expr: WdlExpression,
ast: Ast,
call: WdlCall) : Boolean = {
val lhs:String = WdlExpression.toString(ast.getAttribute("lhs"))
try {
val wdlType = WdlNamespace.lookupType(wf)(lhs)
wdlType.isInstanceOf[WdlCallOutputsObjectType]
} catch {
case e:Throwable=> false
private def isCallOutputAccess(ast: Ast) : Boolean = {
if (!isMemberAccess(ast)) {
false
} else {
val lhs:String = WdlExpression.toString(ast.getAttribute("lhs"))
try {
val wdlType = Utils.lookupType(wf)(lhs)
wdlType.isInstanceOf[WdlCallOutputsObjectType]
} catch {
case e:Throwable=> false
}
}
}

// Convert the source to a singleton array. It is used in
// the GATK best practices pipeline. This conversion may not be a
// good idea, but it is used.
//
// https://github.com/openwdl/wdl/blob/develop/scripts/broad_pipelines/germline-short-variant-discovery/gvcf-generation-per-sample/0.2.0/PublicPairedSingleSampleWf_170412.wdl#L1256
// input_bams = SortAndFixSampleBam.output_bam
// input_bams is of type Array[File]
// output_bam is of type File
private def typesArrayDiff(expr: WdlExpression,
callerType:WdlType,
calleeType:WdlType) : Boolean = {
val srcType: WdlType = stripOptional(callerType)
val trgType: WdlType = stripOptional(calleeType)
(srcType, trgType) match {
case (WdlArrayType(_), WdlArrayType(_)) => false
case (_, WdlArrayType(_)) =>
System.err.println(s"Warning: converting ${expr.toWdlString} from ${srcType.toWdlString} to ${trgType.toWdlString}")
//System.err.println(cef.traceExpression(expr.ast.asInstanceOf[Ast]))
true
case (_, _) => false
}
}

// Convert a T to Array[T]
// 1 -> [1]
// "a" -> ["a"]
private def augmentExprToArray(expr: WdlExpression) : WdlExpression = {
// Convert the source to a singleton array. It is used in
// the GATK best practices pipeline. This conversion may not be a
// good idea, but it is used.
WdlExpression.fromString(s"[ ${expr.toWdlString} ]")
}

// Figure out the type of an expression
private def evalType(expr: WdlExpression, parent: Scope) : WdlType = {
/* expr.evaluateType(Utils.lookupType(parent),
new WdlStandardLibraryFunctionsType,
Some(parent)) match {*/
dxWDL.TypeEvaluator(Utils.lookupType(parent),
new WdlStandardLibraryFunctionsType,
Some(parent)).evaluate(expr.ast) match {
case Success(wdlType) => wdlType
case Failure(f) =>
System.err.println(cef.couldNotEvaluateType(expr))
throw f
}
}

// Make sure [srcType] can be coerced into [trgType]. wdl4s allows
// certain conversions that we do not. We assume that wdl4s type checking
// has already taken place; we don't need to repeat it.
//
// For example, in wdl4s it is legal to coerce a String into a
// File in workflow context. This is not possible in dxWDL.
private def typesMatch(srcType: WdlType, trgType: WdlType) : Boolean = {
(srcType, trgType) match {
// base cases
case (WdlBooleanType, WdlBooleanType) => true
case (WdlIntegerType, WdlIntegerType) => true
case (WdlFloatType, WdlFloatType) => true
case (WdlStringType, WdlStringType) => true
case (WdlFileType, WdlFileType) => true

// Files: it is legal to convert a file to a string, but not the other
// way around.
//case (WdlFileType, WdlStringType) => true

// array
case (WdlArrayType(WdlNothingType), WdlArrayType(_)) => true
case (WdlArrayType(s), WdlArrayType(t)) => typesMatch(s,t)

// strip optionals
case (WdlOptionalType(s), WdlOptionalType(t)) => typesMatch(s,t)
case (WdlOptionalType(s), t) => typesMatch(s,t)
case (s, WdlOptionalType(t)) => typesMatch(s,t)

// map
case (WdlMapType(sk, sv), WdlMapType(tk, tv)) =>
typesMatch(sk, tv) && typesMatch(sv, tv)

case (WdlPairType(sLeft, sRight), WdlPairType(tLeft, tRight)) =>
typesMatch(sLeft, tLeft) && typesMatch(sRight, tRight)

// objects
case (WdlObjectType, WdlObjectType) => true
case (_,_) => false
}
}

Expand All @@ -52,10 +146,6 @@ case class CompilerSimplifyExpr(wf: WdlWorkflow,
}
}

def isMemberAccess(a: Ast) = {
wdl4s.wdl.WdlExpression.AstForExpressions(a).isMemberAccess
}

// Transform a call by lifting its non trivial expressions,
// and converting them into declarations. For example:
//
Expand All @@ -72,26 +162,40 @@ case class CompilerSimplifyExpr(wf: WdlWorkflow,
// Inside a scatter we can deal with field accesses,
private def simplifyCall(call: WdlCall) : Vector[Scope] = {
val tmpDecls = Queue[Scope]()

// replace an expression with a temporary variable
def replaceWithTempVar(t: WdlType, expr: WdlExpression) : WdlExpression = {
val tmpVarName = genTmpVarName()
tmpDecls += WdlRewrite.declaration(t, tmpVarName, Some(expr))
WdlExpression.fromString(tmpVarName)
}

val inputs: Map[String, WdlExpression] = call.inputMappings.map { case (key, expr) =>
val rhs = expr.ast match {
val calleeDecl: Declaration =
call.declarations.find(decl => decl.unqualifiedName == key).get
val calleeType = calleeDecl.wdlType
val callerType = evalType(expr, call)
val rhs:WdlExpression = expr.ast match {
case _ if typesArrayDiff(expr, callerType, calleeType) =>
// A coercion is required to convert the expression to the expected
// type
val augExpr = augmentExprToArray(expr)
replaceWithTempVar(calleeType, augExpr)
case _ if (!typesMatch(callerType, calleeType)) =>
System.err.println(cef.typeConversionRequired(expr, call,
callerType, calleeType))
replaceWithTempVar(calleeType, expr)
case t: Terminal if nonInterpolation(t) => expr
case a: Ast if (isMemberAccess(a) && isCallOutputAccess(expr, a, call)) =>
case a: Ast if isCallOutputAccess(a) =>
// Accessing an expression like A.B.C
// The expression could be:
// 1) Result from a previous call
// 2) Access to a field in a pair, or an object (pair.left, obj.name)
// Only the first case can be handled inline, the other requires
// a temporary variable
// a temporary variable.
expr
case _ =>
// replace an expression with a temporary variable
val tmpVarName = genTmpVarName()
val calleeDecl: Declaration =
call.declarations.find(decl => decl.unqualifiedName == key).get
val wdlType = calleeDecl.wdlType
//tmpDecls += Declaration(wdlType, tmpVarName, Some(expr), call.parent, a)
tmpDecls += WdlRewrite.declaration(wdlType, tmpVarName, Some(expr))
WdlExpression.fromString(tmpVarName)
replaceWithTempVar(calleeType, expr)
}
(key -> rhs)
}
Expand Down Expand Up @@ -125,16 +229,8 @@ case class CompilerSimplifyExpr(wf: WdlWorkflow,
.map(x => simplify(x))
.toVector
.flatten

// Figure out the expression type for a collection we loop over in a scatter
val collType : WdlType =
ssc.collection.evaluateType(WdlNamespace.lookupType(ssc),
new WdlStandardLibraryFunctionsType,
Some(ssc)) match {
case Success(wdlType) => wdlType
case _ => throw new Exception(cef.couldNotEvaluateType(ssc.ast))
}

val collType : WdlType = evalType(ssc.collection, ssc)
if (isConstOrVar(ssc.collection)) {
// The collection is a simple variable, there is no need
// to create an additional declaration
Expand All @@ -159,14 +255,7 @@ case class CompilerSimplifyExpr(wf: WdlWorkflow,
.flatten

// Figure out the expression type for a collection we loop over in a scatter
val exprType : WdlType =
cond.condition.evaluateType(WdlNamespace.lookupType(cond),
new WdlStandardLibraryFunctionsType,
Some(cond)) match {
case Success(wdlType) => wdlType
case _ => throw new Exception(cef.couldNotEvaluateType(cond.ast))
}

val exprType : WdlType = evalType(cond.condition, cond)
if (isConstOrVar(cond.condition)) {
// The condition is a simple variable, there is no need
// to create an additional declaration
Expand Down Expand Up @@ -234,7 +323,7 @@ object CompilerSimplifyExpr {
}

def apply(ns: WdlNamespace, verbose: Verbose) : WdlNamespace = {
Utils.trace(verbose.on, "simplifying workflow expressions")
trace(verbose.on, "simplifying workflow expressions")
val cef = new CompilerErrorFormatter(ns.terminalMap)
checkReservedWords(ns, cef)

Expand Down
Loading

0 comments on commit 76ffbf7

Please sign in to comment.