Skip to content

Commit

Permalink
Merge pull request #13982 from aschackmull/dataflow/typeflow-calledge…
Browse files Browse the repository at this point in the history
…-pruning

Dataflow: Add type-based call-edge pruning.
  • Loading branch information
aschackmull authored Sep 21, 2023
2 parents 3d8231b + 4205453 commit 13f7daf
Show file tree
Hide file tree
Showing 19 changed files with 919 additions and 195 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,8 @@ predicate expectsContent(Node n, ContentSet c) { none() }

predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }

predicate localMustFlowStep(Node node1, Node node2) { none() }

/** Gets the type of `n` used for type pruning. */
Type getNodeType(Node n) {
suppressUnusedNode(n) and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,8 @@ predicate expectsContent(Node n, ContentSet c) { none() }

predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }

predicate localMustFlowStep(Node node1, Node node2) { none() }

/** Gets the type of `n` used for type pruning. */
DataFlowType getNodeType(Node n) {
suppressUnusedNode(n) and
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -533,8 +533,40 @@ module LocalFlow {
) and
not exists(getALastEvalNode(result))
}

/**
* Holds if the value of `node2` is given by `node1`.
*/
predicate localMustFlowStep(Node node1, Node node2) {
exists(Callable c, Expr e |
node1.(InstanceParameterNode).getCallable() = c and
node2.asExpr() = e and
(e instanceof ThisAccess or e instanceof BaseAccess) and
c = e.getEnclosingCallable()
)
or
hasNodePath(any(LocalExprStepConfiguration x), node1, node2) and
(
node2 instanceof SsaDefinitionExtNode or
node2.asExpr() instanceof Cast or
node2.asExpr() instanceof AssignExpr
)
or
exists(SsaImpl::Definition def |
def = getSsaDefinitionExt(node1) and
exists(SsaImpl::getAReadAtNode(def, node2.(ExprNode).getControlFlowNode()))
)
or
node1 =
unique(FlowSummaryNode n1 |
FlowSummaryImpl::Private::Steps::summaryLocalStep(n1.getSummaryNode(),
node2.(FlowSummaryNode).getSummaryNode(), true)
)
}
}

predicate localMustFlowStep = LocalFlow::localMustFlowStep/2;

/**
* This is the local flow predicate that is used as a building block in global
* data flow. It excludes SSA flow through instance fields, as flow through fields
Expand Down
2 changes: 2 additions & 0 deletions go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ predicate expectsContent(Node n, ContentSet c) {

predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }

predicate localMustFlowStep(Node node1, Node node2) { none() }

/** Gets the type of `n` used for type pruning. */
DataFlowType getNodeType(Node n) { result = TTodoDataFlowType() and exists(n) }

Expand Down
16 changes: 14 additions & 2 deletions java/ql/lib/semmle/code/java/Expr.qll
Original file line number Diff line number Diff line change
Expand Up @@ -1676,13 +1676,25 @@ abstract class InstanceAccess extends Expr {
/** Holds if this instance access is to an enclosing instance of type `t`. */
predicate isEnclosingInstanceAccess(RefType t) {
t = this.getQualifier().getType().(RefType).getSourceDeclaration() and
t != this.getEnclosingCallable().getDeclaringType()
t != this.getEnclosingCallable().getDeclaringType() and
not this.isSuperInterfaceAccess()
or
not exists(this.getQualifier()) and
(not exists(this.getQualifier()) or this.isSuperInterfaceAccess()) and
exists(LambdaExpr lam | lam.asMethod() = this.getEnclosingCallable() |
t = lam.getAnonymousClass().getEnclosingType()
)
}

// A default method on an interface, `I`, may be invoked using `I.super.m()`.
// This always refers to the implemented interfaces of `this`. This form of
// qualified `super` cannot be combined with accessing an enclosing instance.
// JLS 15.11.2. "Accessing Superclass Members using super"
// JLS 15.12. "Method Invocation Expressions"
// JLS 15.12.1. "Compile-Time Step 1: Determine Type to Search"
private predicate isSuperInterfaceAccess() {
this instanceof SuperAccess and
this.getQualifier().getType().(RefType).getSourceDeclaration() instanceof Interface
}
}

/**
Expand Down
15 changes: 14 additions & 1 deletion java/ql/lib/semmle/code/java/dataflow/TypeFlow.qll
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,18 @@ predicate arrayInstanceOfGuarded(ArrayAccess aa, RefType t) {
)
}

/**
* Holds if `t` is the type of the `this` value corresponding to the the
* `SuperAccess`. As the `SuperAccess` expression has the type of the supertype,
* the type `t` is a stronger type bound.
*/
private predicate superAccess(SuperAccess sup, RefType t) {
sup.isEnclosingInstanceAccess(t)
or
sup.isOwnInstanceAccess() and
t = sup.getEnclosingCallable().getDeclaringType()
}

/**
* Holds if `n` has type `t` and this information is discarded, such that `t`
* might be a better type bound for nodes where `n` flows to. This might include
Expand All @@ -452,7 +464,8 @@ private predicate typeFlowBaseCand(TypeFlowNode n, RefType t) {
downcastSuccessor(n.asExpr(), srctype) or
instanceOfGuarded(n.asExpr(), srctype) or
arrayInstanceOfGuarded(n.asExpr(), srctype) or
n.asExpr().(FunctionalExpr).getConstructedType() = srctype
n.asExpr().(FunctionalExpr).getConstructedType() = srctype or
superAccess(n.asExpr(), srctype)
|
t = srctype.(BoundedType).getAnUltimateUpperBoundType()
or
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,13 +326,18 @@ string ppReprType(DataFlowType t) {
else result = t.toString()
}

pragma[nomagic]
private predicate compatibleTypes0(DataFlowType t1, DataFlowType t2) {
erasedHaveIntersection(t1, t2)
}

/**
* Holds if `t1` and `t2` are compatible, that is, whether data can flow from
* a node of type `t1` to a node of type `t2`.
*/
bindingset[t1, t2]
pragma[inline_late]
predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { erasedHaveIntersection(t1, t2) }
predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { compatibleTypes0(t1, t2) }

/** A node that performs a type cast. */
class CastNode extends ExprNode {
Expand Down
33 changes: 33 additions & 0 deletions java/ql/lib/semmle/code/java/dataflow/internal/DataFlowUtil.qll
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,39 @@ private module Cached {
}
}

/**
* Holds if the value of `node2` is given by `node1`.
*/
predicate localMustFlowStep(Node node1, Node node2) {
exists(Callable c | node1.(InstanceParameterNode).getCallable() = c |
exists(InstanceAccess ia |
ia = node2.asExpr() and ia.getEnclosingCallable() = c and ia.isOwnInstanceAccess()
)
or
c =
node2.(ImplicitInstanceAccess).getInstanceAccess().(OwnInstanceAccess).getEnclosingCallable()
)
or
exists(SsaImplicitInit init |
init.isParameterDefinition(node1.asParameter()) and init.getAUse() = node2.asExpr()
)
or
exists(SsaExplicitUpdate upd |
upd.getDefiningExpr().(VariableAssign).getSource() = node1.asExpr() and
upd.getAUse() = node2.asExpr()
)
or
node2.asExpr().(CastingExpr).getExpr() = node1.asExpr()
or
node2.asExpr().(AssignExpr).getSource() = node1.asExpr()
or
node1 =
unique(FlowSummaryNode n1 |
FlowSummaryImpl::Private::Steps::summaryLocalStep(n1.getSummaryNode(),
node2.(FlowSummaryNode).getSummaryNode(), true)
)
}

import Cached

private predicate capturedVariableRead(Node n) {
Expand Down
1 change: 1 addition & 0 deletions java/ql/lib/semmle/code/java/frameworks/Thrift.qll
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import java
* A file detected as generated by the Apache Thrift Compiler.
*/
class ThriftGeneratedFile extends GeneratedFile {
cached
ThriftGeneratedFile() {
exists(JavadocElement t | t.getFile() = this |
exists(string msg | msg = t.getText() | msg.regexpMatch("(?i).*\\bAutogenerated by Thrift.*"))
Expand Down
60 changes: 60 additions & 0 deletions java/ql/test/library-tests/dataflow/typeflow-dispatch/A.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import java.util.*;
import java.util.function.*;

public class A {
static String source(String tag) { return null; }

static void sink(Object o) { }

interface MyConsumer {
void run(Object o);
}

void apply(MyConsumer f, Object x) {
f.run(x);
}

void apply_wrap(MyConsumer f, Object x) {
apply(f, x);
}

void testLambdaDispatch1() {
apply_wrap(x -> { sink(x); }, source("A")); // $ hasValueFlow=A
apply_wrap(x -> { sink(x); }, null); // no flow
apply_wrap(x -> { }, source("B"));
apply_wrap(x -> { }, null);
}

void forEach_wrap(List<Object> l, Consumer<Object> f) {
l.forEach(f);
}

void testLambdaDispatch2() {
List<Object> tainted = new ArrayList<>();
tainted.add(source("L"));
List<Object> safe = new ArrayList<>();
forEach_wrap(safe, x -> { sink(x); }); // no flow
forEach_wrap(tainted, x -> { sink(x); }); // $ hasValueFlow=L
}

static class TaintedClass {
public String toString() { return source("TaintedClass"); }
}

static class SafeClass {
public String toString() { return "safe"; }
}

String convertToString(Object o) {
return o.toString();
}

String convertToString_wrap(Object o) {
return convertToString(o);
}

void testToString1() {
String unused = convertToString_wrap(new TaintedClass());
sink(convertToString_wrap(new SafeClass())); // no flow
}
}
Empty file.
2 changes: 2 additions & 0 deletions java/ql/test/library-tests/dataflow/typeflow-dispatch/test.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import TestUtilities.InlineFlowTest
import DefaultFlowTest
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,8 @@ predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() }

predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() }

predicate localMustFlowStep(Node node1, Node node2) { none() }

/**
* Gets the type of `node`.
*/
Expand Down
2 changes: 2 additions & 0 deletions ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll
Original file line number Diff line number Diff line change
Expand Up @@ -1697,6 +1697,8 @@ private predicate mustHaveLambdaType(CfgNodes::ExprCfgNode e, Callable c) {
)
}

predicate localMustFlowStep(Node node1, Node node2) { none() }

/** Gets the type of `n` used for type pruning. */
DataFlowType getNodeType(Node n) {
result = TLambdaDataFlowType(n.(LambdaSelfReferenceNode).getCallable())
Expand Down
4 changes: 4 additions & 0 deletions shared/dataflow/change-notes/2023-09-12-typeflow.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
---
category: majorAnalysis
---
* Added support for type-based call edge pruning. This removes data flow call edges that are incompatible with the set of flow paths that reach it based on type information. This improves dispatch precision for constructs like lambdas, `Object.toString()` calls, and the visitor pattern. For now this is only enabled for Java and C#.
5 changes: 5 additions & 0 deletions shared/dataflow/codeql/dataflow/DataFlow.qll
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,11 @@ signature module InputSig {
*/
predicate allowParameterReturnInSelf(ParameterNode p);

/**
* Holds if the value of `node2` is given by `node1`.
*/
predicate localMustFlowStep(Node node1, Node node2);

class LambdaCallKind;

/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */
Expand Down
Loading

0 comments on commit 13f7daf

Please sign in to comment.