From 4fa83f5532df22e3aac842b32f6cbe1be3956206 Mon Sep 17 00:00:00 2001 From: Tom Hvitved Date: Fri, 22 Nov 2024 14:54:39 +0100 Subject: [PATCH] Rust: Flow through enum constructors --- rust/ql/lib/codeql/rust/dataflow/DataFlow.qll | 4 + .../rust/dataflow/internal/DataFlowImpl.qll | 236 ++++++++++++------ .../dataflow/local/DataFlowStep.expected | 18 ++ .../dataflow/local/inline-flow.expected | 20 ++ .../test/library-tests/dataflow/local/main.rs | 4 +- 5 files changed, 204 insertions(+), 78 deletions(-) diff --git a/rust/ql/lib/codeql/rust/dataflow/DataFlow.qll b/rust/ql/lib/codeql/rust/dataflow/DataFlow.qll index 3a09df2c45d1..2b00342ef6ef 100644 --- a/rust/ql/lib/codeql/rust/dataflow/DataFlow.qll +++ b/rust/ql/lib/codeql/rust/dataflow/DataFlow.qll @@ -19,6 +19,10 @@ module DataFlow { final class PostUpdateNode = Node::PostUpdateNode; + final class Content = DataFlowImpl::Content; + + final class ContentSet = DataFlowImpl::ContentSet; + /** * Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local * (intra-procedural) step. diff --git a/rust/ql/lib/codeql/rust/dataflow/internal/DataFlowImpl.qll b/rust/ql/lib/codeql/rust/dataflow/internal/DataFlowImpl.qll index 4e892e8d19c7..21c72c5996cb 100644 --- a/rust/ql/lib/codeql/rust/dataflow/internal/DataFlowImpl.qll +++ b/rust/ql/lib/codeql/rust/dataflow/internal/DataFlowImpl.qll @@ -115,6 +115,11 @@ module Node { */ ExprCfgNode asExpr() { none() } + /** + * Gets the pattern that corresponds to this node, if any. + */ + PatCfgNode asPat() { none() } + /** Gets the enclosing callable. */ DataFlowCallable getEnclosingCallable() { result = TCfgScope(this.getCfgScope()) } @@ -177,8 +182,7 @@ module Node { PatNode() { this = TPatNode(n) } - /** Gets the `PatCfgNode` in the CFG that this node corresponds to. */ - PatCfgNode getPat() { result = n } + override PatCfgNode asPat() { result = n } } abstract class ParameterNode extends AstCfgFlowNode { } @@ -333,8 +337,7 @@ module LocalFlow { nodeFrom.(Node::AstCfgFlowNode).getCfgNode() = nodeTo.(Node::SsaNode).getDefinitionExt().(Ssa::WriteDefinition).getControlFlowNode() or - nodeFrom.(Node::PositionalParameterNode).getParameter().getPat() = - nodeTo.(Node::PatNode).getPat() + nodeFrom.(Node::PositionalParameterNode).getParameter().getPat() = nodeTo.asPat() or SsaFlow::localFlowStep(_, nodeFrom, nodeTo, _) or @@ -342,9 +345,118 @@ module LocalFlow { a.getRhs() = nodeFrom.getCfgNode() and a.getLhs() = nodeTo.getCfgNode() ) + or + exists(MatchExprCfgNode match | + nodeFrom.asExpr() = match.getExpr() and + nodeTo.asPat() = match.getArmPat(_) + ) } } +abstract class Content extends TContent { + abstract string toString(); +} + +private class VariantContent extends Content, TVariantContent { + private CrateOriginOption crate; + private string path; + private string name; + + VariantContent() { this = TVariantContent(crate, path, name) } + + override string toString() { result = name } +} + +abstract class ContentSet extends TContentSet { + /** Gets a textual representation of this element. */ + abstract string toString(); + + /** Gets a content that may be stored into when storing into this set. */ + abstract Content getAStoreContent(); + + /** Gets a content that may be read from when reading from this set. */ + abstract Content getAReadContent(); +} + +private class SingletonContentSet extends ContentSet, TSingletonContentSet { + private Content c; + + SingletonContentSet() { this = TSingletonContentSet(c) } + + Content getContent() { result = c } + + override string toString() { result = c.toString() } + + override Content getAStoreContent() { result = c } + + override Content getAReadContent() { result = c } +} + +private import codeql.util.Option + +private class CrateOrigin extends string { + CrateOrigin() { + this = [any(Item i).getCrateOrigin(), any(Resolvable r).getResolvedCrateOrigin()] + } +} + +private class CrateOriginOption = Option::Option; + +pragma[nomagic] +private predicate hasExtendedCanonicalPath(Item i, CrateOriginOption crate, string path) { + path = i.getExtendedCanonicalPath() and + ( + crate.asSome() = i.getCrateOrigin() + or + crate.isNone() and + not i.hasCrateOrigin() + ) +} + +pragma[nomagic] +private predicate resolvesExtendedCanonicalPath(Resolvable r, CrateOriginOption crate, string path) { + path = r.getResolvedPath() and + ( + crate.asSome() = r.getResolvedCrateOrigin() + or + crate.isNone() and + not r.hasResolvedCrateOrigin() + ) +} + +pragma[nomagic] +private predicate callResolvesExtendedCanonicalPath( + CallExprBase call, CrateOriginOption crate, string path +) { + exists(Resolvable r | resolvesExtendedCanonicalPath(r, crate, path) | + r = call.(MethodCallExpr) + or + r = call.(CallExpr).getExpr().(PathExpr).getPath() + ) +} + +/** Holds if qualified path `p` resolves to variant `c`. */ +private predicate pathResolvesToVariant(Path p, VariantContent c) { + exists(CrateOriginOption crate, string path | + resolvesExtendedCanonicalPath(p.getQualifier(), crate, path) and + c = TVariantContent(crate, path, p.getPart().getNameRef().getText()) + ) + or + // TODO: Remove once library types are extracted + not p.hasQualifier() and + c = TVariantContent(_, "crate::std::option::Option", p.getPart().getNameRef().getText()) +} + +/** Holds if `ce` constructs an enum value of type `c`. */ +private predicate variantConstructor(CallExpr ce, VariantContent c) { + pathResolvesToVariant(ce.getExpr().(PathExpr).getPath(), c) +} + +/** Holds if `p` destructs an enum value of type `c`. */ +private predicate variantDestructor(TupleStructPat p, VariantContent c) { + pathResolvesToVariant(p.getPath(), c) +} + private class DataFlowCallableAlias = DataFlowCallable; private class ReturnKindAlias = ReturnKind; @@ -353,6 +465,10 @@ private class DataFlowCallAlias = DataFlowCall; private class ParameterPositionAlias = ParameterPosition; +private class ContentAlias = Content; + +private class ContentSetAlias = ContentSet; + module RustDataFlow implements InputSig { /** * An element, viewed as a node in a data flow graph. Either an expression @@ -399,55 +515,11 @@ module RustDataFlow implements InputSig { final class ReturnKind = ReturnKindAlias; - private import codeql.util.Option - - private class CrateOrigin extends string { - CrateOrigin() { - this = [any(Item i).getCrateOrigin(), any(Resolvable r).getResolvedCrateOrigin()] - } - } - - private class CrateOriginOption = Option::Option; - - pragma[nomagic] - private predicate hasExtendedCanonicalPath( - DataFlowCallable c, CrateOriginOption crate, string path - ) { - exists(Item i | - i = c.asCfgScope() and - path = i.getExtendedCanonicalPath() - | - crate.asSome() = i.getCrateOrigin() - or - crate.isNone() and - not i.hasCrateOrigin() - ) - } - - pragma[nomagic] - private predicate resolvesExtendedCanonicalPath( - DataFlowCall c, CrateOriginOption crate, string path - ) { - exists(Resolvable r | - path = r.getResolvedPath() and - ( - r = c.asMethodCallExprCfgNode().getExpr() - or - r = c.asCallExprCfgNode().getExpr().(PathExprCfgNode).getPath() - ) - | - crate.asSome() = r.getResolvedCrateOrigin() - or - crate.isNone() and - not r.hasResolvedCrateOrigin() - ) - } - /** Gets a viable implementation of the target of the given `Call`. */ DataFlowCallable viableCallable(DataFlowCall call) { exists(string path, CrateOriginOption crate | - hasExtendedCanonicalPath(result, crate, path) and - resolvesExtendedCanonicalPath(call, crate, path) + hasExtendedCanonicalPath(result.asCfgScope(), crate, path) and + callResolvesExtendedCanonicalPath(call.asCallBaseExprCfgNode().getExpr(), crate, path) ) } @@ -469,24 +541,15 @@ module RustDataFlow implements InputSig { predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() } - final class Content = Void; - - predicate forceHighPrecision(Content c) { none() } - - class ContentSet extends TContentSet { - /** Gets a textual representation of this element. */ - string toString() { result = "ContentSet" } + class Content = ContentAlias; - /** Gets a content that may be stored into when storing into this set. */ - Content getAStoreContent() { none() } + class ContentSet = ContentSetAlias; - /** Gets a content that may be read from when reading from this set. */ - Content getAReadContent() { none() } - } + predicate forceHighPrecision(Content c) { none() } - final class ContentApprox = Void; + final class ContentApprox = Content; // todo - ContentApprox getContentApprox(Content c) { any() } + ContentApprox getContentApprox(Content c) { result = c } class ParameterPosition = ParameterPositionAlias; @@ -519,14 +582,27 @@ module RustDataFlow implements InputSig { * `node1` references an object with a content `c.getAReadContent()` whose * value ends up in `node2`. */ - predicate readStep(Node node1, ContentSet c, Node node2) { none() } + predicate readStep(Node node1, ContentSet c, Node node2) { + node1.asPat() = + any(TupleStructPatCfgNode pat | + variantDestructor(pat.getPat(), c.(SingletonContentSet).getContent()) and + node2.asPat() = pat.getField(0) + ) + } /** * Holds if data can flow from `node1` to `node2` via a store into `c`. Thus, * `node2` references an object with a content `c.getAStoreContent()` that * contains the value of `node1`. */ - predicate storeStep(Node node1, ContentSet c, Node node2) { none() } + predicate storeStep(Node node1, ContentSet c, Node node2) { + // todo: use post-update + node2.asExpr() = + any(CallExprCfgNode call | + variantConstructor(call.getCallExpr(), c.(SingletonContentSet).getContent()) and + node1.asExpr() = call.getArgument(0) + ) + } /** * Holds if values stored inside content `c` are cleared at node `n`. For example, @@ -593,8 +669,6 @@ module RustDataFlow implements InputSig { class DataFlowSecondLevelScope = Void; } -final class ContentSet = RustDataFlow::ContentSet; - import MakeImpl /** A collection of cached types and predicates to be evaluated in the same stage. */ @@ -612,14 +686,6 @@ private module Cached { cached newtype TDataFlowCall = TCall(CallExprBaseCfgNode c) - cached - newtype TOptionalContentSet = - TAnyElementContent() or - TAnyContent() - - cached - class TContentSet = TAnyElementContent or TAnyContent; - cached newtype TDataFlowCallable = TCfgScope(CfgScope scope) @@ -635,6 +701,24 @@ private module Cached { i in [0 .. max([any(ParamList l).getNumberOfParams(), any(ArgList l).getNumberOfArgs()]) - 1] } or TSelfParameterPosition() + + cached + newtype TContent = + TVariantContent(CrateOriginOption crate, string path, string name) { + exists(Enum e | + hasExtendedCanonicalPath(e, crate, path) and + // s = e.getExtendedCanonicalPath() and + name = e.getVariantList().getAVariant().getName().getText() + ) + or + // TODO: Remove once library types are extracted + crate.isNone() and + path = "crate::std::option::Option" and + name = ["None", "Some"] + } + + cached + newtype TContentSet = TSingletonContentSet(Content c) } import Cached diff --git a/rust/ql/test/library-tests/dataflow/local/DataFlowStep.expected b/rust/ql/test/library-tests/dataflow/local/DataFlowStep.expected index 433f38706457..dd5926064bb9 100644 --- a/rust/ql/test/library-tests/dataflow/local/DataFlowStep.expected +++ b/rust/ql/test/library-tests/dataflow/local/DataFlowStep.expected @@ -33,6 +33,8 @@ | main.rs:32:9:32:9 | [SSA] b | main.rs:36:10:36:10 | b | | main.rs:32:9:32:9 | b | main.rs:32:9:32:9 | [SSA] b | | main.rs:32:13:35:5 | MatchExpr | main.rs:32:9:32:9 | b | +| main.rs:32:19:32:19 | m | main.rs:33:9:33:15 | TupleStructPat | +| main.rs:32:19:32:19 | m | main.rs:34:9:34:12 | None | | main.rs:33:20:33:20 | a | main.rs:32:13:35:5 | MatchExpr | | main.rs:34:17:34:17 | 0 | main.rs:32:13:35:5 | MatchExpr | | main.rs:40:9:40:9 | [SSA] a | main.rs:43:10:43:10 | a | @@ -79,11 +81,15 @@ | main.rs:105:9:105:10 | [SSA] s2 | main.rs:110:11:110:12 | s2 | | main.rs:105:9:105:10 | s2 | main.rs:105:9:105:10 | [SSA] s2 | | main.rs:105:14:105:28 | CallExpr | main.rs:105:9:105:10 | s2 | +| main.rs:106:11:106:12 | s1 | main.rs:107:9:107:23 | TupleStructPat | +| main.rs:106:11:106:12 | s1 | main.rs:108:9:108:20 | PathPat | | main.rs:107:22:107:22 | [SSA] n | main.rs:107:33:107:33 | n | | main.rs:107:22:107:22 | n | main.rs:107:22:107:22 | [SSA] n | | main.rs:107:28:107:34 | CallExpr | main.rs:106:5:109:5 | MatchExpr | | main.rs:108:25:108:31 | CallExpr | main.rs:106:5:109:5 | MatchExpr | | main.rs:110:5:113:5 | MatchExpr | main.rs:103:37:114:1 | BlockExpr | +| main.rs:110:11:110:12 | s2 | main.rs:111:9:111:23 | TupleStructPat | +| main.rs:110:11:110:12 | s2 | main.rs:112:9:112:20 | PathPat | | main.rs:111:22:111:22 | [SSA] n | main.rs:111:33:111:33 | n | | main.rs:111:22:111:22 | n | main.rs:111:22:111:22 | [SSA] n | | main.rs:111:28:111:34 | CallExpr | main.rs:110:5:113:5 | MatchExpr | @@ -94,11 +100,15 @@ | main.rs:118:9:118:10 | [SSA] s2 | main.rs:123:11:123:12 | s2 | | main.rs:118:9:118:10 | s2 | main.rs:118:9:118:10 | [SSA] s2 | | main.rs:118:14:118:20 | CallExpr | main.rs:118:9:118:10 | s2 | +| main.rs:119:11:119:12 | s1 | main.rs:120:9:120:15 | TupleStructPat | +| main.rs:119:11:119:12 | s1 | main.rs:121:9:121:12 | None | | main.rs:120:14:120:14 | [SSA] n | main.rs:120:25:120:25 | n | | main.rs:120:14:120:14 | n | main.rs:120:14:120:14 | [SSA] n | | main.rs:120:20:120:26 | CallExpr | main.rs:119:5:122:5 | MatchExpr | | main.rs:121:17:121:23 | CallExpr | main.rs:119:5:122:5 | MatchExpr | | main.rs:123:5:126:5 | MatchExpr | main.rs:116:39:127:1 | BlockExpr | +| main.rs:123:11:123:12 | s2 | main.rs:124:9:124:15 | TupleStructPat | +| main.rs:123:11:123:12 | s2 | main.rs:125:9:125:12 | None | | main.rs:124:14:124:14 | [SSA] n | main.rs:124:25:124:25 | n | | main.rs:124:14:124:14 | n | main.rs:124:14:124:14 | [SSA] n | | main.rs:124:20:124:26 | CallExpr | main.rs:123:5:126:5 | MatchExpr | @@ -109,6 +119,8 @@ | main.rs:136:9:136:10 | [SSA] s2 | main.rs:141:11:141:12 | s2 | | main.rs:136:9:136:10 | s2 | main.rs:136:9:136:10 | [SSA] s2 | | main.rs:136:14:136:25 | CallExpr | main.rs:136:9:136:10 | s2 | +| main.rs:137:11:137:12 | s1 | main.rs:138:9:138:20 | TupleStructPat | +| main.rs:137:11:137:12 | s1 | main.rs:139:9:139:20 | TupleStructPat | | main.rs:138:19:138:19 | [SSA] n | main.rs:138:30:138:30 | n | | main.rs:138:19:138:19 | n | main.rs:138:19:138:19 | [SSA] n | | main.rs:138:25:138:31 | CallExpr | main.rs:137:5:140:5 | MatchExpr | @@ -116,6 +128,8 @@ | main.rs:139:19:139:19 | n | main.rs:139:19:139:19 | [SSA] n | | main.rs:139:25:139:31 | CallExpr | main.rs:137:5:140:5 | MatchExpr | | main.rs:141:5:144:5 | MatchExpr | main.rs:134:42:145:1 | BlockExpr | +| main.rs:141:11:141:12 | s2 | main.rs:142:9:142:20 | TupleStructPat | +| main.rs:141:11:141:12 | s2 | main.rs:143:9:143:20 | TupleStructPat | | main.rs:142:19:142:19 | [SSA] n | main.rs:142:30:142:30 | n | | main.rs:142:19:142:19 | n | main.rs:142:19:142:19 | [SSA] n | | main.rs:142:25:142:31 | CallExpr | main.rs:141:5:144:5 | MatchExpr | @@ -128,6 +142,8 @@ | main.rs:151:9:151:10 | [SSA] s2 | main.rs:156:11:156:12 | s2 | | main.rs:151:9:151:10 | s2 | main.rs:151:9:151:10 | [SSA] s2 | | main.rs:151:14:151:17 | CallExpr | main.rs:151:9:151:10 | s2 | +| main.rs:152:11:152:12 | s1 | main.rs:153:9:153:12 | TupleStructPat | +| main.rs:152:11:152:12 | s1 | main.rs:154:9:154:12 | TupleStructPat | | main.rs:153:11:153:11 | [SSA] n | main.rs:153:22:153:22 | n | | main.rs:153:11:153:11 | n | main.rs:153:11:153:11 | [SSA] n | | main.rs:153:17:153:23 | CallExpr | main.rs:152:5:155:5 | MatchExpr | @@ -135,6 +151,8 @@ | main.rs:154:11:154:11 | n | main.rs:154:11:154:11 | [SSA] n | | main.rs:154:17:154:23 | CallExpr | main.rs:152:5:155:5 | MatchExpr | | main.rs:156:5:159:5 | MatchExpr | main.rs:149:44:160:1 | BlockExpr | +| main.rs:156:11:156:12 | s2 | main.rs:157:9:157:12 | TupleStructPat | +| main.rs:156:11:156:12 | s2 | main.rs:158:9:158:12 | TupleStructPat | | main.rs:157:11:157:11 | [SSA] n | main.rs:157:22:157:22 | n | | main.rs:157:11:157:11 | n | main.rs:157:11:157:11 | [SSA] n | | main.rs:157:17:157:23 | CallExpr | main.rs:156:5:159:5 | MatchExpr | diff --git a/rust/ql/test/library-tests/dataflow/local/inline-flow.expected b/rust/ql/test/library-tests/dataflow/local/inline-flow.expected index 98289b67da64..b8641e064f12 100644 --- a/rust/ql/test/library-tests/dataflow/local/inline-flow.expected +++ b/rust/ql/test/library-tests/dataflow/local/inline-flow.expected @@ -5,6 +5,14 @@ edges | main.rs:31:13:31:21 | CallExpr | main.rs:36:10:36:10 | b | provenance | | | main.rs:45:15:45:23 | CallExpr | main.rs:47:10:47:10 | b | provenance | | | main.rs:53:9:53:17 | CallExpr | main.rs:54:10:54:10 | i | provenance | | +| main.rs:117:14:117:29 | CallExpr [Some] | main.rs:120:9:120:15 | TupleStructPat [Some] | provenance | | +| main.rs:117:19:117:28 | CallExpr | main.rs:117:14:117:29 | CallExpr [Some] | provenance | | +| main.rs:120:9:120:15 | TupleStructPat [Some] | main.rs:120:14:120:14 | n | provenance | | +| main.rs:120:14:120:14 | n | main.rs:120:25:120:25 | n | provenance | | +| main.rs:135:14:135:34 | CallExpr [A] | main.rs:138:9:138:20 | TupleStructPat [A] | provenance | | +| main.rs:135:24:135:33 | CallExpr | main.rs:135:14:135:34 | CallExpr [A] | provenance | | +| main.rs:138:9:138:20 | TupleStructPat [A] | main.rs:138:19:138:19 | n | provenance | | +| main.rs:138:19:138:19 | n | main.rs:138:30:138:30 | n | provenance | | nodes | main.rs:15:10:15:18 | CallExpr | semmle.label | CallExpr | | main.rs:19:13:19:21 | CallExpr | semmle.label | CallExpr | @@ -17,6 +25,16 @@ nodes | main.rs:47:10:47:10 | b | semmle.label | b | | main.rs:53:9:53:17 | CallExpr | semmle.label | CallExpr | | main.rs:54:10:54:10 | i | semmle.label | i | +| main.rs:117:14:117:29 | CallExpr [Some] | semmle.label | CallExpr [Some] | +| main.rs:117:19:117:28 | CallExpr | semmle.label | CallExpr | +| main.rs:120:9:120:15 | TupleStructPat [Some] | semmle.label | TupleStructPat [Some] | +| main.rs:120:14:120:14 | n | semmle.label | n | +| main.rs:120:25:120:25 | n | semmle.label | n | +| main.rs:135:14:135:34 | CallExpr [A] | semmle.label | CallExpr [A] | +| main.rs:135:24:135:33 | CallExpr | semmle.label | CallExpr | +| main.rs:138:9:138:20 | TupleStructPat [A] | semmle.label | TupleStructPat [A] | +| main.rs:138:19:138:19 | n | semmle.label | n | +| main.rs:138:30:138:30 | n | semmle.label | n | subpaths testFailures #select @@ -26,3 +44,5 @@ testFailures | main.rs:36:10:36:10 | b | main.rs:31:13:31:21 | CallExpr | main.rs:36:10:36:10 | b | $@ | main.rs:31:13:31:21 | CallExpr | CallExpr | | main.rs:47:10:47:10 | b | main.rs:45:15:45:23 | CallExpr | main.rs:47:10:47:10 | b | $@ | main.rs:45:15:45:23 | CallExpr | CallExpr | | main.rs:54:10:54:10 | i | main.rs:53:9:53:17 | CallExpr | main.rs:54:10:54:10 | i | $@ | main.rs:53:9:53:17 | CallExpr | CallExpr | +| main.rs:120:25:120:25 | n | main.rs:117:19:117:28 | CallExpr | main.rs:120:25:120:25 | n | $@ | main.rs:117:19:117:28 | CallExpr | CallExpr | +| main.rs:138:30:138:30 | n | main.rs:135:24:135:33 | CallExpr | main.rs:138:30:138:30 | n | $@ | main.rs:135:24:135:33 | CallExpr | CallExpr | diff --git a/rust/ql/test/library-tests/dataflow/local/main.rs b/rust/ql/test/library-tests/dataflow/local/main.rs index 9f31b08f680f..2666339ad59a 100644 --- a/rust/ql/test/library-tests/dataflow/local/main.rs +++ b/rust/ql/test/library-tests/dataflow/local/main.rs @@ -117,7 +117,7 @@ fn option_pattern_match_unqualified() { let s1 = Some(source(14)); let s2 = Some(2); match s1 { - Some(n) => sink(n), // $ MISSING: hasValueFlow=14 + Some(n) => sink(n), // $ hasValueFlow=14 None => sink(0), } match s2 { @@ -135,7 +135,7 @@ fn custom_enum_pattern_match_qualified() { let s1 = MyEnum::A(source(15)); let s2 = MyEnum::B(2); match s1 { - MyEnum::A(n) => sink(n), // $ MISSING: hasValueFlow=15 + MyEnum::A(n) => sink(n), // $ hasValueFlow=15 MyEnum::B(n) => sink(n), } match s2 {