diff --git a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll index 2b765765e992..2e598711fccf 100644 --- a/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll +++ b/javascript/ql/lib/semmle/javascript/frameworks/data/internal/ApiGraphModels.qll @@ -643,6 +643,15 @@ module ModelOutput { baseNode = getInvocationFromPath(type, path) } + /** + * Holds if a `baseNode` is a callable identified by the `type,path` part of a summary row. + */ + cached + predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) { + summaryModel(type, path, _, _, _) and + baseNode = getNodeFromPath(type, path) + } + /** * Holds if `node` is seen as an instance of `type` due to a type definition * contributed by a CSV model. diff --git a/python/ql/lib/change-notes/2023-06-20-summaries-from-models.md b/python/ql/lib/change-notes/2023-06-20-summaries-from-models.md new file mode 100644 index 000000000000..feded1bb6c5f --- /dev/null +++ b/python/ql/lib/change-notes/2023-06-20-summaries-from-models.md @@ -0,0 +1,4 @@ +--- +category: feature +--- +* It is now possible to specify flow summaries in the format "MyPkg;Member[list_map];Argument[1].ListElement;Argument[0].Parameter[0];value" diff --git a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll index 5e82700bd0e3..8b80e13d06de 100644 --- a/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll +++ b/python/ql/lib/semmle/python/dataflow/new/FlowSummary.qll @@ -90,39 +90,32 @@ abstract class SummarizedCallable extends LibraryCallable, Impl::Public::Summari } class RequiredSummaryComponentStack = Impl::Public::RequiredSummaryComponentStack; -// // This gives access to getNodeFromPath, which is not constrained to `CallNode`s -// // as `resolvedSummaryBase` is. -// private import semmle.python.frameworks.data.internal.ApiGraphModels as AGM -// -// private class SummarizedCallableFromModel extends SummarizedCallable { -// string package; -// string type; -// string path; -// SummarizedCallableFromModel() { -// ModelOutput::relevantSummaryModel(package, type, path, _, _, _) and -// this = package + ";" + type + ";" + path -// } -// override CallCfgNode getACall() { -// exists(API::CallNode base | -// ModelOutput::resolvedSummaryBase(package, type, path, base) and -// result = base.getACall() -// ) -// } -// override ArgumentNode getACallback() { -// exists(API::Node base | -// base = AGM::getNodeFromPath(package, type, path) and -// result = base.getAValueReachableFromSource() -// ) -// } -// override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { -// exists(string kind | -// ModelOutput::relevantSummaryModel(package, type, path, input, output, kind) -// | -// kind = "value" and -// preservesValue = true -// or -// kind = "taint" and -// preservesValue = false -// ) -// } -// } + +private class SummarizedCallableFromModel extends SummarizedCallable { + string type; + string path; + + SummarizedCallableFromModel() { + ModelOutput::relevantSummaryModel(type, path, _, _, _) and + this = type + ";" + path + } + + override CallCfgNode getACall() { ModelOutput::resolvedSummaryBase(type, path, result) } + + override ArgumentNode getACallback() { + exists(API::Node base | + ModelOutput::resolvedSummaryRefBase(type, path, base) and + result = base.getAValueReachableFromSource() + ) + } + + override predicate propagatesFlowExt(string input, string output, boolean preservesValue) { + exists(string kind | ModelOutput::relevantSummaryModel(type, path, input, output, kind) | + kind = "value" and + preservesValue = true + or + kind = "taint" and + preservesValue = false + ) + } +} diff --git a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll index 2b765765e992..2e598711fccf 100644 --- a/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll +++ b/python/ql/lib/semmle/python/frameworks/data/internal/ApiGraphModels.qll @@ -643,6 +643,15 @@ module ModelOutput { baseNode = getInvocationFromPath(type, path) } + /** + * Holds if a `baseNode` is a callable identified by the `type,path` part of a summary row. + */ + cached + predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) { + summaryModel(type, path, _, _, _) and + baseNode = getNodeFromPath(type, path) + } + /** * Holds if `node` is seen as an instance of `type` due to a type definition * contributed by a CSV model. diff --git a/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.expected b/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.expected new file mode 100644 index 000000000000..4a72c551661a --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.expected @@ -0,0 +1,4 @@ +failures +argumentToEnsureNotTaintedNotMarkedAsSpurious +untaintedArgumentToEnsureTaintedNotMarkedAsMissing +testFailures diff --git a/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.ql b/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.ql new file mode 100644 index 000000000000..551266d74556 --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/InlineTaintTest.ql @@ -0,0 +1,4 @@ +import python +private import TestSummaries +import experimental.meta.InlineTaintTest +import MakeInlineTaintTest diff --git a/python/ql/test/experimental/dataflow/model-summaries/NormalDataflowTest.expected b/python/ql/test/experimental/dataflow/model-summaries/NormalDataflowTest.expected new file mode 100644 index 000000000000..04431311999c --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/NormalDataflowTest.expected @@ -0,0 +1,3 @@ +missingAnnotationOnSink +failures +testFailures diff --git a/python/ql/test/experimental/dataflow/model-summaries/NormalDataflowTest.ql b/python/ql/test/experimental/dataflow/model-summaries/NormalDataflowTest.ql new file mode 100644 index 000000000000..3e311335e14d --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/NormalDataflowTest.ql @@ -0,0 +1,3 @@ +import python +private import TestSummaries +import experimental.dataflow.TestUtil.NormalDataflowTest diff --git a/python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll b/python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll new file mode 100644 index 000000000000..5f1e0a1f90b9 --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/TestSummaries.qll @@ -0,0 +1,25 @@ +private import python +private import semmle.python.dataflow.new.FlowSummary +private import semmle.python.frameworks.data.ModelsAsData +private import semmle.python.ApiGraphs + +private class StepsFromModel extends ModelInput::SummaryModelCsv { + override predicate row(string row) { + row = + [ + "foo;Member[MS_identity];Argument[0];ReturnValue;value", + "foo;Member[MS_apply_lambda];Argument[1];Argument[0].Parameter[0];value", + "foo;Member[MS_apply_lambda];Argument[0].ReturnValue;ReturnValue;value", + "foo;Member[MS_reversed];Argument[0].ListElement;ReturnValue.ListElement;value", + "foo;Member[MS_reversed];Argument[0];ReturnValue;taint", + "foo;Member[MS_list_map];Argument[1].ListElement;Argument[0].Parameter[0];value", + "foo;Member[MS_list_map];Argument[0].ReturnValue;ReturnValue.ListElement;value", + "foo;Member[MS_list_map];Argument[1];ReturnValue;taint", + "foo;Member[MS_append_to_list];Argument[0].ListElement;ReturnValue.ListElement;value", + "foo;Member[MS_append_to_list];Argument[1];ReturnValue.ListElement;value", + "foo;Member[MS_append_to_list];Argument[0];ReturnValue;taint", + "foo;Member[MS_append_to_list];Argument[1];ReturnValue;taint", + "json;Member[MS_loads];Argument[0];ReturnValue;taint" + ] + } +} diff --git a/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py new file mode 100644 index 000000000000..ee02918b0798 --- /dev/null +++ b/python/ql/test/experimental/dataflow/model-summaries/model_summaries.py @@ -0,0 +1,122 @@ + +import sys +import os + +sys.path.append(os.path.dirname(os.path.dirname((__file__)))) +from testlib import expects + +# These are defined so that we can evaluate the test code. +NONSOURCE = "not a source" +SOURCE = "source" + + +def is_source(x): + return x == "source" or x == b"source" or x == 42 or x == 42.0 or x == 42j + + +def SINK(x): + if is_source(x): + print("OK") + else: + print("Unexpected flow", x) + + +def SINK_F(x): + if is_source(x): + print("Unexpected flow", x) + else: + print("OK") + +ensure_tainted = ensure_not_tainted = print +TAINTED_STRING = "TAINTED_STRING" + +from foo import MS_identity, MS_apply_lambda, MS_reversed, MS_list_map, MS_append_to_list + +# Simple summary +via_identity = MS_identity(SOURCE) +SINK(via_identity) # $ flow="SOURCE, l:-1 -> via_identity" + +# Lambda summary +via_lambda = MS_apply_lambda(lambda x: [x], SOURCE) +SINK(via_lambda[0]) # $ flow="SOURCE, l:-1 -> via_lambda[0]" + +# A lambda that breaks the flow +not_via_lambda = MS_apply_lambda(lambda x: 1, SOURCE) +SINK_F(not_via_lambda) + + +# Collection summaries +via_reversed = MS_reversed([SOURCE]) +SINK(via_reversed[0]) # $ flow="SOURCE, l:-1 -> via_reversed[0]" + +tainted_list = MS_reversed(TAINTED_LIST) +ensure_tainted( + tainted_list, # $ tainted + tainted_list[0], # $ tainted +) + +# Complex summaries +def box(x): + return [x] + +via_map = MS_list_map(box, [SOURCE]) +SINK(via_map[0][0]) # $ flow="SOURCE, l:-1 -> via_map[0][0]" + +tainted_mapped = MS_list_map(box, TAINTED_LIST) +ensure_tainted( + tainted_mapped, # $ tainted + tainted_mapped[0][0], # $ tainted +) + +def explicit_identity(x): + return x + +via_map_explicit = MS_list_map(explicit_identity, [SOURCE]) +SINK(via_map_explicit[0]) # $ flow="SOURCE, l:-1 -> via_map_explicit[0]" + +tainted_mapped_explicit = MS_list_map(explicit_identity, TAINTED_LIST) +ensure_tainted( + tainted_mapped_explicit, # $ tainted + tainted_mapped_explicit[0], # $ tainted +) + +via_map_summary = MS_list_map(MS_identity, [SOURCE]) +SINK(via_map_summary[0]) # $ flow="SOURCE, l:-1 -> via_map_summary[0]" + +tainted_mapped_summary = MS_list_map(MS_identity, TAINTED_LIST) +ensure_tainted( + tainted_mapped_summary, # $ tainted + tainted_mapped_summary[0], # $ tainted +) + +via_append_el = MS_append_to_list([], SOURCE) +SINK(via_append_el[0]) # $ flow="SOURCE, l:-1 -> via_append_el[0]" + +tainted_list_el = MS_append_to_list([], TAINTED_STRING) +ensure_tainted( + tainted_list_el, # $ tainted + tainted_list_el[0], # $ tainted +) + +via_append = MS_append_to_list([SOURCE], NONSOURCE) +SINK(via_append[0]) # $ flow="SOURCE, l:-1 -> via_append[0]" + +tainted_list_implicit = MS_append_to_list(TAINTED_LIST, NONSOURCE) +ensure_tainted( + tainted_list, # $ tainted + tainted_list[0], # $ tainted +) + +# Modeled flow-summary is not value preserving +from json import MS_loads as json_loads + +# so no data-flow +SINK_F(json_loads(SOURCE)) +SINK_F(json_loads(SOURCE)[0]) + +# but has taint-flow +tainted_resultlist = json_loads(TAINTED_STRING) +ensure_tainted( + tainted_resultlist, # $ tainted + tainted_resultlist[0], # $ tainted +) diff --git a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll index 2b765765e992..2e598711fccf 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/data/internal/ApiGraphModels.qll @@ -643,6 +643,15 @@ module ModelOutput { baseNode = getInvocationFromPath(type, path) } + /** + * Holds if a `baseNode` is a callable identified by the `type,path` part of a summary row. + */ + cached + predicate resolvedSummaryRefBase(string type, string path, API::Node baseNode) { + summaryModel(type, path, _, _, _) and + baseNode = getNodeFromPath(type, path) + } + /** * Holds if `node` is seen as an instance of `type` due to a type definition * contributed by a CSV model.