diff --git a/python/ql/lib/semmle/python/ApiGraphs.qll b/python/ql/lib/semmle/python/ApiGraphs.qll index 1ce04852f3ad..4385259ca9b3 100644 --- a/python/ql/lib/semmle/python/ApiGraphs.qll +++ b/python/ql/lib/semmle/python/ApiGraphs.qll @@ -843,6 +843,13 @@ module API { ref = pred.getSubscript(_) and ref.asCfgNode().isLoad() or + // Subscript via comprehension + lbl = Label::subscript() and + exists(PY::Comp comp | + pred.asExpr() = comp.getIterable() and + ref.asExpr() = comp.getNthInnerLoop(0).getTarget() + ) + or // Subclassing a node lbl = Label::subclass() and exists(PY::ClassExpr clsExpr, DataFlow::Node superclass | pred.flowsTo(superclass) | diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll index f3e4ff40800b..415028ad8277 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/TypeTrackingImpl.qll @@ -304,7 +304,7 @@ module TypeTrackingInput implements Shared::TypeTrackingInput { var.hasDefiningNode(def) | nodeTo.(DataFlowPublic::ScopeEntryDefinitionNode).getDefinition() = e and - nodeFrom.asCfgNode() = def.getValue() and + nodeFrom.asCfgNode() = def and var.getScope().getScope*() = nodeFrom.getScope() ) } diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml index 683b0aa9b3df..63d6cf003cd8 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.model.yml @@ -180,4 +180,4 @@ extensions: - addsTo: pack: codeql/python-all extensible: typeVariableModel - data: [] \ No newline at end of file + data: [] diff --git a/python/ql/lib/semmle/python/frameworks/Stdlib.qll b/python/ql/lib/semmle/python/frameworks/Stdlib.qll index c7179dbd46c0..ea2f863789a9 100644 --- a/python/ql/lib/semmle/python/frameworks/Stdlib.qll +++ b/python/ql/lib/semmle/python/frameworks/Stdlib.qll @@ -3284,6 +3284,14 @@ module StdlibPrivate { } } + private API::Node re(string name) { + name = "re.Match" and + result = API::moduleImport("re") + or + name = "compiled re.Match" and + result = any(RePatternSummary c).getACall().(API::CallNode).getReturn() + } + /** * A flow summary for methods returning a `re.Match` object * @@ -3293,17 +3301,9 @@ module StdlibPrivate { ReMatchSummary() { this = ["re.Match", "compiled re.Match"] } override DataFlow::CallCfgNode getACall() { - this = "re.Match" and - result = API::moduleImport("re").getMember(["match", "search", "fullmatch"]).getACall() - or - this = "compiled re.Match" and - result = - any(RePatternSummary c) - .getACall() - .(API::CallNode) - .getReturn() - .getMember(["match", "search", "fullmatch"]) - .getACall() + exists(API::Node re | re = re(this) | + result = re.getMember(["match", "search", "fullmatch"]).getACall() + ) } override DataFlow::ArgumentNode getACallback() { none() } @@ -3340,6 +3340,12 @@ module StdlibPrivate { } } + private API::Node match() { + result = any(ReMatchSummary c).getACall().(API::CallNode).getReturn() + or + result = re(_).getMember("finditer").getReturn().getASubscript() + } + /** * A flow summary for methods on a `re.Match` object * @@ -3353,15 +3359,7 @@ module StdlibPrivate { methodName in ["expand", "group", "groups", "groupdict"] } - override DataFlow::CallCfgNode getACall() { - result = - any(ReMatchSummary c) - .getACall() - .(API::CallNode) - .getReturn() - .getMember(methodName) - .getACall() - } + override DataFlow::CallCfgNode getACall() { result = match().getMember(methodName).getACall() } override DataFlow::ArgumentNode getACallback() { none() } @@ -3447,6 +3445,9 @@ module StdlibPrivate { or methodName = "subn" and output = "ReturnValue.TupleElement[0]" + or + methodName = "finditer" and + output = "ReturnValue.ListElement.Attribute[string]" ) ) or diff --git a/python/ql/test/library-tests/dataflow/tainttracking/DSVW/InlineTaintTest.expected b/python/ql/test/library-tests/dataflow/tainttracking/DSVW/InlineTaintTest.expected new file mode 100644 index 000000000000..366de37b8677 --- /dev/null +++ b/python/ql/test/library-tests/dataflow/tainttracking/DSVW/InlineTaintTest.expected @@ -0,0 +1,4 @@ +argumentToEnsureNotTaintedNotMarkedAsSpurious +untaintedArgumentToEnsureTaintedNotMarkedAsMissing +testFailures +failures diff --git a/python/ql/test/library-tests/dataflow/tainttracking/DSVW/InlineTaintTest.ql b/python/ql/test/library-tests/dataflow/tainttracking/DSVW/InlineTaintTest.ql new file mode 100644 index 000000000000..8524da5fe7db --- /dev/null +++ b/python/ql/test/library-tests/dataflow/tainttracking/DSVW/InlineTaintTest.ql @@ -0,0 +1,2 @@ +import experimental.meta.InlineTaintTest +import MakeInlineTaintTest diff --git a/python/ql/test/library-tests/dataflow/tainttracking/DSVW/repro.py b/python/ql/test/library-tests/dataflow/tainttracking/DSVW/repro.py new file mode 100644 index 000000000000..ae9761cefbfd --- /dev/null +++ b/python/ql/test/library-tests/dataflow/tainttracking/DSVW/repro.py @@ -0,0 +1,45 @@ +import re +import urllib.parse +import sys +import http.client + +def generator_dict_re_combo(): + query = TAINTED_STRING + + params = dict( + ( + match.group("parameter"), + urllib.parse.unquote( + ",".join( + re.findall( + r"(?:\A|[?&])%s=([^&]+)" % match.group("parameter"), query + ) + ) + ), + ) + for match in re.finditer( + r"((\A|[?&])(?P[\w\[\]]+)=)([^&]+)", query + ) + ) + + ensure_tainted(params) # $ tainted + +def parse_qs(): + query = TAINTED_STRING + + params = urllib.parse.parse_qs(query) + + ensure_tainted(params) # $ tainted + +HTML_PREFIX = """""" + +def flat(): + self_path = TAINTED_STRING + + path, query = self_path.split('?', 1) if '?' in self_path else (self_path, "") + code, content, params, cursor = http.client.OK, HTML_PREFIX, dict((match.group("parameter"), urllib.parse.unquote(','.join(re.findall(r"(?:\A|[?&])%s=([^&]+)" % match.group("parameter"), query)))) for match in re.finditer(r"((\A|[?&])(?P[\w\[\]]+)=)([^&]+)", query)), "Cursor" + + print(code) + print(content) + ensure_tainted(params) # $ tainted + print(cursor)