From 5feb4016079ef80c4da80e402f5eec4b28761fe6 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 27 Nov 2024 23:49:46 +0100 Subject: [PATCH 01/12] ruby: Add query for hoisting Rails ActiveRecord calls This does not take assicoations into account. It uses ActiveRecordModelFinderCall to identify relevant calls. This class has therefor been made public. --- .../codeql/ruby/frameworks/ActiveRecord.qll | 5 +- .../queries/performance/CouldBeHoisted.qhelp | 22 +++++ .../src/queries/performance/CouldBeHoisted.ql | 90 +++++++++++++++++++ .../queries/performance/examples/preload.rb | 14 +++ .../performance/examples/straight_loop.rb | 8 ++ 5 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 ruby/ql/src/queries/performance/CouldBeHoisted.qhelp create mode 100644 ruby/ql/src/queries/performance/CouldBeHoisted.ql create mode 100644 ruby/ql/src/queries/performance/examples/preload.rb create mode 100644 ruby/ql/src/queries/performance/examples/straight_loop.rb diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll index deaa0a6427a0..42c6286e4ea0 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll @@ -254,9 +254,8 @@ private Expr getUltimateReceiver(MethodCall call) { ) } -// A call to `find`, `where`, etc. that may return active record model object(s) -private class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode -{ +/** A call to `find`, `where`, etc. that may return active record model object(s) */ +class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode { private ActiveRecordModelClass cls; ActiveRecordModelFinderCall() { diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp b/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp new file mode 100644 index 000000000000..b476a2c7a00c --- /dev/null +++ b/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp @@ -0,0 +1,22 @@ + + + + +

+When a Rails ActiveRecord query is executed in a loop, it is potentially an n+1 problem. +This query identifies situations where an ActiveRecord query execution could be pulled out of a loop. +

+
+ +

If possible, pull the query out of the loop, thus replacing the many calls with a single one. +

+
+ +

The following (suboptimal) example code queries the User object in each iteration of the loop:

+ +

To improve the performance, we instead query the User object once outside the loop, gathereing all necessary information:

+ +
+
\ No newline at end of file diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.ql b/ruby/ql/src/queries/performance/CouldBeHoisted.ql new file mode 100644 index 000000000000..4f5653a8e458 --- /dev/null +++ b/ruby/ql/src/queries/performance/CouldBeHoisted.ql @@ -0,0 +1,90 @@ +/** + * @name Could be hoisted + * @description Hoist Rails `ActiveRecord::Relation` query calls out of loops. + * @kind problem + * @problem.severity info + * @precision high + * @id rb/could-be-hoisted + * @tags performance + */ + +// Possible Improvements; +// - Consider also Associations. +// Associations are lazy-loading by default, so something like +// in a loop over `article` do +// `article.book` +// if you have 1000 articles it will do a 1000 calls to `book`. +// If you already did `article includes book`, there should be no problem. +// - Consider instances of ActiveRecordInstanceMethodCall, for instance +// calls to `pluck`. +import ruby +private import codeql.ruby.AST +import codeql.ruby.ast.internal.Constant +import codeql.ruby.Concepts +import codeql.ruby.frameworks.ActiveRecord +private import codeql.ruby.TaintTracking + +string loopMethodName() { + result in [ + "each", "reverse_each", "map", "map!", "foreach", "flat_map", "in_batches", "one?", "all?", + "collect", "collect!", "select", "select!", "reject", "reject!" + ] +} + +class LoopingCall extends DataFlow::CallNode { + DataFlow::CallableNode loopBlock; + + LoopingCall() { + this.getMethodName() = loopMethodName() and loopBlock = this.getBlock().asCallable() + } + + DataFlow::CallableNode getLoopBlock() { result = loopBlock } +} + +predicate happensInLoop(LoopingCall loop, DataFlow::CallNode e) { + loop.getLoopBlock().asCallableAstNode() = e.asExpr().getScope() +} + +predicate happensInOuterLoop(LoopingCall outerLoop, DataFlow::CallNode e) { + exists(LoopingCall innerLoop | + happensInLoop(outerLoop, innerLoop) and + happensInLoop(innerLoop, e) + ) +} + +predicate happensInInnermostLoop(LoopingCall loop, DataFlow::CallNode e) { + happensInLoop(loop, e) and + not happensInOuterLoop(loop, e) +} + +// The ActiveRecord instance is used to potentially control the loop +predicate usedInLoopControlGuard(ActiveRecordInstance ar, DataFlow::Node guard) { + TaintTracking::localTaint(ar, guard) and + guard = guardForLoopControl(_, _) +} + +// A guard for controlling the loop +DataFlow::Node guardForLoopControl(ConditionalExpr cond, Stmt control) { + result.asExpr().getAstNode() = cond.getCondition().getAChild*() and + ( + control.(MethodCall).getMethodName() = "raise" + or + control instanceof NextStmt + ) and + control = cond.getBranch(_).getAChild() +} + +from LoopingCall loop, DataFlow::CallNode call +where + // Disregard loops over constants + not isArrayConstant(loop.getReceiver().asExpr(), _) and + // Disregard tests + not call.getLocation().getFile().getAbsolutePath().matches("%test%") and + // Disregard cases where the looping is influenced by the query result + not usedInLoopControlGuard(call, _) and + // Only report the inner most loop + happensInInnermostLoop(loop, call) and + // Only report calls that are likely to be expensive + call instanceof ActiveRecordModelFinderCall and + not call.getMethodName() in ["new", "create"] +select call, "This call happens inside $@, and could be hoisted.", loop, "this loop" diff --git a/ruby/ql/src/queries/performance/examples/preload.rb b/ruby/ql/src/queries/performance/examples/preload.rb new file mode 100644 index 000000000000..27a313a0f65b --- /dev/null +++ b/ruby/ql/src/queries/performance/examples/preload.rb @@ -0,0 +1,14 @@ +# Preload User data +user_data = User.where(login: repo_names_by_owner.keys).pluck(:login, :id, :type).to_h do |login, id, type| + [login, { id: id, type: type == "User" ? "USER" : "ORGANIZATION" }] +end + +repo_names_by_owner.each do |owner_slug, repo_names| + owner_info = user_data[owner_slug] + owner_id = owner_info[:id] + owner_type = owner_info[:type] + rel_conditions = { owner_id: owner_id, name: repo_names } + + nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg + nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg +end \ No newline at end of file diff --git a/ruby/ql/src/queries/performance/examples/straight_loop.rb b/ruby/ql/src/queries/performance/examples/straight_loop.rb new file mode 100644 index 000000000000..4c64d3bb84f2 --- /dev/null +++ b/ruby/ql/src/queries/performance/examples/straight_loop.rb @@ -0,0 +1,8 @@ +repo_names_by_owner.map do |owner_slug, repo_names| + owner_id, owner_type = User.where(login: owner_slug).pluck(:id, :type).first + owner_type = owner_type == "User" ? "USER" : "ORGANIZATION" + rel_conditions = { owner_id: owner_id, name: repo_names } + + nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg + nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg + end \ No newline at end of file From 7af8fa75e6c14beedf0444ef316dc28a874d52df Mon Sep 17 00:00:00 2001 From: yoff Date: Thu, 6 Feb 2025 15:45:28 +0100 Subject: [PATCH 02/12] Apply suggestions from code review Co-authored-by: Aditya Sharad <6874315+adityasharad@users.noreply.github.com> --- ruby/ql/src/queries/performance/CouldBeHoisted.qhelp | 9 +++++---- ruby/ql/src/queries/performance/CouldBeHoisted.ql | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp b/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp index b476a2c7a00c..a473fc9ce1f9 100644 --- a/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp +++ b/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp @@ -5,18 +5,19 @@

-When a Rails ActiveRecord query is executed in a loop, it is potentially an n+1 problem. -This query identifies situations where an ActiveRecord query execution could be pulled out of a loop. +When a database query operation, for example a call to a query method in the Rails `ActiveRecord::Relation` class, is executed in a loop, this can lead to a performance issue known as an "n+1 query problem". +The database query will be executed in each iteration of the loop. +Performance can usually be improved by performing a single database query outside of a loop, which retrieves all the required objects in a single operation.

-

If possible, pull the query out of the loop, thus replacing the many calls with a single one. +

If possible, pull the database query out of the loop and rewrite it to retrieve all the required objects. This replaces multiple database operations with a single one.

The following (suboptimal) example code queries the User object in each iteration of the loop:

-

To improve the performance, we instead query the User object once outside the loop, gathereing all necessary information:

+

To improve the performance, we instead query the User object once outside the loop, gathering all necessary information in a single query:

\ No newline at end of file diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.ql b/ruby/ql/src/queries/performance/CouldBeHoisted.ql index 4f5653a8e458..92ea9eac302b 100644 --- a/ruby/ql/src/queries/performance/CouldBeHoisted.ql +++ b/ruby/ql/src/queries/performance/CouldBeHoisted.ql @@ -87,4 +87,4 @@ where // Only report calls that are likely to be expensive call instanceof ActiveRecordModelFinderCall and not call.getMethodName() in ["new", "create"] -select call, "This call happens inside $@, and could be hoisted.", loop, "this loop" +select call, "This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop.", loop, "this loop" From 8aa195d8383d33e895bf93427f437214cf38da6e Mon Sep 17 00:00:00 2001 From: yoff Date: Thu, 6 Feb 2025 16:59:08 +0100 Subject: [PATCH 03/12] ruby: remove comment (we can create issues) --- ruby/ql/src/queries/performance/CouldBeHoisted.ql | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.ql b/ruby/ql/src/queries/performance/CouldBeHoisted.ql index 92ea9eac302b..137faa3c659f 100644 --- a/ruby/ql/src/queries/performance/CouldBeHoisted.ql +++ b/ruby/ql/src/queries/performance/CouldBeHoisted.ql @@ -8,15 +8,6 @@ * @tags performance */ -// Possible Improvements; -// - Consider also Associations. -// Associations are lazy-loading by default, so something like -// in a loop over `article` do -// `article.book` -// if you have 1000 articles it will do a 1000 calls to `book`. -// If you already did `article includes book`, there should be no problem. -// - Consider instances of ActiveRecordInstanceMethodCall, for instance -// calls to `pluck`. import ruby private import codeql.ruby.AST import codeql.ruby.ast.internal.Constant @@ -87,4 +78,6 @@ where // Only report calls that are likely to be expensive call instanceof ActiveRecordModelFinderCall and not call.getMethodName() in ["new", "create"] -select call, "This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop.", loop, "this loop" +select call, + "This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop.", + loop, "this loop" From d9d0d3c18b372e158bb8cdfc91b65933889d9b53 Mon Sep 17 00:00:00 2001 From: yoff Date: Thu, 6 Feb 2025 16:59:23 +0100 Subject: [PATCH 04/12] ruby: add code block --- ruby/ql/src/queries/performance/CouldBeHoisted.qhelp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp b/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp index a473fc9ce1f9..493597ee0c96 100644 --- a/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp +++ b/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp @@ -15,7 +15,7 @@ Performance can usually be improved by performing a single database query outsid

-

The following (suboptimal) example code queries the User object in each iteration of the loop:

+

The following (suboptimal) example code queries the User object in each iteration of the loop:

To improve the performance, we instead query the User object once outside the loop, gathering all necessary information in a single query:

From 51a2d8c72f76c6a22f5e5ab06042326cc1f1fc93 Mon Sep 17 00:00:00 2001 From: yoff Date: Thu, 6 Feb 2025 17:07:12 +0100 Subject: [PATCH 05/12] ruby: rename query --- .../{CouldBeHoisted.qhelp => DatabaseQueryInLoop.qhelp} | 0 .../{CouldBeHoisted.ql => DatabaseQueryInLoop.ql} | 6 +++--- 2 files changed, 3 insertions(+), 3 deletions(-) rename ruby/ql/src/queries/performance/{CouldBeHoisted.qhelp => DatabaseQueryInLoop.qhelp} (100%) rename ruby/ql/src/queries/performance/{CouldBeHoisted.ql => DatabaseQueryInLoop.ql} (93%) diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.qhelp similarity index 100% rename from ruby/ql/src/queries/performance/CouldBeHoisted.qhelp rename to ruby/ql/src/queries/performance/DatabaseQueryInLoop.qhelp diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.ql b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql similarity index 93% rename from ruby/ql/src/queries/performance/CouldBeHoisted.ql rename to ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql index 137faa3c659f..e34ff491656e 100644 --- a/ruby/ql/src/queries/performance/CouldBeHoisted.ql +++ b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql @@ -1,10 +1,10 @@ /** - * @name Could be hoisted - * @description Hoist Rails `ActiveRecord::Relation` query calls out of loops. + * @name Database query in a loop + * @description Database queries in a loop can lead to an unnecessary amount of database calls and poor performance. * @kind problem * @problem.severity info * @precision high - * @id rb/could-be-hoisted + * @id rb/database-query-in-loop * @tags performance */ From 74155a0214657e265a6fa90a545f211c180b2062 Mon Sep 17 00:00:00 2001 From: yoff Date: Thu, 6 Feb 2025 18:09:38 +0100 Subject: [PATCH 06/12] ruby: start adding comments I apuse here, because the code may be simplified --- ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql index e34ff491656e..8ca2f8600986 100644 --- a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql +++ b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql @@ -15,18 +15,20 @@ import codeql.ruby.Concepts import codeql.ruby.frameworks.ActiveRecord private import codeql.ruby.TaintTracking -string loopMethodName() { +/** Gets the name of a built-in method that involves a loop operation. */ +string getALoopMethodName() { result in [ "each", "reverse_each", "map", "map!", "foreach", "flat_map", "in_batches", "one?", "all?", "collect", "collect!", "select", "select!", "reject", "reject!" ] } +/** A call to a loop operation. */ class LoopingCall extends DataFlow::CallNode { DataFlow::CallableNode loopBlock; LoopingCall() { - this.getMethodName() = loopMethodName() and loopBlock = this.getBlock().asCallable() + this.getMethodName() = getALoopMethodName() and loopBlock = this.getBlock().asCallable() } DataFlow::CallableNode getLoopBlock() { result = loopBlock } From d7ffc3fc772770016150a6bf8dd2e0ee82e2dcac Mon Sep 17 00:00:00 2001 From: yoff Date: Thu, 6 Feb 2025 18:10:06 +0100 Subject: [PATCH 07/12] Ruby: remove test code filtering --- ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql | 2 -- 1 file changed, 2 deletions(-) diff --git a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql index 8ca2f8600986..6d74443e12ae 100644 --- a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql +++ b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql @@ -71,8 +71,6 @@ from LoopingCall loop, DataFlow::CallNode call where // Disregard loops over constants not isArrayConstant(loop.getReceiver().asExpr(), _) and - // Disregard tests - not call.getLocation().getFile().getAbsolutePath().matches("%test%") and // Disregard cases where the looping is influenced by the query result not usedInLoopControlGuard(call, _) and // Only report the inner most loop From 58fb592822c4744979351a7cf87e7a04ee2d674c Mon Sep 17 00:00:00 2001 From: yoff Date: Fri, 7 Feb 2025 13:50:27 +0100 Subject: [PATCH 08/12] ruby: add tests --- .../DatabaseQueryInLoop.expected | 3 ++ .../DatabaseQueryInLoop.qlref | 1 + .../DatabaseQueryInLoop.rb | 53 +++++++++++++++++++ 3 files changed, 57 insertions(+) create mode 100644 ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.expected create mode 100644 ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref create mode 100644 ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.expected b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.expected new file mode 100644 index 000000000000..b4cbf2773802 --- /dev/null +++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.expected @@ -0,0 +1,3 @@ +| DatabaseQueryInLoop.rb:11:13:11:52 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:10:9:12:11 | call to map | this loop | +| DatabaseQueryInLoop.rb:16:20:16:59 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:15:9:21:11 | call to map | this loop | +| DatabaseQueryInLoop.rb:19:17:19:56 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:18:13:20:15 | call to map | this loop | diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref new file mode 100644 index 000000000000..f1302ca1ff4a --- /dev/null +++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref @@ -0,0 +1 @@ +queries/performance/DatabaseQueryInLoop.ql diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb new file mode 100644 index 000000000000..57c27217a2e5 --- /dev/null +++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb @@ -0,0 +1,53 @@ + +class User < ActiveRecord::Base +end + +class DatabaseQueryInLoopTest + def test + ### These are bad + + # simple query in loop + names.map do |name| + User.where(login: name).pluck(:id).first + end + + # nested loop + names.map do |name| + user = User.where(login: name).pluck(:id).first + + ids.map do |user_id| + User.where(id: user_id).pluck(:id).first + end + end + + ### These are OK + + # Not in loop + User.where(login: owner_slug).pluck(:id).first + + # Loops over constant array + %w(first-name second-name).map { |name| User.where(login: name).pluck(:id).first } + + constant_names = [first-name, second-name] + constant_names.each do |name| + User.where(login: name).pluck(:id).first + end + + # Loop traversal is influenced by query result + # raising an exception if the user is not found + names.map do |name| + user = User.where(login: name).pluck(:id).first + unless user + raise Error.new("User '#{name}' not found") + end + end + + # skipping through the loop when users are not relevant + names.map do |name| + user = User.where(login: name).pluck(:id).first + if not isRelevant(user) + next + end + end + end +end From b3eaac0ab7d6f00db214e7d4121c6b6382906d3b Mon Sep 17 00:00:00 2001 From: yoff Date: Fri, 7 Feb 2025 14:03:57 +0100 Subject: [PATCH 09/12] ruby: remove superflous logic --- .../queries/performance/DatabaseQueryInLoop.ql | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql index 6d74443e12ae..3fbf0e07b126 100644 --- a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql +++ b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql @@ -38,18 +38,6 @@ predicate happensInLoop(LoopingCall loop, DataFlow::CallNode e) { loop.getLoopBlock().asCallableAstNode() = e.asExpr().getScope() } -predicate happensInOuterLoop(LoopingCall outerLoop, DataFlow::CallNode e) { - exists(LoopingCall innerLoop | - happensInLoop(outerLoop, innerLoop) and - happensInLoop(innerLoop, e) - ) -} - -predicate happensInInnermostLoop(LoopingCall loop, DataFlow::CallNode e) { - happensInLoop(loop, e) and - not happensInOuterLoop(loop, e) -} - // The ActiveRecord instance is used to potentially control the loop predicate usedInLoopControlGuard(ActiveRecordInstance ar, DataFlow::Node guard) { TaintTracking::localTaint(ar, guard) and @@ -73,8 +61,7 @@ where not isArrayConstant(loop.getReceiver().asExpr(), _) and // Disregard cases where the looping is influenced by the query result not usedInLoopControlGuard(call, _) and - // Only report the inner most loop - happensInInnermostLoop(loop, call) and + happensInLoop(loop, call) and // Only report calls that are likely to be expensive call instanceof ActiveRecordModelFinderCall and not call.getMethodName() in ["new", "create"] From 9d810130e151fc6930769260cca4f784d3f813ff Mon Sep 17 00:00:00 2001 From: yoff Date: Fri, 7 Feb 2025 16:33:28 +0100 Subject: [PATCH 10/12] ruby: simplify and document --- .../performance/DatabaseQueryInLoop.ql | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql index 3fbf0e07b126..f294359a97df 100644 --- a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql +++ b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql @@ -23,28 +23,26 @@ string getALoopMethodName() { ] } -/** A call to a loop operation. */ +/** A loop, represented by a call to a loop operation. */ class LoopingCall extends DataFlow::CallNode { - DataFlow::CallableNode loopBlock; + Callable loopScope; LoopingCall() { - this.getMethodName() = getALoopMethodName() and loopBlock = this.getBlock().asCallable() + this.getMethodName() = getALoopMethodName() and + loopScope = this.getBlock().asCallable().asCallableAstNode() } - DataFlow::CallableNode getLoopBlock() { result = loopBlock } + /** Holds if `c` is executed as part of the body of this loop. */ + predicate executesCall(DataFlow::CallNode c) { c.asExpr().getScope() = loopScope } } -predicate happensInLoop(LoopingCall loop, DataFlow::CallNode e) { - loop.getLoopBlock().asCallableAstNode() = e.asExpr().getScope() -} - -// The ActiveRecord instance is used to potentially control the loop +/** Holds if `ar` influences `guard`, which may control the execution of a loop. */ predicate usedInLoopControlGuard(ActiveRecordInstance ar, DataFlow::Node guard) { TaintTracking::localTaint(ar, guard) and guard = guardForLoopControl(_, _) } -// A guard for controlling the loop +/** Gets a dataflow node that is used to decide whether to break a loop. */ DataFlow::Node guardForLoopControl(ConditionalExpr cond, Stmt control) { result.asExpr().getAstNode() = cond.getCondition().getAChild*() and ( @@ -55,15 +53,14 @@ DataFlow::Node guardForLoopControl(ConditionalExpr cond, Stmt control) { control = cond.getBranch(_).getAChild() } -from LoopingCall loop, DataFlow::CallNode call +from LoopingCall loop, ActiveRecordModelFinderCall call where + loop.executesCall(call) and // Disregard loops over constants not isArrayConstant(loop.getReceiver().asExpr(), _) and // Disregard cases where the looping is influenced by the query result not usedInLoopControlGuard(call, _) and - happensInLoop(loop, call) and // Only report calls that are likely to be expensive - call instanceof ActiveRecordModelFinderCall and not call.getMethodName() in ["new", "create"] select call, "This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop.", From 921104306a1751da98b3c3ddf9fd8f82309ddee9 Mon Sep 17 00:00:00 2001 From: yoff Date: Fri, 7 Feb 2025 23:43:27 +0100 Subject: [PATCH 11/12] ruby: clean up logic and add test use the CFG more than the AST --- .../performance/DatabaseQueryInLoop.ql | 31 ++++++++++++------- .../DatabaseQueryInLoop.rb | 8 +++++ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql index f294359a97df..b17c5ecd9ba3 100644 --- a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql +++ b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql @@ -14,6 +14,8 @@ import codeql.ruby.ast.internal.Constant import codeql.ruby.Concepts import codeql.ruby.frameworks.ActiveRecord private import codeql.ruby.TaintTracking +private import codeql.ruby.CFG +private import codeql.ruby.controlflow.internal.Guards as Guards /** Gets the name of a built-in method that involves a loop operation. */ string getALoopMethodName() { @@ -36,21 +38,26 @@ class LoopingCall extends DataFlow::CallNode { predicate executesCall(DataFlow::CallNode c) { c.asExpr().getScope() = loopScope } } -/** Holds if `ar` influences `guard`, which may control the execution of a loop. */ -predicate usedInLoopControlGuard(ActiveRecordInstance ar, DataFlow::Node guard) { - TaintTracking::localTaint(ar, guard) and - guard = guardForLoopControl(_, _) +/** Holds if `ar` influences a guard that may control the execution of a loop. */ +predicate usedInLoopControlGuard(ActiveRecordInstance ar) { + exists(DataFlow::Node insideGuard, CfgNodes::ExprCfgNode guard | + // For a guard like `cond && ar`, the whole guard will not be tainted + // so we need to look at the taint of the individual parts. + insideGuard.asExpr().getExpr() = guard.getExpr().getAChild*() + | + TaintTracking::localTaint(ar, insideGuard) and + guardForLoopControl(guard, _) + ) } -/** Gets a dataflow node that is used to decide whether to break a loop. */ -DataFlow::Node guardForLoopControl(ConditionalExpr cond, Stmt control) { - result.asExpr().getAstNode() = cond.getCondition().getAChild*() and +/** Holds if `guard` controls `break` and `break` would break out of a loop. */ +predicate guardForLoopControl(CfgNodes::ExprCfgNode guard, CfgNodes::AstCfgNode break) { + Guards::guardControlsBlock(guard, break.getBasicBlock(), _) and ( - control.(MethodCall).getMethodName() = "raise" + break.(CfgNodes::ExprNodes::MethodCallCfgNode).getMethodName() = "raise" or - control instanceof NextStmt - ) and - control = cond.getBranch(_).getAChild() + break instanceof CfgNodes::ReturningCfgNode + ) } from LoopingCall loop, ActiveRecordModelFinderCall call @@ -59,7 +66,7 @@ where // Disregard loops over constants not isArrayConstant(loop.getReceiver().asExpr(), _) and // Disregard cases where the looping is influenced by the query result - not usedInLoopControlGuard(call, _) and + not usedInLoopControlGuard(call) and // Only report calls that are likely to be expensive not call.getMethodName() in ["new", "create"] select call, diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb index 57c27217a2e5..ca17666fd0c7 100644 --- a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb +++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb @@ -42,6 +42,14 @@ def test end end + # more complicated condition + names.map do |name| + user = User.where(login: name).pluck(:id).first + unless cond && user + raise Error.new("User '#{name}' not found") + end + end + # skipping through the loop when users are not relevant names.map do |name| user = User.where(login: name).pluck(:id).first From 0912e3b024b76f21c6802524c4b1d83afc7dcebb Mon Sep 17 00:00:00 2001 From: yoff Date: Tue, 11 Feb 2025 12:51:25 +0100 Subject: [PATCH 12/12] ruby: use inline expectation tests --- .../DatabaseQueryInLoop/DatabaseQueryInLoop.qlref | 3 ++- .../performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref index f1302ca1ff4a..42412050efdd 100644 --- a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref +++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref @@ -1 +1,2 @@ -queries/performance/DatabaseQueryInLoop.ql +query: queries/performance/DatabaseQueryInLoop.ql +postprocess: utils/test/InlineExpectationsTestQuery.ql \ No newline at end of file diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb index ca17666fd0c7..31029ece5e68 100644 --- a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb +++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb @@ -8,15 +8,15 @@ def test # simple query in loop names.map do |name| - User.where(login: name).pluck(:id).first + User.where(login: name).pluck(:id).first # $ Alert end # nested loop names.map do |name| - user = User.where(login: name).pluck(:id).first + user = User.where(login: name).pluck(:id).first # $ Alert ids.map do |user_id| - User.where(id: user_id).pluck(:id).first + User.where(id: user_id).pluck(:id).first # $ Alert end end