From 5feb4016079ef80c4da80e402f5eec4b28761fe6 Mon Sep 17 00:00:00 2001
From: Rasmus Lerchedahl Petersen
Date: Wed, 27 Nov 2024 23:49:46 +0100
Subject: [PATCH 01/12] ruby: Add query for hoisting Rails ActiveRecord calls
This does not take assicoations into account. It uses
ActiveRecordModelFinderCall to identify relevant calls. This class has
therefor been made public.
---
.../codeql/ruby/frameworks/ActiveRecord.qll | 5 +-
.../queries/performance/CouldBeHoisted.qhelp | 22 +++++
.../src/queries/performance/CouldBeHoisted.ql | 90 +++++++++++++++++++
.../queries/performance/examples/preload.rb | 14 +++
.../performance/examples/straight_loop.rb | 8 ++
5 files changed, 136 insertions(+), 3 deletions(-)
create mode 100644 ruby/ql/src/queries/performance/CouldBeHoisted.qhelp
create mode 100644 ruby/ql/src/queries/performance/CouldBeHoisted.ql
create mode 100644 ruby/ql/src/queries/performance/examples/preload.rb
create mode 100644 ruby/ql/src/queries/performance/examples/straight_loop.rb
diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll
index deaa0a6427a0..42c6286e4ea0 100644
--- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll
+++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll
@@ -254,9 +254,8 @@ private Expr getUltimateReceiver(MethodCall call) {
)
}
-// A call to `find`, `where`, etc. that may return active record model object(s)
-private class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode
-{
+/** A call to `find`, `where`, etc. that may return active record model object(s) */
+class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode {
private ActiveRecordModelClass cls;
ActiveRecordModelFinderCall() {
diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp b/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp
new file mode 100644
index 000000000000..b476a2c7a00c
--- /dev/null
+++ b/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp
@@ -0,0 +1,22 @@
+
+
+
+
+
+When a Rails ActiveRecord query is executed in a loop, it is potentially an n+1 problem.
+This query identifies situations where an ActiveRecord query execution could be pulled out of a loop.
+
+
+
+If possible, pull the query out of the loop, thus replacing the many calls with a single one.
+
+
+
+The following (suboptimal) example code queries the User object in each iteration of the loop:
+
+To improve the performance, we instead query the User object once outside the loop, gathereing all necessary information:
+
+
+
\ No newline at end of file
diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.ql b/ruby/ql/src/queries/performance/CouldBeHoisted.ql
new file mode 100644
index 000000000000..4f5653a8e458
--- /dev/null
+++ b/ruby/ql/src/queries/performance/CouldBeHoisted.ql
@@ -0,0 +1,90 @@
+/**
+ * @name Could be hoisted
+ * @description Hoist Rails `ActiveRecord::Relation` query calls out of loops.
+ * @kind problem
+ * @problem.severity info
+ * @precision high
+ * @id rb/could-be-hoisted
+ * @tags performance
+ */
+
+// Possible Improvements;
+// - Consider also Associations.
+// Associations are lazy-loading by default, so something like
+// in a loop over `article` do
+// `article.book`
+// if you have 1000 articles it will do a 1000 calls to `book`.
+// If you already did `article includes book`, there should be no problem.
+// - Consider instances of ActiveRecordInstanceMethodCall, for instance
+// calls to `pluck`.
+import ruby
+private import codeql.ruby.AST
+import codeql.ruby.ast.internal.Constant
+import codeql.ruby.Concepts
+import codeql.ruby.frameworks.ActiveRecord
+private import codeql.ruby.TaintTracking
+
+string loopMethodName() {
+ result in [
+ "each", "reverse_each", "map", "map!", "foreach", "flat_map", "in_batches", "one?", "all?",
+ "collect", "collect!", "select", "select!", "reject", "reject!"
+ ]
+}
+
+class LoopingCall extends DataFlow::CallNode {
+ DataFlow::CallableNode loopBlock;
+
+ LoopingCall() {
+ this.getMethodName() = loopMethodName() and loopBlock = this.getBlock().asCallable()
+ }
+
+ DataFlow::CallableNode getLoopBlock() { result = loopBlock }
+}
+
+predicate happensInLoop(LoopingCall loop, DataFlow::CallNode e) {
+ loop.getLoopBlock().asCallableAstNode() = e.asExpr().getScope()
+}
+
+predicate happensInOuterLoop(LoopingCall outerLoop, DataFlow::CallNode e) {
+ exists(LoopingCall innerLoop |
+ happensInLoop(outerLoop, innerLoop) and
+ happensInLoop(innerLoop, e)
+ )
+}
+
+predicate happensInInnermostLoop(LoopingCall loop, DataFlow::CallNode e) {
+ happensInLoop(loop, e) and
+ not happensInOuterLoop(loop, e)
+}
+
+// The ActiveRecord instance is used to potentially control the loop
+predicate usedInLoopControlGuard(ActiveRecordInstance ar, DataFlow::Node guard) {
+ TaintTracking::localTaint(ar, guard) and
+ guard = guardForLoopControl(_, _)
+}
+
+// A guard for controlling the loop
+DataFlow::Node guardForLoopControl(ConditionalExpr cond, Stmt control) {
+ result.asExpr().getAstNode() = cond.getCondition().getAChild*() and
+ (
+ control.(MethodCall).getMethodName() = "raise"
+ or
+ control instanceof NextStmt
+ ) and
+ control = cond.getBranch(_).getAChild()
+}
+
+from LoopingCall loop, DataFlow::CallNode call
+where
+ // Disregard loops over constants
+ not isArrayConstant(loop.getReceiver().asExpr(), _) and
+ // Disregard tests
+ not call.getLocation().getFile().getAbsolutePath().matches("%test%") and
+ // Disregard cases where the looping is influenced by the query result
+ not usedInLoopControlGuard(call, _) and
+ // Only report the inner most loop
+ happensInInnermostLoop(loop, call) and
+ // Only report calls that are likely to be expensive
+ call instanceof ActiveRecordModelFinderCall and
+ not call.getMethodName() in ["new", "create"]
+select call, "This call happens inside $@, and could be hoisted.", loop, "this loop"
diff --git a/ruby/ql/src/queries/performance/examples/preload.rb b/ruby/ql/src/queries/performance/examples/preload.rb
new file mode 100644
index 000000000000..27a313a0f65b
--- /dev/null
+++ b/ruby/ql/src/queries/performance/examples/preload.rb
@@ -0,0 +1,14 @@
+# Preload User data
+user_data = User.where(login: repo_names_by_owner.keys).pluck(:login, :id, :type).to_h do |login, id, type|
+ [login, { id: id, type: type == "User" ? "USER" : "ORGANIZATION" }]
+end
+
+repo_names_by_owner.each do |owner_slug, repo_names|
+ owner_info = user_data[owner_slug]
+ owner_id = owner_info[:id]
+ owner_type = owner_info[:type]
+ rel_conditions = { owner_id: owner_id, name: repo_names }
+
+ nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg
+ nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg
+end
\ No newline at end of file
diff --git a/ruby/ql/src/queries/performance/examples/straight_loop.rb b/ruby/ql/src/queries/performance/examples/straight_loop.rb
new file mode 100644
index 000000000000..4c64d3bb84f2
--- /dev/null
+++ b/ruby/ql/src/queries/performance/examples/straight_loop.rb
@@ -0,0 +1,8 @@
+repo_names_by_owner.map do |owner_slug, repo_names|
+ owner_id, owner_type = User.where(login: owner_slug).pluck(:id, :type).first
+ owner_type = owner_type == "User" ? "USER" : "ORGANIZATION"
+ rel_conditions = { owner_id: owner_id, name: repo_names }
+
+ nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg
+ nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg
+ end
\ No newline at end of file
From 7af8fa75e6c14beedf0444ef316dc28a874d52df Mon Sep 17 00:00:00 2001
From: yoff
Date: Thu, 6 Feb 2025 15:45:28 +0100
Subject: [PATCH 02/12] Apply suggestions from code review
Co-authored-by: Aditya Sharad <6874315+adityasharad@users.noreply.github.com>
---
ruby/ql/src/queries/performance/CouldBeHoisted.qhelp | 9 +++++----
ruby/ql/src/queries/performance/CouldBeHoisted.ql | 2 +-
2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp b/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp
index b476a2c7a00c..a473fc9ce1f9 100644
--- a/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp
+++ b/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp
@@ -5,18 +5,19 @@
-When a Rails ActiveRecord query is executed in a loop, it is potentially an n+1 problem.
-This query identifies situations where an ActiveRecord query execution could be pulled out of a loop.
+When a database query operation, for example a call to a query method in the Rails `ActiveRecord::Relation` class, is executed in a loop, this can lead to a performance issue known as an "n+1 query problem".
+The database query will be executed in each iteration of the loop.
+Performance can usually be improved by performing a single database query outside of a loop, which retrieves all the required objects in a single operation.
-If possible, pull the query out of the loop, thus replacing the many calls with a single one.
+
If possible, pull the database query out of the loop and rewrite it to retrieve all the required objects. This replaces multiple database operations with a single one.
The following (suboptimal) example code queries the User object in each iteration of the loop:
-To improve the performance, we instead query the User object once outside the loop, gathereing all necessary information:
+To improve the performance, we instead query the User
object once outside the loop, gathering all necessary information in a single query:
\ No newline at end of file
diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.ql b/ruby/ql/src/queries/performance/CouldBeHoisted.ql
index 4f5653a8e458..92ea9eac302b 100644
--- a/ruby/ql/src/queries/performance/CouldBeHoisted.ql
+++ b/ruby/ql/src/queries/performance/CouldBeHoisted.ql
@@ -87,4 +87,4 @@ where
// Only report calls that are likely to be expensive
call instanceof ActiveRecordModelFinderCall and
not call.getMethodName() in ["new", "create"]
-select call, "This call happens inside $@, and could be hoisted.", loop, "this loop"
+select call, "This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop.", loop, "this loop"
From 8aa195d8383d33e895bf93427f437214cf38da6e Mon Sep 17 00:00:00 2001
From: yoff
Date: Thu, 6 Feb 2025 16:59:08 +0100
Subject: [PATCH 03/12] ruby: remove comment (we can create issues)
---
ruby/ql/src/queries/performance/CouldBeHoisted.ql | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)
diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.ql b/ruby/ql/src/queries/performance/CouldBeHoisted.ql
index 92ea9eac302b..137faa3c659f 100644
--- a/ruby/ql/src/queries/performance/CouldBeHoisted.ql
+++ b/ruby/ql/src/queries/performance/CouldBeHoisted.ql
@@ -8,15 +8,6 @@
* @tags performance
*/
-// Possible Improvements;
-// - Consider also Associations.
-// Associations are lazy-loading by default, so something like
-// in a loop over `article` do
-// `article.book`
-// if you have 1000 articles it will do a 1000 calls to `book`.
-// If you already did `article includes book`, there should be no problem.
-// - Consider instances of ActiveRecordInstanceMethodCall, for instance
-// calls to `pluck`.
import ruby
private import codeql.ruby.AST
import codeql.ruby.ast.internal.Constant
@@ -87,4 +78,6 @@ where
// Only report calls that are likely to be expensive
call instanceof ActiveRecordModelFinderCall and
not call.getMethodName() in ["new", "create"]
-select call, "This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop.", loop, "this loop"
+select call,
+ "This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop.",
+ loop, "this loop"
From d9d0d3c18b372e158bb8cdfc91b65933889d9b53 Mon Sep 17 00:00:00 2001
From: yoff
Date: Thu, 6 Feb 2025 16:59:23 +0100
Subject: [PATCH 04/12] ruby: add code block
---
ruby/ql/src/queries/performance/CouldBeHoisted.qhelp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp b/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp
index a473fc9ce1f9..493597ee0c96 100644
--- a/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp
+++ b/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp
@@ -15,7 +15,7 @@ Performance can usually be improved by performing a single database query outsid
-The following (suboptimal) example code queries the User object in each iteration of the loop:
+The following (suboptimal) example code queries the User
object in each iteration of the loop:
To improve the performance, we instead query the User
object once outside the loop, gathering all necessary information in a single query:
From 51a2d8c72f76c6a22f5e5ab06042326cc1f1fc93 Mon Sep 17 00:00:00 2001
From: yoff
Date: Thu, 6 Feb 2025 17:07:12 +0100
Subject: [PATCH 05/12] ruby: rename query
---
.../{CouldBeHoisted.qhelp => DatabaseQueryInLoop.qhelp} | 0
.../{CouldBeHoisted.ql => DatabaseQueryInLoop.ql} | 6 +++---
2 files changed, 3 insertions(+), 3 deletions(-)
rename ruby/ql/src/queries/performance/{CouldBeHoisted.qhelp => DatabaseQueryInLoop.qhelp} (100%)
rename ruby/ql/src/queries/performance/{CouldBeHoisted.ql => DatabaseQueryInLoop.ql} (93%)
diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.qhelp b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.qhelp
similarity index 100%
rename from ruby/ql/src/queries/performance/CouldBeHoisted.qhelp
rename to ruby/ql/src/queries/performance/DatabaseQueryInLoop.qhelp
diff --git a/ruby/ql/src/queries/performance/CouldBeHoisted.ql b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
similarity index 93%
rename from ruby/ql/src/queries/performance/CouldBeHoisted.ql
rename to ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
index 137faa3c659f..e34ff491656e 100644
--- a/ruby/ql/src/queries/performance/CouldBeHoisted.ql
+++ b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
@@ -1,10 +1,10 @@
/**
- * @name Could be hoisted
- * @description Hoist Rails `ActiveRecord::Relation` query calls out of loops.
+ * @name Database query in a loop
+ * @description Database queries in a loop can lead to an unnecessary amount of database calls and poor performance.
* @kind problem
* @problem.severity info
* @precision high
- * @id rb/could-be-hoisted
+ * @id rb/database-query-in-loop
* @tags performance
*/
From 74155a0214657e265a6fa90a545f211c180b2062 Mon Sep 17 00:00:00 2001
From: yoff
Date: Thu, 6 Feb 2025 18:09:38 +0100
Subject: [PATCH 06/12] ruby: start adding comments I apuse here, because the
code may be simplified
---
ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
index e34ff491656e..8ca2f8600986 100644
--- a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
+++ b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
@@ -15,18 +15,20 @@ import codeql.ruby.Concepts
import codeql.ruby.frameworks.ActiveRecord
private import codeql.ruby.TaintTracking
-string loopMethodName() {
+/** Gets the name of a built-in method that involves a loop operation. */
+string getALoopMethodName() {
result in [
"each", "reverse_each", "map", "map!", "foreach", "flat_map", "in_batches", "one?", "all?",
"collect", "collect!", "select", "select!", "reject", "reject!"
]
}
+/** A call to a loop operation. */
class LoopingCall extends DataFlow::CallNode {
DataFlow::CallableNode loopBlock;
LoopingCall() {
- this.getMethodName() = loopMethodName() and loopBlock = this.getBlock().asCallable()
+ this.getMethodName() = getALoopMethodName() and loopBlock = this.getBlock().asCallable()
}
DataFlow::CallableNode getLoopBlock() { result = loopBlock }
From d7ffc3fc772770016150a6bf8dd2e0ee82e2dcac Mon Sep 17 00:00:00 2001
From: yoff
Date: Thu, 6 Feb 2025 18:10:06 +0100
Subject: [PATCH 07/12] Ruby: remove test code filtering
---
ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql | 2 --
1 file changed, 2 deletions(-)
diff --git a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
index 8ca2f8600986..6d74443e12ae 100644
--- a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
+++ b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
@@ -71,8 +71,6 @@ from LoopingCall loop, DataFlow::CallNode call
where
// Disregard loops over constants
not isArrayConstant(loop.getReceiver().asExpr(), _) and
- // Disregard tests
- not call.getLocation().getFile().getAbsolutePath().matches("%test%") and
// Disregard cases where the looping is influenced by the query result
not usedInLoopControlGuard(call, _) and
// Only report the inner most loop
From 58fb592822c4744979351a7cf87e7a04ee2d674c Mon Sep 17 00:00:00 2001
From: yoff
Date: Fri, 7 Feb 2025 13:50:27 +0100
Subject: [PATCH 08/12] ruby: add tests
---
.../DatabaseQueryInLoop.expected | 3 ++
.../DatabaseQueryInLoop.qlref | 1 +
.../DatabaseQueryInLoop.rb | 53 +++++++++++++++++++
3 files changed, 57 insertions(+)
create mode 100644 ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.expected
create mode 100644 ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref
create mode 100644 ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb
diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.expected b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.expected
new file mode 100644
index 000000000000..b4cbf2773802
--- /dev/null
+++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.expected
@@ -0,0 +1,3 @@
+| DatabaseQueryInLoop.rb:11:13:11:52 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:10:9:12:11 | call to map | this loop |
+| DatabaseQueryInLoop.rb:16:20:16:59 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:15:9:21:11 | call to map | this loop |
+| DatabaseQueryInLoop.rb:19:17:19:56 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:18:13:20:15 | call to map | this loop |
diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref
new file mode 100644
index 000000000000..f1302ca1ff4a
--- /dev/null
+++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref
@@ -0,0 +1 @@
+queries/performance/DatabaseQueryInLoop.ql
diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb
new file mode 100644
index 000000000000..57c27217a2e5
--- /dev/null
+++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb
@@ -0,0 +1,53 @@
+
+class User < ActiveRecord::Base
+end
+
+class DatabaseQueryInLoopTest
+ def test
+ ### These are bad
+
+ # simple query in loop
+ names.map do |name|
+ User.where(login: name).pluck(:id).first
+ end
+
+ # nested loop
+ names.map do |name|
+ user = User.where(login: name).pluck(:id).first
+
+ ids.map do |user_id|
+ User.where(id: user_id).pluck(:id).first
+ end
+ end
+
+ ### These are OK
+
+ # Not in loop
+ User.where(login: owner_slug).pluck(:id).first
+
+ # Loops over constant array
+ %w(first-name second-name).map { |name| User.where(login: name).pluck(:id).first }
+
+ constant_names = [first-name, second-name]
+ constant_names.each do |name|
+ User.where(login: name).pluck(:id).first
+ end
+
+ # Loop traversal is influenced by query result
+ # raising an exception if the user is not found
+ names.map do |name|
+ user = User.where(login: name).pluck(:id).first
+ unless user
+ raise Error.new("User '#{name}' not found")
+ end
+ end
+
+ # skipping through the loop when users are not relevant
+ names.map do |name|
+ user = User.where(login: name).pluck(:id).first
+ if not isRelevant(user)
+ next
+ end
+ end
+ end
+end
From b3eaac0ab7d6f00db214e7d4121c6b6382906d3b Mon Sep 17 00:00:00 2001
From: yoff
Date: Fri, 7 Feb 2025 14:03:57 +0100
Subject: [PATCH 09/12] ruby: remove superflous logic
---
.../queries/performance/DatabaseQueryInLoop.ql | 15 +--------------
1 file changed, 1 insertion(+), 14 deletions(-)
diff --git a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
index 6d74443e12ae..3fbf0e07b126 100644
--- a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
+++ b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
@@ -38,18 +38,6 @@ predicate happensInLoop(LoopingCall loop, DataFlow::CallNode e) {
loop.getLoopBlock().asCallableAstNode() = e.asExpr().getScope()
}
-predicate happensInOuterLoop(LoopingCall outerLoop, DataFlow::CallNode e) {
- exists(LoopingCall innerLoop |
- happensInLoop(outerLoop, innerLoop) and
- happensInLoop(innerLoop, e)
- )
-}
-
-predicate happensInInnermostLoop(LoopingCall loop, DataFlow::CallNode e) {
- happensInLoop(loop, e) and
- not happensInOuterLoop(loop, e)
-}
-
// The ActiveRecord instance is used to potentially control the loop
predicate usedInLoopControlGuard(ActiveRecordInstance ar, DataFlow::Node guard) {
TaintTracking::localTaint(ar, guard) and
@@ -73,8 +61,7 @@ where
not isArrayConstant(loop.getReceiver().asExpr(), _) and
// Disregard cases where the looping is influenced by the query result
not usedInLoopControlGuard(call, _) and
- // Only report the inner most loop
- happensInInnermostLoop(loop, call) and
+ happensInLoop(loop, call) and
// Only report calls that are likely to be expensive
call instanceof ActiveRecordModelFinderCall and
not call.getMethodName() in ["new", "create"]
From 9d810130e151fc6930769260cca4f784d3f813ff Mon Sep 17 00:00:00 2001
From: yoff
Date: Fri, 7 Feb 2025 16:33:28 +0100
Subject: [PATCH 10/12] ruby: simplify and document
---
.../performance/DatabaseQueryInLoop.ql | 23 ++++++++-----------
1 file changed, 10 insertions(+), 13 deletions(-)
diff --git a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
index 3fbf0e07b126..f294359a97df 100644
--- a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
+++ b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
@@ -23,28 +23,26 @@ string getALoopMethodName() {
]
}
-/** A call to a loop operation. */
+/** A loop, represented by a call to a loop operation. */
class LoopingCall extends DataFlow::CallNode {
- DataFlow::CallableNode loopBlock;
+ Callable loopScope;
LoopingCall() {
- this.getMethodName() = getALoopMethodName() and loopBlock = this.getBlock().asCallable()
+ this.getMethodName() = getALoopMethodName() and
+ loopScope = this.getBlock().asCallable().asCallableAstNode()
}
- DataFlow::CallableNode getLoopBlock() { result = loopBlock }
+ /** Holds if `c` is executed as part of the body of this loop. */
+ predicate executesCall(DataFlow::CallNode c) { c.asExpr().getScope() = loopScope }
}
-predicate happensInLoop(LoopingCall loop, DataFlow::CallNode e) {
- loop.getLoopBlock().asCallableAstNode() = e.asExpr().getScope()
-}
-
-// The ActiveRecord instance is used to potentially control the loop
+/** Holds if `ar` influences `guard`, which may control the execution of a loop. */
predicate usedInLoopControlGuard(ActiveRecordInstance ar, DataFlow::Node guard) {
TaintTracking::localTaint(ar, guard) and
guard = guardForLoopControl(_, _)
}
-// A guard for controlling the loop
+/** Gets a dataflow node that is used to decide whether to break a loop. */
DataFlow::Node guardForLoopControl(ConditionalExpr cond, Stmt control) {
result.asExpr().getAstNode() = cond.getCondition().getAChild*() and
(
@@ -55,15 +53,14 @@ DataFlow::Node guardForLoopControl(ConditionalExpr cond, Stmt control) {
control = cond.getBranch(_).getAChild()
}
-from LoopingCall loop, DataFlow::CallNode call
+from LoopingCall loop, ActiveRecordModelFinderCall call
where
+ loop.executesCall(call) and
// Disregard loops over constants
not isArrayConstant(loop.getReceiver().asExpr(), _) and
// Disregard cases where the looping is influenced by the query result
not usedInLoopControlGuard(call, _) and
- happensInLoop(loop, call) and
// Only report calls that are likely to be expensive
- call instanceof ActiveRecordModelFinderCall and
not call.getMethodName() in ["new", "create"]
select call,
"This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop.",
From 921104306a1751da98b3c3ddf9fd8f82309ddee9 Mon Sep 17 00:00:00 2001
From: yoff
Date: Fri, 7 Feb 2025 23:43:27 +0100
Subject: [PATCH 11/12] ruby: clean up logic and add test use the CFG more than
the AST
---
.../performance/DatabaseQueryInLoop.ql | 31 ++++++++++++-------
.../DatabaseQueryInLoop.rb | 8 +++++
2 files changed, 27 insertions(+), 12 deletions(-)
diff --git a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
index f294359a97df..b17c5ecd9ba3 100644
--- a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
+++ b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql
@@ -14,6 +14,8 @@ import codeql.ruby.ast.internal.Constant
import codeql.ruby.Concepts
import codeql.ruby.frameworks.ActiveRecord
private import codeql.ruby.TaintTracking
+private import codeql.ruby.CFG
+private import codeql.ruby.controlflow.internal.Guards as Guards
/** Gets the name of a built-in method that involves a loop operation. */
string getALoopMethodName() {
@@ -36,21 +38,26 @@ class LoopingCall extends DataFlow::CallNode {
predicate executesCall(DataFlow::CallNode c) { c.asExpr().getScope() = loopScope }
}
-/** Holds if `ar` influences `guard`, which may control the execution of a loop. */
-predicate usedInLoopControlGuard(ActiveRecordInstance ar, DataFlow::Node guard) {
- TaintTracking::localTaint(ar, guard) and
- guard = guardForLoopControl(_, _)
+/** Holds if `ar` influences a guard that may control the execution of a loop. */
+predicate usedInLoopControlGuard(ActiveRecordInstance ar) {
+ exists(DataFlow::Node insideGuard, CfgNodes::ExprCfgNode guard |
+ // For a guard like `cond && ar`, the whole guard will not be tainted
+ // so we need to look at the taint of the individual parts.
+ insideGuard.asExpr().getExpr() = guard.getExpr().getAChild*()
+ |
+ TaintTracking::localTaint(ar, insideGuard) and
+ guardForLoopControl(guard, _)
+ )
}
-/** Gets a dataflow node that is used to decide whether to break a loop. */
-DataFlow::Node guardForLoopControl(ConditionalExpr cond, Stmt control) {
- result.asExpr().getAstNode() = cond.getCondition().getAChild*() and
+/** Holds if `guard` controls `break` and `break` would break out of a loop. */
+predicate guardForLoopControl(CfgNodes::ExprCfgNode guard, CfgNodes::AstCfgNode break) {
+ Guards::guardControlsBlock(guard, break.getBasicBlock(), _) and
(
- control.(MethodCall).getMethodName() = "raise"
+ break.(CfgNodes::ExprNodes::MethodCallCfgNode).getMethodName() = "raise"
or
- control instanceof NextStmt
- ) and
- control = cond.getBranch(_).getAChild()
+ break instanceof CfgNodes::ReturningCfgNode
+ )
}
from LoopingCall loop, ActiveRecordModelFinderCall call
@@ -59,7 +66,7 @@ where
// Disregard loops over constants
not isArrayConstant(loop.getReceiver().asExpr(), _) and
// Disregard cases where the looping is influenced by the query result
- not usedInLoopControlGuard(call, _) and
+ not usedInLoopControlGuard(call) and
// Only report calls that are likely to be expensive
not call.getMethodName() in ["new", "create"]
select call,
diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb
index 57c27217a2e5..ca17666fd0c7 100644
--- a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb
+++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb
@@ -42,6 +42,14 @@ def test
end
end
+ # more complicated condition
+ names.map do |name|
+ user = User.where(login: name).pluck(:id).first
+ unless cond && user
+ raise Error.new("User '#{name}' not found")
+ end
+ end
+
# skipping through the loop when users are not relevant
names.map do |name|
user = User.where(login: name).pluck(:id).first
From 0912e3b024b76f21c6802524c4b1d83afc7dcebb Mon Sep 17 00:00:00 2001
From: yoff
Date: Tue, 11 Feb 2025 12:51:25 +0100
Subject: [PATCH 12/12] ruby: use inline expectation tests
---
.../DatabaseQueryInLoop/DatabaseQueryInLoop.qlref | 3 ++-
.../performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb | 6 +++---
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref
index f1302ca1ff4a..42412050efdd 100644
--- a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref
+++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref
@@ -1 +1,2 @@
-queries/performance/DatabaseQueryInLoop.ql
+query: queries/performance/DatabaseQueryInLoop.ql
+postprocess: utils/test/InlineExpectationsTestQuery.ql
\ No newline at end of file
diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb
index ca17666fd0c7..31029ece5e68 100644
--- a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb
+++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb
@@ -8,15 +8,15 @@ def test
# simple query in loop
names.map do |name|
- User.where(login: name).pluck(:id).first
+ User.where(login: name).pluck(:id).first # $ Alert
end
# nested loop
names.map do |name|
- user = User.where(login: name).pluck(:id).first
+ user = User.where(login: name).pluck(:id).first # $ Alert
ids.map do |user_id|
- User.where(id: user_id).pluck(:id).first
+ User.where(id: user_id).pluck(:id).first # $ Alert
end
end