diff --git a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll index deaa0a6427a0..42c6286e4ea0 100644 --- a/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll +++ b/ruby/ql/lib/codeql/ruby/frameworks/ActiveRecord.qll @@ -254,9 +254,8 @@ private Expr getUltimateReceiver(MethodCall call) { ) } -// A call to `find`, `where`, etc. that may return active record model object(s) -private class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode -{ +/** A call to `find`, `where`, etc. that may return active record model object(s) */ +class ActiveRecordModelFinderCall extends ActiveRecordModelInstantiation, DataFlow::CallNode { private ActiveRecordModelClass cls; ActiveRecordModelFinderCall() { diff --git a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.qhelp b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.qhelp new file mode 100644 index 000000000000..493597ee0c96 --- /dev/null +++ b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.qhelp @@ -0,0 +1,23 @@ + + + + +

+When a database query operation, for example a call to a query method in the Rails `ActiveRecord::Relation` class, is executed in a loop, this can lead to a performance issue known as an "n+1 query problem". +The database query will be executed in each iteration of the loop. +Performance can usually be improved by performing a single database query outside of a loop, which retrieves all the required objects in a single operation. +

+
+ +

If possible, pull the database query out of the loop and rewrite it to retrieve all the required objects. This replaces multiple database operations with a single one. +

+
+ +

The following (suboptimal) example code queries the User object in each iteration of the loop:

+ +

To improve the performance, we instead query the User object once outside the loop, gathering all necessary information in a single query:

+ +
+
\ No newline at end of file diff --git a/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql new file mode 100644 index 000000000000..b17c5ecd9ba3 --- /dev/null +++ b/ruby/ql/src/queries/performance/DatabaseQueryInLoop.ql @@ -0,0 +1,74 @@ +/** + * @name Database query in a loop + * @description Database queries in a loop can lead to an unnecessary amount of database calls and poor performance. + * @kind problem + * @problem.severity info + * @precision high + * @id rb/database-query-in-loop + * @tags performance + */ + +import ruby +private import codeql.ruby.AST +import codeql.ruby.ast.internal.Constant +import codeql.ruby.Concepts +import codeql.ruby.frameworks.ActiveRecord +private import codeql.ruby.TaintTracking +private import codeql.ruby.CFG +private import codeql.ruby.controlflow.internal.Guards as Guards + +/** Gets the name of a built-in method that involves a loop operation. */ +string getALoopMethodName() { + result in [ + "each", "reverse_each", "map", "map!", "foreach", "flat_map", "in_batches", "one?", "all?", + "collect", "collect!", "select", "select!", "reject", "reject!" + ] +} + +/** A loop, represented by a call to a loop operation. */ +class LoopingCall extends DataFlow::CallNode { + Callable loopScope; + + LoopingCall() { + this.getMethodName() = getALoopMethodName() and + loopScope = this.getBlock().asCallable().asCallableAstNode() + } + + /** Holds if `c` is executed as part of the body of this loop. */ + predicate executesCall(DataFlow::CallNode c) { c.asExpr().getScope() = loopScope } +} + +/** Holds if `ar` influences a guard that may control the execution of a loop. */ +predicate usedInLoopControlGuard(ActiveRecordInstance ar) { + exists(DataFlow::Node insideGuard, CfgNodes::ExprCfgNode guard | + // For a guard like `cond && ar`, the whole guard will not be tainted + // so we need to look at the taint of the individual parts. + insideGuard.asExpr().getExpr() = guard.getExpr().getAChild*() + | + TaintTracking::localTaint(ar, insideGuard) and + guardForLoopControl(guard, _) + ) +} + +/** Holds if `guard` controls `break` and `break` would break out of a loop. */ +predicate guardForLoopControl(CfgNodes::ExprCfgNode guard, CfgNodes::AstCfgNode break) { + Guards::guardControlsBlock(guard, break.getBasicBlock(), _) and + ( + break.(CfgNodes::ExprNodes::MethodCallCfgNode).getMethodName() = "raise" + or + break instanceof CfgNodes::ReturningCfgNode + ) +} + +from LoopingCall loop, ActiveRecordModelFinderCall call +where + loop.executesCall(call) and + // Disregard loops over constants + not isArrayConstant(loop.getReceiver().asExpr(), _) and + // Disregard cases where the looping is influenced by the query result + not usedInLoopControlGuard(call) and + // Only report calls that are likely to be expensive + not call.getMethodName() in ["new", "create"] +select call, + "This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop.", + loop, "this loop" diff --git a/ruby/ql/src/queries/performance/examples/preload.rb b/ruby/ql/src/queries/performance/examples/preload.rb new file mode 100644 index 000000000000..27a313a0f65b --- /dev/null +++ b/ruby/ql/src/queries/performance/examples/preload.rb @@ -0,0 +1,14 @@ +# Preload User data +user_data = User.where(login: repo_names_by_owner.keys).pluck(:login, :id, :type).to_h do |login, id, type| + [login, { id: id, type: type == "User" ? "USER" : "ORGANIZATION" }] +end + +repo_names_by_owner.each do |owner_slug, repo_names| + owner_info = user_data[owner_slug] + owner_id = owner_info[:id] + owner_type = owner_info[:type] + rel_conditions = { owner_id: owner_id, name: repo_names } + + nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg + nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg +end \ No newline at end of file diff --git a/ruby/ql/src/queries/performance/examples/straight_loop.rb b/ruby/ql/src/queries/performance/examples/straight_loop.rb new file mode 100644 index 000000000000..4c64d3bb84f2 --- /dev/null +++ b/ruby/ql/src/queries/performance/examples/straight_loop.rb @@ -0,0 +1,8 @@ +repo_names_by_owner.map do |owner_slug, repo_names| + owner_id, owner_type = User.where(login: owner_slug).pluck(:id, :type).first + owner_type = owner_type == "User" ? "USER" : "ORGANIZATION" + rel_conditions = { owner_id: owner_id, name: repo_names } + + nwo_rel = nwo_rel.or(RepositorySecurityCenterConfig.where(rel_conditions)) unless neg + nwo_rel = nwo_rel.and(RepositorySecurityCenterConfig.where.not(rel_conditions)) if neg + end \ No newline at end of file diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.expected b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.expected new file mode 100644 index 000000000000..b4cbf2773802 --- /dev/null +++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.expected @@ -0,0 +1,3 @@ +| DatabaseQueryInLoop.rb:11:13:11:52 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:10:9:12:11 | call to map | this loop | +| DatabaseQueryInLoop.rb:16:20:16:59 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:15:9:21:11 | call to map | this loop | +| DatabaseQueryInLoop.rb:19:17:19:56 | call to first | This call to a database query operation happens inside $@, and could be hoisted to a single call outside the loop. | DatabaseQueryInLoop.rb:18:13:20:15 | call to map | this loop | diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref new file mode 100644 index 000000000000..42412050efdd --- /dev/null +++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.qlref @@ -0,0 +1,2 @@ +query: queries/performance/DatabaseQueryInLoop.ql +postprocess: utils/test/InlineExpectationsTestQuery.ql \ No newline at end of file diff --git a/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb new file mode 100644 index 000000000000..31029ece5e68 --- /dev/null +++ b/ruby/ql/test/query-tests/performance/DatabaseQueryInLoop/DatabaseQueryInLoop.rb @@ -0,0 +1,61 @@ + +class User < ActiveRecord::Base +end + +class DatabaseQueryInLoopTest + def test + ### These are bad + + # simple query in loop + names.map do |name| + User.where(login: name).pluck(:id).first # $ Alert + end + + # nested loop + names.map do |name| + user = User.where(login: name).pluck(:id).first # $ Alert + + ids.map do |user_id| + User.where(id: user_id).pluck(:id).first # $ Alert + end + end + + ### These are OK + + # Not in loop + User.where(login: owner_slug).pluck(:id).first + + # Loops over constant array + %w(first-name second-name).map { |name| User.where(login: name).pluck(:id).first } + + constant_names = [first-name, second-name] + constant_names.each do |name| + User.where(login: name).pluck(:id).first + end + + # Loop traversal is influenced by query result + # raising an exception if the user is not found + names.map do |name| + user = User.where(login: name).pluck(:id).first + unless user + raise Error.new("User '#{name}' not found") + end + end + + # more complicated condition + names.map do |name| + user = User.where(login: name).pluck(:id).first + unless cond && user + raise Error.new("User '#{name}' not found") + end + end + + # skipping through the loop when users are not relevant + names.map do |name| + user = User.where(login: name).pluck(:id).first + if not isRelevant(user) + next + end + end + end +end