diff --git a/activerecord/lib/active_record/relation/batches.rb b/activerecord/lib/active_record/relation/batches.rb index f9c26770b7e08..2443834225d76 100644 --- a/activerecord/lib/active_record/relation/batches.rb +++ b/activerecord/lib/active_record/relation/batches.rb @@ -373,32 +373,44 @@ def batch_on_unloaded_relation(relation:, start:, finish:, load:, order:, use_ra if load records = batch_relation.records ids = records.map(&:id) + ids_size = ids.size + ids_last = ids.last yielded_relation = where(primary_key => ids) yielded_relation.load_records(records) elsif (empty_scope && use_ranges != false) || use_ranges - ids = batch_relation.ids - finish = ids.last - if finish - yielded_relation = apply_finish_limit(batch_relation, finish, batch_orders) + # Efficiently peak at the last id for the next batch using offset and limit. + ids_size = batch_limit + ids_last = batch_relation.offset(batch_limit - 1).limit(1).ids.first + # If the last id is not found using offset, there is at most one more batch of size < batch_limit. + # We retry getting the list of ids so that we have the exact size and last id. + unless ids_last + ids = batch_relation.ids + ids_size = ids.size + ids_last = ids.last + end + if ids_last + yielded_relation = apply_finish_limit(batch_relation, ids_last, batch_orders) yielded_relation = yielded_relation.except(:limit, :order) yielded_relation.skip_query_cache!(false) end else ids = batch_relation.ids + ids_size = ids.size + ids_last = ids.last yielded_relation = where(primary_key => ids) end - break if ids.empty? + break if ids_size == 0 - primary_key_offset = ids.last + primary_key_offset = ids_last raise ArgumentError.new("Primary key not included in the custom select clause") unless primary_key_offset yield yielded_relation - break if ids.length < batch_limit + break if ids_size < batch_limit if limit_value - remaining -= ids.length + remaining -= ids_size if remaining == 0 # Saves a useless iteration when the limit is a multiple of the diff --git a/activerecord/test/cases/batches_test.rb b/activerecord/test/cases/batches_test.rb index f8df51b9755c5..08f310877ce3e 100644 --- a/activerecord/test/cases/batches_test.rb +++ b/activerecord/test/cases/batches_test.rb @@ -327,7 +327,7 @@ def test_in_batches_has_attribute_readers end def test_in_batches_should_yield_relation_if_block_given - assert_queries_count(6) do + assert_queries_count(7) do Post.in_batches(of: 2) do |relation| assert_kind_of ActiveRecord::Relation, relation end @@ -335,7 +335,7 @@ def test_in_batches_should_yield_relation_if_block_given end def test_in_batches_should_be_enumerable_if_no_block_given - assert_queries_count(6) do + assert_queries_count(7) do Post.in_batches(of: 2).each do |relation| assert_kind_of ActiveRecord::Relation, relation end @@ -370,7 +370,7 @@ def test_in_batches_each_record_should_be_ordered_by_id end def test_in_batches_update_all_affect_all_records - assert_queries_count(6 + 6) do # 6 selects, 6 updates + assert_queries_count(7 + 6) do # 7 selects, 6 updates Post.in_batches(of: 2).update_all(title: "updated-title") end assert_equal Post.all.pluck(:title), ["updated-title"] * Post.count @@ -416,7 +416,7 @@ def test_in_batches_should_be_loaded end def test_in_batches_if_not_loaded_executes_more_queries - assert_queries_count(@total + 1) do + assert_queries_count(@total + 2) do Post.in_batches(of: 1, load: false) do |relation| assert_not_predicate relation, :loaded? end @@ -490,7 +490,7 @@ def test_in_batches_when_loaded_can_return_an_enum end def test_in_batches_should_return_relations - assert_queries_count(@total + 1) do + assert_queries_count(@total + 2) do Post.in_batches(of: 1) do |relation| assert_kind_of ActiveRecord::Relation, relation end @@ -507,7 +507,7 @@ def test_in_batches_should_start_from_the_start_option def test_in_batches_should_end_at_the_finish_option post = Post.order("id DESC").where("id <= ?", 5).first - assert_queries_count(7) do + assert_queries_count(8) do relation = Post.in_batches(of: 1, finish: 5, load: true).reverse_each.first assert_equal post, relation.last end @@ -554,11 +554,11 @@ def test_in_batches_no_subqueries_for_whole_tables_batching end def test_in_batches_shouldnt_execute_query_unless_needed - assert_queries_count(2) do + assert_queries_count(3) do Post.in_batches(of: @total) { |relation| assert_kind_of ActiveRecord::Relation, relation } end - assert_queries_count(1) do + assert_queries_count(2) do Post.in_batches(of: @total + 1) { |relation| assert_kind_of ActiveRecord::Relation, relation } end end @@ -819,7 +819,7 @@ def test_find_in_batches_should_return_a_sized_enumerator test ".in_batches bypasses the query cache for its own queries" do Post.cache do - assert_queries_count(2) do + assert_queries_count(4) do Post.in_batches { } Post.in_batches { } end