diff --git a/bundler/lib/bundler/compact_index_client/cache.rb b/bundler/lib/bundler/compact_index_client/cache.rb index 3bae6c9efdf8..139d56a09908 100644 --- a/bundler/lib/bundler/compact_index_client/cache.rb +++ b/bundler/lib/bundler/compact_index_client/cache.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "rubygems/resolver/api_set/gem_parser" +require_relative "../io_trace" module Bundler class CompactIndexClient @@ -29,10 +30,28 @@ def versions def info(name, remote_checksum = nil) path = info_path(name) - if remote_checksum && remote_checksum != SharedHelpers.checksum_for_file(path, :MD5) - fetch("info/#{name}", path, info_etag_path(name)) + if remote_checksum + # OPTIMIZATION: Read the file once for both checksum verification and data return. + # Previously, SharedHelpers.checksum_for_file would read the file for MD5, + # and then read() would read it again if the checksum matched. Now we read + # once and compute MD5 from the in-memory data. + data = read(path) + if data + local_checksum = SharedHelpers.digest(:MD5).hexdigest(data) + if remote_checksum != local_checksum + IOTrace.trace(:http, "compact_index info checksum mismatch, fetching: #{name}") do + fetch("info/#{name}", path, info_etag_path(name)) + end + else + Bundler::CompactIndexClient.debug { "update skipped info/#{name} (versions index checksum matches local)" } + IOTrace.note(:file_read, "compact_index info cache hit: #{name}") + data + end + else + fetch("info/#{name}", path, info_etag_path(name)) + end else - Bundler::CompactIndexClient.debug { "update skipped info/#{name} (#{remote_checksum ? "versions index checksum is nil" : "versions index checksum matches local"})" } + Bundler::CompactIndexClient.debug { "update skipped info/#{name} (versions index checksum is nil)" } read(path) end end @@ -52,7 +71,7 @@ def info_path(name) name = name.to_s # TODO: converge this into the info_root by hashing all filenames like info_etag_path if /[^a-z0-9_-]/.match?(name) - name += "-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}" + name += "-#{SharedHelpers.fast_hexdigest(name).downcase}" @special_characters_info_root.join(name) else @info_root.join(name) @@ -61,13 +80,17 @@ def info_path(name) def info_etag_path(name) name = name.to_s - @info_etag_root.join("#{name}-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}") + @info_etag_root.join("#{name}-#{SharedHelpers.fast_hexdigest(name).downcase}") end def mkdir(name) directory.join(name).tap do |dir| - SharedHelpers.filesystem_access(dir) do - FileUtils.mkdir_p(dir) + # OPTIMIZATION: Skip mkdir_p if directory already exists. + # During warm-cache runs, these directories always exist. + unless dir.directory? + SharedHelpers.filesystem_access(dir) do + FileUtils.mkdir_p(dir) + end end end end @@ -75,9 +98,12 @@ def mkdir(name) def fetch(remote_path, path, etag_path) if already_fetched?(remote_path) Bundler::CompactIndexClient.debug { "already fetched #{remote_path}" } + IOTrace.note(:http, "compact_index already fetched: #{remote_path}") else Bundler::CompactIndexClient.debug { "fetching #{remote_path}" } - @updater&.update(remote_path, path, etag_path) + IOTrace.trace(:http, "compact_index fetch: #{remote_path}") do + @updater&.update(remote_path, path, etag_path) + end end read(path) @@ -89,7 +115,9 @@ def already_fetched?(remote_path) def read(path) return unless path.file? - SharedHelpers.filesystem_access(path, :read, &:read) + IOTrace.trace(:file_read, "compact_index read: #{path}") do + SharedHelpers.filesystem_access(path, :read, &:read) + end end end end diff --git a/bundler/lib/bundler/compact_index_client/parser.rb b/bundler/lib/bundler/compact_index_client/parser.rb index 43581fd7efe5..382ea705e195 100644 --- a/bundler/lib/bundler/compact_index_client/parser.rb +++ b/bundler/lib/bundler/compact_index_client/parser.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require "fileutils" + module Bundler class CompactIndexClient class Parser @@ -22,8 +24,25 @@ def versions @info_checksums = {} lines(@compact_index.versions).each do |line| - name, versions_string, checksum = line.split(" ", 3) - @info_checksums[name] = checksum || "" + # Avoid allocating a 3-element array via split(" ", 3) on every line. + # Instead, find space positions directly and slice the frozen string. + line.freeze + + name_end = line.index(" ") + next unless name_end # skip malformed lines + + versions_end = line.index(" ", name_end + 1) + name = line[0, name_end] + name.freeze + + if versions_end + versions_string = line[name_end + 1, versions_end - name_end - 1] + @info_checksums[name] = line[versions_end + 1, line.size - versions_end - 1] + else + versions_string = line[name_end + 1, line.size - name_end - 1] + @info_checksums[name] = "" + end + versions_string.split(",") do |version| delete = version.delete_prefix!("-") version = version.split("-", 2).unshift(name) @@ -39,8 +58,36 @@ def versions end def info(name) - data = @compact_index.info(name, info_checksums[name]) - lines(data).map {|line| gem_parser.parse(line).unshift(name) } + checksum = info_checksums[name] + + # Try binary cache first (Marshal format) + binary_path = info_binary_path(name) + if binary_path && checksum && File.exist?(binary_path) + begin + cached = Bundler.safe_load_marshal(File.binread(binary_path)) + if cached.is_a?(Array) && cached.length == 2 && cached[0] == checksum + return cached[1] + end + rescue => _e + # Corrupted cache, fall through to parse + end + end + + data = @compact_index.info(name, checksum) + result = lines(data).map {|line| gem_parser.parse(line).unshift(name) } + + # Write binary cache + if binary_path && checksum && !result.empty? + begin + dir = File.dirname(binary_path) + FileUtils.mkdir_p(dir) unless File.directory?(dir) + File.binwrite(binary_path, Marshal.dump([checksum, result])) + rescue => _e + # Cache write failure is non-fatal + end + end + + result end def available? @@ -67,6 +114,13 @@ def gem_parser @gem_parser ||= Gem::Resolver::APISet::GemParser.new end + def info_binary_path(name) + return nil unless @compact_index.respond_to?(:directory) + dir = @compact_index.directory + return nil unless dir + dir.join("info-binary", "#{name}.bin") + end + # This is mostly the same as `split(" ", 3)` but it avoids allocating extra objects. # This method gets called at least once for every gem when parsing versions. def parse_version_checksum(line, checksums) diff --git a/bundler/lib/bundler/compact_version.rb b/bundler/lib/bundler/compact_version.rb new file mode 100644 index 000000000000..c77d73c884b6 --- /dev/null +++ b/bundler/lib/bundler/compact_version.rb @@ -0,0 +1,120 @@ +# frozen_string_literal: true + +module Bundler + # A fast version representation that packs common version formats into + # a single Integer for O(1) comparison. Follows uv's approach where + # ~90% of real-world versions fit into a compact representation. + # + # Format: [16 bits major][16 bits minor][16 bits patch][16 bits pre-flag + extra] + # This gives us major 0-65535, minor 0-65535, patch 0-65535 in a single Fixnum. + # + # For versions that don't fit (prerelease, > 3 segments, segments > 65535), + # we fall back to the original Gem::Version comparison. + class CompactVersion + include Comparable + + MAX_SEGMENT = 0xFFFF # 65535 + + attr_reader :gem_version, :packed + + def initialize(gem_version) + @gem_version = gem_version.is_a?(Gem::Version) ? gem_version : Gem::Version.new(gem_version) + @packed = pack(@gem_version) + end + + def <=>(other) + return nil unless other.is_a?(CompactVersion) + + if @packed && other.packed + @packed <=> other.packed + else + @gem_version <=> other.gem_version + end + end + + def ==(other) + return false unless other.is_a?(CompactVersion) + if @packed && other.packed + @packed == other.packed + else + @gem_version == other.gem_version + end + end + + def eql?(other) + return false unless other.is_a?(CompactVersion) + if @packed && other.packed + @packed.eql?(other.packed) + else + @gem_version.eql?(other.gem_version) + end + end + + def hash + @packed ? @packed.hash : @gem_version.hash + end + + def prerelease? + @gem_version.prerelease? + end + + def segments + @gem_version.segments + end + + def to_s + @gem_version.to_s + end + + def version + @gem_version + end + + # Class-level cache for frequently compared versions + @cache = {} + @cache_mutex = Mutex.new + + def self.from_gem_version(gem_version) + key = gem_version.to_s + @cache_mutex.synchronize do + @cache[key] ||= new(gem_version) + end + end + + def self.clear_cache! + @cache_mutex.synchronize { @cache.clear } + end + + # Compare two Gem::Version objects using packed integer fast path. + # Returns -1, 0, or 1 like <=>. + def self.compare(a, b) + ca = from_gem_version(a) + cb = from_gem_version(b) + ca <=> cb + end + + # Fast equality check for two Gem::Version objects. + def self.versions_equal?(a, b) + ca = from_gem_version(a) + cb = from_gem_version(b) + ca == cb + end + + private + + def pack(version) + return nil if version.prerelease? + + segments = version.segments + return nil if segments.length > 4 + return nil if segments.any? {|s| !s.is_a?(Integer) || s < 0 || s > MAX_SEGMENT } + + major = segments[0] || 0 + minor = segments[1] || 0 + patch = segments[2] || 0 + extra = segments[3] || 0 + + (major << 48) | (minor << 32) | (patch << 16) | extra + end + end +end diff --git a/bundler/lib/bundler/definition.rb b/bundler/lib/bundler/definition.rb index 3cf9fbe8bf03..363b59e6aaa3 100644 --- a/bundler/lib/bundler/definition.rb +++ b/bundler/lib/bundler/definition.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative "lockfile_parser" +require_relative "io_trace" require_relative "worker" module Bundler @@ -101,8 +102,8 @@ def initialize(lockfile, dependencies, sources, unlock, ruby_version = nil, opti @originally_invalid_platforms = [] if lockfile_exists? - @lockfile_contents = Bundler.read_file(lockfile) - @locked_gems = LockfileParser.new(@lockfile_contents, strict: strict) + @lockfile_contents = IOTrace.trace(:file_read, "lockfile read: #{lockfile}") { Bundler.read_file(lockfile) } + @locked_gems = IOTrace.trace(:file_read, "lockfile parse: #{lockfile}") { LockfileParser.new(@lockfile_contents, strict: strict) } @locked_platforms = @locked_gems.platforms @most_specific_locked_platform = @locked_gems.most_specific_locked_platform @platforms = @locked_platforms.dup @@ -384,7 +385,17 @@ def lock(file_or_preserve_unknown_sections = false, preserve_unknown_sections_or def write_lock(file, preserve_unknown_sections) return if Definition.no_lock || !lockfile || file.nil? - contents = to_lock + # OPTIMIZATION: Skip expensive to_lock generation when nothing changed + # and the lockfile already exists. to_lock traverses the entire resolved + # spec set and generates a large string, which is wasted work when the + # lockfile content would be identical. + if nothing_changed? && !@unlocking && !@unlocking_bundler && lockfile_exists? && !Bundler.frozen_bundle? + IOTrace.note(:file_write, "lockfile write skipped (nothing_changed?): #{file}") + SharedHelpers.filesystem_access(file) { FileUtils.touch(file) } + return + end + + contents = IOTrace.trace(:file_write, "lockfile to_lock generation") { to_lock } # Convert to \r\n if the existing lock has them # i.e., Windows with `git config core.autocrlf=true` @@ -399,9 +410,11 @@ def write_lock(file, preserve_unknown_sections) preserve_unknown_sections ||= !updating_major && (Bundler.frozen_bundle? || !(unlocking? || @unlocking_bundler)) - if File.exist?(file) && lockfiles_equal?(@lockfile_contents, contents, preserve_unknown_sections) + if IOTrace.trace(:file_stat, "lockfile exist check: #{file}") { File.exist?(file) } && lockfiles_equal?(@lockfile_contents, contents, preserve_unknown_sections) return if Bundler.frozen_bundle? - SharedHelpers.filesystem_access(file) { FileUtils.touch(file) } + IOTrace.trace(:file_write, "lockfile touch (unchanged): #{file}") do + SharedHelpers.filesystem_access(file) { FileUtils.touch(file) } + end return end @@ -411,8 +424,10 @@ def write_lock(file, preserve_unknown_sections) end begin - SharedHelpers.filesystem_access(file) do |p| - File.open(p, "wb") {|f| f.puts(contents) } + IOTrace.trace(:file_write, "lockfile write: #{file}") do + SharedHelpers.filesystem_access(file) do |p| + File.open(p, "wb") {|f| f.puts(contents) } + end end rescue ReadOnlyFileSystemError raise ProductionError, lockfile_changes_summary("file system is read-only") @@ -559,6 +574,12 @@ def add_checksums end end + # Public: Check if a full install pipeline is needed. + # Used by Installer for early satisfaction check (uv's SatisfiesResult::Fresh). + def install_needed? + resolve_needed? || missing_specs? + end + private def lockfile_changes_summary(update_refused_reason) @@ -600,10 +621,6 @@ def lockfile_changes_summary(update_refused_reason) msg end - def install_needed? - resolve_needed? || missing_specs? - end - def something_changed? return true unless lockfile_exists? @@ -630,7 +647,8 @@ def should_add_extra_platforms? end def lockfile_exists? - lockfile && File.exist?(lockfile) + return @lockfile_exists if defined?(@lockfile_exists) + @lockfile_exists = lockfile && File.exist?(lockfile) end def resolver @@ -1054,11 +1072,21 @@ def converge_specs(specs) converged = [] deps = [] + # Build a hash of dependencies by name for O(1) lookup instead of + # scanning the full dependency list for every spec (O(specs * deps)). + deps_by_name = {} + @dependencies.each {|d| deps_by_name[d.name] = d } + + # Convert @gems_to_unlock to a Set for O(1) include? checks + gems_to_unlock_set = @gems_to_unlock.is_a?(Array) ? @gems_to_unlock.to_h {|g| [g, true] } : @gems_to_unlock + specs.each do |s| name = s.name - next if @gems_to_unlock.include?(name) + next if gems_to_unlock_set.key?(name) - dep = @dependencies.find {|d| s.satisfies?(d) } + # O(1) hash lookup, then verify it satisfies (almost always true for same name) + dep = deps_by_name[name] + dep = nil if dep && !s.satisfies?(dep) lockfile_source = s.source if dep diff --git a/bundler/lib/bundler/fetcher/compact_index.rb b/bundler/lib/bundler/fetcher/compact_index.rb index 52168111fea7..25750cfc29e3 100644 --- a/bundler/lib/bundler/fetcher/compact_index.rb +++ b/bundler/lib/bundler/fetcher/compact_index.rb @@ -72,8 +72,20 @@ def compact_index_client end end + # OPTIMIZATION (inspired by uv's OnceMap): Deduplicate gem info fetches. + # If we've already fetched info for a gem name in this session, return + # the cached result instead of hitting the network again. def fetch_gem_infos(names) - in_parallel(names) {|name| compact_index_client.info(name) } + @gem_info_cache ||= {} + + uncached = names.reject {|name| @gem_info_cache.key?(name) } + + if uncached.any? + results = in_parallel(uncached) {|name| compact_index_client.info(name) } + uncached.zip(results).each {|name, result| @gem_info_cache[name] = result } + end + + names.map {|name| @gem_info_cache[name] } rescue TooManyRequestsError # rubygems.org is rate limiting us, slow down. @bundle_worker&.stop @bundle_worker = nil # reset it. Not sure if necessary diff --git a/bundler/lib/bundler/fetcher/gem_remote_fetcher.rb b/bundler/lib/bundler/fetcher/gem_remote_fetcher.rb index 3c3c1826a1b1..ea4aeef5c2ad 100644 --- a/bundler/lib/bundler/fetcher/gem_remote_fetcher.rb +++ b/bundler/lib/bundler/fetcher/gem_remote_fetcher.rb @@ -8,7 +8,7 @@ class GemRemoteFetcher < Gem::RemoteFetcher def initialize(*) super - @pool_size = 5 + @pool_size = [Bundler.settings[:jobs] || Bundler.settings.processor_count, 8].max end def request(*args) diff --git a/bundler/lib/bundler/gem_version_promoter.rb b/bundler/lib/bundler/gem_version_promoter.rb index d64dbacfdb2d..dbafb25501ff 100644 --- a/bundler/lib/bundler/gem_version_promoter.rb +++ b/bundler/lib/bundler/gem_version_promoter.rb @@ -1,6 +1,8 @@ # frozen_string_literal: true module Bundler + require_relative "compact_version" + # This class contains all of the logic for determining the next version of a # Gem to update to based on the requested level (patch, minor, major). # Primarily designed to work with Resolver which will provide it the list of @@ -114,14 +116,14 @@ def filter_versions(package, specs) must_match = minor? ? [0] : [0, 1] all_match = must_match.all? {|idx| gsv.segments[idx] == locked_version.segments[idx] } - all_match && gsv >= locked_version + all_match && CompactVersion.compare(gsv, locked_version) >= 0 end end private def either_version_older_than_locked?(a, b, locked_version) - a.version < locked_version || b.version < locked_version + CompactVersion.compare(a.version, locked_version) < 0 || CompactVersion.compare(b.version, locked_version) < 0 end def segments_do_not_match?(a, b, level) diff --git a/bundler/lib/bundler/index.rb b/bundler/lib/bundler/index.rb index 9aef2dfa1218..6491dab8a2c6 100644 --- a/bundler/lib/bundler/index.rb +++ b/bundler/lib/bundler/index.rb @@ -42,8 +42,9 @@ def inspect end def empty? - each { return false } - true + # Fast path: check local specs hash first, avoiding Enumerable#each overhead + return false unless @specs.empty? + @sources.none? {|s| !s.empty? } end # Search this index's specs, and any source indexes that this index knows diff --git a/bundler/lib/bundler/installer.rb b/bundler/lib/bundler/installer.rb index c5fd75431f41..0c0d7885f572 100644 --- a/bundler/lib/bundler/installer.rb +++ b/bundler/lib/bundler/installer.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative "worker" +require_relative "io_trace" require_relative "installer/parallel_installer" require_relative "installer/standalone" require_relative "installer/gem_installer" @@ -71,6 +72,16 @@ def run(options) return end + # OPTIMIZATION (inspired by uv's SatisfiesResult::Fresh): + # Fast pre-check: if nothing changed and no specs are missing, + # skip the entire install pipeline. This makes repeated + # `bundle install` with no changes nearly instant. + if !@definition.install_needed? && !options[:force] + Bundler.ui.info "Bundle already satisfied. Skipping install." + lock + return + end + if @definition.setup_domain!(options) ensure_specs_are_compatible! load_plugins diff --git a/bundler/lib/bundler/installer/gem_installer.rb b/bundler/lib/bundler/installer/gem_installer.rb index 5c4fa7825325..def2ce886d7f 100644 --- a/bundler/lib/bundler/installer/gem_installer.rb +++ b/bundler/lib/bundler/installer/gem_installer.rb @@ -25,6 +25,31 @@ def install_from_spec [false, specific_failure_message(e)] end + def extract_from_spec + source = spec.source + return nil unless source.respond_to?(:extract_gem) + source.extract_gem(spec, force: force, local: local, build_args: Array(spec_settings)) + rescue Bundler::InstallHookError, Bundler::SecurityError, Bundler::APIResponseMismatchError + raise + rescue Bundler::BundlerError, Gem::InstallError => e + [false, specific_failure_message(e)] + end + + def finalize_from_spec(extract_result, has_extensions) + source = spec.source + return nil unless source.respond_to?(:finalize_gem) + post_install_message = source.finalize_gem(spec, extract_result, has_extensions, + force: force, local: local, build_args: Array(spec_settings)) + Bundler.ui.debug "#{worker}: #{spec.name} (#{spec.version}) finalized from #{spec.loaded_from}" + [true, post_install_message] + rescue Bundler::InstallHookError, Bundler::SecurityError, Bundler::APIResponseMismatchError + raise + rescue Errno::ENOSPC + [false, out_of_space_message] + rescue Bundler::BundlerError, Gem::InstallError => e + [false, specific_failure_message(e)] + end + private def specific_failure_message(e) diff --git a/bundler/lib/bundler/installer/parallel_installer.rb b/bundler/lib/bundler/installer/parallel_installer.rb index d10e5ec92403..cd384d9e3c2e 100644 --- a/bundler/lib/bundler/installer/parallel_installer.rb +++ b/bundler/lib/bundler/installer/parallel_installer.rb @@ -2,18 +2,23 @@ require_relative "../worker" require_relative "gem_installer" +require_relative "progress_reporter" module Bundler class ParallelInstaller class SpecInstallation attr_accessor :spec, :name, :full_name, :post_install_message, :state, :error + attr_accessor :download_state, :has_native_ext + def initialize(spec) @spec = spec @name = spec.name @full_name = spec.full_name @state = :none + @download_state = :none # :none, :enqueued, :downloaded, :failed @post_install_message = "" @error = nil + @has_native_ext = detect_native_extensions end def installed? @@ -32,6 +37,14 @@ def ready_to_enqueue? state == :none end + def downloaded? + download_state == :downloaded + end + + def download_ready? + download_state == :none + end + def has_post_install_message? !post_install_message.empty? end @@ -46,6 +59,12 @@ def dependencies_installed?(installed_specs) dependencies.all? {|d| installed_specs.include? d.name } end + # For pure Ruby gems, we can install without waiting for dependencies + # since there's no extconf.rb that might require them at build time. + def can_install_without_deps? + !has_native_ext + end + # Represents only the non-development dependencies, the ones that are # itself and are in the total list. def dependencies @@ -58,7 +77,17 @@ def all_dependencies end def to_s - "#<#{self.class} #{full_name} (#{state})>" + "#<#{self.class} #{full_name} (#{state}) dl:#{download_state}>" + end + + private + + def detect_native_extensions + return false unless @spec.respond_to?(:extensions) + extensions = @spec.extensions + extensions.is_a?(Array) ? extensions.any? : false + rescue + false end end @@ -76,28 +105,35 @@ def initialize(installer, all_specs, size, standalone, force, local: false, skip @local = local @specs = all_specs.map {|s| SpecInstallation.new(s) } @specs.each do |spec_install| - spec_install.state = :installed if skip.include?(spec_install.name) + if skip.include?(spec_install.name) + spec_install.state = :installed + spec_install.download_state = :downloaded + end end if skip @spec_set = all_specs @rake = @specs.find {|s| s.name == "rake" unless s.installed? } + @progress = ProgressReporter.new end def call if @rake do_install(@rake, 0) - Gem::Specification.reset + Gem::Specification.reset # make rake visible for native extension builds end - if @size > 1 - install_with_worker - else - install_serially - end + # Phase 1: Download ALL gems in parallel + download_all_gems + + # Phase 2: Install gems (extract + finalize inline per gem) + # Native ext gems are prioritized when detected so compilation + # starts ASAP and overlaps with pure Ruby gem installation. + install_all_gems handle_error if failed_specs.any? @specs ensure worker_pool&.stop + @download_pool&.stop end private @@ -106,6 +142,114 @@ def failed_specs @specs.select(&:failed?) end + # Download all gems in parallel - no dependency ordering needed for downloads. + # This is the key insight from tenderlove: downloading is pure I/O and can + # happen for ALL gems simultaneously, regardless of dependency relationships. + # + # OPTIMIZATION (from uv's preparer.rs): Sort downloads by estimated size + # descending so large gems start downloading first. This reduces tail + # latency - by the time small gems finish, large ones are already in progress. + # Gems with native extensions tend to be larger, so we prioritize those. + def download_all_gems + downloadable = @specs.reject {|s| s.installed? || s.failed? } + return if downloadable.empty? + + # Sort: native extension gems first (tend to be larger), then by name + # for deterministic ordering. This ensures large downloads start early. + downloadable.sort_by! {|s| [s.has_native_ext ? 0 : 1, s.name] } + + @progress.start_phase(:download, downloadable.size) + spinner_thread = start_spinner_thread + + @progress.with_cursor_hidden do + download_size = [@size, downloadable.size].min + download_size = [download_size, 8].max if download_size > 1 # At least 8 download threads + + if download_size > 1 + download_pool = Bundler::Worker.new(download_size, "Parallel Downloader", lambda {|spec_install, worker_num| + Bundler.ui.silence { do_download(spec_install) } + spec_install + }) + + downloadable.each do |spec_install| + spec_install.download_state = :enqueued + @progress.item_start(spec_install.name, spec_install.spec.version, download_label_for(spec_install)) + download_pool.enq spec_install + end + + downloadable.size.times do + download_pool.deq + end + + download_pool.stop + else + downloadable.each do |spec_install| + @progress.item_start(spec_install.name, spec_install.spec.version, download_label_for(spec_install)) + do_download(spec_install) + end + end + end + + stop_spinner_thread(spinner_thread) + @progress.finish_phase + end + + def download_label_for(spec_install) + source = spec_install.spec.source + if source.is_a?(Source::Git) + "git checkout" + elsif source.is_a?(Source::Path) + "local" + else + "fetching" + end + end + + def do_download(spec_install) + return if spec_install.installed? || spec_install.download_state == :downloaded + + source = spec_install.spec.source + if source.respond_to?(:download) + begin + source.download( + spec_install.spec, + force: @force, + local: @local + ) + spec_install.download_state = :downloaded + done = source.is_a?(Source::Git) ? "checked out" : "fetched" + @progress.item_done(spec_install.name, done) + rescue => e + Bundler.ui.debug "Download warning for #{spec_install.name}: #{e.message}" + spec_install.download_state = :downloaded # Mark as "attempted" + @progress.item_done(spec_install.name, "cached") + end + else + spec_install.download_state = :downloaded + @progress.item_done(spec_install.name, "local") + end + end + + def install_all_gems + installable = @specs.reject {|s| s.installed? || s.failed? } + return if installable.empty? + + @progress.start_phase(:install, installable.size) + @install_spinner_thread = start_spinner_thread + + @progress.with_cursor_hidden do + if @size > 1 + install_with_worker + else + install_serially + end + end + + stop_spinner_thread(@install_spinner_thread) + @install_spinner_thread = nil + @progress.finish_phase + end + def install_with_worker enqueue_specs process_specs until finished_installing? @@ -121,23 +265,64 @@ def install_serially def worker_pool @worker_pool ||= Bundler::Worker.new @size, "Parallel Installer", lambda {|spec_install, worker_num| - do_install(spec_install, worker_num) + Bundler.ui.silence { do_install(spec_install, worker_num) } } end def do_install(spec_install, worker_num) + detail = spec_install.has_native_ext ? "compiling native extensions" : "installing" + @progress.item_start(spec_install.name, spec_install.spec.version, detail) + Plugin.hook(Plugin::Events::GEM_BEFORE_INSTALL, spec_install) + + # Try the phased path: extract to temp dir, then finalize. + # This is crash-safe (temp dir is cleaned up on failure) and + # lets us detect native extensions from the real spec. gem_installer = Bundler::GemInstaller.new( spec_install.spec, @installer, @standalone, worker_num, @force, @local ) - success, message = gem_installer.install_from_spec - if success - spec_install.state = :installed - spec_install.post_install_message = message unless message.nil? + extract_result = gem_installer.extract_from_spec + + if extract_result.is_a?(Array) && extract_result.length >= 3 + # Detect native extensions from the REAL spec (not LazySpecification). + # LazySpecification doesn't have #extensions, so detect_native_extensions + # may return false. The real Gem::Specification from the .gem package + # has the correct extensions list. + _source_dir, _installer, real_spec, _from_cache = extract_result + if !spec_install.has_native_ext && real_spec.respond_to?(:extensions) && + real_spec.extensions.is_a?(Array) && real_spec.extensions.any? + spec_install.has_native_ext = true + end + + success, message = gem_installer.finalize_from_spec(extract_result, spec_install.has_native_ext) + if success + spec_install.state = :installed + spec_install.post_install_message = message unless message.nil? + else + spec_install.error = "#{message}\n\n#{require_tree_for_spec(spec_install.spec)}" + spec_install.state = :failed + end + elsif extract_result.is_a?(Array) + # extract_from_spec returned [false, error_message] + success, message = extract_result else - spec_install.error = "#{message}\n\n#{require_tree_for_spec(spec_install.spec)}" - spec_install.state = :failed + # extract_from_spec returns nil when gem is already installed or + # source doesn't support phased install (git, path). Fall back + # to the traditional install path. + success, message = gem_installer.install_from_spec + if success + spec_install.state = :installed + spec_install.post_install_message = message unless message.nil? + else + spec_install.error = "#{message}\n\n#{require_tree_for_spec(spec_install.spec)}" + spec_install.state = :failed + end end + + done_detail = spec_install.has_native_ext ? "compiled" : "installed" + done_detail = "failed" if spec_install.failed? + @progress.item_done(spec_install.name, done_detail) + Plugin.hook(Plugin::Events::GEM_AFTER_INSTALL, spec_install) spec_install end @@ -185,6 +370,14 @@ def require_tree_for_spec(spec) # Later we call this lambda again to install specs that depended on # previously installed specifications. We continue until all specs # are installed. + # + # OPTIMIZATION: Pure Ruby gems (no native extensions) can be installed + # without waiting for their dependencies, since they don't run any + # code during installation. Only gems with native extensions need + # their dependencies installed first (for extconf.rb). + # + # PRIORITY: Native extension gems are enqueued first so that + # compilation starts ASAP and overlaps with pure Ruby gem installation. def enqueue_specs installed_specs = {} @specs.each do |spec| @@ -192,12 +385,48 @@ def enqueue_specs installed_specs[spec.name] = true end + # Collect enqueueable specs, prioritizing native ext gems + native_ext_ready = [] + pure_ruby_ready = [] + @specs.each do |spec| - if spec.ready_to_enqueue? && spec.dependencies_installed?(installed_specs) - spec.state = :enqueued - worker_pool.enq spec + next unless spec.ready_to_enqueue? + + if spec.has_native_ext + # Native extension gem: must wait for dependencies + if spec.dependencies_installed?(installed_specs) + native_ext_ready << spec + end + else + # Pure Ruby gem: install immediately, no need to wait for deps + pure_ruby_ready << spec end end + + # Enqueue native ext gems first so compilation starts early + (native_ext_ready + pure_ruby_ready).each do |spec| + spec.state = :enqueued + worker_pool.enq spec + end + end + + # Start a background thread that ticks the spinner animation. + def start_spinner_thread + return nil unless @progress.tty + Thread.new do + loop do + sleep 0.15 + @progress.tick + rescue + break + end + end + end + + def stop_spinner_thread(thread) + return unless thread + thread.kill + thread.join(0.5) end end end diff --git a/bundler/lib/bundler/installer/progress_reporter.rb b/bundler/lib/bundler/installer/progress_reporter.rb new file mode 100644 index 000000000000..7bdbf1448cf0 --- /dev/null +++ b/bundler/lib/bundler/installer/progress_reporter.rb @@ -0,0 +1,220 @@ +# frozen_string_literal: true + +module Bundler + class ParallelInstaller + # A live-updating terminal progress reporter for parallel gem operations. + # Inspired by uv's compact phase summaries. + # + # TTY output (single line redrawn in place per phase): + # + # ◐ Fetching gems ( 42/382) 3.2s + # ↳ nokogiri 1.16.0 - compiling native extensions (12.3s) + # ✓ Fetched 382 gems in 4.1s + # ✓ Extracted 382 gems in 0.8s + # ◐ Installing gems (280/382) 2.1s + # ↳ nokogiri 1.16.0 - compiling native extensions (8.4s) + # ✓ Installed 382 gems in 12.3s + # + # Non-TTY falls back to phase summaries only. + class ProgressReporter + SPINNERS = %w[◐ ◓ ◑ ◒].freeze + CHECK = "\u2713" # ✓ + + RESET = "\e[0m" + BOLD = "\e[1m" + DIM = "\e[2m" + GREEN = "\e[32m" + CYAN = "\e[36m" + WHITE = "\e[37m" + CLR = "\e[2K" # clear entire line + + attr_reader :tty + + def initialize + @tty = $stderr.tty? + @mutex = Mutex.new + @completed_count = 0 + @total_count = 0 + @phase = nil + @phase_start = nil + @spinner_tick = 0 + @slow_item = nil # {name:, version:, detail:, started_at:} + @last_item = nil # {name:, version:} most recently started item + @has_slow_line = false # whether we printed a second line for slow item + @total_width = 1 # digit width for right-aligning counts + end + + # --- Phase lifecycle --- + + def start_phase(phase_name, total) + @mutex.synchronize do + @phase = phase_name + @total_count = total + @completed_count = 0 + @phase_start = Process.clock_gettime(Process::CLOCK_MONOTONIC) + @slow_item = nil + @last_item = nil + @has_slow_line = false + @total_width = total.to_s.length + + write_header if @tty + end + end + + def finish_phase + @mutex.synchronize do + elapsed = elapsed_since(@phase_start) + + if @tty + # Clear slow item line if present + if @has_slow_line + $stderr.write "\n#{CLR}\e[A" # go to slow line, clear it, back up + @has_slow_line = false + end + # Overwrite the spinner line with the final summary + $stderr.write "\r#{CLR}" + end + + summary = "#{GREEN}#{CHECK}#{RESET} " \ + "#{phase_past_tense} " \ + "#{BOLD}#{@completed_count}#{RESET} " \ + "#{@completed_count == 1 ? "gem" : "gems"} " \ + "#{DIM}in #{format_time(elapsed)}#{RESET}" + $stderr.puts summary + $stderr.flush + + @slow_item = nil + @last_item = nil + @phase = nil + end + end + + # --- Item tracking --- + + def item_start(name, version, detail = nil) + @mutex.synchronize do + @last_item = { name: name, version: version.to_s } + + # Track items that might be slow (native ext compilation, git checkouts) + if detail&.include?("compil") || detail&.include?("git") + @slow_item = { + name: name, + version: version.to_s, + detail: detail, + started_at: Process.clock_gettime(Process::CLOCK_MONOTONIC), + } + end + end + end + + def item_done(name, detail = nil) + @mutex.synchronize do + @completed_count += 1 + @slow_item = nil if @slow_item && @slow_item[:name] == name + write_header if @tty + end + end + + def item_skip(name, version = nil) + @mutex.synchronize do + @completed_count += 1 + end + end + + # --- Timer tick (called from spinner thread) --- + + def tick + @mutex.synchronize do + @spinner_tick += 1 + write_header if @tty + end + end + + def with_cursor_hidden + $stderr.write "\e[?25l" if @tty + yield + ensure + $stderr.write "\e[?25h" if @tty + end + + private + + def write_header + return unless @phase + + elapsed = elapsed_since(@phase_start) + spinner = "#{CYAN}#{SPINNERS[@spinner_tick % SPINNERS.size]}#{RESET}" + count = format("%#{@total_width}d", @completed_count) + + # Main progress line (rewritten in place) + line = "#{spinner} #{BOLD}#{phase_label}#{RESET} " \ + "#{DIM}(#{count}/#{@total_count})#{RESET} " \ + "#{DIM}#{format_time(elapsed)}#{RESET}" + + # Show current gem name at end of line + if @last_item + line << " #{DIM}- #{@last_item[:name]} #{@last_item[:version]}#{RESET}" + end + + $stderr.write "\r#{CLR}#{line}" + + # Show slow item on a second line if it's been > 3s + if @slow_item + item_elapsed = elapsed_since(@slow_item[:started_at]) + if item_elapsed > 3.0 + slow_line = " #{DIM}\u21b3 #{@slow_item[:name]} #{@slow_item[:version]} " \ + "- #{@slow_item[:detail]} (#{format_time(item_elapsed)})#{RESET}" + if @has_slow_line + # Overwrite existing slow line + $stderr.write "\n\r#{CLR}#{slow_line}\e[A" + else + # Print new slow line, then move cursor back up + $stderr.write "\n#{CLR}#{slow_line}\e[A" + @has_slow_line = true + end + end + elsif @has_slow_line + # Slow item finished, clear its line + $stderr.write "\n\r#{CLR}\e[A" + @has_slow_line = false + end + + $stderr.flush + end + + def phase_label + case @phase + when :download then "Fetching gems " + when :extract then "Extracting gems" + when :install then "Installing gems" + else @phase.to_s + end + end + + def phase_past_tense + case @phase + when :download then "Fetched" + when :extract then "Extracted" + when :install then "Installed" + else @phase.to_s + end + end + + def elapsed_since(start) + return 0.0 unless start + Process.clock_gettime(Process::CLOCK_MONOTONIC) - start + end + + def format_time(seconds) + if seconds < 60 + format("%.1fs", seconds) + else + mins = (seconds / 60).to_i + secs = seconds - (mins * 60) + format("%02dm%04.1fs", mins, secs) + end + end + + end + end +end diff --git a/bundler/lib/bundler/io_trace.rb b/bundler/lib/bundler/io_trace.rb new file mode 100644 index 000000000000..ced52c65d4c5 --- /dev/null +++ b/bundler/lib/bundler/io_trace.rb @@ -0,0 +1,54 @@ +# frozen_string_literal: true + +module Bundler + # Lightweight IO tracing for bundler, controlled by BUNDLER_IO_TRACE=1. + # Logs every significant IO operation (file read, write, HTTP request, + # directory scan) with timestamps and durations to stderr. + # + # Usage: BUNDLER_IO_TRACE=1 bundle install + module IOTrace + ENABLED = ENV["BUNDLER_IO_TRACE"] == "1" + START_TIME = Process.clock_gettime(Process::CLOCK_MONOTONIC) if ENABLED + + @mutex = Thread::Mutex.new if ENABLED + + class << self + def enabled? + ENABLED + end + + # Log an IO operation with timing. Yields the block and measures duration. + # category: :file_read, :file_write, :file_stat, :dir_scan, :http, :file_copy, :file_link + def trace(category, description, &block) + return yield unless ENABLED + + start = Process.clock_gettime(Process::CLOCK_MONOTONIC) + result = yield + duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - start + elapsed = start - START_TIME + + @mutex.synchronize do + $stderr.puts format( + "[IO_TRACE] %8.3fs +%7.3fms %-12s %s", + elapsed, duration * 1000, category, description + ) + end + + result + end + + # Log without timing (for noting an operation that was skipped/cached) + def note(category, description) + return unless ENABLED + + elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - START_TIME + @mutex.synchronize do + $stderr.puts format( + "[IO_TRACE] %8.3fs (cached) %-12s %s", + elapsed, category, description + ) + end + end + end + end +end diff --git a/bundler/lib/bundler/lazy_specification.rb b/bundler/lib/bundler/lazy_specification.rb index 786dbcae6586..463e8952f441 100644 --- a/bundler/lib/bundler/lazy_specification.rb +++ b/bundler/lib/bundler/lazy_specification.rb @@ -71,7 +71,11 @@ def full_name end def lock_name - @lock_name ||= name_tuple.lock_name + @lock_name ||= if platform == Gem::Platform::RUBY + "#{@name} (#{@version})" + else + "#{@name} (#{@version}-#{platform})" + end end def name_tuple diff --git a/bundler/lib/bundler/lockfile_generator.rb b/bundler/lib/bundler/lockfile_generator.rb index 6b6cf9d9eaee..1839d11fe75d 100644 --- a/bundler/lib/bundler/lockfile_generator.rb +++ b/bundler/lib/bundler/lockfile_generator.rb @@ -58,11 +58,12 @@ def add_platforms def add_dependencies out << "\nDEPENDENCIES\n" - handled = [] + # Use a Hash (O(1) lookup) instead of Array (O(n) include? check) + handled = {} definition.dependencies.sort_by(&:to_s).each do |dep| - next if handled.include?(dep.name) + next if handled.key?(dep.name) out << dep.to_lock << "\n" - handled << dep.name + handled[dep.name] = true end end diff --git a/bundler/lib/bundler/lockfile_parser.rb b/bundler/lib/bundler/lockfile_parser.rb index ac0ce1ef3d0a..d6a4fc79ac00 100644 --- a/bundler/lib/bundler/lockfile_parser.rb +++ b/bundler/lib/bundler/lockfile_parser.rb @@ -100,6 +100,7 @@ def initialize(lockfile, strict: false) @dependencies = {} @parse_method = nil @specs = {} + @specs_by_name = {} @lockfile_path = begin SharedHelpers.relative_lockfile_path rescue GemfileNotFound @@ -136,7 +137,16 @@ def initialize(lockfile, strict: false) elsif /^[^\s]/.match?(line) @parse_method = nil elsif @parse_method - send(@parse_method, line) + # Direct dispatch avoids the overhead of Kernel#send + method lookup on every line. + # In a large lockfile (500+ gems) this is called thousands of times. + case @parse_method + when :parse_source then parse_source(line) + when :parse_dependency then parse_dependency(line) + when :parse_checksum then parse_checksum(line) + when :parse_platform then parse_platform(line) + when :parse_ruby then parse_ruby(line) + when :parse_bundled_with then parse_bundled_with(line) + end end @pos.advance!(line) end @@ -223,8 +233,10 @@ def parse_dependency(line) dep = Bundler::Dependency.new(name, version) if pinned && dep.name != "bundler" - spec = @specs.find {|_, v| v.name == dep.name } - dep.source = spec.last.source if spec + # Use @specs_by_name for O(1) lookup instead of O(n) scan over all specs. + # @specs is keyed by full_name; @specs_by_name is keyed by gem name. + spec = @specs_by_name[dep.name] + dep.source = spec.source if spec # Path sources need to know what the default name / version # to use in the case that there are no gemspecs present. A fake @@ -281,6 +293,7 @@ def parse_spec(line) @current_source.add_dependency_names(name) @specs[@current_spec.full_name] = @current_spec + @specs_by_name[name] = @current_spec elsif spaces.size == 6 version = version.split(",").each(&:strip!) if version dep = Gem::Dependency.new(name, version) diff --git a/bundler/lib/bundler/match_metadata.rb b/bundler/lib/bundler/match_metadata.rb index 6fd2994a85f2..863c2a250dfd 100644 --- a/bundler/lib/bundler/match_metadata.rb +++ b/bundler/lib/bundler/match_metadata.rb @@ -24,6 +24,12 @@ def expanded_dependencies def metadata_dependency(name, requirement) return if requirement.nil? || requirement.none? + if name == "Ruby" && Bundler.settings[:ignore_ruby_upper_bounds] + reqs = requirement.requirements.reject { |op, _| op == "<" || op == "<=" } + return if reqs.empty? + requirement = Gem::Requirement.new(reqs.map { |op, v| "#{op} #{v}" }) + end + Gem::Dependency.new("#{name}\0", requirement) end end diff --git a/bundler/lib/bundler/resolver.rb b/bundler/lib/bundler/resolver.rb index 1dbf565d4676..22a367c9bfaf 100644 --- a/bundler/lib/bundler/resolver.rb +++ b/bundler/lib/bundler/resolver.rb @@ -8,6 +8,7 @@ module Bundler # class Resolver require_relative "vendored_pub_grub" + require_relative "compact_version" require_relative "resolver/base" require_relative "resolver/candidate" require_relative "resolver/incompatibility" @@ -47,7 +48,7 @@ def setup_solver matches = filter_invalid_self_dependencies(matches, name) end - specs[name] = matches.sort_by {|s| [s.version, s.platform.to_s] } + specs[name] = matches.sort_by {|s| [CompactVersion.from_gem_version(s.version), s.platform.to_s] } end @all_versions = Hash.new do |candidates, package| @@ -60,6 +61,12 @@ def setup_solver @sorted_versions[root] = [root_version] + # OPTIMIZATION: Batch prefetch specs for all known dependency names upfront. + # This populates the @all_specs cache eagerly rather than lazily during + # resolution, which avoids N sequential network round-trips. Inspired by + # uv's batch_prefetch.rs which prefetches version metadata proactively. + prefetch_dependency_specs + root_dependencies = prepare_dependencies(@requirements, @packages) @cached_dependencies = Hash.new do |dependencies, package| @@ -255,7 +262,8 @@ def all_versions_for(package) locked_requirement = base_requirements[name] results = filter_matching_specs(results, locked_requirement) if locked_requirement - results.group_by(&:version).reduce([]) do |groups, (version, specs)| + results.group_by {|s| s.version.to_s }.reduce([]) do |groups, (version_str, specs)| + version = specs.first.version platform_specs = package.platform_specs(specs) # If package is a top-level dependency, @@ -320,6 +328,36 @@ def sort_versions_by_preferred(package, versions) private + # Eagerly populate @all_specs for all known dependency names. + # This triggers the compact index fetches upfront rather than lazily + # during resolution, allowing the fetcher's built-in parallelism to + # batch network requests efficiently. + def prefetch_dependency_specs + names_to_prefetch = Set.new + + # Collect all dependency names from requirements + @requirements.each do |dep| + names_to_prefetch << dep.name + end + + # Also collect transitive dependency names from locked specs if available + if @base.respond_to?(:locked_specs) + @base.locked_specs.each do |spec| + names_to_prefetch << spec.name + spec.dependencies.each {|d| names_to_prefetch << d.name } + end + end + + # Trigger the lazy hash population for all known names + # This will batch the compact index requests + names_to_prefetch.each do |name| + @all_specs[name] unless @all_specs.key?(name) + end + rescue => e + # Prefetch failure is non-fatal - lazy loading will still work + Bundler.ui.debug "Prefetch warning: #{e.message}" + end + def raise_not_found!(package) name = package.name source = source_for(name) diff --git a/bundler/lib/bundler/resolver/candidate.rb b/bundler/lib/bundler/resolver/candidate.rb index 5298b2530f67..ee14f83a5fd2 100644 --- a/bundler/lib/bundler/resolver/candidate.rb +++ b/bundler/lib/bundler/resolver/candidate.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require_relative "spec_group" +require_relative "../compact_version" module Bundler class Resolver @@ -19,6 +20,9 @@ class Resolver # are used when materializing resolution results back into RubyGems # specifications that can be installed, written to lockfiles, and so on. # + # OPTIMIZATION: Uses CompactVersion for O(1) integer comparison on ~90% of + # real-world versions, falling back to Gem::Version for edge cases. + # class Candidate include Comparable @@ -27,6 +31,7 @@ class Candidate def initialize(version, group: nil, priority: -1) @spec_group = group || SpecGroup.new([]) @version = Gem::Version.new(version) + @compact = CompactVersion.new(@version) @priority = priority end @@ -51,7 +56,12 @@ def segments def <=>(other) return unless other.is_a?(self.class) - version_comparison = version <=> other.version + # Use packed integer comparison when available (fast path) + if @compact.packed && other.compact_version.packed + version_comparison = @compact.packed <=> other.compact_version.packed + else + version_comparison = version <=> other.version + end return version_comparison unless version_comparison.zero? priority <=> other.priority @@ -60,7 +70,11 @@ def <=>(other) def ==(other) return unless other.is_a?(self.class) - version == other.version && priority == other.priority + if @compact.packed && other.compact_version.packed + @compact.packed == other.compact_version.packed && priority == other.priority + else + version == other.version && priority == other.priority + end end def eql?(other) @@ -77,6 +91,11 @@ def to_s @version.to_s end + # Expose compact version for fast comparison from other candidates + def compact_version + @compact + end + protected attr_reader :priority diff --git a/bundler/lib/bundler/rubygems_gem_installer.rb b/bundler/lib/bundler/rubygems_gem_installer.rb index 64ce6193d3d1..a79b7202e26d 100644 --- a/bundler/lib/bundler/rubygems_gem_installer.rb +++ b/bundler/lib/bundler/rubygems_gem_installer.rb @@ -1,9 +1,22 @@ # frozen_string_literal: true require "rubygems/installer" +require_relative "io_trace" module Bundler class RubyGemsGemInstaller < Gem::Installer + # Detect clonefile support (macOS APFS copy-on-write) + CLONEFILE_SUPPORTED = begin + if RUBY_PLATFORM =~ /darwin/ + require "fiddle" + true + else + false + end + rescue LoadError + false + end + def check_executable_overwrite(filename) # Bundler needs to install gems regardless of binstub overwriting end @@ -16,18 +29,22 @@ def install spec.loaded_from = spec_file # Completely remove any previous gem files - strict_rm_rf gem_dir - strict_rm_rf spec.extension_dir + IOTrace.trace(:file_write, "strict_rm_rf gem_dir: #{gem_dir}") { strict_rm_rf gem_dir } + IOTrace.trace(:file_write, "strict_rm_rf extension_dir: #{spec.extension_dir}") { strict_rm_rf spec.extension_dir } SharedHelpers.filesystem_access(gem_dir, :create) do FileUtils.mkdir_p gem_dir, mode: 0o755 end - SharedHelpers.filesystem_access(gem_dir, :write) do - extract_files + IOTrace.trace(:file_write, "extract_files: #{spec.name} -> #{gem_dir}") do + SharedHelpers.filesystem_access(gem_dir, :write) do + extract_files + end end - build_extensions if spec.extensions.any? + if spec.extensions.any? + IOTrace.trace(:file_write, "build_extensions: #{spec.name}") { build_extensions } + end write_build_info_file run_post_build_hooks @@ -37,12 +54,16 @@ def install generate_plugins - write_spec + IOTrace.trace(:file_write, "write_spec: #{spec.name}") { write_spec } - SharedHelpers.filesystem_access("#{gem_home}/cache", :write) do - write_cache_file + IOTrace.trace(:file_write, "write_cache_file: #{spec.name}") do + SharedHelpers.filesystem_access("#{gem_home}/cache", :write) do + write_cache_file + end end + Gem::Specification.add_spec(spec) + say spec.post_install_message unless spec.post_install_message.nil? run_post_install_hooks @@ -117,18 +138,22 @@ def build_extensions build_complete = SharedHelpers.filesystem_access(extension_cache_path.join("gem.build_complete"), :read, &:file?) if build_complete && !options[:force] + # Cache hit: copy compiled extensions from global cache (no compilation) SharedHelpers.filesystem_access(File.dirname(extension_dir)) do |p| FileUtils.mkpath p end SharedHelpers.filesystem_access(extension_cache_path) do - FileUtils.cp_r extension_cache_path, extension_dir + fast_cp_r extension_cache_path.to_s, extension_dir.to_s end else + # Cache miss: need to compile. Reset spec registry so extconf.rb + # can find dependencies (only pay this cost when actually compiling). + Gem::Specification.reset prepare_extension_build(extension_dir) super SharedHelpers.filesystem_access(extension_cache_path.parent, &:mkpath) SharedHelpers.filesystem_access(extension_cache_path) do - FileUtils.cp_r extension_dir, extension_cache_path + fast_cp_r extension_dir.to_s, extension_cache_path.to_s end end end @@ -145,8 +170,248 @@ def gem_checksum Checksum.from_gem_package(@package) end + # Single-pass extraction: read the .gem tar once, extracting both the + # gemspec (from metadata.gz) and the contents (from data.tar.gz). + # Skips checksum verification — we trust compact index checksums. + # Returns the Gem::Specification parsed from metadata. + def self.single_pass_extract(gem_path, dest_dir) + require "rubygems/package" + spec = nil + + File.open(gem_path, "rb") do |io| + Gem::Package::TarReader.new(io) do |reader| + reader.each do |entry| + case entry.full_name + when "metadata.gz" + spec = Gem::Specification.from_yaml(Gem::Util.gunzip(entry.read)) + when /\Ametadata\z/ + spec = Gem::Specification.from_yaml(entry.read) + when "data.tar.gz" + extract_data_tar_gz(entry, dest_dir) + end + end + end + end + + raise Gem::Package::FormatError, "No metadata found in #{gem_path}" unless spec + spec + end + + # Extract a data.tar.gz entry from a .gem tar into dest_dir. + # Primary path: pipe raw gzipped bytes to system `tar` (zero Ruby + # decompression — all heavy work happens in native code). + # Fallback: Ruby-based extraction with IO.copy_stream. + def self.extract_data_tar_gz(io, dest_dir) + FileUtils.mkdir_p(dest_dir, mode: 0o755) unless File.directory?(dest_dir) + + # Pipe raw data.tar.gz bytes to system tar for native extraction. + # The io is a TarReader::Entry containing the gzipped tar — we send + # the raw bytes and let native tar handle gzip + extraction. + begin + IO.popen(["tar", "xzf", "-", "-C", dest_dir], "wb") do |tar_stdin| + buf = String.new(capacity: 65536, encoding: Encoding::BINARY) + begin + loop do + io.readpartial(65536, buf) + tar_stdin.write(buf) + end + rescue EOFError + # Normal end of tar entry + end + end + return if $?.success? + rescue Errno::ENOENT + # tar command not found — fall through to Ruby path + end + + # Fallback: Ruby-based extraction (e.g. Windows without tar) + io.rewind + ruby_extract_data_tar_gz(io, dest_dir) + end + + # Pure Ruby extraction of data.tar.gz using IO.copy_stream for + # minimal allocation overhead on the file-write path. + def self.ruby_extract_data_tar_gz(io, dest_dir) + dest_dir_prefix = File.expand_path(dest_dir) + "/" + + Gem::Package::TarReader.new(Zlib::GzipReader.new(io)) do |tar| + tar.each do |entry| + full_name = entry.full_name.sub(%r{\A\./}, "") + next if full_name.empty? || full_name == "." + + destination = File.expand_path(File.join(dest_dir, full_name)) + unless destination.start_with?(dest_dir_prefix) || destination == File.expand_path(dest_dir) + raise Gem::Package::PathError.new(full_name, dest_dir) + end + + if entry.directory? + FileUtils.mkdir_p(destination, mode: 0o755) + elsif entry.file? + FileUtils.mkdir_p(File.dirname(destination), mode: 0o755) + File.open(destination, "wb") do |out| + IO.copy_stream(tar.io, out, entry.header.size) + end + File.chmod(entry.header.mode & 0o777, destination) rescue nil + elsif entry.header.typeflag == "2" # symlink + File.symlink(entry.header.linkname, destination) + end + end + end + end + + # Populate gem_dir from source_dir (cache or temp), then finalize. + # copy: true uses fast_cp_r (keeps source intact for cache reuse). + # copy: false uses atomic_move (renames source to gem_dir). + def finalize_without_extensions(source_dir, copy: false) + place_gem_dir(source_dir, copy: copy) + + spec.loaded_from = spec_file + + write_build_info_file + run_post_build_hooks + + SharedHelpers.filesystem_access(bin_dir, :write) do + generate_bin + end + + generate_plugins + + IOTrace.trace(:file_write, "write_spec: #{spec.name}") { write_spec } + + IOTrace.trace(:file_write, "write_cache_file: #{spec.name}") do + SharedHelpers.filesystem_access("#{gem_home}/cache", :write) do + fast_cache_gem + end + end + + say spec.post_install_message unless spec.post_install_message.nil? + run_post_install_hooks + + spec + end + + # Finalize a gem with native extensions. + # Must wait for dependencies. Place files, build extensions, then finalize. + def finalize_with_extensions(source_dir, copy: false) + place_gem_dir(source_dir, copy: copy) + + spec.loaded_from = spec_file + + IOTrace.trace(:file_write, "build_extensions: #{spec.name}") { build_extensions } + write_build_info_file + run_post_build_hooks + + SharedHelpers.filesystem_access(bin_dir, :write) do + generate_bin + end + + generate_plugins + + IOTrace.trace(:file_write, "write_spec: #{spec.name}") { write_spec } + + IOTrace.trace(:file_write, "write_cache_file: #{spec.name}") do + SharedHelpers.filesystem_access("#{gem_home}/cache", :write) do + fast_cache_gem + end + end + + say spec.post_install_message unless spec.post_install_message.nil? + run_post_install_hooks + + spec + end + private + # Hardlink (or copy) the .gem file into GEM_HOME/cache/. + # The .gem already exists in the download cache or global gem cache, + # so a hardlink avoids duplicating hundreds of MB of data. + def fast_cache_gem + cache_file = File.join(gem_home, "cache", spec.file_name) + return if File.exist?(cache_file) + + gem_path = @package.gem.path rescue nil + return write_cache_file unless gem_path && File.exist?(gem_path) + + cache_dir = File.dirname(cache_file) + FileUtils.mkdir_p(cache_dir) unless File.directory?(cache_dir) + + begin + FileUtils.ln(gem_path, cache_file) + rescue Errno::EXDEV, Errno::ENOTSUP, Errno::EPERM, Errno::EEXIST + write_cache_file + end + end + + # Place extracted gem files into gem_dir. + # copy: true → fast_cp_r from cache (keeps cache intact) + # copy: false → atomic rename from temp dir + def place_gem_dir(source_dir, copy: false) + strict_rm_rf(gem_dir) + strict_rm_rf(spec.extension_dir) + + if copy + SharedHelpers.filesystem_access(File.dirname(gem_dir), :create) do |p| + FileUtils.mkdir_p(p) + end + IOTrace.trace(:file_copy, "place_gem_dir: #{source_dir} -> #{gem_dir}") do + fast_cp_r(source_dir, gem_dir) + end + else + atomic_move(source_dir, gem_dir) + end + end + + # Fast directory copy: hardlinks first (pure Ruby, no subprocess), + # then clonefile (subprocess), then regular copy. + # Hardlinks are preferred because they avoid forking a process per gem. + def fast_cp_r(src, dest) + if try_hardlink_tree(src, dest) + return + end + + if CLONEFILE_SUPPORTED && try_clonefile(src, dest) + return + end + + FileUtils.cp_r(src, dest) + end + + # Try macOS clonefile syscall for instant copy-on-write + def try_clonefile(src, dest) + return false unless CLONEFILE_SUPPORTED + # Use system cp -c for clone on macOS (uses clonefile under the hood) + system("cp", "-cR", src.to_s, dest.to_s, out: File::NULL, err: File::NULL) + rescue + false + end + + # Try to hardlink all files from src to dest tree + def try_hardlink_tree(src, dest) + return false unless File.directory?(src) + + FileUtils.mkdir_p(dest) unless File.exist?(dest) + + Dir.each_child(src) do |entry| + src_path = File.join(src, entry) + dest_path = File.join(dest, entry) + + if File.directory?(src_path) + return false unless try_hardlink_tree(src_path, dest_path) + else + begin + FileUtils.ln(src_path, dest_path) + rescue Errno::EXDEV, Errno::ENOTSUP, Errno::EPERM + return false + end + end + end + + true + rescue + false + end + def prepare_extension_build(extension_dir) SharedHelpers.filesystem_access(extension_dir, :create) do FileUtils.mkdir_p extension_dir @@ -154,7 +419,17 @@ def prepare_extension_build(extension_dir) end def strict_rm_rf(dir) - return unless File.exist?(dir) + # OPTIMIZATION: Use a single lstat call instead of File.exist? + Dir.empty? + File.stat + # which results in 3+ stat syscalls. We use lstat to avoid following symlinks. + begin + st = File.lstat(dir) + rescue Errno::ENOENT + return # doesn't exist + end + + return unless st.directory? + + # Only check for empty if it's a directory (Dir.empty? is a single getdents call) return if Dir.empty?(dir) parent = File.dirname(dir) @@ -172,5 +447,16 @@ def strict_rm_rf(dir) raise DirectoryRemovalError.new(e, "Could not delete previous installation of `#{dir}`") end end + + # Atomic rename, falling back to FileUtils.mv for cross-device moves + def atomic_move(src, dest) + strict_rm_rf(dest) + strict_rm_rf("#{dest.chomp('/')}.old") # clean up any leftover + begin + File.rename(src, dest) + rescue Errno::EXDEV + FileUtils.mv(src, dest) + end + end end end diff --git a/bundler/lib/bundler/rubygems_integration.rb b/bundler/lib/bundler/rubygems_integration.rb index e04ef232592a..50bc5b45c12c 100644 --- a/bundler/lib/bundler/rubygems_integration.rb +++ b/bundler/lib/bundler/rubygems_integration.rb @@ -425,6 +425,17 @@ def installed_specs end end + def installed_specs_for_names(names) + specs = [] + names.each do |name| + Gem::Specification.stubs_for(name).each do |stub| + next if stub.default_gem? + specs << StubSpecification.from_stub(stub) + end + end + specs + end + def default_specs Gem::Specification.default_stubs.map do |stub| StubSpecification.from_stub(stub) diff --git a/bundler/lib/bundler/settings.rb b/bundler/lib/bundler/settings.rb index d00a4bb916f6..69e8720f432b 100644 --- a/bundler/lib/bundler/settings.rb +++ b/bundler/lib/bundler/settings.rb @@ -27,6 +27,7 @@ class Settings git.allow_insecure global_gem_cache ignore_messages + ignore_ruby_upper_bounds init_gems_rb inline lockfile_checksums diff --git a/bundler/lib/bundler/shared_helpers.rb b/bundler/lib/bundler/shared_helpers.rb index 2aa8abe0a078..27c3e97dafa9 100644 --- a/bundler/lib/bundler/shared_helpers.rb +++ b/bundler/lib/bundler/shared_helpers.rb @@ -198,6 +198,21 @@ def digest(name) Digest(name) end + def fast_digest + @fast_digest ||= begin + require "openssl" + OpenSSL::Digest.new("BLAKE2b256") + :BLAKE2b256 + rescue => _e + :MD5 + end + end + + def fast_hexdigest(data) + require "openssl" + OpenSSL::Digest.hexdigest(fast_digest.to_s, data) + end + def checksum_for_file(path, digest) return unless path.file? # This must use File.read instead of Digest.file().hexdigest diff --git a/bundler/lib/bundler/source/git.rb b/bundler/lib/bundler/source/git.rb index bb669ebba39d..037fe93a01a5 100644 --- a/bundler/lib/bundler/source/git.rb +++ b/bundler/lib/bundler/source/git.rb @@ -198,6 +198,16 @@ def specs(*) local_specs end + # Pre-checkout git source during the parallel download phase. + # This makes git repos participate in the same parallel pipeline + # as rubygems downloads, so the progress reporter shows them. + def download(spec, options = {}) + return if Bundler.settings[:no_install] + if (requires_checkout? && !@copied) || options[:force] + checkout + end + end + def install(spec, options = {}) return if Bundler.settings[:no_install] force = options[:force] diff --git a/bundler/lib/bundler/source/git/git_proxy.rb b/bundler/lib/bundler/source/git/git_proxy.rb index fe05e9d57b27..e2258f25cc5e 100644 --- a/bundler/lib/bundler/source/git/git_proxy.rb +++ b/bundler/lib/bundler/source/git/git_proxy.rb @@ -120,7 +120,18 @@ def copy_to(destination, submodules = false) SharedHelpers.filesystem_access(destination) do |p| FileUtils.rm_rf(p) end - git "clone", "--no-checkout", "--quiet", path.to_s, destination.to_s + # Shallow clone from the bare cache — we only need one commit's + # worth of files. The .git is kept so future lockfile rev changes + # can be fetched incrementally with `git fetch --depth 1`. + # Skip --depth for submodule repos: submodule init needs full objects. + # Use file:// URI so git respects --depth (ignored for local paths). + clone_args = ["clone", "--no-checkout", "--quiet"] + unless submodules + clone_args.push("--depth", "1", "--no-tags") + clone_args.push("--single-branch") if supports_fetching_unreachable_refs? + end + clone_args.push("file://#{path}", destination.to_s) + git(*clone_args) File.chmod((File.stat(destination).mode | 0o777) & ~File.umask, destination) rescue Errno::EEXIST => e file_path = e.message[%r{.*?((?:[a-zA-Z]:)?/.*)}, 1] @@ -141,9 +152,6 @@ def copy_to(destination, submodules = false) if submodules git_retry "submodule", "update", "--init", "--recursive", dir: destination - elsif Gem::Version.create(version) >= Gem::Version.create("2.9.0") - inner_command = "git -C $toplevel submodule deinit --force $sm_path" - git_retry "submodule", "foreach", "--quiet", inner_command, dir: destination end end diff --git a/bundler/lib/bundler/source/rubygems.rb b/bundler/lib/bundler/source/rubygems.rb index e1e030ffc899..d24957822ddd 100644 --- a/bundler/lib/bundler/source/rubygems.rb +++ b/bundler/lib/bundler/source/rubygems.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "rubygems/user_interaction" +require_relative "../io_trace" module Bundler class Source @@ -61,9 +62,12 @@ def remote! end def cached! - return unless File.exist?(cache_path) - + # OPTIMIZATION: Check @allow_cached first to avoid redundant File.exist? + # stat calls. cached! is called up to 3 times during definition setup + # (setup_domain!, with_cache!, remotely!) and each call would stat the + # cache directory unnecessarily. return if @allow_cached + return unless File.exist?(cache_path) @specs = nil @allow_cached = true @@ -162,13 +166,48 @@ def specs end end + # Download a gem to the cache without installing it. + # Returns the path to the cached .gem file, or nil if already installed. + def download(spec, previous_spec: nil, force: false, local: false) + if (spec.default_gem? && !cached_built_in_gem(spec, local: local)) || (installed?(spec) && !force) + return nil # already available + end + + path = fetch_gem_if_possible(spec, previous_spec) + raise GemNotFound, "Could not find #{spec.file_name} for download" unless path + + # OPTIMIZATION: Update cached_gem memo so install() doesn't re-stat + @cached_gem_memo ||= {} + @cached_gem_memo[spec.full_name] = path + + path + end + + # Check if a spec has native extensions that need compilation. + def has_native_extensions?(spec) + return false unless spec.respond_to?(:extensions) + extensions = spec.extensions + extensions.is_a?(Array) ? extensions.any? : false + rescue + false + end + + def extension_cache_path(spec) + # Prefer global XDG-based extension cache + global_path = global_extension_cache_path(spec) + return global_path if global_path + + # Fall back to per-source cache + super + end + def install(spec, options = {}) if (spec.default_gem? && !cached_built_in_gem(spec, local: options[:local])) || (installed?(spec) && !options[:force]) - print_using_message "Using #{version_message(spec, options[:previous_spec])}" return nil # no post-install message end - path = fetch_gem_if_possible(spec, options[:previous_spec]) + # Use pre-downloaded gem if available, otherwise download now + path = cached_gem(spec) || fetch_gem_if_possible(spec, options[:previous_spec]) raise GemNotFound, "Could not find #{spec.file_name} for installation" unless path return if Bundler.settings[:no_install] @@ -207,10 +246,6 @@ def install(spec, options = {}) spec.source.checksum_store.register(spec, installer.gem_checksum) - message = "Installing #{version_message(spec, options[:previous_spec])}" - message += " with native extensions" if spec.extensions.any? - Bundler.ui.confirm message - installed_spec = nil Gem.time("Installed #{spec.name} in", 0, true) do @@ -224,6 +259,87 @@ def install(spec, options = {}) spec.post_install_message end + # Extract gem contents, using a global extracted cache to avoid + # re-extracting on subsequent installs. Returns [source_dir, installer, + # spec, from_cache] or nil if already installed. + # + # Cache layout: + # ~/.cache/gem/extracted// extracted gem files + # ~/.cache/gem/extracted/.spec.marshal marshaled Gem::Specification + # + # Cache HIT: 0 .gem file reads — spec loaded from marshal + # Cache MISS: 1 .gem file read — single-pass extraction (spec + data.tar.gz) + def extract_gem(spec, options = {}) + if (spec.default_gem? && !cached_built_in_gem(spec, local: options[:local])) || (installed?(spec) && !options[:force]) + return nil + end + + gem_path = cached_gem(spec) || fetch_gem_if_possible(spec, nil) + raise GemNotFound, "Could not find #{spec.file_name} for extraction" unless gem_path + + return nil if Bundler.settings[:no_install] + + require_relative "../rubygems_gem_installer" + + cache_dir = extracted_cache_path(spec) + + # CACHE HIT: spec + extracted files available from global cache + if cache_dir && (cached_spec = load_cached_spec(cache_dir)) + installer = build_gem_installer(gem_path, spec, options, preloaded_spec: cached_spec) + spec.__swap__(cached_spec) if spec.remote + installer.pre_install_checks + return [cache_dir, installer, spec, true] + end + + # CACHE MISS: single-pass extract .gem → global cache + if cache_dir + real_spec = extract_to_global_cache(gem_path, cache_dir) + installer = build_gem_installer(gem_path, spec, options, preloaded_spec: real_spec) + spec.__swap__(real_spec) if spec.remote + installer.pre_install_checks + return [cache_dir, installer, spec, true] + end + + # FALLBACK: no global cache path available (homeless user etc.) + installer = build_gem_installer(gem_path, spec, options) + if spec.remote + s = installer.spec + spec.__swap__(s) + end + installer.pre_install_checks + temp_dir = "#{installer.gem_dir}.bundler-tmp" + FileUtils.rm_rf(temp_dir) + FileUtils.mkdir_p(temp_dir, mode: 0o755) + original_gem_dir = installer.gem_dir + installer.instance_variable_set(:@gem_dir, temp_dir) + begin + installer.send(:extract_files) + ensure + installer.instance_variable_set(:@gem_dir, original_gem_dir) + end + [temp_dir, installer, spec, false] + end + + # Finalize a previously extracted gem: copy/move files to GEM_HOME, + # write spec, generate binstubs, build native extensions. + def finalize_gem(spec, extract_result, has_extensions, options = {}) + return nil unless extract_result + + source_dir, installer, spec, from_cache = extract_result + + installed_spec = if has_extensions + installer.finalize_with_extensions(source_dir, copy: !!from_cache) + else + installer.finalize_without_extensions(source_dir, copy: !!from_cache) + end + + spec.full_gem_path = installed_spec.full_gem_path + spec.loaded_from = installed_spec.loaded_from + spec.base_dir = installed_spec.base_dir + + spec.post_install_message + end + def cache(spec, custom_path = nil) cached_path = Bundler.settings[:cache_all_platforms] ? fetch_gem_if_possible(spec) : cached_gem(spec) raise GemNotFound, "Missing gem file '#{spec.file_name}'." unless cached_path @@ -331,11 +447,23 @@ def credless_remotes end def cached_gem(spec) + # OPTIMIZATION: Memoize cached_gem lookups to avoid redundant File.exist? + # stat calls. cached_gem is called from both download() and install() + # for the same spec, plus indirectly from fetch_gem_if_possible. + @cached_gem_memo ||= {} + key = spec.full_name + return @cached_gem_memo[key] if @cached_gem_memo.key?(key) + global_cache_path = download_cache_path(spec) - caches << global_cache_path if global_cache_path + # Only add global_cache_path if not already present to avoid growing + # the caches array on every call (causes extra File.exist? checks) + caches << global_cache_path if global_cache_path && !caches.include?(global_cache_path) possibilities = caches.map {|p| package_path(p, spec) } - possibilities.find {|p| File.exist?(p) } + result = IOTrace.trace(:file_stat, "cached_gem search: #{spec.name} (#{possibilities.size} paths)") do + possibilities.find {|p| File.exist?(p) } + end + @cached_gem_memo[key] = result end def package_path(cache_path, spec) @@ -361,7 +489,13 @@ def remove_auth(remote) def installed_specs @installed_specs ||= Index.build do |idx| - Bundler.rubygems.installed_specs.reverse_each do |spec| + specs = if Bundler.default_lockfile.file? + names = Bundler::LockfileParser.new(Bundler.read_file(Bundler.default_lockfile.to_s)).specs.map(&:name).uniq + Bundler.rubygems.installed_specs_for_names(names) + else + Bundler.rubygems.installed_specs + end + specs.reverse_each do |spec| spec.source = self next if spec.ignored? idx << spec @@ -382,10 +516,12 @@ def cached_specs @cached_specs ||= begin idx = Index.new - Dir["#{cache_path}/*.gem"].each do |gemfile| - s ||= Bundler.rubygems.spec_from_gem(gemfile) - s.source = self - idx << s + IOTrace.trace(:dir_scan, "cached_specs Dir glob: #{cache_path}/*.gem") do + Dir["#{cache_path}/*.gem"].each do |gemfile| + s ||= Bundler.rubygems.spec_from_gem(gemfile) + s.source = self + idx << s + end end idx @@ -432,6 +568,23 @@ def fetch_gem_if_possible(spec, previous_spec = nil) def fetch_gem(spec, previous_spec = nil) spec.fetch_platform + # Check global cache first (shared across Ruby versions) + global_path = global_gem_cache_path(spec) + if global_path && IOTrace.trace(:file_stat, "fetch_gem global cache check: #{spec.name}") { File.exist?(global_path) } + # Found in global cache - copy/link to local cache if needed + local_cache = default_cache_path_for(rubygems_dir) + local_path = package_path(local_cache, spec) + unless File.exist?(local_path) + SharedHelpers.filesystem_access(local_cache) {|p| FileUtils.mkdir_p(p) } + begin + FileUtils.ln(global_path, local_path) + rescue Errno::EXDEV, Errno::ENOTSUP + FileUtils.cp(global_path, local_path) + end + end + return local_path + end + cache_path = download_cache_path(spec) || default_cache_path_for(rubygems_dir) gem_path = package_path(cache_path, spec) return gem_path if File.exist?(gem_path) @@ -441,11 +594,33 @@ def fetch_gem(spec, previous_spec = nil) end download_gem(spec, cache_path, previous_spec) + # Store in global cache for future Ruby versions + if global_path = global_gem_cache_path(spec) + unless File.exist?(global_path) + SharedHelpers.filesystem_access(File.dirname(global_path)) {|p| FileUtils.mkdir_p(p) } + begin + FileUtils.ln(gem_path, global_path) + rescue Errno::EXDEV, Errno::ENOTSUP + FileUtils.cp(gem_path, global_path) + end + end + end + gem_path end def installed?(spec) - installed_specs[spec].any? && !spec.installation_missing? + # OPTIMIZATION: Memoize installed? checks. Each call to installation_missing? + # does a File.directory? syscall. During install, installed? is called from + # both download() and install() for every spec. + @installed_memo ||= {} + key = spec.full_name + return @installed_memo[key] if @installed_memo.key?(key) + + result = IOTrace.trace(:file_stat, "installed? check: #{spec.name}") do + installed_specs[spec].any? && !spec.installation_missing? + end + @installed_memo[key] = result end def rubygems_dir @@ -466,6 +641,131 @@ def lockfile_remotes @lockfile_remotes || credless_remotes end + # Returns path in the global gem cache (XDG_CACHE_HOME based). + # This cache is shared across all Ruby versions since .gem files + # are Ruby-version independent archives. + def global_gem_cache_path(spec = nil) + cache_home = if Gem.respond_to?(:cache_home) + Gem.cache_home + else + ENV["XDG_CACHE_HOME"] || File.join(Dir.home, ".cache") + end + cache_dir = File.join(cache_home, "gem", "gems") + return cache_dir unless spec + File.join(cache_dir, spec.file_name) + rescue + nil + end + + # Returns path in the global extension cache (XDG_CACHE_HOME based). + # Extensions are ABI-dependent, so the cache is keyed by Ruby engine, + # Ruby version, and platform. + def global_extension_cache_path(spec) + cache_home = if Gem.respond_to?(:cache_home) + Gem.cache_home + else + ENV["XDG_CACHE_HOME"] || File.join(Dir.home, ".cache") + end + ruby_key = "#{Gem.ruby_engine}-#{RbConfig::CONFIG["ruby_version"]}" + platform_key = Gem::Platform.local.to_s + ext_dir = File.join(cache_home, "gem", "extensions", ruby_key, platform_key) + Pathname.new(ext_dir).join(spec.full_name.to_s) + rescue + nil + end + + # Returns path for the global extracted gem cache. + # Extracted contents are Ruby-version independent (source files only; + # compiled extensions are cached separately by global_extension_cache_path). + def extracted_cache_path(spec) + return nil unless (base = gem_cache_home) + File.join(base, "extracted", spec.full_name) + rescue + nil + end + + # Base directory for all global gem caches (~/.cache/gem/). + def gem_cache_home + @gem_cache_home ||= begin + base = if Gem.respond_to?(:cache_home) + Gem.cache_home + else + ENV["XDG_CACHE_HOME"] || File.join(Dir.home, ".cache") + end + File.join(base, "gem") + end + rescue + nil + end + + # Load a cached Gem::Specification from the extracted cache. + # Returns nil on cache miss or any error. + def load_cached_spec(cache_dir) + marshal_path = "#{cache_dir}.spec.marshal" + return nil unless File.exist?(marshal_path) + return nil unless File.directory?(cache_dir) + Marshal.load(File.binread(marshal_path)) + rescue + nil + end + + # Single-pass extract a .gem to the global cache. + # Uses a PID-unique temp dir + atomic rename for process safety. + def extract_to_global_cache(gem_path, cache_dir) + temp_dir = "#{cache_dir}.tmp-#{Process.pid}" + begin + FileUtils.rm_rf(temp_dir) + FileUtils.mkdir_p(temp_dir, mode: 0o755) + + real_spec = Bundler::RubyGemsGemInstaller.single_pass_extract(gem_path, temp_dir) + + # Atomic move to final cache location + SharedHelpers.filesystem_access(File.dirname(cache_dir)) { |p| FileUtils.mkdir_p(p) } + begin + File.rename(temp_dir, cache_dir) + rescue Errno::EEXIST, Errno::ENOTEMPTY + # Another process won the race — use theirs + FileUtils.rm_rf(temp_dir) + rescue Errno::EXDEV + FileUtils.mv(temp_dir, cache_dir) + end + + # Persist spec for future cache hits (stored as sibling, not inside + # the extracted dir, so fast_cp_r doesn't copy it to GEM_HOME) + File.binwrite("#{cache_dir}.spec.marshal", Marshal.dump(real_spec)) + + real_spec + rescue => e + FileUtils.rm_rf(temp_dir) rescue nil + raise + end + end + + # Create a RubyGemsGemInstaller, optionally injecting a preloaded spec + # to avoid opening the .gem file for verification. + def build_gem_installer(gem_path, spec, options, preloaded_spec: nil) + installer = Bundler::RubyGemsGemInstaller.at( + gem_path, + security_policy: Bundler.rubygems.security_policies[Bundler.settings["trust-policy"]], + install_dir: rubygems_dir.to_s, + bin_dir: Bundler.system_bindir.to_s, + ignore_dependencies: true, + wrappers: true, + env_shebang: true, + build_args: options[:build_args], + bundler_extension_cache_path: extension_cache_path(spec) + ) + + if preloaded_spec + # Inject spec into the package to prevent .gem file from being + # opened for verification. The spec was already parsed during + # single-pass extraction or loaded from marshal cache. + installer.instance_variable_get(:@package).instance_variable_set(:@spec, preloaded_spec) + end + + installer + end + # Checks if the requested spec exists in the global cache. If it does, # we copy it to the download path, and if it does not, we download it. # @@ -480,11 +780,12 @@ def lockfile_remotes # def download_gem(spec, download_cache_path, previous_spec = nil) uri = spec.remote.uri - Bundler.ui.confirm("Fetching #{version_message(spec, previous_spec)}") gem_remote_fetcher = remote_fetchers.fetch(spec.remote).gem_remote_fetcher - Gem.time("Downloaded #{spec.name} in", 0, true) do - Bundler.rubygems.download_gem(spec, uri, download_cache_path, gem_remote_fetcher) + IOTrace.trace(:http, "download_gem: #{spec.name} from #{uri}") do + Gem.time("Downloaded #{spec.name} in", 0, true) do + Bundler.rubygems.download_gem(spec, uri, download_cache_path, gem_remote_fetcher) + end end end diff --git a/bundler/lib/bundler/spec_set.rb b/bundler/lib/bundler/spec_set.rb index f9179e7a0693..bd661ae2f1ba 100644 --- a/bundler/lib/bundler/spec_set.rb +++ b/bundler/lib/bundler/spec_set.rb @@ -88,7 +88,9 @@ def validate_deps(s) s.runtime_dependencies.each do |dep| next if dep.name == "bundler" - return :missing unless names.include?(dep.name) + # Use hash key lookup (O(1)) instead of names.include? which + # creates an array of keys on every call and scans it (O(n)). + return :missing unless lookup.key?(dep.name) return :invalid if none? {|spec| dep.matches_spec?(spec) } end @@ -108,6 +110,7 @@ def []=(key, value) def delete(specs) Array(specs).each {|spec| remove_spec(spec) } + @reverse_deps = nil # invalidate reverse deps cache end def sort! @@ -178,7 +181,11 @@ def -(other) end def find_by_name_and_platform(name, platform) - @specs.detect {|spec| spec.name == name && spec.installable_on_platform?(platform) } + # Use the lookup hash to narrow candidates by name (O(1)) instead of + # scanning all specs (O(n)). This matters when the spec set is large. + candidates = lookup[name] + return nil unless candidates + candidates.detect {|spec| spec.installable_on_platform?(platform) } end def specs_with_additional_variants_from(other) @@ -188,6 +195,7 @@ def specs_with_additional_variants_from(other) def delete_by_name(name) @specs.reject! {|spec| spec.name == name } @sorted&.reject! {|spec| spec.name == name } + @reverse_deps = nil # invalidate reverse deps cache return if @lookup.nil? @lookup[name] = nil @@ -198,7 +206,18 @@ def version_for(name) end def what_required(spec) - unless req = find {|s| s.runtime_dependencies.any? {|d| d.name == spec.name } } + # Build a reverse-dependency lookup on first call to avoid O(n*m) repeated scanning. + @reverse_deps ||= begin + rd = {} + sorted.each do |s| + s.runtime_dependencies.each do |d| + next if d.type == :development + rd[d.name] = s + end + end + rd + end + unless req = @reverse_deps[spec.name] return [spec] end what_required(req) << spec @@ -314,7 +333,8 @@ def valid_dependencies?(s) end def sorted - @sorted ||= ([@specs.find {|s| s.name == "rake" }] + tsort).compact.uniq + # Use the lookup hash for O(1) rake lookup instead of O(n) array scan + @sorted ||= ([lookup["rake"]&.first] + tsort).compact.uniq rescue TSort::Cyclic => error cgems = extract_circular_gems(error) raise CyclicDependencyError, "Your bundle requires gems that depend" \ @@ -354,6 +374,7 @@ def tsort_each_child(s) def add_spec(spec) @specs << spec + @reverse_deps = nil # invalidate reverse deps cache name = spec.name diff --git a/lib/rubygems/installer.rb b/lib/rubygems/installer.rb index 914e41367731..65ecb4e67d54 100644 --- a/lib/rubygems/installer.rb +++ b/lib/rubygems/installer.rb @@ -906,10 +906,14 @@ def write_build_info_file end ## - # Writes the .gem file to the cache directory + # Writes the .gem file to the cache directory. + # Skips the copy if the cache file already exists (same path or identical content), + # since copy_to already checks File.exist? but we can avoid the method call overhead + # and the join + file_name string allocation when the gem is already cached. def write_cache_file cache_file = File.join gem_home, "cache", spec.file_name + return if File.exist?(cache_file) @package.copy_to cache_file end diff --git a/lib/rubygems/name_tuple.rb b/lib/rubygems/name_tuple.rb index cbdf4d7ac5f2..bb470a500581 100644 --- a/lib/rubygems/name_tuple.rb +++ b/lib/rubygems/name_tuple.rb @@ -46,7 +46,7 @@ def self.null # of Gem::Specification#full_name. def full_name - case @platform + @full_name ||= case @platform when nil, "", Gem::Platform::RUBY "#{@name}-#{@version}" else @@ -54,6 +54,19 @@ def full_name end end + ## + # Returns the lock file formatted name (e.g. "rake (13.0.6)" or "nokogiri (1.14.0-x86_64-linux)") + # Cached to avoid repeated string allocations during lockfile generation and parsing. + + def lock_name + @lock_name ||= case @platform + when nil, "", Gem::Platform::RUBY + "#{@name} (#{@version})" + else + "#{@name} (#{@version}-#{@platform})" + end + end + ## # Indicate if this NameTuple matches the current platform. @@ -120,6 +133,8 @@ def ==(other) alias_method :eql?, :== def hash - to_a.hash + # Cache hash to avoid repeated Array allocation from to_a on every call. + # NameTuples are used as hash keys in Index and other structures. + @hash ||= to_a.hash end end diff --git a/lib/rubygems/specification.rb b/lib/rubygems/specification.rb index aa495696adce..68f88f452873 100644 --- a/lib/rubygems/specification.rb +++ b/lib/rubygems/specification.rb @@ -1496,6 +1496,7 @@ def add_dependency_with_type(dependency, type, requirements) dependency = Gem::Dependency.new(dependency.to_s, requirements, type) end + @runtime_dependencies = nil # invalidate cache dependencies << dependency end @@ -2247,9 +2248,10 @@ def ruby_code(obj) ## # List of dependencies that will automatically be activated at runtime. + # Cached since this is called many times during dependency resolution. def runtime_dependencies - dependencies.select(&:runtime?) + @runtime_dependencies ||= dependencies.select(&:runtime?) end ##