Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 37 additions & 9 deletions bundler/lib/bundler/compact_index_client/cache.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# frozen_string_literal: true

require "rubygems/resolver/api_set/gem_parser"
require_relative "../io_trace"

module Bundler
class CompactIndexClient
Expand Down Expand Up @@ -29,10 +30,28 @@ def versions
def info(name, remote_checksum = nil)
path = info_path(name)

if remote_checksum && remote_checksum != SharedHelpers.checksum_for_file(path, :MD5)
fetch("info/#{name}", path, info_etag_path(name))
if remote_checksum
# OPTIMIZATION: Read the file once for both checksum verification and data return.
# Previously, SharedHelpers.checksum_for_file would read the file for MD5,
# and then read() would read it again if the checksum matched. Now we read
# once and compute MD5 from the in-memory data.
data = read(path)
if data
local_checksum = SharedHelpers.digest(:MD5).hexdigest(data)
if remote_checksum != local_checksum
IOTrace.trace(:http, "compact_index info checksum mismatch, fetching: #{name}") do
fetch("info/#{name}", path, info_etag_path(name))
end
else
Bundler::CompactIndexClient.debug { "update skipped info/#{name} (versions index checksum matches local)" }
IOTrace.note(:file_read, "compact_index info cache hit: #{name}")
data
end
else
fetch("info/#{name}", path, info_etag_path(name))
end
else
Bundler::CompactIndexClient.debug { "update skipped info/#{name} (#{remote_checksum ? "versions index checksum is nil" : "versions index checksum matches local"})" }
Bundler::CompactIndexClient.debug { "update skipped info/#{name} (versions index checksum is nil)" }
read(path)
end
end
Expand All @@ -52,7 +71,7 @@ def info_path(name)
name = name.to_s
# TODO: converge this into the info_root by hashing all filenames like info_etag_path
if /[^a-z0-9_-]/.match?(name)
name += "-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}"
name += "-#{SharedHelpers.fast_hexdigest(name).downcase}"
@special_characters_info_root.join(name)
else
@info_root.join(name)
Expand All @@ -61,23 +80,30 @@ def info_path(name)

def info_etag_path(name)
name = name.to_s
@info_etag_root.join("#{name}-#{SharedHelpers.digest(:MD5).hexdigest(name).downcase}")
@info_etag_root.join("#{name}-#{SharedHelpers.fast_hexdigest(name).downcase}")
end

def mkdir(name)
directory.join(name).tap do |dir|
SharedHelpers.filesystem_access(dir) do
FileUtils.mkdir_p(dir)
# OPTIMIZATION: Skip mkdir_p if directory already exists.
# During warm-cache runs, these directories always exist.
unless dir.directory?
SharedHelpers.filesystem_access(dir) do
FileUtils.mkdir_p(dir)
end
end
end
end

def fetch(remote_path, path, etag_path)
if already_fetched?(remote_path)
Bundler::CompactIndexClient.debug { "already fetched #{remote_path}" }
IOTrace.note(:http, "compact_index already fetched: #{remote_path}")
else
Bundler::CompactIndexClient.debug { "fetching #{remote_path}" }
@updater&.update(remote_path, path, etag_path)
IOTrace.trace(:http, "compact_index fetch: #{remote_path}") do
@updater&.update(remote_path, path, etag_path)
end
end

read(path)
Expand All @@ -89,7 +115,9 @@ def already_fetched?(remote_path)

def read(path)
return unless path.file?
SharedHelpers.filesystem_access(path, :read, &:read)
IOTrace.trace(:file_read, "compact_index read: #{path}") do
SharedHelpers.filesystem_access(path, :read, &:read)
end
end
end
end
Expand Down
62 changes: 58 additions & 4 deletions bundler/lib/bundler/compact_index_client/parser.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

require "fileutils"

module Bundler
class CompactIndexClient
class Parser
Expand All @@ -22,8 +24,25 @@ def versions
@info_checksums = {}

lines(@compact_index.versions).each do |line|
name, versions_string, checksum = line.split(" ", 3)
@info_checksums[name] = checksum || ""
# Avoid allocating a 3-element array via split(" ", 3) on every line.
# Instead, find space positions directly and slice the frozen string.
line.freeze

name_end = line.index(" ")
next unless name_end # skip malformed lines

versions_end = line.index(" ", name_end + 1)
name = line[0, name_end]
name.freeze

if versions_end
versions_string = line[name_end + 1, versions_end - name_end - 1]
@info_checksums[name] = line[versions_end + 1, line.size - versions_end - 1]
else
versions_string = line[name_end + 1, line.size - name_end - 1]
@info_checksums[name] = ""
end

versions_string.split(",") do |version|
delete = version.delete_prefix!("-")
version = version.split("-", 2).unshift(name)
Expand All @@ -39,8 +58,36 @@ def versions
end

def info(name)
data = @compact_index.info(name, info_checksums[name])
lines(data).map {|line| gem_parser.parse(line).unshift(name) }
checksum = info_checksums[name]

# Try binary cache first (Marshal format)
binary_path = info_binary_path(name)
if binary_path && checksum && File.exist?(binary_path)
begin
cached = Bundler.safe_load_marshal(File.binread(binary_path))
if cached.is_a?(Array) && cached.length == 2 && cached[0] == checksum
return cached[1]
end
rescue => _e
# Corrupted cache, fall through to parse
end
end

data = @compact_index.info(name, checksum)
result = lines(data).map {|line| gem_parser.parse(line).unshift(name) }

# Write binary cache
if binary_path && checksum && !result.empty?
begin
dir = File.dirname(binary_path)
FileUtils.mkdir_p(dir) unless File.directory?(dir)
File.binwrite(binary_path, Marshal.dump([checksum, result]))
rescue => _e
# Cache write failure is non-fatal
end
end

result
end

def available?
Expand All @@ -67,6 +114,13 @@ def gem_parser
@gem_parser ||= Gem::Resolver::APISet::GemParser.new
end

def info_binary_path(name)
return nil unless @compact_index.respond_to?(:directory)
dir = @compact_index.directory
return nil unless dir
dir.join("info-binary", "#{name}.bin")
end

# This is mostly the same as `split(" ", 3)` but it avoids allocating extra objects.
# This method gets called at least once for every gem when parsing versions.
def parse_version_checksum(line, checksums)
Expand Down
120 changes: 120 additions & 0 deletions bundler/lib/bundler/compact_version.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# frozen_string_literal: true

module Bundler
# A fast version representation that packs common version formats into
# a single Integer for O(1) comparison. Follows uv's approach where
# ~90% of real-world versions fit into a compact representation.
#
# Format: [16 bits major][16 bits minor][16 bits patch][16 bits pre-flag + extra]
# This gives us major 0-65535, minor 0-65535, patch 0-65535 in a single Fixnum.
#
# For versions that don't fit (prerelease, > 3 segments, segments > 65535),
# we fall back to the original Gem::Version comparison.
class CompactVersion
include Comparable

MAX_SEGMENT = 0xFFFF # 65535

attr_reader :gem_version, :packed

def initialize(gem_version)
@gem_version = gem_version.is_a?(Gem::Version) ? gem_version : Gem::Version.new(gem_version)
@packed = pack(@gem_version)
end

def <=>(other)
return nil unless other.is_a?(CompactVersion)

if @packed && other.packed
@packed <=> other.packed
else
@gem_version <=> other.gem_version
end
end

def ==(other)
return false unless other.is_a?(CompactVersion)
if @packed && other.packed
@packed == other.packed
else
@gem_version == other.gem_version
end
end

def eql?(other)
return false unless other.is_a?(CompactVersion)
if @packed && other.packed
@packed.eql?(other.packed)
else
@gem_version.eql?(other.gem_version)
end
end

def hash
@packed ? @packed.hash : @gem_version.hash
end

def prerelease?
@gem_version.prerelease?
end

def segments
@gem_version.segments
end

def to_s
@gem_version.to_s
end

def version
@gem_version
end

# Class-level cache for frequently compared versions
@cache = {}
@cache_mutex = Mutex.new

def self.from_gem_version(gem_version)
key = gem_version.to_s
@cache_mutex.synchronize do
@cache[key] ||= new(gem_version)
end
end

def self.clear_cache!
@cache_mutex.synchronize { @cache.clear }
end

# Compare two Gem::Version objects using packed integer fast path.
# Returns -1, 0, or 1 like <=>.
def self.compare(a, b)
ca = from_gem_version(a)
cb = from_gem_version(b)
ca <=> cb
end

# Fast equality check for two Gem::Version objects.
def self.versions_equal?(a, b)
ca = from_gem_version(a)
cb = from_gem_version(b)
ca == cb
end

private

def pack(version)
return nil if version.prerelease?

segments = version.segments
return nil if segments.length > 4
return nil if segments.any? {|s| !s.is_a?(Integer) || s < 0 || s > MAX_SEGMENT }

major = segments[0] || 0
minor = segments[1] || 0
patch = segments[2] || 0
extra = segments[3] || 0

(major << 48) | (minor << 32) | (patch << 16) | extra
end
end
end
Loading
Loading