Skip to content

Commit

Permalink
Add compact index support for private sources
Browse files Browse the repository at this point in the history
  • Loading branch information
segiddins committed Jul 19, 2024
1 parent 111d304 commit 8993ad0
Show file tree
Hide file tree
Showing 23 changed files with 733 additions and 29 deletions.
6 changes: 4 additions & 2 deletions .rubocop-bundler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ Lint/UnusedMethodArgument:
Lint/UriEscapeUnescape:
Enabled: true


# Style

Layout/EndAlignment:
Expand Down Expand Up @@ -92,7 +91,10 @@ Style/SpecialGlobalVars:
Enabled: false

Naming/VariableNumber:
EnforcedStyle: 'snake_case'
EnforcedStyle: "snake_case"
AllowedIdentifiers:
- sha256
- capture3

Naming/MemoizedInstanceVariableName:
Enabled: false
Expand Down
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,6 @@ group :linting do
end

group :test do
gem "gem_server_conformance", "~> 0.1.4"
gem "mock_redis"
end
16 changes: 8 additions & 8 deletions docs/gemstash-configuration.5.md
Original file line number Diff line number Diff line change
Expand Up @@ -221,10 +221,10 @@ Boolean values `true` or `false`
`:fetch_timeout`

This is the number of seconds to allow for fetching a gem from upstream.
It covers establishing the connection and receiving the response. Fetching
gems over a slow connection may cause timeout errors. If you experience
timeout errors, you may want to increase this value. The default is `20`
seconds.
It covers establishing the connection and receiving the response.
Fetching gems over a slow connection may cause timeout errors. If you
experience timeout errors, you may want to increase this value. The
default is `20` seconds.

## Default value

Expand All @@ -239,10 +239,10 @@ Integer value with a minimum of `1`
`:open_timeout`

The timeout setting for opening the connection to an upstream gem
server. On high-latency networks, even establishing the connection
to an upstream gem server can take a while. If you experience
connection failures instead of timeout errors, you may want to
increase this value. The default is `2` seconds.
server. On high-latency networks, even establishing the connection to an
upstream gem server can take a while. If you experience connection
failures instead of timeout errors, you may want to increase this value.
The default is `2` seconds.

## Default value

Expand Down
1 change: 1 addition & 0 deletions gemstash.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ you push your own private gems as well."
spec.required_ruby_version = ">= 3.1"

spec.add_runtime_dependency "activesupport", ">= 4.2", "< 8"
spec.add_runtime_dependency "compact_index", "~> 0.15.0"
spec.add_runtime_dependency "dalli", ">= 3.2.3", "< 4"
spec.add_runtime_dependency "faraday", ">= 1", "< 3"
spec.add_runtime_dependency "faraday_middleware", "~> 1.0"
Expand Down
1 change: 1 addition & 0 deletions lib/gemstash.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ module Gemstash
autoload :DB, "gemstash/db"
autoload :Cache, "gemstash/cache"
autoload :CLI, "gemstash/cli"
autoload :CompactIndexBuilder, "gemstash/compact_index_builder"
autoload :Configuration, "gemstash/configuration"
autoload :Dependencies, "gemstash/dependencies"
autoload :Env, "gemstash/env"
Expand Down
5 changes: 4 additions & 1 deletion lib/gemstash/cache.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,10 @@ def set_dependency(scope, gem, value)

def invalidate_gem(scope, gem)
@client.delete("deps/v1/#{scope}/#{gem}")
Gemstash::SpecsBuilder.invalidate_stored if scope == "private"
if scope == "private"
Gemstash::SpecsBuilder.invalidate_stored
Gemstash::CompactIndexBuilder.invalidate_stored(gem)
end
end
end

Expand Down
3 changes: 3 additions & 0 deletions lib/gemstash/cli/info.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ class Info < Gemstash::CLI::Base
def run
prepare
list_config

# Gemstash::DB
# Gemstash::Env.current.db.dump_schema_migration(same_db: true)
end

private
Expand Down
257 changes: 257 additions & 0 deletions lib/gemstash/compact_index_builder.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
# frozen_string_literal: true

require "active_support/core_ext/string/filters"
require "compact_index"
require "gemstash"
require "stringio"
require "zlib"

module Gemstash
# Comment
class CompactIndexBuilder
include Gemstash::Env::Helper
attr_reader :result

def self.serve(app, ...)
app.content_type "text/plain; charset=utf-8"
body = new(app.auth, ...).serve
app.etag Digest::MD5.hexdigest(body)
sha256 = Digest::SHA256.base64digest(body)
app.headers "Accept-Ranges" => "bytes", "Digest" => "sha-256=#{sha256}", "Repr-Digest" => "sha-256=:#{sha256}:",
"Content-Length" => body.bytesize.to_s
body
end

def self.invalidate_stored(name)
storage = Gemstash::Storage.for("private").for("compact_index")
storage.resource("names").delete(:names)
storage.resource("versions").delete(:versions)
storage.resource("info/#{name}").delete(:info)
end

def initialize(auth)
@auth = auth
end

def serve
check_auth if gemstash_env.config[:protected_fetch]
fetch_from_storage
return result if result

build_result
store_result
result
end

private

def storage
@storage ||= Gemstash::Storage.for("private").for("compact_index")
end

def fetch_from_storage
resource = fetch_resource
return unless resource.exist?(key)

@result = resource.load(key).content(key)
rescue StandardError
# On the off-chance of a race condition between specs.exist? and specs.load
@result = nil
end

def store_result
fetch_resource.save(key => @result)
end

def check_auth
@auth.check("fetch")
end

# Comment
class Versions < CompactIndexBuilder
def fetch_resource
storage.resource("versions")
end

def build_result(force_rebuild: false)
resource = fetch_resource
base = !force_rebuild && resource.exist?("versions.list") && resource.content("versions.list")
Tempfile.create("versions.list") do |file|
versions_file = CompactIndex::VersionsFile.new(file.path)
if base
file.write(base)
file.close
@result = versions_file.contents(
compact_index_versions(versions_file.updated_at.to_time)
)
else
ts = Time.now.iso8601
versions_file.create(
compact_index_public_versions(ts), ts
)
@result = file.read
resource.save("versions.list" => @result)
end
end
end

private

def compact_index_versions(date)
all_versions = Sequel::Model.db[<<~SQL.squish, date, date].to_a
SELECT r.name as name, v.created_at as date, v.info_checksum as info_checksum, v.number as number, v.platform as platform
FROM rubygems AS r, versions AS v
WHERE v.rubygem_id = r.id AND
v.created_at > ?
UNION ALL
SELECT r.name as name, v.yanked_at as date, v.yanked_info_checksum as info_checksum, '-'||v.number as number, v.platform as platform
FROM rubygems AS r, versions AS v
WHERE v.rubygem_id = r.id AND
v.indexed is false AND
v.yanked_at > ?
ORDER BY date, number, platform, name
SQL

# not ordered correctly in sqlite for some reason
all_versions.sort_by! {|v| [v[:date], v[:number], v[:platform], v[:name]] }
map_gem_versions(all_versions.map {|v| [v[:name], [v]] })
end

def compact_index_public_versions(date)
all_versions = Sequel::Model.db[<<~SQL.squish, date, date].to_a
SELECT r.name, v.indexed, COALESCE(v.yanked_at, v.created_at) as stamp,
COALESCE(v.yanked_info_checksum, v.info_checksum) as info_checksum, v.number, v.platform
FROM rubygems AS r, versions AS v
WHERE v.rubygem_id = r.id AND
(v.created_at <= ? OR v.yanked_at <= ?)
ORDER BY name, COALESCE(v.yanked_at, v.created_at), number, platform
SQL

versions_by_gem = all_versions.group_by {|row| row[:name] }
versions_by_gem.each_value do |versions|
info_checksum = versions.last[:info_checksum]
versions.select! {|v| v[:indexed] == true }
# Set all versions' info_checksum to work around https://github.com/bundler/compact_index/pull/20
versions.each {|v| v[:info_checksum] = info_checksum }
end

map_gem_versions(versions_by_gem)
end

def map_gem_versions(versions_by_gem)
versions_by_gem.map do |name, versions|
CompactIndex::Gem.new(
name,
versions.map do |row|
CompactIndex::GemVersion.new(
row[:number],
row[:platform],
nil, # sha256
row[:info_checksum],
nil, # dependencies
nil, # version.required_ruby_version,
nil, # version.required_rubygems_version
)
end
)
end
end

def key
:versions
end
end

# Comment
class Info < CompactIndexBuilder
def initialize(auth, name)
super(auth)
@name = name
end

def fetch_resource
storage.resource("info/#{@name}")
end

def build_result
@result = CompactIndex.info(requirements_and_dependencies)
end

private

def requirements_and_dependencies
group_by_columns = "number, platform, sha256, info_checksum, required_ruby_version, required_rubygems_version, versions.created_at"

dep_req_agg = "string_agg(dependencies.requirements, '@' ORDER BY dependencies.rubygem_name, dependencies.id) as dep_req_agg"

dep_name_agg = "string_agg(dependencies.rubygem_name, ',' ORDER BY dependencies.rubygem_name) AS dep_name_agg"

DB::Rubygem.db[<<~SQL.squish, @name].
SELECT #{group_by_columns}, #{dep_req_agg}, #{dep_name_agg}
FROM rubygems
LEFT JOIN versions ON versions.rubygem_id = rubygems.id
LEFT JOIN dependencies ON dependencies.version_id = versions.id
WHERE rubygems.name = ? AND versions.indexed = true
GROUP BY #{group_by_columns}
ORDER BY versions.created_at, number, platform, dep_name_agg
SQL
map do |row|
reqs = row[:dep_req_agg]&.split("@")
dep_names = row[:dep_name_agg]&.split(",")

raise "Dependencies and requirements are not the same size:\n reqs: #{reqs.inspect}\n dep_names: #{dep_names.inspect}\n row: #{row.inspect}" if dep_names&.size != reqs&.size

deps = []
if reqs
dep_names.zip(reqs).each do |name, req|
deps << CompactIndex::Dependency.new(name, req)
end
end

CompactIndex::GemVersion.new(
row[:number],
row[:platform],
row[:sha256],
nil, # info_checksum
deps,
row[:required_ruby_version],
row[:required_rubygems_version]
)
end
end

def key
:info
end
end

# Comment
class Names < CompactIndexBuilder
def fetch_resource
storage.resource("names")
end

def build_result
names = DB::Rubygem.db[<<~SQL.squish].map {|row| row[:name] }
SELECT name
FROM rubygems
LEFT JOIN versions ON versions.rubygem_id = rubygems.id
WHERE versions.indexed = true
GROUP BY name
HAVING COUNT(versions.id) > 0
ORDER BY name
SQL
@result = CompactIndex.names(names).encode("UTF-8")
end

private

def key
:names
end
end
end
end
1 change: 1 addition & 0 deletions lib/gemstash/db.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ module DB
Sequel::Model.db = Gemstash::Env.current.db
Sequel::Model.raise_on_save_failure = true
Sequel::Model.plugin :timestamps, update_on_create: true
Sequel::Model.db.extension :schema_dumper
autoload :Authorization, "gemstash/db/authorization"
autoload :CachedRubygem, "gemstash/db/cached_rubygem"
autoload :Dependency, "gemstash/db/dependency"
Expand Down
Loading

0 comments on commit 8993ad0

Please sign in to comment.