Skip to content

Commit f1abb08

Browse files
authored
Store gem name, version, platform, and surrogate-key in S3 metadata (#5568)
This way, we can reliably determine what gem was downloaded from only the request path + response headers. I added the `surrogate-key` so yanks/restores will properly purge the cache. This also adds a new maintenance task to backfill the metadata for all existing gems in S3, so going forward rubytogether/kirby can assume the headers are present. As a follow-up, the Fastly logging will need to be updated to include these three new headers. Signed-off-by: Samuel Giddins <segiddins@segiddins.me>
1 parent b827011 commit f1abb08

5 files changed

Lines changed: 227 additions & 2 deletions

File tree

app/models/pusher.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,11 @@ def write_gem(body, spec_contents)
308308
spec_path = "quick/Marshal.4.8/#{@version.full_name}.gemspec.rz"
309309

310310
# do all processing _before_ we upload anything to S3, so we lower the chances of orphaned files
311-
RubygemFs.instance.store(gem_path, gem_contents, checksum_sha256: version.sha256)
311+
RubygemFs.instance.store(gem_path, gem_contents, checksum_sha256: version.sha256,
312+
metadata: {
313+
"gem" => version.rubygem.name, "version" => version.number, "platform" => version.platform,
314+
"surrogate-key" => "gem/#{version.rubygem.name}", "sha256" => version.sha256
315+
})
312316
RubygemFs.instance.store(spec_path, spec_contents, checksum_sha256: version.spec_sha256)
313317

314318
Fastly.purge(path: gem_path)
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# frozen_string_literal: true
2+
3+
class Maintenance::BackfillGemS3MetadataTask < MaintenanceTasks::Task
4+
include SemanticLogger::Loggable
5+
6+
def collection
7+
Version.indexed.includes(:rubygem)
8+
end
9+
10+
def process(version)
11+
sha256 = version.sha256
12+
13+
gem_path = "gems/#{version.gem_file_name}"
14+
gem_contents, response = RubygemFs.instance.get_object(gem_path)
15+
16+
if gem_contents.nil?
17+
logger.error("Version #{version.full_name} has no gem contents")
18+
return
19+
end
20+
21+
actual_sha256 = Digest::SHA256.base64digest(gem_contents)
22+
# Validate the stored content matches the expected checksum
23+
if actual_sha256 != sha256
24+
logger.error("Version #{version.full_name} has sha256 mismatch", expected: sha256, actual: actual_sha256)
25+
return
26+
end
27+
28+
existing_metadata = response[:metadata]
29+
new_metadata = {
30+
"gem" => version.rubygem.name, "version" => version.number, "platform" => version.platform,
31+
"surrogate-key" => "gem/#{version.rubygem.name}", "sha256" => sha256
32+
}
33+
34+
if existing_metadata == new_metadata
35+
# No changes needed
36+
elsif existing_metadata <= new_metadata
37+
logger.info("Updating metadata for #{version.full_name}", existing_metadata: existing_metadata, new_metadata: new_metadata)
38+
RubygemFs.instance.store(gem_path, gem_contents, checksum_sha256: sha256, metadata: new_metadata)
39+
else
40+
logger.error("Version #{version.full_name} has unexpected metadata", existing_metadata:, new_metadata:)
41+
end
42+
end
43+
end

lib/rubygem_fs.rb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,12 @@ def get(key)
8282
nil
8383
end
8484

85+
def get_object(key)
86+
body = get(key)
87+
return unless body
88+
[body, head(key)]
89+
end
90+
8591
def each_key(prefix: nil, &)
8692
return enum_for(__method__, prefix:) unless block_given?
8793
base = dir_for(prefix)
@@ -186,6 +192,13 @@ def get(key)
186192
nil
187193
end
188194

195+
def get_object(key)
196+
response = s3.get_object(key: key, bucket: bucket)
197+
[response.body.read, response.to_h]
198+
rescue Aws::S3::Errors::NoSuchKey
199+
nil
200+
end
201+
189202
def each_key(prefix: nil, &)
190203
return enum_for(__method__, prefix: prefix) unless block_given?
191204
s3.list_objects_v2(bucket: bucket, prefix: prefix).each do |response|

test/integration/push_test.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,11 @@ class PushTest < ActionDispatch::IntegrationTest
106106

107107
refute_nil RubygemFs.instance.get("gems/sandworm-2.0.0.gem")
108108
refute_nil RubygemFs.instance.get("quick/Marshal.4.8/sandworm-2.0.0.gemspec.rz")
109-
assert_equal({ checksum_sha256: rubygem.versions.find_by!(full_name: "sandworm-2.0.0").sha256, key: "gems/sandworm-2.0.0.gem" },
109+
checksum_sha256 = rubygem.versions.find_by!(full_name: "sandworm-2.0.0").sha256
110+
111+
assert_equal({ checksum_sha256:, key: "gems/sandworm-2.0.0.gem",
112+
metadata: { "gem" => "sandworm", "version" => "2.0.0", "platform" => "ruby",
113+
"surrogate-key" => "gem/sandworm", "sha256" => checksum_sha256 } },
110114
RubygemFs.instance.head("gems/sandworm-2.0.0.gem"))
111115

112116
spec = Gem::Package.new("sandworm-2.0.0.gem").spec
Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
# frozen_string_literal: true
2+
3+
require "test_helper"
4+
5+
# test "#process performs a task iteration" do
6+
# Maintenance::BackfillGemS3MetadataTask.process(element)
7+
# end
8+
class Maintenance::BackfillGemS3MetadataTaskTest < ActiveSupport::TestCase
9+
include SemanticLogger::Test::Minitest
10+
11+
make_my_diffs_pretty!
12+
13+
test "#collection returns a collection of indexed versions" do
14+
create(:version, indexed: false)
15+
v = create(:version, indexed: true)
16+
17+
assert_equal [v], Maintenance::BackfillGemS3MetadataTask.collection.to_a
18+
end
19+
20+
test "#process makes no changes for newly pushed gems" do
21+
@gem = gem_file("bin_and_img-0.1.0.gem")
22+
@user = create(:user)
23+
pusher = Pusher.new(create(:api_key, owner: @user), @gem)
24+
25+
assert pusher.process, "gem should be pushed successfully: #{pusher.code} #{pusher.message}"
26+
27+
assert_no_changes -> { [pusher.version.reload.updated_at, RubygemFs.instance.get_object("gems/bin_and_img-0.1.0.gem")] } do
28+
Maintenance::BackfillGemS3MetadataTask.process(pusher.version)
29+
end
30+
end
31+
32+
test "#process logs missing gem contents" do
33+
logger = SemanticLogger::Test::CaptureLogEvents.new
34+
Maintenance::BackfillGemS3MetadataTask.stubs(:logger).returns(logger)
35+
36+
@gem = gem_file("bin_and_img-0.1.0.gem")
37+
@user = create(:user)
38+
pusher = Pusher.new(create(:api_key, owner: @user), @gem)
39+
40+
assert pusher.process, "gem should be pushed successfully: #{pusher.code} #{pusher.message}"
41+
42+
# remove metadata from the fs
43+
RubygemFs.instance.remove("gems/bin_and_img-0.1.0.gem")
44+
45+
assert_no_changes -> { [pusher.version.reload.updated_at, RubygemFs.instance.get_object("gems/bin_and_img-0.1.0.gem")] } do
46+
Maintenance::BackfillGemS3MetadataTask.process(pusher.version)
47+
end
48+
49+
assert_semantic_logger_event(
50+
logger.events[0],
51+
level: :error,
52+
message_includes: "Version bin_and_img-0.1.0 has no gem contents"
53+
)
54+
assert_equal 1, logger.events.size
55+
end
56+
57+
test "#process logs sha256 mismatch" do
58+
logger = SemanticLogger::Test::CaptureLogEvents.new
59+
Maintenance::BackfillGemS3MetadataTask.stubs(:logger).returns(logger)
60+
61+
@gem = gem_file("bin_and_img-0.1.0.gem")
62+
@user = create(:user)
63+
pusher = Pusher.new(create(:api_key, owner: @user), @gem)
64+
65+
assert pusher.process, "gem should be pushed successfully: #{pusher.code} #{pusher.message}"
66+
67+
# remove metadata from the fs
68+
RubygemFs.instance.remove("gems/bin_and_img-0.1.0.gem")
69+
RubygemFs.instance.store("gems/bin_and_img-0.1.0.gem", "contents")
70+
71+
assert_no_changes -> { [pusher.version.reload.updated_at, RubygemFs.instance.get_object("gems/bin_and_img-0.1.0.gem")] } do
72+
Maintenance::BackfillGemS3MetadataTask.process(pusher.version)
73+
end
74+
75+
assert_semantic_logger_event(
76+
logger.events[0],
77+
level: :error,
78+
message_includes: "Version bin_and_img-0.1.0 has sha256 mismatch"
79+
)
80+
assert_equal 1, logger.events.size
81+
end
82+
83+
test "#process logs unexpected metadata" do
84+
logger = SemanticLogger::Test::CaptureLogEvents.new
85+
Maintenance::BackfillGemS3MetadataTask.stubs(:logger).returns(logger)
86+
87+
@gem = gem_file("bin_and_img-0.1.0.gem")
88+
@user = create(:user)
89+
pusher = Pusher.new(create(:api_key, owner: @user), @gem)
90+
91+
assert pusher.process, "gem should be pushed successfully: #{pusher.code} #{pusher.message}"
92+
93+
# remove metadata from the fs
94+
body, response = RubygemFs.instance.get_object("gems/bin_and_img-0.1.0.gem")
95+
RubygemFs.instance.store("gems/bin_and_img-0.1.0.gem", body, metadata: response[:metadata].merge("unexpected" => "value"))
96+
97+
assert_no_changes -> { [pusher.version.reload.updated_at, RubygemFs.instance.get_object("gems/bin_and_img-0.1.0.gem")] } do
98+
Maintenance::BackfillGemS3MetadataTask.process(pusher.version)
99+
end
100+
101+
assert_semantic_logger_event(
102+
logger.events[0],
103+
level: :error,
104+
message_includes: "Version bin_and_img-0.1.0 has unexpected metadata"
105+
)
106+
assert_equal 1, logger.events.size
107+
end
108+
109+
test "#process logs conflicting metadata" do
110+
logger = SemanticLogger::Test::CaptureLogEvents.new
111+
Maintenance::BackfillGemS3MetadataTask.stubs(:logger).returns(logger)
112+
113+
@gem = gem_file("bin_and_img-0.1.0.gem")
114+
@user = create(:user)
115+
pusher = Pusher.new(create(:api_key, owner: @user), @gem)
116+
117+
assert pusher.process, "gem should be pushed successfully: #{pusher.code} #{pusher.message}"
118+
119+
# remove metadata from the fs
120+
body, response = RubygemFs.instance.get_object("gems/bin_and_img-0.1.0.gem")
121+
RubygemFs.instance.store("gems/bin_and_img-0.1.0.gem", body, metadata: response[:metadata].merge("gem" => "not_bin_and_img"))
122+
123+
assert_no_changes -> { [pusher.version.reload.updated_at, RubygemFs.instance.get_object("gems/bin_and_img-0.1.0.gem")] } do
124+
Maintenance::BackfillGemS3MetadataTask.process(pusher.version)
125+
end
126+
127+
assert_semantic_logger_event(
128+
logger.events[0],
129+
level: :error,
130+
message_includes: "Version bin_and_img-0.1.0 has unexpected metadata"
131+
)
132+
assert_equal 1, logger.events.size
133+
end
134+
135+
test "#process updates metadata" do
136+
@gem = gem_file("bin_and_img-0.1.0.gem")
137+
@user = create(:user)
138+
pusher = Pusher.new(create(:api_key, owner: @user), @gem)
139+
140+
assert pusher.process, "gem should be pushed successfully: #{pusher.code} #{pusher.message}"
141+
142+
# remove metadata from the fs
143+
body, = RubygemFs.instance.get_object("gems/bin_and_img-0.1.0.gem")
144+
RubygemFs.instance.store("gems/bin_and_img-0.1.0.gem", body, metadata: {})
145+
146+
assert_no_changes -> { [pusher.version.reload.updated_at] } do
147+
Maintenance::BackfillGemS3MetadataTask.process(pusher.version)
148+
end
149+
150+
assert_equal [
151+
@gem.tap(&:rewind).read,
152+
checksum_sha256: pusher.version.sha256,
153+
metadata: { "gem" => "bin_and_img",
154+
"version" => "0.1.0",
155+
"platform" => "ruby",
156+
"surrogate-key" => "gem/bin_and_img",
157+
"sha256" => pusher.version.sha256 },
158+
key: "gems/bin_and_img-0.1.0.gem"
159+
], RubygemFs.instance.get_object("gems/bin_and_img-0.1.0.gem")
160+
end
161+
end

0 commit comments

Comments
 (0)