Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/bosh_aws_cpi/lib/cloud/aws.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ module AwsCloud; end
require 'cloud/aws/instance_param_mapper'
require 'cloud/aws/security_group_mapper'
require 'cloud/aws/block_device_manager'
require 'cloud/aws/instance_type_info'
require 'cloud/aws/instance_type_mapper'
require 'cloud/aws/classic_lb'
require 'cloud/aws/lb_target_group'
Expand Down
88 changes: 38 additions & 50 deletions src/bosh_aws_cpi/lib/cloud/aws/block_device_manager.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,10 @@ class BlockDeviceManager
DEFAULT_INSTANCE_STORAGE_DISK_MAPPING = { device_name: '/dev/sdb', virtual_name: 'ephemeral0' }.freeze
NVME_EBS_BY_ID_DEVICE_PATH_PREFIX = '/dev/disk/by-id/nvme-Amazon_Elastic_Block_Store_'

# Newer, nitro-based instances use NVMe storage volumes.
# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-types.html#ec2-nitro-instances
NVME_INSTANCE_FAMILIES = %w[a1 c5 c5a c5ad c5d c5n c6a c6g c6gd c6gn c6i c6id c6in c7i c7a d3 d3en g4dn g4ad g5 g6 g6e i3en i4i inf1 m5 m5a m5ad m5d m5dn m5n m5zn m6a m6g m6gd m6i m6id m6idn m6in m7i m7a m7i-flex p3dn p4d p5 r5 r5a r5ad r5b r5d r5dn r5n r6a r6g r6gd r6i r6in r6id r6idn r7i r7a r7iz t3 t3a t4g z1d x2iezn].freeze

def initialize(logger, stemcell, vm_cloud_props)
def initialize(logger, stemcell, vm_cloud_props, instance_type_info)
@logger = logger
@vm_cloud_props = vm_cloud_props
@instance_type_info = instance_type_info
@virtualization_type = stemcell.ami.virtualization_type
@root_device_name = stemcell.ami.root_device_name
@ami_block_device_names = stemcell.ami.block_device_mappings.map { |blk| blk.device_name }
Expand All @@ -21,8 +18,8 @@ def mappings_and_info
return mappings(info), agent_info(info)
end

def self.device_path(device_name, instance_type, volume_id)
if BlockDeviceManager.requires_nvme_device(instance_type)
def self.device_path(device_name, instance_type, volume_id, instance_type_info)
if instance_type_info.ebs_requires_nvme_path?(instance_type)
NVME_EBS_BY_ID_DEVICE_PATH_PREFIX + volume_id.sub('-', '')
else
device_name
Expand All @@ -49,18 +46,16 @@ def self.block_device_ready?(device_path)
cloud_error('Cannot find EBS volume on current instance')
end

def self.requires_nvme_device(instance_type)
instance_type = instance_type.nil? ? 'unspecified' : instance_type
instance_family = instance_type.split(".")[0]
NVME_INSTANCE_FAMILIES.include?(instance_family)
end

private

def mappings(info)
instance_type = @vm_cloud_props.instance_type.nil? ? 'unspecified' : @vm_cloud_props.instance_type
if instance_type =~ /^i3\./ || instance_type =~ /^i3en\./
info = info.reject { |device| device[:bosh_type] == 'raw_ephemeral' }
# For NVMe instances with instance storage, AWS auto-attaches the disks,
# so we don't include them in the block device mappings
if info.any? { |device| device[:bosh_type] == 'raw_ephemeral' }
instance_type = @vm_cloud_props.instance_type.nil? ? 'unspecified' : @vm_cloud_props.instance_type
if @instance_type_info.instance_storage_nvme_naming?(instance_type)
info = info.reject { |device| device[:bosh_type] == 'raw_ephemeral' }
end
end
Comment thread
coderabbitai[bot] marked this conversation as resolved.

info.map { |entry| entry.reject { |k| k == :bosh_type } }
Expand Down Expand Up @@ -150,34 +145,37 @@ def ephemeral_disk_mapping(instance_type, disk_info)
result
end

def first_raw_ephemeral_device
instance_type = @vm_cloud_props.instance_type.nil? ? 'unspecified' : @vm_cloud_props.instance_type
case @virtualization_type

when 'hvm'
if instance_type =~ /^i3\./ || instance_type =~ /^i3en\./
'/dev/nvme0n1'
else
'/dev/xvdba'
end
when 'paravirtual'
'/dev/sdc'
else
raise Bosh::Clouds::CloudError, "unknown virtualization type #{@virtualization_type}"
end
end

def raw_instance_mappings(num_of_devices)
next_device = first_raw_ephemeral_device

# Device hints for raw ephemeral disks:
# - NVMe instances: Agent discovers instance storage at runtime (enumeration order varies)
# by excluding EBS volumes identified via /dev/disk/by-id/nvme-Amazon_Elastic_Block_Store_*
# These device_name hints are informational only - agent ignores them and uses discovery.
# - Paravirtual/HVM instances: Agent trusts these device paths (enumeration is deterministic)
#
# In all cases, the count (num_of_devices) must be correct to trigger setup.

instance_type = @vm_cloud_props.instance_type || 'unspecified'
requires_nvme = @instance_type_info.instance_storage_nvme_naming?(instance_type)

num_of_devices.times.map do |index|
result = {
{
virtual_name: "ephemeral#{index}",
device_name: next_device,
bosh_type: 'raw_ephemeral',
device_name: raw_ephemeral_device_name(index, requires_nvme),
bosh_type: 'raw_ephemeral'
}
next_device = next_raw_ephemeral_disk(next_device)
result
end
end

def raw_ephemeral_device_name(index, requires_nvme)
if requires_nvme
# Simple sequential hints - agent will discover actual devices via EBS symlink exclusion
"/dev/nvme#{index}n1"
elsif @virtualization_type == 'paravirtual'
"/dev/sd#{('c'.ord + index).chr}"
elsif @virtualization_type == 'hvm'
"/dev/xvdb#{('a'.ord + index).chr}"
else
raise Bosh::Clouds::CloudError, "unknown virtualization type #{@virtualization_type}"
end
end
Comment thread
coderabbitai[bot] marked this conversation as resolved.

Expand Down Expand Up @@ -225,16 +223,6 @@ def root_device_name
end
end

def next_raw_ephemeral_disk(current_disk)
if current_disk =~ /^\/dev\/nvme/
disk_id = /^\/dev\/nvme(\d+)n.*/.match(current_disk)[1]
disk_id = disk_id.next
"/dev/nvme#{disk_id}n1"
else
current_disk.next
end
end

class DiskInfo
INSTANCE_TYPE_DISK_MAPPING = {
# previous generation
Expand Down
3 changes: 3 additions & 0 deletions src/bosh_aws_cpi/lib/cloud/aws/cloud_core.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ class CloudCore

attr_reader :ec2_resource
attr_reader :logger
attr_reader :instance_type_info

##
# Initialize BOSH AWS CPI. The contents of sub-hashes are defined in the {file:README.md}
Expand All @@ -33,6 +34,7 @@ def initialize(config, logger, volume_manager, az_selector, stemcell_api_version

@instance_manager = InstanceManager.new(@ec2_resource, @logger)
@instance_type_mapper = InstanceTypeMapper.new
@instance_type_info = InstanceTypeInfo.new(@ec2_client, @logger)
Comment thread
neddp marked this conversation as resolved.

@props_factory = Bosh::AwsCloud::PropsFactory.new(@config)
end
Expand Down Expand Up @@ -75,6 +77,7 @@ def create_vm(_agent_id, stemcell_id, vm_type, network_props, settings, disk_loc
@logger,
stemcell,
vm_props,
@instance_type_info,
).mappings_and_info

settings.agent_disk_info = agent_disk_info
Expand Down
5 changes: 3 additions & 2 deletions src/bosh_aws_cpi/lib/cloud/aws/cloud_v1.rb
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ def attach_disk(instance_id, disk_id)
update_agent_settings(instance_id) do |settings|
settings['disks'] ||= {}
settings['disks']['persistent'] ||= {}
settings['disks']['persistent'][disk_id] = BlockDeviceManager.device_path(device_name, instance.instance_type, disk_id)
settings['disks']['persistent'][disk_id] = BlockDeviceManager.device_path(device_name, instance.instance_type, disk_id, @cloud_core.instance_type_info)
end
end
end
Expand Down Expand Up @@ -480,7 +480,8 @@ def create_ami_for_stemcell(image_path, stemcell_cloud_props)
expected_path = BlockDeviceManager.device_path(
requested_path,
instance.instance_type,
volume.id
volume.id,
@cloud_core.instance_type_info,
)

logger.debug("Expected block device: #{expected_path}")
Expand Down
2 changes: 1 addition & 1 deletion src/bosh_aws_cpi/lib/cloud/aws/cloud_v2.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def attach_disk(vm_id, disk_id)
with_thread_name("attach_disk(#{vm_id}, #{disk_id}):v2") do
device_path = nil
@cloud_core.attach_disk(vm_id, disk_id) do |instance, device_name|
device_path = BlockDeviceManager.device_path(device_name, instance.instance_type, disk_id)
device_path = BlockDeviceManager.device_path(device_name, instance.instance_type, disk_id, @cloud_core.instance_type_info)
if @stemcell_api_version < 2
update_agent_settings(vm_id) do |settings|
settings['disks'] ||= {}
Expand Down
74 changes: 74 additions & 0 deletions src/bosh_aws_cpi/lib/cloud/aws/instance_type_info.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
module Bosh::AwsCloud
# Provides instance type metadata by querying the EC2 DescribeInstanceTypes API.
# Results are cached for the lifetime of the CPI process so each instance type
# is queried at most once.
class InstanceTypeInfo
def initialize(ec2_client, logger)
@ec2_client = ec2_client
@logger = logger
@cache = {}
end

# Returns true if EBS volumes on this instance type are exposed exclusively via
# NVMe and must be located using the /dev/disk/by-id/nvme-Amazon_Elastic_Block_Store_*
# symlink. This is true only for Nitro instances (nvme_support = 'required').
# Xen instances with NVMe instance storage (e.g. i3, nvme_support = 'supported')
# still use traditional /dev/xvd* paths for EBS volumes.
def ebs_requires_nvme_path?(instance_type)
info = fetch(instance_type)
return false if info.nil?

info.ebs_info&.nvme_support == 'required'
end

# Returns true if instance storage (local NVMe SSDs) on this instance type uses
# /dev/nvme*n1 device naming. Covers both Nitro instances (nvme_support = 'required')
# and any instances where NVMe is supported for instance storage.
def instance_storage_nvme_naming?(instance_type)
info = fetch(instance_type)
return false if info.nil?

%w[required supported].include?(info.instance_storage_info&.nvme_support)
end

private

# Fetches and caches the DescribeInstanceTypes response for the given instance type.
# Returns the instance type info struct, or nil if the type is unknown/invalid.
def fetch(instance_type)
instance_type = instance_type.nil? ? 'unspecified' : instance_type

return @cache[instance_type] if @cache.key?(instance_type)

result = query(instance_type)
@cache[instance_type] = result
result
end

def query(instance_type)
@logger.debug("DescribeInstanceTypes for '#{instance_type}'")

response = nil
errors = [Aws::EC2::Errors::RequestLimitExceeded, Aws::EC2::Errors::InternalError, Aws::EC2::Errors::ServiceUnavailable]
Bosh::Common.retryable(tries: 5, sleep: 1, on: errors) do |_tries, error|
@logger.warn("DescribeInstanceTypes retrying for '#{instance_type}': #{error.message}") if error
response = @ec2_client.describe_instance_types(
instance_types: [instance_type],
)
true
end
Comment thread
neddp marked this conversation as resolved.

if response.instance_types.empty?
@logger.warn("DescribeInstanceTypes returned no data for '#{instance_type}'")
return nil
end

response.instance_types.first
rescue Aws::EC2::Errors::InvalidInstanceType, Aws::EC2::Errors::InvalidParameterValue => e
@logger.warn("DescribeInstanceTypes failed for '#{instance_type}': #{e.message}")
nil
rescue Aws::Errors::ServiceError => e
raise Bosh::Clouds::CloudError, "DescribeInstanceTypes API error for '#{instance_type}': #{e.message}"
end
end
end
1 change: 1 addition & 0 deletions src/bosh_aws_cpi/spec/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def mock_cloud_v3(options = nil)
def mock_ec2
client = instance_double(Aws::EC2::Client)
allow(Aws::EC2::Client).to receive(:new).and_return(client)
allow(client).to receive(:describe_instance_types).and_return(double(instance_types: []))
ec2 = double(Aws::EC2::Resource, client: client)

yield ec2 if block_given?
Expand Down
12 changes: 12 additions & 0 deletions src/bosh_aws_cpi/spec/unit/attach_disk_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,24 @@
context 'when attaching to c5 instance' do
let(:instance_type) { 'c5.large' }

before do
allow(cloud.ec2_resource.client).to receive(:describe_instance_types)
.with(instance_types: ['c5.large'])
.and_return(double(instance_types: [double(ebs_info: double(nvme_support: 'required'))]))
end

it_behaves_like 'NVMe required instance types'
end

context 'when attaching to m5 instance' do
let(:instance_type) { 'm5.xlarge' }

before do
allow(cloud.ec2_resource.client).to receive(:describe_instance_types)
.with(instance_types: ['m5.xlarge'])
.and_return(double(instance_types: [double(ebs_info: double(nvme_support: 'required'))]))
end

it_behaves_like 'NVMe required instance types'
end

Expand Down
Loading
Loading