From b21568f3190937ae3652d1ece10381c07978be04 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Fri, 8 Oct 2021 13:08:08 +0200 Subject: [PATCH] libct/capabilities: create capabilities map based on current environment Commit 5fb831a0fa68578e8020dc9c59b90336cfd95198 changed the behavior of runc to match the OCI runtime spec, which now describes that unknown or unavailable capabilities should be ignored. While this change addressed situations where a capability was requested that's not supported by the current kernel ("unknown capabilities"), it did not take into account situations where the kernel *supports* a capability, but that capability is not *available* in the current environment. This causes issues if, for example, runc is running in a docker-in-docker setup, and the outer container does not have all known capabilities enabled, either on purpose (for example, Talos version 0.13 drops two capabilities (kexec + module loading) from all processes but PID 1), or because the outer container was created by an older version of docker or runc, which did not yet support newer capabilities. This patch attempts to address this problem by limiting the list of "known" capa- bilities on the set of effective capabilties for the current process. This code is based on the code in containerd's "caps" package, with some modifications: - the full list of capabilities uses github.com/syndtr/gocapability, instead of a self-defined list. Containerd removed the use of github.com/syndtr/gocapability, but this dependency is still in use in runc, so this change makes it a closer match to the current code. - functions where un-exported, as we don't intend them to be used externally. - a sync.Once was added to the .current() function, so that /proc/self/status is only parsed once. This assumes effective capabilities do not change during runc's lifecycle. There are some things left to be looked at: 1. current() may return an error when failing to parse /proc/self/status, but this error is currently ignored. If an error occurs in this code, it will mean that *no* capabilities are known. While this will be logged as warning when attempting to apply capabilities, it's not a very desirable situation. We'll have to decide what to do in that situation, which could be "panic" (runc unable to run success- fully), or "fall back to a safe/default list". 2. the current code applies the same list (effective caps) to every "type" (ambient, inheritable, bounding, ...). When applying capabilities, should each of those types in the container's spec be limited to the _corresponding_ type in the current processes' capabilities? 3. integration test: we may want an integration test for this. 4. do we want to upstream this functionality to github.com/syndtr/gocapability ? Signed-off-by: Sebastiaan van Stijn --- libcontainer/capabilities/capabilities.go | 29 ++--- .../capabilities/capabilities_linux.go | 105 ++++++++++++++++++ 2 files changed, 113 insertions(+), 21 deletions(-) create mode 100644 libcontainer/capabilities/capabilities_linux.go diff --git a/libcontainer/capabilities/capabilities.go b/libcontainer/capabilities/capabilities.go index 7e938d3f505..fa1949ac48b 100644 --- a/libcontainer/capabilities/capabilities.go +++ b/libcontainer/capabilities/capabilities.go @@ -5,7 +5,6 @@ package capabilities import ( "sort" - "strings" "github.com/opencontainers/runc/libcontainer/configs" "github.com/sirupsen/logrus" @@ -14,25 +13,12 @@ import ( const allCapabilityTypes = capability.CAPS | capability.BOUNDING | capability.AMBIENT -var ( - capabilityMap map[string]capability.Cap - capTypes = []capability.CapType{ - capability.BOUNDING, - capability.PERMITTED, - capability.INHERITABLE, - capability.EFFECTIVE, - capability.AMBIENT, - } -) - -func init() { - capabilityMap = make(map[string]capability.Cap, capability.CAP_LAST_CAP+1) - for _, c := range capability.List() { - if c > capability.CAP_LAST_CAP { - continue - } - capabilityMap["CAP_"+strings.ToUpper(c.String())] = c - } +var capTypes = []capability.CapType{ + capability.BOUNDING, + capability.PERMITTED, + capability.INHERITABLE, + capability.EFFECTIVE, + capability.AMBIENT, } // New creates a new Caps from the given Capabilities config. Unknown Capabilities @@ -69,6 +55,7 @@ func New(capConfig *configs.Capabilities) (*Caps, error) { // are not returned, but appended to unknownCaps. func capSlice(caps []string, unknownCaps map[string]struct{}) []capability.Cap { var out []capability.Cap + capabilityMap, _ := current() for _, c := range caps { if v, ok := capabilityMap[c]; !ok { unknownCaps[c] = struct{}{} @@ -102,7 +89,7 @@ func (c *Caps) ApplyBoundingSet() error { return c.pid.Apply(capability.BOUNDING) } -// Apply sets all the capabilities for the current process in the config. +// ApplyCaps sets all the capabilities for the current process in the config. func (c *Caps) ApplyCaps() error { c.pid.Clear(allCapabilityTypes) for _, g := range capTypes { diff --git a/libcontainer/capabilities/capabilities_linux.go b/libcontainer/capabilities/capabilities_linux.go new file mode 100644 index 00000000000..3e7fd88cb80 --- /dev/null +++ b/libcontainer/capabilities/capabilities_linux.go @@ -0,0 +1,105 @@ +/* + Copyright The containerd Authors. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package capabilities + +import ( + "bufio" + "fmt" + "io" + "os" + "strconv" + "strings" + "sync" + + "github.com/syndtr/gocapability/capability" +) + +// fromBitmap parses an uint64 bitmap into a capability map. Unknown cap numbers +// are ignored. +func fromBitmap(v uint64) map[string]capability.Cap { + res := make(map[string]capability.Cap, 63) + for i := 0; i <= 63; i++ { + if b := (v >> i) & 0x1; b == 0x1 { + c := capability.Cap(i) + if s := c.String(); s != "unknown" { + res["CAP_"+strings.ToUpper(s)] = c + } + } + } + return res +} + +// parseProcPIDStatus returns uint64 bitmap value from /proc//status file +func parseProcPIDStatus(r io.Reader) (map[capability.CapType]uint64, error) { + res := make(map[capability.CapType]uint64) + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := scanner.Text() + pair := strings.SplitN(line, ":", 2) + if len(pair) != 2 { + continue + } + k := strings.TrimSpace(pair[0]) + v := strings.TrimSpace(pair[1]) + switch k { + case "CapInh", "CapPrm", "CapEff", "CapBnd", "CapAmb": + ui64, err := strconv.ParseUint(v, 16, 64) + if err != nil { + return nil, fmt.Errorf("failed to parse line %q", line) + } + switch k { + case "CapInh": + res[capability.INHERITABLE] = ui64 + case "CapPrm": + res[capability.PERMITTED] = ui64 + case "CapEff": + res[capability.EFFECTIVE] = ui64 + case "CapBnd": + res[capability.BOUNDING] = ui64 + case "CapAmb": + res[capability.AMBIENT] = ui64 + } + } + } + if err := scanner.Err(); err != nil { + return nil, err + } + return res, nil +} + +var ( + curCaps map[string]capability.Cap + curCapsErr error + curCapsOnce sync.Once +) + +// current returns a map of the effective known caps of the current process. +func current() (map[string]capability.Cap, error) { + curCapsOnce.Do(func() { + f, curCapsErr := os.Open("/proc/self/status") + if curCapsErr != nil { + return + } + defer f.Close() + caps, curCapsErr := parseProcPIDStatus(f) + if curCapsErr != nil { + return + } + curCaps = fromBitmap(caps[capability.EFFECTIVE]) + }) + return curCaps, curCapsErr +}