mirror of
https://github.com/weaveworks/scope.git
synced 2026-03-02 17:50:39 +00:00
Update opencontainers/runc to v1.0.0-rc5
``` $ gvt delete github.com/opencontainers/runc/libcontainer/cgroups $ gvt delete github.com/opencontainers/runc/libcontainer/configs $ gvt delete github.com/opencontainers/runc/libcontainer/system $ gvt delete github.com/opencontainers/runc/libcontainer/user $ gvt delete github.com/opencontainers/runc/libcontainer/utils $ gvt fetch --tag v1.0.0-rc5 github.com/opencontainers/runc/libcontainer 2018/07/23 17:08:18 Fetching: github.com/opencontainers/runc/libcontainer 2018/07/23 17:08:24 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/vishvananda/netlink 2018/07/23 17:08:24 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/golang.org/x/sys/unix 2018/07/23 17:08:24 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/cyphar/filepath-securejoin 2018/07/23 17:08:24 ·· Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/pkg/errors 2018/07/23 17:08:24 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/opencontainers/selinux/go-selinux/label 2018/07/23 17:08:25 ·· Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/opencontainers/selinux/go-selinux 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/containerd/console 2018/07/23 17:08:25 ·· Fetching recursive dependency: github.com/opencontainers/runc/vendor/golang.org/x/sys/windows 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/sirupsen/logrus 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/godbus/dbus 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/mrunalp/fileutils 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/coreos/go-systemd/util 2018/07/23 17:08:25 ·· Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/coreos/pkg/dlopen 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/golang/protobuf/proto 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/syndtr/gocapability/capability 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/coreos/go-systemd/dbus 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/opencontainers/runtime-spec/specs-go 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/seccomp/libseccomp-golang 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/docker/go-units ```
This commit is contained in:
191
vendor/github.com/opencontainers/runc/libcontainer/LICENSE
generated
vendored
Normal file
191
vendor/github.com/opencontainers/runc/libcontainer/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,191 @@
|
||||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
Copyright 2014 Docker, Inc.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
54
vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go
generated
vendored
Normal file
54
vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go
generated
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
// +build apparmor,linux
|
||||
|
||||
package apparmor
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
)
|
||||
|
||||
// IsEnabled returns true if apparmor is enabled for the host.
|
||||
func IsEnabled() bool {
|
||||
if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" {
|
||||
if _, err = os.Stat("/sbin/apparmor_parser"); err == nil {
|
||||
buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
|
||||
return err == nil && len(buf) > 1 && buf[0] == 'Y'
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func setprocattr(attr, value string) error {
|
||||
// Under AppArmor you can only change your own attr, so use /proc/self/
|
||||
// instead of /proc/<tid>/ like libapparmor does
|
||||
path := fmt.Sprintf("/proc/self/attr/%s", attr)
|
||||
|
||||
f, err := os.OpenFile(path, os.O_WRONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = fmt.Fprintf(f, "%s", value)
|
||||
return err
|
||||
}
|
||||
|
||||
// changeOnExec reimplements aa_change_onexec from libapparmor in Go
|
||||
func changeOnExec(name string) error {
|
||||
value := "exec " + name
|
||||
if err := setprocattr("exec", value); err != nil {
|
||||
return fmt.Errorf("apparmor failed to apply profile: %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ApplyProfile will apply the profile with the specified name to the process after
|
||||
// the next exec.
|
||||
func ApplyProfile(name string) error {
|
||||
if name == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
return changeOnExec(name)
|
||||
}
|
||||
20
vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_disabled.go
generated
vendored
Normal file
20
vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_disabled.go
generated
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
// +build !apparmor !linux
|
||||
|
||||
package apparmor
|
||||
|
||||
import (
|
||||
"errors"
|
||||
)
|
||||
|
||||
var ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported")
|
||||
|
||||
func IsEnabled() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func ApplyProfile(name string) error {
|
||||
if name != "" {
|
||||
return ErrApparmorNotEnabled
|
||||
}
|
||||
return nil
|
||||
}
|
||||
113
vendor/github.com/opencontainers/runc/libcontainer/capabilities_linux.go
generated
vendored
Normal file
113
vendor/github.com/opencontainers/runc/libcontainer/capabilities_linux.go
generated
vendored
Normal file
@@ -0,0 +1,113 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/syndtr/gocapability/capability"
|
||||
)
|
||||
|
||||
const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS
|
||||
|
||||
var capabilityMap map[string]capability.Cap
|
||||
|
||||
func init() {
|
||||
capabilityMap = make(map[string]capability.Cap)
|
||||
last := capability.CAP_LAST_CAP
|
||||
// workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap
|
||||
if last == capability.Cap(63) {
|
||||
last = capability.CAP_BLOCK_SUSPEND
|
||||
}
|
||||
for _, cap := range capability.List() {
|
||||
if cap > last {
|
||||
continue
|
||||
}
|
||||
capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))
|
||||
capabilityMap[capKey] = cap
|
||||
}
|
||||
}
|
||||
|
||||
func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilities, error) {
|
||||
bounding := []capability.Cap{}
|
||||
for _, c := range capConfig.Bounding {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
bounding = append(bounding, v)
|
||||
}
|
||||
effective := []capability.Cap{}
|
||||
for _, c := range capConfig.Effective {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
effective = append(effective, v)
|
||||
}
|
||||
inheritable := []capability.Cap{}
|
||||
for _, c := range capConfig.Inheritable {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
inheritable = append(inheritable, v)
|
||||
}
|
||||
permitted := []capability.Cap{}
|
||||
for _, c := range capConfig.Permitted {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
permitted = append(permitted, v)
|
||||
}
|
||||
ambient := []capability.Cap{}
|
||||
for _, c := range capConfig.Ambient {
|
||||
v, ok := capabilityMap[c]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("unknown capability %q", c)
|
||||
}
|
||||
ambient = append(ambient, v)
|
||||
}
|
||||
pid, err := capability.NewPid(0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &containerCapabilities{
|
||||
bounding: bounding,
|
||||
effective: effective,
|
||||
inheritable: inheritable,
|
||||
permitted: permitted,
|
||||
ambient: ambient,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type containerCapabilities struct {
|
||||
pid capability.Capabilities
|
||||
bounding []capability.Cap
|
||||
effective []capability.Cap
|
||||
inheritable []capability.Cap
|
||||
permitted []capability.Cap
|
||||
ambient []capability.Cap
|
||||
}
|
||||
|
||||
// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
|
||||
func (c *containerCapabilities) ApplyBoundingSet() error {
|
||||
c.pid.Clear(capability.BOUNDS)
|
||||
c.pid.Set(capability.BOUNDS, c.bounding...)
|
||||
return c.pid.Apply(capability.BOUNDS)
|
||||
}
|
||||
|
||||
// Apply sets all the capabilities for the current process in the config.
|
||||
func (c *containerCapabilities) ApplyCaps() error {
|
||||
c.pid.Clear(allCapabilityTypes)
|
||||
c.pid.Set(capability.BOUNDS, c.bounding...)
|
||||
c.pid.Set(capability.PERMITTED, c.permitted...)
|
||||
c.pid.Set(capability.INHERITABLE, c.inheritable...)
|
||||
c.pid.Set(capability.EFFECTIVE, c.effective...)
|
||||
c.pid.Set(capability.AMBIENT, c.ambient...)
|
||||
return c.pid.Apply(allCapabilityTypes)
|
||||
}
|
||||
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
generated
vendored
@@ -9,7 +9,7 @@ import (
|
||||
)
|
||||
|
||||
type Manager interface {
|
||||
// Apply cgroup configuration to the process with the specified pid
|
||||
// Applies cgroup configuration to the process with the specified pid
|
||||
Apply(pid int) error
|
||||
|
||||
// Returns the PIDs inside the cgroup set
|
||||
@@ -27,9 +27,9 @@ type Manager interface {
|
||||
// Destroys the cgroup set
|
||||
Destroy() error
|
||||
|
||||
// NewCgroupManager() and LoadCgroupManager() require following attributes:
|
||||
// The option func SystemdCgroups() and Cgroupfs() require following attributes:
|
||||
// Paths map[string]string
|
||||
// Cgroups *cgroups.Cgroup
|
||||
// Cgroups *configs.Cgroup
|
||||
// Paths maps cgroup subsystem to path at which it is mounted.
|
||||
// Cgroups specifies specific cgroup settings for the various subsystems
|
||||
|
||||
@@ -37,7 +37,7 @@ type Manager interface {
|
||||
// restore the object later.
|
||||
GetPaths() map[string]string
|
||||
|
||||
// Set the cgroup as configured.
|
||||
// Sets the cgroup as configured.
|
||||
Set(container *configs.Config) error
|
||||
}
|
||||
|
||||
|
||||
18
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_test.go
generated
vendored
18
vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_test.go
generated
vendored
@@ -1,18 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseCgroups(t *testing.T) {
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if _, ok := cgroups["cpu"]; !ok {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
146
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
generated
vendored
146
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
generated
vendored
@@ -9,11 +9,11 @@ import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -30,8 +30,8 @@ var (
|
||||
&NetPrioGroup{},
|
||||
&PerfEventGroup{},
|
||||
&FreezerGroup{},
|
||||
&NameGroup{GroupName: "name=systemd", Join: true},
|
||||
}
|
||||
CgroupProcesses = "cgroup.procs"
|
||||
HugePageSizes, _ = cgroups.GetHugePageSize()
|
||||
)
|
||||
|
||||
@@ -104,6 +104,8 @@ func (m *Manager) Apply(pid int) (err error) {
|
||||
if m.Cgroups == nil {
|
||||
return nil
|
||||
}
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
var c = m.Cgroups
|
||||
|
||||
@@ -112,8 +114,8 @@ func (m *Manager) Apply(pid int) (err error) {
|
||||
return err
|
||||
}
|
||||
|
||||
m.Paths = make(map[string]string)
|
||||
if c.Paths != nil {
|
||||
paths := make(map[string]string)
|
||||
for name, path := range c.Paths {
|
||||
_, err := d.path(name)
|
||||
if err != nil {
|
||||
@@ -122,35 +124,39 @@ func (m *Manager) Apply(pid int) (err error) {
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[name] = path
|
||||
m.Paths[name] = path
|
||||
}
|
||||
m.Paths = paths
|
||||
return cgroups.EnterPid(m.Paths, pid)
|
||||
}
|
||||
|
||||
paths := make(map[string]string)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
cgroups.RemovePaths(paths)
|
||||
}
|
||||
}()
|
||||
for _, sys := range subsystems {
|
||||
if err := sys.Apply(d); err != nil {
|
||||
return err
|
||||
}
|
||||
// TODO: Apply should, ideally, be reentrant or be broken up into a separate
|
||||
// create and join phase so that the cgroup hierarchy for a container can be
|
||||
// created then join consists of writing the process pids to cgroup.procs
|
||||
p, err := d.path(sys.Name())
|
||||
if err != nil {
|
||||
if cgroups.IsNotFound(err) {
|
||||
// The non-presence of the devices subsystem is
|
||||
// considered fatal for security reasons.
|
||||
if cgroups.IsNotFound(err) && sys.Name() != "devices" {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
paths[sys.Name()] = p
|
||||
m.Paths[sys.Name()] = p
|
||||
|
||||
if err := sys.Apply(d); err != nil {
|
||||
if os.IsPermission(err) && m.Cgroups.Path == "" {
|
||||
// If we didn't set a cgroup path, then let's defer the error here
|
||||
// until we know whether we have set limits or not.
|
||||
// If we hadn't set limits, then it's ok that we couldn't join this cgroup, because
|
||||
// it will have the same limits as its parent.
|
||||
delete(m.Paths, sys.Name())
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
}
|
||||
m.Paths = paths
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -191,19 +197,20 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
for _, sys := range subsystems {
|
||||
// Generate fake cgroup data.
|
||||
d, err := getCgroupData(container.Cgroups, -1)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Get the path, but don't error out if the cgroup wasn't found.
|
||||
path, err := d.path(sys.Name())
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
// If Paths are set, then we are just joining cgroups paths
|
||||
// and there is no need to set any values.
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
paths := m.GetPaths()
|
||||
for _, sys := range subsystems {
|
||||
path := paths[sys.Name()]
|
||||
if err := sys.Set(path, container.Cgroups); err != nil {
|
||||
if path == "" {
|
||||
// cgroup never applied
|
||||
return fmt.Errorf("cannot set limits on the %s cgroup, as the container has not joined it", sys.Name())
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -219,14 +226,8 @@ func (m *Manager) Set(container *configs.Config) error {
|
||||
// Freeze toggles the container's freezer cgroup depending on the state
|
||||
// provided
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
d, err := getCgroupData(m.Cgroups, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dir, err := d.path("freezer")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
paths := m.GetPaths()
|
||||
dir := paths["freezer"]
|
||||
prevState := m.Cgroups.Resources.Freezer
|
||||
m.Cgroups.Resources.Freezer = state
|
||||
freezer, err := subsystems.Get("freezer")
|
||||
@@ -242,28 +243,13 @@ func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
}
|
||||
|
||||
func (m *Manager) GetPids() ([]int, error) {
|
||||
dir, err := getCgroupPath(m.Cgroups)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetPids(dir)
|
||||
paths := m.GetPaths()
|
||||
return cgroups.GetPids(paths["devices"])
|
||||
}
|
||||
|
||||
func (m *Manager) GetAllPids() ([]int, error) {
|
||||
dir, err := getCgroupPath(m.Cgroups)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return cgroups.GetAllPids(dir)
|
||||
}
|
||||
|
||||
func getCgroupPath(c *configs.Cgroup) (string, error) {
|
||||
d, err := getCgroupData(c, 0)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return d.path("devices")
|
||||
paths := m.GetPaths()
|
||||
return cgroups.GetAllPids(paths["devices"])
|
||||
}
|
||||
|
||||
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
|
||||
@@ -276,38 +262,26 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
|
||||
return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used")
|
||||
}
|
||||
|
||||
innerPath := c.Path
|
||||
// XXX: Do not remove this code. Path safety is important! -- cyphar
|
||||
cgPath := libcontainerUtils.CleanPath(c.Path)
|
||||
cgParent := libcontainerUtils.CleanPath(c.Parent)
|
||||
cgName := libcontainerUtils.CleanPath(c.Name)
|
||||
|
||||
innerPath := cgPath
|
||||
if innerPath == "" {
|
||||
innerPath = filepath.Join(c.Parent, c.Name)
|
||||
innerPath = filepath.Join(cgParent, cgName)
|
||||
}
|
||||
|
||||
return &cgroupData{
|
||||
root: root,
|
||||
innerPath: c.Path,
|
||||
innerPath: innerPath,
|
||||
config: c,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
|
||||
// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
|
||||
// process could in container and shared pid namespace with host, and
|
||||
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||
initPath, err := cgroups.GetThisCgroupDir(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see pathes from host, which don't exist in container.
|
||||
relDir, err := filepath.Rel(root, initPath)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return filepath.Join(mountpoint, relDir), nil
|
||||
}
|
||||
|
||||
func (raw *cgroupData) path(subsystem string) (string, error) {
|
||||
mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
|
||||
mnt, err := cgroups.FindCgroupMountpoint(subsystem)
|
||||
// If we didn't mount the subsystem, there is no point we make the path.
|
||||
if err != nil {
|
||||
return "", err
|
||||
@@ -315,11 +289,14 @@ func (raw *cgroupData) path(subsystem string) (string, error) {
|
||||
|
||||
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
|
||||
if filepath.IsAbs(raw.innerPath) {
|
||||
// Sometimes subsystems can be mounted togethger as 'cpu,cpuacct'.
|
||||
// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
|
||||
return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
|
||||
}
|
||||
|
||||
parentPath, err := raw.parentPath(subsystem, mnt, root)
|
||||
// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
|
||||
// process could in container and shared pid namespace with host, and
|
||||
// /proc/1/cgroup could point to whole other world of cgroups.
|
||||
parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@@ -335,7 +312,7 @@ func (raw *cgroupData) join(subsystem string) (string, error) {
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := writeFile(path, CgroupProcesses, strconv.Itoa(raw.pid)); err != nil {
|
||||
if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return path, nil
|
||||
@@ -345,9 +322,12 @@ func writeFile(dir, file, data string) error {
|
||||
// Normally dir should not be empty, one case is that cgroup subsystem
|
||||
// is not mounted, we will get empty dir, and we want it fail here.
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s.", file)
|
||||
return fmt.Errorf("no such directory for %s", file)
|
||||
}
|
||||
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
|
||||
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil {
|
||||
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func readFile(dir, file string) (string, error) {
|
||||
@@ -365,8 +345,8 @@ func removePath(p string, err error) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func CheckCpushares(path string, c int64) error {
|
||||
var cpuShares int64
|
||||
func CheckCpushares(path string, c uint64) error {
|
||||
var cpuShares uint64
|
||||
|
||||
if c == 0 {
|
||||
return nil
|
||||
|
||||
636
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio_test.go
generated
vendored
636
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio_test.go
generated
vendored
@@ -1,636 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
sectorsRecursiveContents = `8:0 1024`
|
||||
serviceBytesRecursiveContents = `8:0 Read 100
|
||||
8:0 Write 200
|
||||
8:0 Sync 300
|
||||
8:0 Async 500
|
||||
8:0 Total 500
|
||||
Total 500`
|
||||
servicedRecursiveContents = `8:0 Read 10
|
||||
8:0 Write 40
|
||||
8:0 Sync 20
|
||||
8:0 Async 30
|
||||
8:0 Total 50
|
||||
Total 50`
|
||||
queuedRecursiveContents = `8:0 Read 1
|
||||
8:0 Write 4
|
||||
8:0 Sync 2
|
||||
8:0 Async 3
|
||||
8:0 Total 5
|
||||
Total 5`
|
||||
serviceTimeRecursiveContents = `8:0 Read 173959
|
||||
8:0 Write 0
|
||||
8:0 Sync 0
|
||||
8:0 Async 173959
|
||||
8:0 Total 17395
|
||||
Total 17395`
|
||||
waitTimeRecursiveContents = `8:0 Read 15571
|
||||
8:0 Write 0
|
||||
8:0 Sync 0
|
||||
8:0 Async 15571
|
||||
8:0 Total 15571`
|
||||
mergedRecursiveContents = `8:0 Read 5
|
||||
8:0 Write 10
|
||||
8:0 Sync 0
|
||||
8:0 Async 0
|
||||
8:0 Total 15
|
||||
Total 15`
|
||||
timeRecursiveContents = `8:0 8`
|
||||
throttleServiceBytes = `8:0 Read 11030528
|
||||
8:0 Write 23
|
||||
8:0 Sync 42
|
||||
8:0 Async 11030528
|
||||
8:0 Total 11030528
|
||||
252:0 Read 11030528
|
||||
252:0 Write 23
|
||||
252:0 Sync 42
|
||||
252:0 Async 11030528
|
||||
252:0 Total 11030528
|
||||
Total 22061056`
|
||||
throttleServiced = `8:0 Read 164
|
||||
8:0 Write 23
|
||||
8:0 Sync 42
|
||||
8:0 Async 164
|
||||
8:0 Total 164
|
||||
252:0 Read 164
|
||||
252:0 Write 23
|
||||
252:0 Sync 42
|
||||
252:0 Async 164
|
||||
252:0 Total 164
|
||||
Total 328`
|
||||
)
|
||||
|
||||
func appendBlkioStatEntry(blkioStatEntries *[]cgroups.BlkioStatEntry, major, minor, value uint64, op string) {
|
||||
*blkioStatEntries = append(*blkioStatEntries, cgroups.BlkioStatEntry{Major: major, Minor: minor, Value: value, Op: op})
|
||||
}
|
||||
|
||||
func TestBlkioSetWeight(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
weightBefore = 100
|
||||
weightAfter = 200
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.weight": strconv.Itoa(weightBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.BlkioWeight = weightAfter
|
||||
blkio := &BlkioGroup{}
|
||||
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamUint(helper.CgroupPath, "blkio.weight")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse blkio.weight - %s", err)
|
||||
}
|
||||
|
||||
if value != weightAfter {
|
||||
t.Fatal("Got the wrong value, set blkio.weight failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioSetWeightDevice(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
weightDeviceBefore = "8:0 400"
|
||||
)
|
||||
|
||||
wd := configs.NewWeightDevice(8, 0, 500, 0)
|
||||
weightDeviceAfter := wd.WeightString()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.weight_device": weightDeviceBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.BlkioWeightDevice = []*configs.WeightDevice{wd}
|
||||
blkio := &BlkioGroup{}
|
||||
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "blkio.weight_device")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse blkio.weight_device - %s", err)
|
||||
}
|
||||
|
||||
if value != weightDeviceAfter {
|
||||
t.Fatal("Got the wrong value, set blkio.weight_device failed.")
|
||||
}
|
||||
}
|
||||
|
||||
// regression #274
|
||||
func TestBlkioSetMultipleWeightDevice(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
weightDeviceBefore = "8:0 400"
|
||||
)
|
||||
|
||||
wd1 := configs.NewWeightDevice(8, 0, 500, 0)
|
||||
wd2 := configs.NewWeightDevice(8, 16, 500, 0)
|
||||
// we cannot actually set and check both because normal ioutil.WriteFile
|
||||
// when writing to cgroup file will overwrite the whole file content instead
|
||||
// of updating it as the kernel is doing. Just check the second device
|
||||
// is present will suffice for the test to ensure multiple writes are done.
|
||||
weightDeviceAfter := wd2.WeightString()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.weight_device": weightDeviceBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.BlkioWeightDevice = []*configs.WeightDevice{wd1, wd2}
|
||||
blkio := &BlkioGroup{}
|
||||
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "blkio.weight_device")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse blkio.weight_device - %s", err)
|
||||
}
|
||||
|
||||
if value != weightDeviceAfter {
|
||||
t.Fatal("Got the wrong value, set blkio.weight_device failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Verify expected stats.
|
||||
expectedStats := cgroups.BlkioStats{}
|
||||
appendBlkioStatEntry(&expectedStats.SectorsRecursive, 8, 0, 1024, "")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 100, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 200, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 300, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Total")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 10, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 40, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 20, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 30, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 50, "Total")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 1, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 4, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 2, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 3, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 5, "Total")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 17395, "Total")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Total")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 5, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 10, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 15, "Total")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoTimeRecursive, 8, 0, 8, "")
|
||||
|
||||
expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats)
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoSectorsFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoServiceBytesFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoServicedFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoQueuedFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoServiceTimeFile(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping test in short mode.")
|
||||
}
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoWaitTimeFile(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping test in short mode.")
|
||||
}
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoMergedFile(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping test in short mode.")
|
||||
}
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsNoTimeFile(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping test in short mode.")
|
||||
}
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed unexpectedly: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsUnexpectedNumberOfFields(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": "8:0 Read 100 100",
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected to fail, but did not")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlkioStatsUnexpectedFieldType(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": "8:0 Read Write",
|
||||
"blkio.io_serviced_recursive": servicedRecursiveContents,
|
||||
"blkio.io_queued_recursive": queuedRecursiveContents,
|
||||
"blkio.sectors_recursive": sectorsRecursiveContents,
|
||||
"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
|
||||
"blkio.io_wait_time_recursive": waitTimeRecursiveContents,
|
||||
"blkio.io_merged_recursive": mergedRecursiveContents,
|
||||
"blkio.time_recursive": timeRecursiveContents,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected to fail, but did not")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNonCFQBlkioStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.io_service_bytes_recursive": "",
|
||||
"blkio.io_serviced_recursive": "",
|
||||
"blkio.io_queued_recursive": "",
|
||||
"blkio.sectors_recursive": "",
|
||||
"blkio.io_service_time_recursive": "",
|
||||
"blkio.io_wait_time_recursive": "",
|
||||
"blkio.io_merged_recursive": "",
|
||||
"blkio.time_recursive": "",
|
||||
"blkio.throttle.io_service_bytes": throttleServiceBytes,
|
||||
"blkio.throttle.io_serviced": throttleServiced,
|
||||
})
|
||||
|
||||
blkio := &BlkioGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := blkio.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Verify expected stats.
|
||||
expectedStats := cgroups.BlkioStats{}
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 23, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 42, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Total")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 23, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 42, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Total")
|
||||
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 23, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 42, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Total")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Read")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 23, "Write")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 42, "Sync")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Async")
|
||||
appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Total")
|
||||
|
||||
expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats)
|
||||
}
|
||||
|
||||
func TestBlkioSetThrottleReadBpsDevice(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
throttleBefore = `8:0 1024`
|
||||
)
|
||||
|
||||
td := configs.NewThrottleDevice(8, 0, 2048)
|
||||
throttleAfter := td.String()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.throttle.read_bps_device": throttleBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.BlkioThrottleReadBpsDevice = []*configs.ThrottleDevice{td}
|
||||
blkio := &BlkioGroup{}
|
||||
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.read_bps_device")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse blkio.throttle.read_bps_device - %s", err)
|
||||
}
|
||||
|
||||
if value != throttleAfter {
|
||||
t.Fatal("Got the wrong value, set blkio.throttle.read_bps_device failed.")
|
||||
}
|
||||
}
|
||||
func TestBlkioSetThrottleWriteBpsDevice(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
throttleBefore = `8:0 1024`
|
||||
)
|
||||
|
||||
td := configs.NewThrottleDevice(8, 0, 2048)
|
||||
throttleAfter := td.String()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.throttle.write_bps_device": throttleBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.BlkioThrottleWriteBpsDevice = []*configs.ThrottleDevice{td}
|
||||
blkio := &BlkioGroup{}
|
||||
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.write_bps_device")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse blkio.throttle.write_bps_device - %s", err)
|
||||
}
|
||||
|
||||
if value != throttleAfter {
|
||||
t.Fatal("Got the wrong value, set blkio.throttle.write_bps_device failed.")
|
||||
}
|
||||
}
|
||||
func TestBlkioSetThrottleReadIOpsDevice(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
throttleBefore = `8:0 1024`
|
||||
)
|
||||
|
||||
td := configs.NewThrottleDevice(8, 0, 2048)
|
||||
throttleAfter := td.String()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.throttle.read_iops_device": throttleBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.BlkioThrottleReadIOPSDevice = []*configs.ThrottleDevice{td}
|
||||
blkio := &BlkioGroup{}
|
||||
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.read_iops_device")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse blkio.throttle.read_iops_device - %s", err)
|
||||
}
|
||||
|
||||
if value != throttleAfter {
|
||||
t.Fatal("Got the wrong value, set blkio.throttle.read_iops_device failed.")
|
||||
}
|
||||
}
|
||||
func TestBlkioSetThrottleWriteIOpsDevice(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("blkio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
throttleBefore = `8:0 1024`
|
||||
)
|
||||
|
||||
td := configs.NewThrottleDevice(8, 0, 2048)
|
||||
throttleAfter := td.String()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"blkio.throttle.write_iops_device": throttleBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.BlkioThrottleWriteIOPSDevice = []*configs.ThrottleDevice{td}
|
||||
blkio := &BlkioGroup{}
|
||||
if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.write_iops_device")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse blkio.throttle.write_iops_device - %s", err)
|
||||
}
|
||||
|
||||
if value != throttleAfter {
|
||||
t.Fatal("Got the wrong value, set blkio.throttle.write_iops_device failed.")
|
||||
}
|
||||
}
|
||||
55
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
55
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
generated
vendored
@@ -22,21 +22,59 @@ func (s *CpuGroup) Name() string {
|
||||
func (s *CpuGroup) Apply(d *cgroupData) error {
|
||||
// We always want to join the cpu group, to allow fair cpu scheduling
|
||||
// on a container basis
|
||||
_, err := d.join("cpu")
|
||||
path, err := d.path("cpu")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return s.ApplyDir(path, d.config, d.pid)
|
||||
}
|
||||
|
||||
func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error {
|
||||
// This might happen if we have no cpu cgroup mounted.
|
||||
// Just do nothing and don't fail.
|
||||
if path == "" {
|
||||
return nil
|
||||
}
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// We should set the real-Time group scheduling settings before moving
|
||||
// in the process because if the process is already in SCHED_RR mode
|
||||
// and no RT bandwidth is set, adding it will fail.
|
||||
if err := s.SetRtSched(path, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
// because we are not using d.join we need to place the pid into the procs file
|
||||
// unlike the other subsystems
|
||||
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuRtPeriod != 0 {
|
||||
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuRtRuntime != 0 {
|
||||
if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.CpuShares != 0 {
|
||||
if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.Resources.CpuShares, 10)); err != nil {
|
||||
if err := writeFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuPeriod != 0 {
|
||||
if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.Resources.CpuPeriod, 10)); err != nil {
|
||||
if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -45,15 +83,8 @@ func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuRtPeriod != 0 {
|
||||
if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.CpuRtRuntime != 0 {
|
||||
if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.SetRtSched(path, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
163
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu_test.go
generated
vendored
163
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu_test.go
generated
vendored
@@ -1,163 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
)
|
||||
|
||||
func TestCpuSetShares(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
sharesBefore = 1024
|
||||
sharesAfter = 512
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"cpu.shares": strconv.Itoa(sharesBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.CpuShares = sharesAfter
|
||||
cpu := &CpuGroup{}
|
||||
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamUint(helper.CgroupPath, "cpu.shares")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.shares - %s", err)
|
||||
}
|
||||
|
||||
if value != sharesAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.shares failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCpuSetBandWidth(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
quotaBefore = 8000
|
||||
quotaAfter = 5000
|
||||
periodBefore = 10000
|
||||
periodAfter = 7000
|
||||
rtRuntimeBefore = 8000
|
||||
rtRuntimeAfter = 5000
|
||||
rtPeriodBefore = 10000
|
||||
rtPeriodAfter = 7000
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"cpu.cfs_quota_us": strconv.Itoa(quotaBefore),
|
||||
"cpu.cfs_period_us": strconv.Itoa(periodBefore),
|
||||
"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
|
||||
"cpu.rt_period_us": strconv.Itoa(rtPeriodBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.CpuQuota = quotaAfter
|
||||
helper.CgroupData.config.Resources.CpuPeriod = periodAfter
|
||||
helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter
|
||||
helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter
|
||||
cpu := &CpuGroup{}
|
||||
if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
quota, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_quota_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.cfs_quota_us - %s", err)
|
||||
}
|
||||
if quota != quotaAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.")
|
||||
}
|
||||
|
||||
period, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_period_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.cfs_period_us - %s", err)
|
||||
}
|
||||
if period != periodAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.")
|
||||
}
|
||||
rtRuntime, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
|
||||
}
|
||||
if rtRuntime != rtRuntimeAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
|
||||
}
|
||||
rtPeriod, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
|
||||
}
|
||||
if rtPeriod != rtPeriodAfter {
|
||||
t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCpuStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
kNrPeriods = 2000
|
||||
kNrThrottled = 200
|
||||
kThrottledTime = uint64(18446744073709551615)
|
||||
)
|
||||
|
||||
cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n",
|
||||
kNrPeriods, kNrThrottled, kThrottledTime)
|
||||
helper.writeFileContents(map[string]string{
|
||||
"cpu.stat": cpuStatContent,
|
||||
})
|
||||
|
||||
cpu := &CpuGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := cpu.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
expectedStats := cgroups.ThrottlingData{
|
||||
Periods: kNrPeriods,
|
||||
ThrottledPeriods: kNrThrottled,
|
||||
ThrottledTime: kThrottledTime}
|
||||
|
||||
expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData)
|
||||
}
|
||||
|
||||
func TestNoCpuStatFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
cpu := &CpuGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := cpu.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal("Expected not to fail, but did")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInvalidCpuStat(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpu", t)
|
||||
defer helper.cleanup()
|
||||
cpuStatContent := `nr_periods 2000
|
||||
nr_throttled 200
|
||||
throttled_time fortytwo`
|
||||
helper.writeFileContents(map[string]string{
|
||||
"cpu.stat": cpuStatContent,
|
||||
})
|
||||
|
||||
cpu := &CpuGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := cpu.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failed stat parsing.")
|
||||
}
|
||||
}
|
||||
32
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
32
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
generated
vendored
@@ -8,7 +8,6 @@ import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
@@ -58,16 +57,34 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
|
||||
if dir == "" {
|
||||
return nil
|
||||
}
|
||||
root, err := getCgroupRoot()
|
||||
mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.ensureParent(dir, root); err != nil {
|
||||
root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
|
||||
// 'ensureParent' start with parent because we don't want to
|
||||
// explicitly inherit from parent, it could conflict with
|
||||
// 'cpuset.cpu_exclusive'.
|
||||
if err := s.ensureParent(filepath.Dir(dir), root); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := os.MkdirAll(dir, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// We didn't inherit cpuset configs from parent, but we have
|
||||
// to ensure cpuset configs are set before moving task into the
|
||||
// cgroup.
|
||||
// The logic is, if user specified cpuset configs, use these
|
||||
// specified configs, otherwise, inherit from parent. This makes
|
||||
// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
|
||||
// keep backward compatbility.
|
||||
if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// because we are not using d.join we need to place the pid into the procs file
|
||||
// unlike the other subsystems
|
||||
if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil {
|
||||
if err := cgroups.WriteCgroupProc(dir, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -137,3 +154,10 @@ func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
|
||||
func (s *CpusetGroup) isEmpty(b []byte) bool {
|
||||
return len(bytes.Trim(b, "\n")) == 0
|
||||
}
|
||||
|
||||
func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error {
|
||||
if err := s.Set(path, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
return s.copyIfNeeded(path, filepath.Dir(path))
|
||||
}
|
||||
|
||||
65
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset_test.go
generated
vendored
65
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset_test.go
generated
vendored
@@ -1,65 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestCpusetSetCpus(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpuset", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
cpusBefore = "0"
|
||||
cpusAfter = "1-3"
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"cpuset.cpus": cpusBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.CpusetCpus = cpusAfter
|
||||
cpuset := &CpusetGroup{}
|
||||
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "cpuset.cpus")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpuset.cpus - %s", err)
|
||||
}
|
||||
|
||||
if value != cpusAfter {
|
||||
t.Fatal("Got the wrong value, set cpuset.cpus failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCpusetSetMems(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("cpuset", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
memsBefore = "0"
|
||||
memsAfter = "1"
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"cpuset.mems": memsBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.CpusetMems = memsAfter
|
||||
cpuset := &CpusetGroup{}
|
||||
if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "cpuset.mems")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse cpuset.mems - %s", err)
|
||||
}
|
||||
|
||||
if value != memsAfter {
|
||||
t.Fatal("Got the wrong value, set cpuset.mems failed.")
|
||||
}
|
||||
}
|
||||
31
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
31
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
generated
vendored
@@ -5,6 +5,7 @@ package fs
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
type DevicesGroup struct {
|
||||
@@ -25,6 +26,10 @@ func (s *DevicesGroup) Apply(d *cgroupData) error {
|
||||
}
|
||||
|
||||
func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if system.RunningInUserNS() {
|
||||
return nil
|
||||
}
|
||||
|
||||
devices := cgroup.Resources.Devices
|
||||
if len(devices) > 0 {
|
||||
for _, dev := range devices {
|
||||
@@ -38,21 +43,23 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if !cgroup.Resources.AllowAllDevices {
|
||||
if err := writeFile(path, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.AllowedDevices {
|
||||
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
|
||||
if cgroup.Resources.AllowAllDevices != nil {
|
||||
if *cgroup.Resources.AllowAllDevices == false {
|
||||
if err := writeFile(path, "devices.deny", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := writeFile(path, "devices.allow", "a"); err != nil {
|
||||
return err
|
||||
for _, dev := range cgroup.Resources.AllowedDevices {
|
||||
if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := writeFile(path, "devices.allow", "a"); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
for _, dev := range cgroup.Resources.DeniedDevices {
|
||||
|
||||
84
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices_test.go
generated
vendored
84
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices_test.go
generated
vendored
@@ -1,84 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
allowedDevices = []*configs.Device{
|
||||
{
|
||||
Path: "/dev/zero",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
}
|
||||
allowedList = "c 1:5 rwm"
|
||||
deniedDevices = []*configs.Device{
|
||||
{
|
||||
Path: "/dev/null",
|
||||
Type: 'c',
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
Permissions: "rwm",
|
||||
FileMode: 0666,
|
||||
},
|
||||
}
|
||||
deniedList = "c 1:3 rwm"
|
||||
)
|
||||
|
||||
func TestDevicesSetAllow(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("devices", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"devices.deny": "a",
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.AllowAllDevices = false
|
||||
helper.CgroupData.config.Resources.AllowedDevices = allowedDevices
|
||||
devices := &DevicesGroup{}
|
||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "devices.allow")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse devices.allow - %s", err)
|
||||
}
|
||||
|
||||
if value != allowedList {
|
||||
t.Fatal("Got the wrong value, set devices.allow failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDevicesSetDeny(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("devices", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"devices.allow": "a",
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.AllowAllDevices = true
|
||||
helper.CgroupData.config.Resources.DeniedDevices = deniedDevices
|
||||
devices := &DevicesGroup{}
|
||||
if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "devices.deny")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse devices.deny - %s", err)
|
||||
}
|
||||
|
||||
if value != deniedList {
|
||||
t.Fatal("Got the wrong value, set devices.deny failed.")
|
||||
}
|
||||
}
|
||||
13
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
13
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
generated
vendored
@@ -29,11 +29,15 @@ func (s *FreezerGroup) Apply(d *cgroupData) error {
|
||||
func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
switch cgroup.Resources.Freezer {
|
||||
case configs.Frozen, configs.Thawed:
|
||||
if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for {
|
||||
// In case this loop does not exit because it doesn't get the expected
|
||||
// state, let's write again this state, hoping it's going to be properly
|
||||
// set this time. Otherwise, this loop could run infinitely, waiting for
|
||||
// a state change that would never happen.
|
||||
if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
state, err := readFile(path, "freezer.state")
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -41,6 +45,7 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) {
|
||||
break
|
||||
}
|
||||
|
||||
time.Sleep(1 * time.Millisecond)
|
||||
}
|
||||
case configs.Undefined:
|
||||
|
||||
47
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer_test.go
generated
vendored
47
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer_test.go
generated
vendored
@@ -1,47 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
func TestFreezerSetState(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("freezer", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"freezer.state": string(configs.Frozen),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.Freezer = configs.Thawed
|
||||
freezer := &FreezerGroup{}
|
||||
if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "freezer.state")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse freezer.state - %s", err)
|
||||
}
|
||||
if value != string(configs.Thawed) {
|
||||
t.Fatal("Got the wrong value, set freezer.state failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestFreezerSetInvalidState(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("freezer", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
invalidArg configs.FreezerState = "Invalid"
|
||||
)
|
||||
|
||||
helper.CgroupData.config.Resources.Freezer = invalidArg
|
||||
freezer := &FreezerGroup{}
|
||||
if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err == nil {
|
||||
t.Fatal("Failed to return invalid argument error")
|
||||
}
|
||||
}
|
||||
154
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb_test.go
generated
vendored
154
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb_test.go
generated
vendored
@@ -1,154 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
const (
|
||||
hugetlbUsageContents = "128\n"
|
||||
hugetlbMaxUsageContents = "256\n"
|
||||
hugetlbFailcnt = "100\n"
|
||||
)
|
||||
|
||||
var (
|
||||
usage = "hugetlb.%s.usage_in_bytes"
|
||||
limit = "hugetlb.%s.limit_in_bytes"
|
||||
maxUsage = "hugetlb.%s.max_usage_in_bytes"
|
||||
failcnt = "hugetlb.%s.failcnt"
|
||||
)
|
||||
|
||||
func TestHugetlbSetHugetlb(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
hugetlbBefore = 256
|
||||
hugetlbAfter = 512
|
||||
)
|
||||
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.writeFileContents(map[string]string{
|
||||
fmt.Sprintf(limit, pageSize): strconv.Itoa(hugetlbBefore),
|
||||
})
|
||||
}
|
||||
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.CgroupData.config.Resources.HugetlbLimit = []*configs.HugepageLimit{
|
||||
{
|
||||
Pagesize: pageSize,
|
||||
Limit: hugetlbAfter,
|
||||
},
|
||||
}
|
||||
hugetlb := &HugetlbGroup{}
|
||||
if err := hugetlb.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, pageSize := range HugePageSizes {
|
||||
limit := fmt.Sprintf(limit, pageSize)
|
||||
value, err := getCgroupParamUint(helper.CgroupPath, limit)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse %s - %s", limit, err)
|
||||
}
|
||||
if value != hugetlbAfter {
|
||||
t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHugetlbStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.writeFileContents(map[string]string{
|
||||
fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
|
||||
fmt.Sprintf(maxUsage, pageSize): hugetlbMaxUsageContents,
|
||||
fmt.Sprintf(failcnt, pageSize): hugetlbFailcnt,
|
||||
})
|
||||
}
|
||||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100}
|
||||
for _, pageSize := range HugePageSizes {
|
||||
expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[pageSize])
|
||||
}
|
||||
}
|
||||
|
||||
func TestHugetlbStatsNoUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
maxUsage: hugetlbMaxUsageContents,
|
||||
})
|
||||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.writeFileContents(map[string]string{
|
||||
fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
|
||||
})
|
||||
}
|
||||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHugetlbStatsBadUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
for _, pageSize := range HugePageSizes {
|
||||
helper.writeFileContents(map[string]string{
|
||||
fmt.Sprintf(usage, pageSize): "bad",
|
||||
maxUsage: hugetlbMaxUsageContents,
|
||||
})
|
||||
}
|
||||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestHugetlbStatsBadMaxUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("hugetlb", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
usage: hugetlbUsageContents,
|
||||
maxUsage: "bad",
|
||||
})
|
||||
|
||||
hugetlb := &HugetlbGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
156
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
156
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
generated
vendored
@@ -5,13 +5,23 @@ package fs
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall" // only for Errno
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
|
||||
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
|
||||
cgroupMemoryLimit = "memory.limit_in_bytes"
|
||||
)
|
||||
|
||||
type MemoryGroup struct {
|
||||
@@ -25,20 +35,23 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
|
||||
path, err := d.path("memory")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
} else if path == "" {
|
||||
return nil
|
||||
}
|
||||
if memoryAssigned(d.config) {
|
||||
if path != "" {
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// We have to set kernel memory here, as we can't change it once
|
||||
// processes have been attached.
|
||||
if err := s.SetKernelMemory(path, d.config); err != nil {
|
||||
return err
|
||||
// Only enable kernel memory accouting when this cgroup
|
||||
// is created by libcontainer, otherwise we might get
|
||||
// error when people use `cgroupsPath` to join an existed
|
||||
// cgroup whose kernel memory is not initialized.
|
||||
if err := EnableKernelMemoryAccounting(path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if err != nil {
|
||||
os.RemoveAll(path)
|
||||
@@ -54,30 +67,115 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) SetKernelMemory(path string, cgroup *configs.Cgroup) error {
|
||||
// This has to be done separately because it has special constraints (it
|
||||
// can't be done after there are processes attached to the cgroup).
|
||||
if cgroup.Resources.KernelMemory > 0 {
|
||||
if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemory, 10)); err != nil {
|
||||
func EnableKernelMemoryAccounting(path string) error {
|
||||
// Check if kernel memory is enabled
|
||||
// We have to limit the kernel memory here as it won't be accounted at all
|
||||
// until a limit is set on the cgroup and limit cannot be set once the
|
||||
// cgroup has children, or if there are already tasks in the cgroup.
|
||||
for _, i := range []int64{1, -1} {
|
||||
if err := setKernelMemory(path, i); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setKernelMemory(path string, kernelMemoryLimit int64) error {
|
||||
if path == "" {
|
||||
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
|
||||
}
|
||||
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
|
||||
// kernel memory is not enabled on the system so we should do nothing
|
||||
return nil
|
||||
}
|
||||
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
|
||||
// Check if the error number returned by the syscall is "EBUSY"
|
||||
// The EBUSY signal is returned on attempts to write to the
|
||||
// memory.kmem.limit_in_bytes file if the cgroup has children or
|
||||
// once tasks have been attached to the cgroup
|
||||
if pathErr, ok := err.(*os.PathError); ok {
|
||||
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
|
||||
if errNo == unix.EBUSY {
|
||||
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
|
||||
}
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
|
||||
// If the memory update is set to -1 we should also
|
||||
// set swap to -1, it means unlimited memory.
|
||||
if cgroup.Resources.Memory == -1 {
|
||||
// Only set swap if it's enabled in kernel
|
||||
if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
|
||||
cgroup.Resources.MemorySwap = -1
|
||||
}
|
||||
}
|
||||
|
||||
// When memory and swap memory are both set, we need to handle the cases
|
||||
// for updating container.
|
||||
if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
|
||||
memoryUsage, err := getMemoryData(path, "")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// When update memory limit, we should adapt the write sequence
|
||||
// for memory and swap memory, so it won't fail because the new
|
||||
// value and the old value don't fit kernel's validation.
|
||||
if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
|
||||
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if cgroup.Resources.Memory != 0 {
|
||||
if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwap != 0 {
|
||||
if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.Memory != 0 {
|
||||
if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
|
||||
if err := setMemoryAndSwap(path, cgroup); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if cgroup.Resources.KernelMemory != 0 {
|
||||
if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if cgroup.Resources.MemoryReservation != 0 {
|
||||
if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwap > 0 {
|
||||
if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
|
||||
|
||||
if cgroup.Resources.KernelMemoryTCP != 0 {
|
||||
if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -86,14 +184,14 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if cgroup.Resources.MemorySwappiness >= 0 && cgroup.Resources.MemorySwappiness <= 100 {
|
||||
if err := writeFile(path, "memory.swappiness", strconv.FormatInt(cgroup.Resources.MemorySwappiness, 10)); err != nil {
|
||||
if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
|
||||
return nil
|
||||
} else if *cgroup.Resources.MemorySwappiness <= 100 {
|
||||
if err := writeFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
} else if cgroup.Resources.MemorySwappiness == -1 {
|
||||
return nil
|
||||
} else {
|
||||
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", cgroup.Resources.MemorySwappiness)
|
||||
return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness)
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -139,7 +237,20 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.KernelUsage = kernelUsage
|
||||
kernelTCPUsage, err := getMemoryData(path, "kmem.tcp")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
stats.MemoryStats.KernelTCPUsage = kernelTCPUsage
|
||||
|
||||
useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
|
||||
value, err := getCgroupParamUint(path, useHierarchy)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if value == 1 {
|
||||
stats.MemoryStats.UseHierarchy = true
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -148,8 +259,9 @@ func memoryAssigned(cgroup *configs.Cgroup) bool {
|
||||
cgroup.Resources.MemoryReservation != 0 ||
|
||||
cgroup.Resources.MemorySwap > 0 ||
|
||||
cgroup.Resources.KernelMemory > 0 ||
|
||||
cgroup.Resources.KernelMemoryTCP > 0 ||
|
||||
cgroup.Resources.OomKillDisable ||
|
||||
cgroup.Resources.MemorySwappiness != -1
|
||||
(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
|
||||
}
|
||||
|
||||
func getMemoryData(path, name string) (cgroups.MemoryData, error) {
|
||||
|
||||
339
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory_test.go
generated
vendored
339
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory_test.go
generated
vendored
@@ -1,339 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
)
|
||||
|
||||
const (
|
||||
memoryStatContents = `cache 512
|
||||
rss 1024`
|
||||
memoryUsageContents = "2048\n"
|
||||
memoryMaxUsageContents = "4096\n"
|
||||
memoryFailcnt = "100\n"
|
||||
memoryLimitContents = "8192\n"
|
||||
)
|
||||
|
||||
func TestMemorySetMemory(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
memoryBefore = 314572800 // 300M
|
||||
memoryAfter = 524288000 // 500M
|
||||
reservationBefore = 209715200 // 200M
|
||||
reservationAfter = 314572800 // 300M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.limit_in_bytes": strconv.Itoa(memoryBefore),
|
||||
"memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.Memory = memoryAfter
|
||||
helper.CgroupData.config.Resources.MemoryReservation = reservationAfter
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != memoryAfter {
|
||||
t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
|
||||
}
|
||||
|
||||
value, err = getCgroupParamUint(helper.CgroupPath, "memory.soft_limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.soft_limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != reservationAfter {
|
||||
t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetMemoryswap(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
memoryswapBefore = 314572800 // 300M
|
||||
memoryswapAfter = 524288000 // 500M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != memoryswapAfter {
|
||||
t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetKernelMemory(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
kernelMemoryBefore = 314572800 // 300M
|
||||
kernelMemoryAfter = 524288000 // 500M
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.kmem.limit_in_bytes": strconv.Itoa(kernelMemoryBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.KernelMemory = kernelMemoryAfter
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.SetKernelMemory(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamUint(helper.CgroupPath, "memory.kmem.limit_in_bytes")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.kmem.limit_in_bytes - %s", err)
|
||||
}
|
||||
if value != kernelMemoryAfter {
|
||||
t.Fatal("Got the wrong value, set memory.kmem.limit_in_bytes failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetMemorySwappinessDefault(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
swappinessBefore = 60 //deafult is 60
|
||||
swappinessAfter = 0
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.swappiness": strconv.Itoa(swappinessBefore),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.Memory = swappinessAfter
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamUint(helper.CgroupPath, "memory.swappiness")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.swappiness - %s", err)
|
||||
}
|
||||
if value != swappinessAfter {
|
||||
t.Fatal("Got the wrong value, set memory.swappiness failed.")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.failcnt": memoryFailcnt,
|
||||
"memory.memsw.usage_in_bytes": memoryUsageContents,
|
||||
"memory.memsw.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.memsw.failcnt": memoryFailcnt,
|
||||
"memory.memsw.limit_in_bytes": memoryLimitContents,
|
||||
"memory.kmem.usage_in_bytes": memoryUsageContents,
|
||||
"memory.kmem.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.kmem.failcnt": memoryFailcnt,
|
||||
"memory.kmem.limit_in_bytes": memoryLimitContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expectedStats := cgroups.MemoryStats{Cache: 512, Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, Stats: map[string]uint64{"cache": 512, "rss": 1024}}
|
||||
expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats)
|
||||
}
|
||||
|
||||
func TestMemoryStatsNoStatFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsNoUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsNoMaxUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsNoLimitInBytesFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsBadStatFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": "rss rss",
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsBadUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": "bad",
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": "bad",
|
||||
"memory.limit_in_bytes": memoryLimitContents,
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemoryStatsBadLimitInBytesFile(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.stat": memoryStatContents,
|
||||
"memory.usage_in_bytes": memoryUsageContents,
|
||||
"memory.max_usage_in_bytes": memoryMaxUsageContents,
|
||||
"memory.limit_in_bytes": "bad",
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
actualStats := *cgroups.NewStats()
|
||||
err := memory.GetStats(helper.CgroupPath, &actualStats)
|
||||
if err == nil {
|
||||
t.Fatal("Expected failure")
|
||||
}
|
||||
}
|
||||
|
||||
func TestMemorySetOomControl(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("memory", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
const (
|
||||
oom_kill_disable = 1 // disable oom killer, default is 0
|
||||
)
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"memory.oom_control": strconv.Itoa(oom_kill_disable),
|
||||
})
|
||||
|
||||
memory := &MemoryGroup{}
|
||||
if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamUint(helper.CgroupPath, "memory.oom_control")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse memory.oom_control - %s", err)
|
||||
}
|
||||
|
||||
if value != oom_kill_disable {
|
||||
t.Fatalf("Got the wrong value, set memory.oom_control failed.")
|
||||
}
|
||||
}
|
||||
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
generated
vendored
8
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
generated
vendored
@@ -9,6 +9,7 @@ import (
|
||||
|
||||
type NameGroup struct {
|
||||
GroupName string
|
||||
Join bool
|
||||
}
|
||||
|
||||
func (s *NameGroup) Name() string {
|
||||
@@ -16,6 +17,10 @@ func (s *NameGroup) Name() string {
|
||||
}
|
||||
|
||||
func (s *NameGroup) Apply(d *cgroupData) error {
|
||||
if s.Join {
|
||||
// ignore errors if the named cgroup does not exist
|
||||
d.join(s.GroupName)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -24,6 +29,9 @@ func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
}
|
||||
|
||||
func (s *NameGroup) Remove(d *cgroupData) error {
|
||||
if s.Join {
|
||||
removePath(d.path(s.GroupName))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
6
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
generated
vendored
6
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
generated
vendored
@@ -3,6 +3,8 @@
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
@@ -23,8 +25,8 @@ func (s *NetClsGroup) Apply(d *cgroupData) error {
|
||||
}
|
||||
|
||||
func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
|
||||
if cgroup.Resources.NetClsClassid != "" {
|
||||
if err := writeFile(path, "net_cls.classid", cgroup.Resources.NetClsClassid); err != nil {
|
||||
if cgroup.Resources.NetClsClassid != 0 {
|
||||
if err := writeFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
38
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls_test.go
generated
vendored
38
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls_test.go
generated
vendored
@@ -1,38 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
const (
|
||||
classidBefore = "0x100002"
|
||||
classidAfter = "0x100001"
|
||||
)
|
||||
|
||||
func TestNetClsSetClassid(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("net_cls", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"net_cls.classid": classidBefore,
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.NetClsClassid = classidAfter
|
||||
netcls := &NetClsGroup{}
|
||||
if err := netcls.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// As we are in mock environment, we can't get correct value of classid from
|
||||
// net_cls.classid.
|
||||
// So. we just judge if we successfully write classid into file
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "net_cls.classid")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse net_cls.classid - %s", err)
|
||||
}
|
||||
if value != classidAfter {
|
||||
t.Fatal("Got the wrong value, set net_cls.classid failed.")
|
||||
}
|
||||
}
|
||||
38
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio_test.go
generated
vendored
38
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio_test.go
generated
vendored
@@ -1,38 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
prioMap = []*configs.IfPrioMap{
|
||||
{
|
||||
Interface: "test",
|
||||
Priority: 5,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
func TestNetPrioSetIfPrio(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("net_prio", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.CgroupData.config.Resources.NetPrioIfpriomap = prioMap
|
||||
netPrio := &NetPrioGroup{}
|
||||
if err := netPrio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "net_prio.ifpriomap")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse net_prio.ifpriomap - %s", err)
|
||||
}
|
||||
if !strings.Contains(value, "test 5") {
|
||||
t.Fatal("Got the wrong value, set net_prio.ifpriomap failed.")
|
||||
}
|
||||
}
|
||||
20
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
generated
vendored
20
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
generated
vendored
@@ -4,6 +4,7 @@ package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
@@ -47,11 +48,26 @@ func (s *PidsGroup) Remove(d *cgroupData) error {
|
||||
}
|
||||
|
||||
func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
|
||||
value, err := getCgroupParamUint(path, "pids.current")
|
||||
current, err := getCgroupParamUint(path, "pids.current")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.current - %s", err)
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = value
|
||||
maxString, err := getCgroupParamString(path, "pids.max")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.max - %s", err)
|
||||
}
|
||||
|
||||
// Default if pids.max == "max" is 0 -- which represents "no limit".
|
||||
var max uint64
|
||||
if maxString != "max" {
|
||||
max, err = parseUint(maxString, 10, 64)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max"))
|
||||
}
|
||||
}
|
||||
|
||||
stats.PidsStats.Current = current
|
||||
stats.PidsStats.Limit = max
|
||||
return nil
|
||||
}
|
||||
|
||||
83
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids_test.go
generated
vendored
83
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids_test.go
generated
vendored
@@ -1,83 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"strconv"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
)
|
||||
|
||||
const (
|
||||
maxUnlimited = -1
|
||||
maxLimited = 1024
|
||||
)
|
||||
|
||||
func TestPidsSetMax(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("pids", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"pids.max": "max",
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.PidsLimit = maxLimited
|
||||
pids := &PidsGroup{}
|
||||
if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamUint(helper.CgroupPath, "pids.max")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse pids.max - %s", err)
|
||||
}
|
||||
|
||||
if value != maxLimited {
|
||||
t.Fatalf("Expected %d, got %d for setting pids.max - limited", maxLimited, value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPidsSetUnlimited(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("pids", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"pids.max": strconv.Itoa(maxLimited),
|
||||
})
|
||||
|
||||
helper.CgroupData.config.Resources.PidsLimit = maxUnlimited
|
||||
pids := &PidsGroup{}
|
||||
if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
value, err := getCgroupParamString(helper.CgroupPath, "pids.max")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse pids.max - %s", err)
|
||||
}
|
||||
|
||||
if value != "max" {
|
||||
t.Fatalf("Expected %s, got %s for setting pids.max - unlimited", "max", value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPidsStats(t *testing.T) {
|
||||
helper := NewCgroupTestUtil("pids", t)
|
||||
defer helper.cleanup()
|
||||
|
||||
helper.writeFileContents(map[string]string{
|
||||
"pids.current": strconv.Itoa(1337),
|
||||
"pids.max": strconv.Itoa(maxLimited),
|
||||
})
|
||||
|
||||
pids := &PidsGroup{}
|
||||
stats := *cgroups.NewStats()
|
||||
if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if stats.PidsStats.Current != 1337 {
|
||||
t.Fatalf("Expected %d, got %d for pids.current", 1337, stats.PidsStats.Current)
|
||||
}
|
||||
}
|
||||
117
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/stats_util_test.go
generated
vendored
117
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/stats_util_test.go
generated
vendored
@@ -1,117 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/Sirupsen/logrus"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
)
|
||||
|
||||
func blkioStatEntryEquals(expected, actual []cgroups.BlkioStatEntry) error {
|
||||
if len(expected) != len(actual) {
|
||||
return fmt.Errorf("blkioStatEntries length do not match")
|
||||
}
|
||||
for i, expValue := range expected {
|
||||
actValue := actual[i]
|
||||
if expValue != actValue {
|
||||
return fmt.Errorf("Expected blkio stat entry %v but found %v", expValue, actValue)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func expectBlkioStatsEquals(t *testing.T, expected, actual cgroups.BlkioStats) {
|
||||
if err := blkioStatEntryEquals(expected.IoServiceBytesRecursive, actual.IoServiceBytesRecursive); err != nil {
|
||||
logrus.Printf("blkio IoServiceBytesRecursive do not match - %s\n", err)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if err := blkioStatEntryEquals(expected.IoServicedRecursive, actual.IoServicedRecursive); err != nil {
|
||||
logrus.Printf("blkio IoServicedRecursive do not match - %s\n", err)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if err := blkioStatEntryEquals(expected.IoQueuedRecursive, actual.IoQueuedRecursive); err != nil {
|
||||
logrus.Printf("blkio IoQueuedRecursive do not match - %s\n", err)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if err := blkioStatEntryEquals(expected.SectorsRecursive, actual.SectorsRecursive); err != nil {
|
||||
logrus.Printf("blkio SectorsRecursive do not match - %s\n", err)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if err := blkioStatEntryEquals(expected.IoServiceTimeRecursive, actual.IoServiceTimeRecursive); err != nil {
|
||||
logrus.Printf("blkio IoServiceTimeRecursive do not match - %s\n", err)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if err := blkioStatEntryEquals(expected.IoWaitTimeRecursive, actual.IoWaitTimeRecursive); err != nil {
|
||||
logrus.Printf("blkio IoWaitTimeRecursive do not match - %s\n", err)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if err := blkioStatEntryEquals(expected.IoMergedRecursive, actual.IoMergedRecursive); err != nil {
|
||||
logrus.Printf("blkio IoMergedRecursive do not match - %v vs %v\n", expected.IoMergedRecursive, actual.IoMergedRecursive)
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
if err := blkioStatEntryEquals(expected.IoTimeRecursive, actual.IoTimeRecursive); err != nil {
|
||||
logrus.Printf("blkio IoTimeRecursive do not match - %s\n", err)
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
||||
func expectThrottlingDataEquals(t *testing.T, expected, actual cgroups.ThrottlingData) {
|
||||
if expected != actual {
|
||||
logrus.Printf("Expected throttling data %v but found %v\n", expected, actual)
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
||||
func expectHugetlbStatEquals(t *testing.T, expected, actual cgroups.HugetlbStats) {
|
||||
if expected != actual {
|
||||
logrus.Printf("Expected hugetlb stats %v but found %v\n", expected, actual)
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
|
||||
func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats) {
|
||||
expectMemoryDataEquals(t, expected.Usage, actual.Usage)
|
||||
expectMemoryDataEquals(t, expected.SwapUsage, actual.SwapUsage)
|
||||
expectMemoryDataEquals(t, expected.KernelUsage, actual.KernelUsage)
|
||||
|
||||
for key, expValue := range expected.Stats {
|
||||
actValue, ok := actual.Stats[key]
|
||||
if !ok {
|
||||
logrus.Printf("Expected memory stat key %s not found\n", key)
|
||||
t.Fail()
|
||||
}
|
||||
if expValue != actValue {
|
||||
logrus.Printf("Expected memory stat value %d but found %d\n", expValue, actValue)
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func expectMemoryDataEquals(t *testing.T, expected, actual cgroups.MemoryData) {
|
||||
if expected.Usage != actual.Usage {
|
||||
logrus.Printf("Expected memory usage %d but found %d\n", expected.Usage, actual.Usage)
|
||||
t.Fail()
|
||||
}
|
||||
if expected.MaxUsage != actual.MaxUsage {
|
||||
logrus.Printf("Expected memory max usage %d but found %d\n", expected.MaxUsage, actual.MaxUsage)
|
||||
t.Fail()
|
||||
}
|
||||
if expected.Failcnt != actual.Failcnt {
|
||||
logrus.Printf("Expected memory failcnt %d but found %d\n", expected.Failcnt, actual.Failcnt)
|
||||
t.Fail()
|
||||
}
|
||||
if expected.Limit != actual.Limit {
|
||||
logrus.Printf("Expected memory limit %d but found %d\n", expected.Limit, actual.Limit)
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
67
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/util_test.go
generated
vendored
67
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/util_test.go
generated
vendored
@@ -1,67 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
/*
|
||||
Utility for testing cgroup operations.
|
||||
|
||||
Creates a mock of the cgroup filesystem for the duration of the test.
|
||||
*/
|
||||
package fs
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type cgroupTestUtil struct {
|
||||
// cgroup data to use in tests.
|
||||
CgroupData *cgroupData
|
||||
|
||||
// Path to the mock cgroup directory.
|
||||
CgroupPath string
|
||||
|
||||
// Temporary directory to store mock cgroup filesystem.
|
||||
tempDir string
|
||||
t *testing.T
|
||||
}
|
||||
|
||||
// Creates a new test util for the specified subsystem
|
||||
func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil {
|
||||
d := &cgroupData{
|
||||
config: &configs.Cgroup{},
|
||||
}
|
||||
d.config.Resources = &configs.Resources{}
|
||||
tempDir, err := ioutil.TempDir("", "cgroup_test")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
d.root = tempDir
|
||||
testCgroupPath := filepath.Join(d.root, subsystem)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Ensure the full mock cgroup path exists.
|
||||
err = os.MkdirAll(testCgroupPath, 0755)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
return &cgroupTestUtil{CgroupData: d, CgroupPath: testCgroupPath, tempDir: tempDir, t: t}
|
||||
}
|
||||
|
||||
func (c *cgroupTestUtil) cleanup() {
|
||||
os.RemoveAll(c.tempDir)
|
||||
}
|
||||
|
||||
// Write the specified contents on the mock of the specified cgroup files.
|
||||
func (c *cgroupTestUtil) writeFileContents(fileContents map[string]string) {
|
||||
for file, contents := range fileContents {
|
||||
err := writeFile(c.CgroupPath, file, contents)
|
||||
if err != nil {
|
||||
c.t.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
1
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go
generated
vendored
1
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go
generated
vendored
@@ -12,7 +12,6 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
ErrNotSupportStat = errors.New("stats are not supported for subsystem")
|
||||
ErrNotValidFormat = errors.New("line is not a valid key value format")
|
||||
)
|
||||
|
||||
|
||||
97
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils_test.go
generated
vendored
97
vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils_test.go
generated
vendored
@@ -1,97 +0,0 @@
|
||||
// +build linux
|
||||
|
||||
package fs
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const (
|
||||
cgroupFile = "cgroup.file"
|
||||
floatValue = 2048.0
|
||||
floatString = "2048"
|
||||
)
|
||||
|
||||
func TestGetCgroupParamsInt(t *testing.T) {
|
||||
// Setup tempdir.
|
||||
tempDir, err := ioutil.TempDir("", "cgroup_utils_test")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer os.RemoveAll(tempDir)
|
||||
tempFile := filepath.Join(tempDir, cgroupFile)
|
||||
|
||||
// Success.
|
||||
err = ioutil.WriteFile(tempFile, []byte(floatString), 0755)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
value, err := getCgroupParamUint(tempDir, cgroupFile)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
} else if value != floatValue {
|
||||
t.Fatalf("Expected %d to equal %f", value, floatValue)
|
||||
}
|
||||
|
||||
// Success with new line.
|
||||
err = ioutil.WriteFile(tempFile, []byte(floatString+"\n"), 0755)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
value, err = getCgroupParamUint(tempDir, cgroupFile)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
} else if value != floatValue {
|
||||
t.Fatalf("Expected %d to equal %f", value, floatValue)
|
||||
}
|
||||
|
||||
// Success with negative values
|
||||
err = ioutil.WriteFile(tempFile, []byte("-12345"), 0755)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
value, err = getCgroupParamUint(tempDir, cgroupFile)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
} else if value != 0 {
|
||||
t.Fatalf("Expected %d to equal %d", value, 0)
|
||||
}
|
||||
|
||||
// Success with negative values lesser than min int64
|
||||
s := strconv.FormatFloat(math.MinInt64, 'f', -1, 64)
|
||||
err = ioutil.WriteFile(tempFile, []byte(s), 0755)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
value, err = getCgroupParamUint(tempDir, cgroupFile)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
} else if value != 0 {
|
||||
t.Fatalf("Expected %d to equal %d", value, 0)
|
||||
}
|
||||
|
||||
// Not a float.
|
||||
err = ioutil.WriteFile(tempFile, []byte("not-a-float"), 0755)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
_, err = getCgroupParamUint(tempDir, cgroupFile)
|
||||
if err == nil {
|
||||
t.Fatal("Expecting error, got none")
|
||||
}
|
||||
|
||||
// Unknown file.
|
||||
err = os.Remove(tempFile)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
_, err = getCgroupParamUint(tempDir, cgroupFile)
|
||||
if err == nil {
|
||||
t.Fatal("Expecting error, got none")
|
||||
}
|
||||
}
|
||||
16
vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
generated
vendored
16
vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
generated
vendored
@@ -11,6 +11,7 @@ type ThrottlingData struct {
|
||||
ThrottledTime uint64 `json:"throttled_time,omitempty"`
|
||||
}
|
||||
|
||||
// CpuUsage denotes the usage of a CPU.
|
||||
// All CPU stats are aggregate since container inception.
|
||||
type CpuUsage struct {
|
||||
// Total CPU time consumed.
|
||||
@@ -46,14 +47,21 @@ type MemoryStats struct {
|
||||
Usage MemoryData `json:"usage,omitempty"`
|
||||
// usage of memory + swap
|
||||
SwapUsage MemoryData `json:"swap_usage,omitempty"`
|
||||
// usafe of kernel memory
|
||||
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
|
||||
Stats map[string]uint64 `json:"stats,omitempty"`
|
||||
// usage of kernel memory
|
||||
KernelUsage MemoryData `json:"kernel_usage,omitempty"`
|
||||
// usage of kernel TCP memory
|
||||
KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
|
||||
// if true, memory usage is accounted for throughout a hierarchy of cgroups.
|
||||
UseHierarchy bool `json:"use_hierarchy"`
|
||||
|
||||
Stats map[string]uint64 `json:"stats,omitempty"`
|
||||
}
|
||||
|
||||
type PidsStats struct {
|
||||
// number of pids in the cgroup
|
||||
Current uint64 `json:"current,omitempty"`
|
||||
// active pids hard limit
|
||||
Limit uint64 `json:"limit,omitempty"`
|
||||
}
|
||||
|
||||
type BlkioStatEntry struct {
|
||||
@@ -80,7 +88,7 @@ type HugetlbStats struct {
|
||||
Usage uint64 `json:"usage,omitempty"`
|
||||
// maximum usage ever recorded.
|
||||
MaxUsage uint64 `json:"max_usage,omitempty"`
|
||||
// number of times htgetlb usage allocation failure.
|
||||
// number of times hugetlb usage allocation failure.
|
||||
Failcnt uint64 `json:"failcnt"`
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// +build !linux
|
||||
// +build !linux static_build
|
||||
|
||||
package systemd
|
||||
|
||||
@@ -43,7 +43,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
return nil, fmt.Errorf("Systemd not supported")
|
||||
return fmt.Errorf("Systemd not supported")
|
||||
}
|
||||
|
||||
func (m *Manager) Freeze(state configs.FreezerState) error {
|
||||
|
||||
347
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
generated
vendored
347
vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
generated
vendored
@@ -1,14 +1,12 @@
|
||||
// +build linux
|
||||
// +build linux,!static_build
|
||||
|
||||
package systemd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -67,13 +65,16 @@ var subsystems = subsystemSet{
|
||||
|
||||
const (
|
||||
testScopeWait = 4
|
||||
testSliceWait = 4
|
||||
)
|
||||
|
||||
var (
|
||||
connLock sync.Mutex
|
||||
theConn *systemdDbus.Conn
|
||||
hasStartTransientUnit bool
|
||||
hasStartTransientSliceUnit bool
|
||||
hasTransientDefaultDependencies bool
|
||||
hasDelegate bool
|
||||
)
|
||||
|
||||
func newProp(name string, units interface{}) systemdDbus.Property {
|
||||
@@ -146,20 +147,52 @@ func UseSystemd() bool {
|
||||
|
||||
// Not critical because of the stop unit logic above.
|
||||
theConn.StopUnit(scope, "replace", nil)
|
||||
|
||||
// Assume StartTransientUnit on a scope allows Delegate
|
||||
hasDelegate = true
|
||||
dl := newProp("Delegate", true)
|
||||
if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{dl}, nil); err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
|
||||
hasDelegate = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assume we have the ability to start a transient unit as a slice
|
||||
// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
|
||||
// For details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1370299
|
||||
hasStartTransientSliceUnit = true
|
||||
|
||||
// To ensure simple clean-up, we create a slice off the root with no hierarchy
|
||||
slice := fmt.Sprintf("libcontainer_%d_systemd_test_default.slice", os.Getpid())
|
||||
if _, err := theConn.StartTransientUnit(slice, "replace", nil, nil); err != nil {
|
||||
if _, ok := err.(dbus.Error); ok {
|
||||
hasStartTransientSliceUnit = false
|
||||
}
|
||||
}
|
||||
|
||||
for i := 0; i <= testSliceWait; i++ {
|
||||
if _, err := theConn.StopUnit(slice, "replace", nil); err != nil {
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") {
|
||||
hasStartTransientSliceUnit = false
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
time.Sleep(time.Millisecond)
|
||||
}
|
||||
|
||||
// Not critical because of the stop unit logic above.
|
||||
theConn.StopUnit(scope, "replace", nil)
|
||||
theConn.StopUnit(slice, "replace", nil)
|
||||
}
|
||||
return hasStartTransientUnit
|
||||
}
|
||||
|
||||
func getIfaceForUnit(unitName string) string {
|
||||
if strings.HasSuffix(unitName, ".scope") {
|
||||
return "Scope"
|
||||
}
|
||||
if strings.HasSuffix(unitName, ".service") {
|
||||
return "Service"
|
||||
}
|
||||
return "Unit"
|
||||
}
|
||||
|
||||
func (m *Manager) Apply(pid int) error {
|
||||
var (
|
||||
c = m.Cgroups
|
||||
@@ -189,11 +222,29 @@ func (m *Manager) Apply(pid int) error {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
properties = append(properties,
|
||||
systemdDbus.PropSlice(slice),
|
||||
systemdDbus.PropDescription("docker container "+c.Name),
|
||||
newProp("PIDs", []uint32{uint32(pid)}),
|
||||
)
|
||||
properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
|
||||
|
||||
// if we create a slice, the parent is defined via a Wants=
|
||||
if strings.HasSuffix(unitName, ".slice") {
|
||||
// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
|
||||
if !hasStartTransientSliceUnit {
|
||||
return fmt.Errorf("systemd version does not support ability to start a slice as transient unit")
|
||||
}
|
||||
properties = append(properties, systemdDbus.PropWants(slice))
|
||||
} else {
|
||||
// otherwise, we use Slice=
|
||||
properties = append(properties, systemdDbus.PropSlice(slice))
|
||||
}
|
||||
|
||||
// only add pid if its valid, -1 is used w/ general slice creation.
|
||||
if pid != -1 {
|
||||
properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
|
||||
}
|
||||
|
||||
if hasDelegate {
|
||||
// This is only supported on systemd versions 218 and above.
|
||||
properties = append(properties, newProp("Delegate", true))
|
||||
}
|
||||
|
||||
// Always enable accounting, this gets us the same behaviour as the fs implementation,
|
||||
// plus the kernel has some problems with joining the memory cgroup at a later time.
|
||||
@@ -214,7 +265,21 @@ func (m *Manager) Apply(pid int) error {
|
||||
|
||||
if c.Resources.CpuShares != 0 {
|
||||
properties = append(properties,
|
||||
newProp("CPUShares", uint64(c.Resources.CpuShares)))
|
||||
newProp("CPUShares", c.Resources.CpuShares))
|
||||
}
|
||||
|
||||
// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
|
||||
if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
|
||||
cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
|
||||
// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
|
||||
// (integer percentage of CPU) internally. This means that if a fractional percent of
|
||||
// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
|
||||
// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
|
||||
if cpuQuotaPerSecUSec%10000 != 0 {
|
||||
cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
|
||||
}
|
||||
properties = append(properties,
|
||||
newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
|
||||
}
|
||||
|
||||
if c.Resources.BlkioWeight != 0 {
|
||||
@@ -222,67 +287,22 @@ func (m *Manager) Apply(pid int) error {
|
||||
newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
|
||||
}
|
||||
|
||||
// We need to set kernel memory before processes join cgroup because
|
||||
// kmem.limit_in_bytes can only be set when the cgroup is empty.
|
||||
// And swap memory limit needs to be set after memory limit, only
|
||||
// memory limit is handled by systemd, so it's kind of ugly here.
|
||||
if c.Resources.KernelMemory > 0 {
|
||||
// We have to set kernel memory here, as we can't change it once
|
||||
// processes have been attached to the cgroup.
|
||||
if c.Resources.KernelMemory != 0 {
|
||||
if err := setKernelMemory(c); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, nil); err != nil {
|
||||
statusChan := make(chan string)
|
||||
if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err != nil && !isUnitExists(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinDevices(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
<-statusChan
|
||||
|
||||
// TODO: CpuQuota and CpuPeriod not available in systemd
|
||||
// we need to manually join the cpu.cfs_quota_us and cpu.cfs_period_us
|
||||
if err := joinCpu(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: MemoryReservation and MemorySwap not available in systemd
|
||||
if err := joinMemory(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// we need to manually join the freezer, net_cls, net_prio, pids and cpuset cgroup in systemd
|
||||
// because it does not currently support it via the dbus api.
|
||||
if err := joinFreezer(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinNetPrio(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := joinNetCls(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinPids(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinCpuset(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinHugetlb(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := joinPerfEvent(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
// FIXME: Systemd does have `BlockIODeviceWeight` property, but we got problem
|
||||
// using that (at least on systemd 208, see https://github.com/opencontainers/runc/libcontainer/pull/354),
|
||||
// so use fs work around for now.
|
||||
if err := joinBlkio(c, pid); err != nil {
|
||||
if err := joinCgroups(c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -323,15 +343,6 @@ func (m *Manager) GetPaths() map[string]string {
|
||||
return paths
|
||||
}
|
||||
|
||||
func writeFile(dir, file, data string) error {
|
||||
// Normally dir should not be empty, one case is that cgroup subsystem
|
||||
// is not mounted, we will get empty dir, and we want it fail here.
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s.", file)
|
||||
}
|
||||
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
|
||||
}
|
||||
|
||||
func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
|
||||
path, err := getSubsystemPath(c, subsystem)
|
||||
if err != nil {
|
||||
@@ -340,57 +351,52 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
if err := writeFile(path, "cgroup.procs", strconv.Itoa(pid)); err != nil {
|
||||
if err := cgroups.WriteCgroupProc(path, pid); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func joinCpu(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "cpu", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
func joinCgroups(c *configs.Cgroup, pid int) error {
|
||||
for _, sys := range subsystems {
|
||||
name := sys.Name()
|
||||
switch name {
|
||||
case "name=systemd":
|
||||
// let systemd handle this
|
||||
case "cpuset":
|
||||
path, err := getSubsystemPath(c, name)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
s := &fs.CpusetGroup{}
|
||||
if err := s.ApplyDir(path, c, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
_, err := join(c, name, pid)
|
||||
if err != nil {
|
||||
// Even if it's `not found` error, we'll return err
|
||||
// because devices cgroup is hard requirement for
|
||||
// container security.
|
||||
if name == "devices" {
|
||||
return err
|
||||
}
|
||||
// For other subsystems, omit the `not found` error
|
||||
// because they are optional.
|
||||
if !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinFreezer(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "freezer", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinNetPrio(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "net_prio", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinNetCls(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "net_cls", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinPids(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "pids", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// systemd represents slice heirarchy using `-`, so we need to follow suit when
|
||||
// systemd represents slice hierarchy using `-`, so we need to follow suit when
|
||||
// generating the path of slice. Essentially, test-a-b.slice becomes
|
||||
// test.slice/test-a.slice/test-a-b.slice.
|
||||
func expandSlice(slice string) (string, error) {
|
||||
// /test.slice/test-a.slice/test-a-b.slice.
|
||||
func ExpandSlice(slice string) (string, error) {
|
||||
suffix := ".slice"
|
||||
// Name has to end with ".slice", but can't be just ".slice".
|
||||
if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
|
||||
@@ -404,6 +410,10 @@ func expandSlice(slice string) (string, error) {
|
||||
|
||||
var path, prefix string
|
||||
sliceName := strings.TrimSuffix(slice, suffix)
|
||||
// if input was -.slice, we should just return root now
|
||||
if sliceName == "-" {
|
||||
return "/", nil
|
||||
}
|
||||
for _, component := range strings.Split(sliceName, "-") {
|
||||
// test--a.slice isn't permitted, nor is -test.slice.
|
||||
if component == "" {
|
||||
@@ -411,10 +421,9 @@ func expandSlice(slice string) (string, error) {
|
||||
}
|
||||
|
||||
// Append the component to the path and to the prefix.
|
||||
path += prefix + component + suffix + "/"
|
||||
path += "/" + prefix + component + suffix
|
||||
prefix += component + "-"
|
||||
}
|
||||
|
||||
return path, nil
|
||||
}
|
||||
|
||||
@@ -424,17 +433,19 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
|
||||
return "", err
|
||||
}
|
||||
|
||||
initPath, err := cgroups.GetInitCgroupDir(subsystem)
|
||||
initPath, err := cgroups.GetInitCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
// if pid 1 is systemd 226 or later, it will be in init.scope, not the root
|
||||
initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope")
|
||||
|
||||
slice := "system.slice"
|
||||
if c.Parent != "" {
|
||||
slice = c.Parent
|
||||
}
|
||||
|
||||
slice, err = expandSlice(slice)
|
||||
slice, err = ExpandSlice(slice)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@@ -495,6 +506,11 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
|
||||
}
|
||||
|
||||
func (m *Manager) Set(container *configs.Config) error {
|
||||
// If Paths are set, then we are just joining cgroups paths
|
||||
// and there is no need to set any values.
|
||||
if m.Cgroups.Paths != nil {
|
||||
return nil
|
||||
}
|
||||
for _, sys := range subsystems {
|
||||
// Get the subsystem path, but don't error out for not found cgroups.
|
||||
path, err := getSubsystemPath(container.Cgroups, sys.Name())
|
||||
@@ -516,28 +532,11 @@ func (m *Manager) Set(container *configs.Config) error {
|
||||
}
|
||||
|
||||
func getUnitName(c *configs.Cgroup) string {
|
||||
return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
|
||||
}
|
||||
|
||||
// Atm we can't use the systemd device support because of two missing things:
|
||||
// * Support for wildcards to allow mknod on any device
|
||||
// * Support for wildcards to allow /dev/pts support
|
||||
//
|
||||
// The second is available in more recent systemd as "char-pts", but not in e.g. v208 which is
|
||||
// in wide use. When both these are available we will be able to switch, but need to keep the old
|
||||
// implementation for backwards compat.
|
||||
//
|
||||
// Note: we can't use systemd to set up the initial limits, and then change the cgroup
|
||||
// because systemd will re-write the device settings if it needs to re-apply the cgroup context.
|
||||
// This happens at least for v208 when any sibling unit is started.
|
||||
func joinDevices(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "devices", pid)
|
||||
// Even if it's `not found` error, we'll return err because devices cgroup
|
||||
// is hard requirement for container security.
|
||||
if err != nil {
|
||||
return err
|
||||
// by default, we create a scope unless the user explicitly asks for a slice.
|
||||
if !strings.HasSuffix(c.Name, ".slice") {
|
||||
return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
|
||||
}
|
||||
return nil
|
||||
return c.Name
|
||||
}
|
||||
|
||||
func setKernelMemory(c *configs.Cgroup) error {
|
||||
@@ -549,57 +548,15 @@ func setKernelMemory(c *configs.Cgroup) error {
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// This doesn't get called by manager.Set, so we need to do it here.
|
||||
s := &fs.MemoryGroup{}
|
||||
return s.SetKernelMemory(path, c)
|
||||
return fs.EnableKernelMemoryAccounting(path)
|
||||
}
|
||||
|
||||
func joinMemory(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "memory", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// systemd does not atm set up the cpuset controller, so we must manually
|
||||
// join it. Additionally that is a very finicky controller where each
|
||||
// level must have a full setup as the default for a new directory is "no cpus"
|
||||
func joinCpuset(c *configs.Cgroup, pid int) error {
|
||||
path, err := getSubsystemPath(c, "cpuset")
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
s := &fs.CpusetGroup{}
|
||||
|
||||
return s.ApplyDir(path, c, pid)
|
||||
}
|
||||
|
||||
// `BlockIODeviceWeight` property of systemd does not work properly, and systemd
|
||||
// expects device path instead of major minor numbers, which is also confusing
|
||||
// for users. So we use fs work around for now.
|
||||
func joinBlkio(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "blkio", pid)
|
||||
// isUnitExists returns true if the error is that a systemd unit already exists.
|
||||
func isUnitExists(err error) bool {
|
||||
if err != nil {
|
||||
return err
|
||||
if dbusError, ok := err.(dbus.Error); ok {
|
||||
return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinHugetlb(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "hugetlb", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func joinPerfEvent(c *configs.Cgroup, pid int) error {
|
||||
_, err := join(c, "perf_event", pid)
|
||||
if err != nil && !cgroups.IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
return false
|
||||
}
|
||||
|
||||
194
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
194
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
generated
vendored
@@ -16,37 +16,24 @@ import (
|
||||
"github.com/docker/go-units"
|
||||
)
|
||||
|
||||
const cgroupNamePrefix = "name="
|
||||
const (
|
||||
cgroupNamePrefix = "name="
|
||||
CgroupProcesses = "cgroup.procs"
|
||||
)
|
||||
|
||||
// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
|
||||
// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
|
||||
func FindCgroupMountpoint(subsystem string) (string, error) {
|
||||
// We are not using mount.GetMounts() because it's super-inefficient,
|
||||
// parsing it directly sped up x10 times because of not using Sscanf.
|
||||
// It was one of two major performance drawbacks in container start.
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
txt := scanner.Text()
|
||||
fields := strings.Split(txt, " ")
|
||||
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
||||
if opt == subsystem {
|
||||
return fields[4], nil
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return "", NewNotFoundError(subsystem)
|
||||
mnt, _, err := FindCgroupMountpointAndRoot(subsystem)
|
||||
return mnt, err
|
||||
}
|
||||
|
||||
func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
|
||||
// We are not using mount.GetMounts() because it's super-inefficient,
|
||||
// parsing it directly sped up x10 times because of not using Sscanf.
|
||||
// It was one of two major performance drawbacks in container start.
|
||||
if !isSubsystemAvailable(subsystem) {
|
||||
return "", "", NewNotFoundError(subsystem)
|
||||
}
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
@@ -70,6 +57,30 @@ func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
|
||||
return "", "", NewNotFoundError(subsystem)
|
||||
}
|
||||
|
||||
func isSubsystemAvailable(subsystem string) bool {
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
_, avail := cgroups[subsystem]
|
||||
return avail
|
||||
}
|
||||
|
||||
func GetClosestMountpointAncestor(dir, mountinfo string) string {
|
||||
deepestMountPoint := ""
|
||||
for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
|
||||
mountInfoParts := strings.Fields(mountInfoEntry)
|
||||
if len(mountInfoParts) < 5 {
|
||||
continue
|
||||
}
|
||||
mountPoint := mountInfoParts[4]
|
||||
if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
|
||||
deepestMountPoint = mountPoint
|
||||
}
|
||||
}
|
||||
return deepestMountPoint
|
||||
}
|
||||
|
||||
func FindCgroupMountpointDir() (string, error) {
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
@@ -113,7 +124,7 @@ type Mount struct {
|
||||
Subsystems []string
|
||||
}
|
||||
|
||||
func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
|
||||
func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
|
||||
if len(m.Subsystems) == 0 {
|
||||
return "", fmt.Errorf("no subsystem for mount")
|
||||
}
|
||||
@@ -121,16 +132,17 @@ func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
|
||||
return getControllerPath(m.Subsystems[0], cgroups)
|
||||
}
|
||||
|
||||
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
|
||||
func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
|
||||
res := make([]Mount, 0, len(ss))
|
||||
scanner := bufio.NewScanner(mi)
|
||||
for scanner.Scan() {
|
||||
numFound := 0
|
||||
for scanner.Scan() && numFound < len(ss) {
|
||||
txt := scanner.Text()
|
||||
sepIdx := strings.IndexByte(txt, '-')
|
||||
sepIdx := strings.Index(txt, " - ")
|
||||
if sepIdx == -1 {
|
||||
return nil, fmt.Errorf("invalid mountinfo format")
|
||||
}
|
||||
if txt[sepIdx+2:sepIdx+8] != "cgroup" {
|
||||
if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
|
||||
continue
|
||||
}
|
||||
fields := strings.Split(txt, " ")
|
||||
@@ -139,12 +151,17 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
|
||||
Root: fields[3],
|
||||
}
|
||||
for _, opt := range strings.Split(fields[len(fields)-1], ",") {
|
||||
if !ss[opt] {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(opt, cgroupNamePrefix) {
|
||||
m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
|
||||
}
|
||||
if ss[opt] {
|
||||
} else {
|
||||
m.Subsystems = append(m.Subsystems, opt)
|
||||
}
|
||||
if !all {
|
||||
numFound++
|
||||
}
|
||||
}
|
||||
res = append(res, m)
|
||||
}
|
||||
@@ -154,26 +171,28 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
|
||||
return res, nil
|
||||
}
|
||||
|
||||
func GetCgroupMounts() ([]Mount, error) {
|
||||
// GetCgroupMounts returns the mounts for the cgroup subsystems.
|
||||
// all indicates whether to return just the first instance or all the mounts.
|
||||
func GetCgroupMounts(all bool) ([]Mount, error) {
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
all, err := GetAllSubsystems()
|
||||
allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
allMap := make(map[string]bool)
|
||||
for _, s := range all {
|
||||
for s := range allSubsystems {
|
||||
allMap[s] = true
|
||||
}
|
||||
return getCgroupMountsHelper(allMap, f)
|
||||
return getCgroupMountsHelper(allMap, f, all)
|
||||
}
|
||||
|
||||
// Returns all the cgroup subsystems supported by the kernel
|
||||
// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
|
||||
func GetAllSubsystems() ([]string, error) {
|
||||
f, err := os.Open("/proc/cgroups")
|
||||
if err != nil {
|
||||
@@ -185,9 +204,6 @@ func GetAllSubsystems() ([]string, error) {
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
text := s.Text()
|
||||
if text[0] != '#' {
|
||||
parts := strings.Fields(text)
|
||||
@@ -196,11 +212,14 @@ func GetAllSubsystems() ([]string, error) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return subsystems, nil
|
||||
}
|
||||
|
||||
// Returns the relative path to the cgroup docker is running in.
|
||||
func GetThisCgroupDir(subsystem string) (string, error) {
|
||||
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
|
||||
func GetOwnCgroup(subsystem string) (string, error) {
|
||||
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
@@ -209,8 +228,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
|
||||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetInitCgroupDir(subsystem string) (string, error) {
|
||||
func GetOwnCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetOwnCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func GetInitCgroup(subsystem string) (string, error) {
|
||||
cgroups, err := ParseCgroupFile("/proc/1/cgroup")
|
||||
if err != nil {
|
||||
return "", err
|
||||
@@ -219,8 +246,33 @@ func GetInitCgroupDir(subsystem string) (string, error) {
|
||||
return getControllerPath(subsystem, cgroups)
|
||||
}
|
||||
|
||||
func GetInitCgroupPath(subsystem string) (string, error) {
|
||||
cgroup, err := GetInitCgroup(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return getCgroupPathHelper(subsystem, cgroup)
|
||||
}
|
||||
|
||||
func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
|
||||
mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// This is needed for nested containers, because in /proc/self/cgroup we
|
||||
// see pathes from host, which don't exist in container.
|
||||
relCgroup, err := filepath.Rel(root, cgroup)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(mnt, relCgroup), nil
|
||||
}
|
||||
|
||||
func readProcsFile(dir string) ([]int, error) {
|
||||
f, err := os.Open(filepath.Join(dir, "cgroup.procs"))
|
||||
f, err := os.Open(filepath.Join(dir, CgroupProcesses))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -243,6 +295,8 @@ func readProcsFile(dir string) ([]int, error) {
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// ParseCgroupFile parses the given cgroup file, typically from
|
||||
// /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
|
||||
func ParseCgroupFile(path string) (map[string]string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
@@ -250,21 +304,35 @@ func ParseCgroupFile(path string) (map[string]string, error) {
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
return parseCgroupFromReader(f)
|
||||
}
|
||||
|
||||
// helper function for ParseCgroupFile to make testing easier
|
||||
func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
|
||||
s := bufio.NewScanner(r)
|
||||
cgroups := make(map[string]string)
|
||||
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
text := s.Text()
|
||||
parts := strings.Split(text, ":")
|
||||
// from cgroups(7):
|
||||
// /proc/[pid]/cgroup
|
||||
// ...
|
||||
// For each cgroup hierarchy ... there is one entry
|
||||
// containing three colon-separated fields of the form:
|
||||
// hierarchy-ID:subsystem-list:cgroup-path
|
||||
parts := strings.SplitN(text, ":", 3)
|
||||
if len(parts) < 3 {
|
||||
return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
|
||||
}
|
||||
|
||||
for _, subs := range strings.Split(parts[1], ",") {
|
||||
cgroups[subs] = parts[2]
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return cgroups, nil
|
||||
}
|
||||
|
||||
@@ -291,8 +359,7 @@ func PathExists(path string) bool {
|
||||
func EnterPid(cgroupPaths map[string]string, pid int) error {
|
||||
for _, path := range cgroupPaths {
|
||||
if PathExists(path) {
|
||||
if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"),
|
||||
[]byte(strconv.Itoa(pid)), 0700); err != nil {
|
||||
if err := WriteCgroupProc(path, pid); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -326,7 +393,7 @@ func RemovePaths(paths map[string]string) (err error) {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("Failed to remove paths: %s", paths)
|
||||
return fmt.Errorf("Failed to remove paths: %v", paths)
|
||||
}
|
||||
|
||||
func GetHugePageSize() ([]string, error) {
|
||||
@@ -361,7 +428,7 @@ func GetAllPids(path string) ([]int, error) {
|
||||
// collect pids from all sub-cgroups
|
||||
err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
|
||||
dir, file := filepath.Split(p)
|
||||
if file != "cgroup.procs" {
|
||||
if file != CgroupProcesses {
|
||||
return nil
|
||||
}
|
||||
if iErr != nil {
|
||||
@@ -376,3 +443,20 @@ func GetAllPids(path string) ([]int, error) {
|
||||
})
|
||||
return pids, err
|
||||
}
|
||||
|
||||
// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
|
||||
func WriteCgroupProc(dir string, pid int) error {
|
||||
// Normally dir should not be empty, one case is that cgroup subsystem
|
||||
// is not mounted, we will get empty dir, and we want it fail here.
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", CgroupProcesses)
|
||||
}
|
||||
|
||||
// Dont attach any pid to the cgroup if -1 is specified as a pid
|
||||
if pid != -1 {
|
||||
if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
|
||||
return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
138
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils_test.go
generated
vendored
138
vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils_test.go
generated
vendored
@@ -1,138 +0,0 @@
|
||||
package cgroups
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
const fedoraMountinfo = `15 35 0:3 / /proc rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw
|
||||
16 35 0:14 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs rw,seclabel
|
||||
17 35 0:5 / /dev rw,nosuid shared:2 - devtmpfs devtmpfs rw,seclabel,size=8056484k,nr_inodes=2014121,mode=755
|
||||
18 16 0:15 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 - securityfs securityfs rw
|
||||
19 16 0:13 / /sys/fs/selinux rw,relatime shared:8 - selinuxfs selinuxfs rw
|
||||
20 17 0:16 / /dev/shm rw,nosuid,nodev shared:3 - tmpfs tmpfs rw,seclabel
|
||||
21 17 0:10 / /dev/pts rw,nosuid,noexec,relatime shared:4 - devpts devpts rw,seclabel,gid=5,mode=620,ptmxmode=000
|
||||
22 35 0:17 / /run rw,nosuid,nodev shared:21 - tmpfs tmpfs rw,seclabel,mode=755
|
||||
23 16 0:18 / /sys/fs/cgroup rw,nosuid,nodev,noexec shared:9 - tmpfs tmpfs rw,seclabel,mode=755
|
||||
24 23 0:19 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:10 - cgroup cgroup rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
|
||||
25 16 0:20 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:20 - pstore pstore rw
|
||||
26 23 0:21 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,cpuset,clone_children
|
||||
27 23 0:22 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:12 - cgroup cgroup rw,cpuacct,cpu,clone_children
|
||||
28 23 0:23 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:13 - cgroup cgroup rw,memory,clone_children
|
||||
29 23 0:24 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:14 - cgroup cgroup rw,devices,clone_children
|
||||
30 23 0:25 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:15 - cgroup cgroup rw,freezer,clone_children
|
||||
31 23 0:26 / /sys/fs/cgroup/net_cls rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,net_cls,clone_children
|
||||
32 23 0:27 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,blkio,clone_children
|
||||
33 23 0:28 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,perf_event,clone_children
|
||||
34 23 0:29 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:19 - cgroup cgroup rw,hugetlb,clone_children
|
||||
35 1 253:2 / / rw,relatime shared:1 - ext4 /dev/mapper/ssd-root--f20 rw,seclabel,data=ordered
|
||||
36 15 0:30 / /proc/sys/fs/binfmt_misc rw,relatime shared:22 - autofs systemd-1 rw,fd=38,pgrp=1,timeout=300,minproto=5,maxproto=5,direct
|
||||
37 17 0:12 / /dev/mqueue rw,relatime shared:23 - mqueue mqueue rw,seclabel
|
||||
38 35 0:31 / /tmp rw shared:24 - tmpfs tmpfs rw,seclabel
|
||||
39 17 0:32 / /dev/hugepages rw,relatime shared:25 - hugetlbfs hugetlbfs rw,seclabel
|
||||
40 16 0:7 / /sys/kernel/debug rw,relatime shared:26 - debugfs debugfs rw
|
||||
41 16 0:33 / /sys/kernel/config rw,relatime shared:27 - configfs configfs rw
|
||||
42 35 0:34 / /var/lib/nfs/rpc_pipefs rw,relatime shared:28 - rpc_pipefs sunrpc rw
|
||||
43 15 0:35 / /proc/fs/nfsd rw,relatime shared:29 - nfsd sunrpc rw
|
||||
45 35 8:17 / /boot rw,relatime shared:30 - ext4 /dev/sdb1 rw,seclabel,data=ordered
|
||||
46 35 253:4 / /home rw,relatime shared:31 - ext4 /dev/mapper/ssd-home rw,seclabel,data=ordered
|
||||
47 35 253:5 / /var/lib/libvirt/images rw,noatime,nodiratime shared:32 - ext4 /dev/mapper/ssd-virt rw,seclabel,discard,data=ordered
|
||||
48 35 253:12 / /mnt/old rw,relatime shared:33 - ext4 /dev/mapper/HelpDeskRHEL6-FedoraRoot rw,seclabel,data=ordered
|
||||
121 22 0:36 / /run/user/1000/gvfs rw,nosuid,nodev,relatime shared:104 - fuse.gvfsd-fuse gvfsd-fuse rw,user_id=1000,group_id=1000
|
||||
124 16 0:37 / /sys/fs/fuse/connections rw,relatime shared:107 - fusectl fusectl rw
|
||||
165 38 253:3 / /tmp/mnt rw,relatime shared:147 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered
|
||||
167 35 253:15 / /var/lib/docker/devicemapper/mnt/aae4076022f0e2b80a2afbf8fc6df450c52080191fcef7fb679a73e6f073e5c2 rw,relatime shared:149 - ext4 /dev/mapper/docker-253:2-425882-aae4076022f0e2b80a2afbf8fc6df450c52080191fcef7fb679a73e6f073e5c2 rw,seclabel,discard,stripe=16,data=ordered
|
||||
171 35 253:16 / /var/lib/docker/devicemapper/mnt/c71be651f114db95180e472f7871b74fa597ee70a58ccc35cb87139ddea15373 rw,relatime shared:153 - ext4 /dev/mapper/docker-253:2-425882-c71be651f114db95180e472f7871b74fa597ee70a58ccc35cb87139ddea15373 rw,seclabel,discard,stripe=16,data=ordered
|
||||
175 35 253:17 / /var/lib/docker/devicemapper/mnt/1bac6ab72862d2d5626560df6197cf12036b82e258c53d981fa29adce6f06c3c rw,relatime shared:157 - ext4 /dev/mapper/docker-253:2-425882-1bac6ab72862d2d5626560df6197cf12036b82e258c53d981fa29adce6f06c3c rw,seclabel,discard,stripe=16,data=ordered
|
||||
179 35 253:18 / /var/lib/docker/devicemapper/mnt/d710a357d77158e80d5b2c55710ae07c94e76d34d21ee7bae65ce5418f739b09 rw,relatime shared:161 - ext4 /dev/mapper/docker-253:2-425882-d710a357d77158e80d5b2c55710ae07c94e76d34d21ee7bae65ce5418f739b09 rw,seclabel,discard,stripe=16,data=ordered
|
||||
183 35 253:19 / /var/lib/docker/devicemapper/mnt/6479f52366114d5f518db6837254baab48fab39f2ac38d5099250e9a6ceae6c7 rw,relatime shared:165 - ext4 /dev/mapper/docker-253:2-425882-6479f52366114d5f518db6837254baab48fab39f2ac38d5099250e9a6ceae6c7 rw,seclabel,discard,stripe=16,data=ordered
|
||||
187 35 253:20 / /var/lib/docker/devicemapper/mnt/8d9df91c4cca5aef49eeb2725292aab324646f723a7feab56be34c2ad08268e1 rw,relatime shared:169 - ext4 /dev/mapper/docker-253:2-425882-8d9df91c4cca5aef49eeb2725292aab324646f723a7feab56be34c2ad08268e1 rw,seclabel,discard,stripe=16,data=ordered
|
||||
191 35 253:21 / /var/lib/docker/devicemapper/mnt/c8240b768603d32e920d365dc9d1dc2a6af46cd23e7ae819947f969e1b4ec661 rw,relatime shared:173 - ext4 /dev/mapper/docker-253:2-425882-c8240b768603d32e920d365dc9d1dc2a6af46cd23e7ae819947f969e1b4ec661 rw,seclabel,discard,stripe=16,data=ordered
|
||||
195 35 253:22 / /var/lib/docker/devicemapper/mnt/2eb3a01278380bbf3ed12d86ac629eaa70a4351301ee307a5cabe7b5f3b1615f rw,relatime shared:177 - ext4 /dev/mapper/docker-253:2-425882-2eb3a01278380bbf3ed12d86ac629eaa70a4351301ee307a5cabe7b5f3b1615f rw,seclabel,discard,stripe=16,data=ordered
|
||||
199 35 253:23 / /var/lib/docker/devicemapper/mnt/37a17fb7c9d9b80821235d5f2662879bd3483915f245f9b49cdaa0e38779b70b rw,relatime shared:181 - ext4 /dev/mapper/docker-253:2-425882-37a17fb7c9d9b80821235d5f2662879bd3483915f245f9b49cdaa0e38779b70b rw,seclabel,discard,stripe=16,data=ordered
|
||||
203 35 253:24 / /var/lib/docker/devicemapper/mnt/aea459ae930bf1de913e2f29428fd80ee678a1e962d4080019d9f9774331ee2b rw,relatime shared:185 - ext4 /dev/mapper/docker-253:2-425882-aea459ae930bf1de913e2f29428fd80ee678a1e962d4080019d9f9774331ee2b rw,seclabel,discard,stripe=16,data=ordered
|
||||
207 35 253:25 / /var/lib/docker/devicemapper/mnt/928ead0bc06c454bd9f269e8585aeae0a6bd697f46dc8754c2a91309bc810882 rw,relatime shared:189 - ext4 /dev/mapper/docker-253:2-425882-928ead0bc06c454bd9f269e8585aeae0a6bd697f46dc8754c2a91309bc810882 rw,seclabel,discard,stripe=16,data=ordered
|
||||
211 35 253:26 / /var/lib/docker/devicemapper/mnt/0f284d18481d671644706e7a7244cbcf63d590d634cc882cb8721821929d0420 rw,relatime shared:193 - ext4 /dev/mapper/docker-253:2-425882-0f284d18481d671644706e7a7244cbcf63d590d634cc882cb8721821929d0420 rw,seclabel,discard,stripe=16,data=ordered
|
||||
215 35 253:27 / /var/lib/docker/devicemapper/mnt/d9dd16722ab34c38db2733e23f69e8f4803ce59658250dd63e98adff95d04919 rw,relatime shared:197 - ext4 /dev/mapper/docker-253:2-425882-d9dd16722ab34c38db2733e23f69e8f4803ce59658250dd63e98adff95d04919 rw,seclabel,discard,stripe=16,data=ordered
|
||||
219 35 253:28 / /var/lib/docker/devicemapper/mnt/bc4500479f18c2c08c21ad5282e5f826a016a386177d9874c2764751c031d634 rw,relatime shared:201 - ext4 /dev/mapper/docker-253:2-425882-bc4500479f18c2c08c21ad5282e5f826a016a386177d9874c2764751c031d634 rw,seclabel,discard,stripe=16,data=ordered
|
||||
223 35 253:29 / /var/lib/docker/devicemapper/mnt/7770c8b24eb3d5cc159a065910076938910d307ab2f5d94e1dc3b24c06ee2c8a rw,relatime shared:205 - ext4 /dev/mapper/docker-253:2-425882-7770c8b24eb3d5cc159a065910076938910d307ab2f5d94e1dc3b24c06ee2c8a rw,seclabel,discard,stripe=16,data=ordered
|
||||
227 35 253:30 / /var/lib/docker/devicemapper/mnt/c280cd3d0bf0aa36b478b292279671624cceafc1a67eaa920fa1082601297adf rw,relatime shared:209 - ext4 /dev/mapper/docker-253:2-425882-c280cd3d0bf0aa36b478b292279671624cceafc1a67eaa920fa1082601297adf rw,seclabel,discard,stripe=16,data=ordered
|
||||
231 35 253:31 / /var/lib/docker/devicemapper/mnt/8b59a7d9340279f09fea67fd6ad89ddef711e9e7050eb647984f8b5ef006335f rw,relatime shared:213 - ext4 /dev/mapper/docker-253:2-425882-8b59a7d9340279f09fea67fd6ad89ddef711e9e7050eb647984f8b5ef006335f rw,seclabel,discard,stripe=16,data=ordered
|
||||
235 35 253:32 / /var/lib/docker/devicemapper/mnt/1a28059f29eda821578b1bb27a60cc71f76f846a551abefabce6efd0146dce9f rw,relatime shared:217 - ext4 /dev/mapper/docker-253:2-425882-1a28059f29eda821578b1bb27a60cc71f76f846a551abefabce6efd0146dce9f rw,seclabel,discard,stripe=16,data=ordered
|
||||
239 35 253:33 / /var/lib/docker/devicemapper/mnt/e9aa60c60128cad1 rw,relatime shared:221 - ext4 /dev/mapper/docker-253:2-425882-e9aa60c60128cad1 rw,seclabel,discard,stripe=16,data=ordered
|
||||
243 35 253:34 / /var/lib/docker/devicemapper/mnt/5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d-init rw,relatime shared:225 - ext4 /dev/mapper/docker-253:2-425882-5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d-init rw,seclabel,discard,stripe=16,data=ordered
|
||||
247 35 253:35 / /var/lib/docker/devicemapper/mnt/5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d rw,relatime shared:229 - ext4 /dev/mapper/docker-253:2-425882-5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d rw,seclabel,discard,stripe=16,data=ordered
|
||||
31 21 0:23 / /DATA/foo_bla_bla rw,relatime - cifs //foo/BLA\040BLA\040BLA/ rw,sec=ntlm,cache=loose,unc=\\foo\BLA BLA BLA,username=my_login,domain=mydomain.com,uid=12345678,forceuid,gid=12345678,forcegid,addr=10.1.30.10,file_mode=0755,dir_mode=0755,nounix,rsize=61440,wsize=65536,actimeo=1`
|
||||
|
||||
func TestGetCgroupMounts(t *testing.T) {
|
||||
subsystems := map[string]bool{
|
||||
"cpuset": true,
|
||||
"cpu": true,
|
||||
"cpuacct": true,
|
||||
"memory": true,
|
||||
"devices": true,
|
||||
"freezer": true,
|
||||
"net_cls": true,
|
||||
"blkio": true,
|
||||
"perf_event": true,
|
||||
"hugetlb": true,
|
||||
}
|
||||
mi := bytes.NewBufferString(fedoraMountinfo)
|
||||
cgMounts, err := getCgroupMountsHelper(subsystems, mi)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
cgMap := make(map[string]Mount)
|
||||
for _, m := range cgMounts {
|
||||
for _, ss := range m.Subsystems {
|
||||
cgMap[ss] = m
|
||||
}
|
||||
}
|
||||
for ss := range subsystems {
|
||||
m, ok := cgMap[ss]
|
||||
if !ok {
|
||||
t.Fatalf("%s not found", ss)
|
||||
}
|
||||
if m.Root != "/" {
|
||||
t.Fatalf("unexpected root for %s: %s", ss, m.Root)
|
||||
}
|
||||
if !strings.HasPrefix(m.Mountpoint, "/sys/fs/cgroup/") && !strings.Contains(m.Mountpoint, ss) {
|
||||
t.Fatalf("unexpected mountpoint for %s: %s", ss, m.Mountpoint)
|
||||
}
|
||||
var ssFound bool
|
||||
for _, mss := range m.Subsystems {
|
||||
if mss == ss {
|
||||
ssFound = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !ssFound {
|
||||
t.Fatalf("subsystem %s not found in Subsystems field %v", ss, m.Subsystems)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkGetCgroupMounts(b *testing.B) {
|
||||
subsystems := map[string]bool{
|
||||
"cpuset": true,
|
||||
"cpu": true,
|
||||
"cpuacct": true,
|
||||
"memory": true,
|
||||
"devices": true,
|
||||
"freezer": true,
|
||||
"net_cls": true,
|
||||
"blkio": true,
|
||||
"perf_event": true,
|
||||
"hugetlb": true,
|
||||
}
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
b.StopTimer()
|
||||
mi := bytes.NewBufferString(fedoraMountinfo)
|
||||
b.StartTimer()
|
||||
if _, err := getCgroupMountsHelper(subsystems, mi); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,3 @@
|
||||
// +build linux freebsd
|
||||
|
||||
package configs
|
||||
|
||||
type FreezerState string
|
||||
@@ -22,7 +20,7 @@ type Cgroup struct {
|
||||
// The path is assumed to be relative to the host system cgroup mountpoint.
|
||||
Path string `json:"path"`
|
||||
|
||||
// ScopePrefix decribes prefix for the scope name
|
||||
// ScopePrefix describes prefix for the scope name
|
||||
ScopePrefix string `json:"scope_prefix"`
|
||||
|
||||
// Paths represent the absolute cgroups paths to join.
|
||||
@@ -36,7 +34,7 @@ type Cgroup struct {
|
||||
type Resources struct {
|
||||
// If this is true allow access to any kind of device within the container. If false, allow access only to devices explicitly listed in the allowed_devices list.
|
||||
// Deprecated
|
||||
AllowAllDevices bool `json:"allow_all_devices,omitempty"`
|
||||
AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
|
||||
// Deprecated
|
||||
AllowedDevices []*Device `json:"allowed_devices,omitempty"`
|
||||
// Deprecated
|
||||
@@ -56,20 +54,23 @@ type Resources struct {
|
||||
// Kernel memory limit (in bytes)
|
||||
KernelMemory int64 `json:"kernel_memory"`
|
||||
|
||||
// Kernel memory limit for TCP use (in bytes)
|
||||
KernelMemoryTCP int64 `json:"kernel_memory_tcp"`
|
||||
|
||||
// CPU shares (relative weight vs. other containers)
|
||||
CpuShares int64 `json:"cpu_shares"`
|
||||
CpuShares uint64 `json:"cpu_shares"`
|
||||
|
||||
// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
|
||||
CpuQuota int64 `json:"cpu_quota"`
|
||||
|
||||
// CPU period to be used for hardcapping (in usecs). 0 to use system default.
|
||||
CpuPeriod int64 `json:"cpu_period"`
|
||||
CpuPeriod uint64 `json:"cpu_period"`
|
||||
|
||||
// How many time CPU will use in realtime scheduling (in usecs).
|
||||
CpuRtRuntime int64 `json:"cpu_quota"`
|
||||
CpuRtRuntime int64 `json:"cpu_rt_quota"`
|
||||
|
||||
// CPU period to be used for realtime scheduling (in usecs).
|
||||
CpuRtPeriod int64 `json:"cpu_period"`
|
||||
CpuRtPeriod uint64 `json:"cpu_rt_period"`
|
||||
|
||||
// CPU to use
|
||||
CpusetCpus string `json:"cpuset_cpus"`
|
||||
@@ -92,7 +93,7 @@ type Resources struct {
|
||||
// IO read rate limit per cgroup per device, bytes per second.
|
||||
BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`
|
||||
|
||||
// IO write rate limit per cgroup per divice, bytes per second.
|
||||
// IO write rate limit per cgroup per device, bytes per second.
|
||||
BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`
|
||||
|
||||
// IO read rate limit per cgroup per device, IO per second.
|
||||
@@ -111,11 +112,11 @@ type Resources struct {
|
||||
OomKillDisable bool `json:"oom_kill_disable"`
|
||||
|
||||
// Tuning swappiness behaviour per cgroup
|
||||
MemorySwappiness int64 `json:"memory_swappiness"`
|
||||
MemorySwappiness *uint64 `json:"memory_swappiness"`
|
||||
|
||||
// Set priority of network traffic for container
|
||||
NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`
|
||||
|
||||
// Set class identifier for container's network packets
|
||||
NetClsClassid string `json:"net_cls_classid"`
|
||||
NetClsClassid uint32 `json:"net_cls_classid_u"`
|
||||
}
|
||||
6
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
generated
vendored
6
vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
generated
vendored
@@ -1,6 +0,0 @@
|
||||
// +build !windows,!linux,!freebsd
|
||||
|
||||
package configs
|
||||
|
||||
type Cgroup struct {
|
||||
}
|
||||
170
vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
generated
vendored
170
vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
generated
vendored
@@ -3,7 +3,13 @@ package configs
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type Rlimit struct {
|
||||
@@ -29,7 +35,7 @@ type Seccomp struct {
|
||||
Syscalls []*Syscall `json:"syscalls"`
|
||||
}
|
||||
|
||||
// An action to be taken upon rule match in Seccomp
|
||||
// Action is taken upon rule match in Seccomp
|
||||
type Action int
|
||||
|
||||
const (
|
||||
@@ -40,7 +46,7 @@ const (
|
||||
Trace
|
||||
)
|
||||
|
||||
// A comparison operator to be used when matching syscall arguments in Seccomp
|
||||
// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
|
||||
type Operator int
|
||||
|
||||
const (
|
||||
@@ -53,7 +59,7 @@ const (
|
||||
MaskEqualTo
|
||||
)
|
||||
|
||||
// A rule to match a specific syscall argument in Seccomp
|
||||
// Arg is a rule to match a specific syscall argument in Seccomp
|
||||
type Arg struct {
|
||||
Index uint `json:"index"`
|
||||
Value uint64 `json:"value"`
|
||||
@@ -61,7 +67,7 @@ type Arg struct {
|
||||
Op Operator `json:"op"`
|
||||
}
|
||||
|
||||
// An rule to match a syscall in Seccomp
|
||||
// Syscall is a rule to match a syscall in Seccomp
|
||||
type Syscall struct {
|
||||
Name string `json:"name"`
|
||||
Action Action `json:"action"`
|
||||
@@ -81,11 +87,6 @@ type Config struct {
|
||||
// that the parent process dies.
|
||||
ParentDeathSignal int `json:"parent_death_signal"`
|
||||
|
||||
// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
|
||||
// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
|
||||
// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
|
||||
PivotDir string `json:"pivot_dir"`
|
||||
|
||||
// Path to a directory containing the container's root filesystem.
|
||||
Rootfs string `json:"rootfs"`
|
||||
|
||||
@@ -113,8 +114,8 @@ type Config struct {
|
||||
Namespaces Namespaces `json:"namespaces"`
|
||||
|
||||
// Capabilities specify the capabilities to keep when executing the process inside the container
|
||||
// All capbilities not specified will be dropped from the processes capability mask
|
||||
Capabilities []string `json:"capabilities"`
|
||||
// All capabilities not specified will be dropped from the processes capability mask
|
||||
Capabilities *Capabilities `json:"capabilities"`
|
||||
|
||||
// Networks specifies the container's network setup to be created
|
||||
Networks []*Network `json:"networks"`
|
||||
@@ -128,15 +129,15 @@ type Config struct {
|
||||
|
||||
// AppArmorProfile specifies the profile to apply to the process running in the container and is
|
||||
// change at the time the process is execed
|
||||
AppArmorProfile string `json:"apparmor_profile"`
|
||||
AppArmorProfile string `json:"apparmor_profile,omitempty"`
|
||||
|
||||
// ProcessLabel specifies the label to apply to the process running in the container. It is
|
||||
// commonly used by selinux
|
||||
ProcessLabel string `json:"process_label"`
|
||||
ProcessLabel string `json:"process_label,omitempty"`
|
||||
|
||||
// Rlimits specifies the resource limits, such as max open files, to set in the container
|
||||
// If Rlimits are not set, the container will inherit rlimits from the parent process
|
||||
Rlimits []Rlimit `json:"rlimits"`
|
||||
Rlimits []Rlimit `json:"rlimits,omitempty"`
|
||||
|
||||
// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
|
||||
// for a process. Valid values are between the range [-1000, '1000'], where processes with
|
||||
@@ -144,10 +145,6 @@ type Config struct {
|
||||
// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
|
||||
OomScoreAdj int `json:"oom_score_adj"`
|
||||
|
||||
// AdditionalGroups specifies the gids that should be added to supplementary groups
|
||||
// in addition to those that the user belongs to.
|
||||
AdditionalGroups []string `json:"additional_groups"`
|
||||
|
||||
// UidMappings is an array of User ID mappings for User Namespaces
|
||||
UidMappings []IDMap `json:"uid_mappings"`
|
||||
|
||||
@@ -171,12 +168,29 @@ type Config struct {
|
||||
// A default action to be taken if no rules match is also given.
|
||||
Seccomp *Seccomp `json:"seccomp"`
|
||||
|
||||
// NoNewPrivileges controls whether processes in the container can gain additional privileges.
|
||||
NoNewPrivileges bool `json:"no_new_privileges,omitempty"`
|
||||
|
||||
// Hooks are a collection of actions to perform at various container lifecycle events.
|
||||
// Hooks are not able to be marshaled to json but they are also not needed to.
|
||||
Hooks *Hooks `json:"-"`
|
||||
// CommandHooks are serialized to JSON, but other hooks are not.
|
||||
Hooks *Hooks
|
||||
|
||||
// Version is the version of opencontainer specification that is supported.
|
||||
Version string `json:"version"`
|
||||
|
||||
// Labels are user defined metadata that is stored in the config and populated on the state
|
||||
Labels []string `json:"labels"`
|
||||
|
||||
// NoNewKeyring will not allocated a new session keyring for the container. It will use the
|
||||
// callers keyring in this case.
|
||||
NoNewKeyring bool `json:"no_new_keyring"`
|
||||
|
||||
// Rootless specifies whether the container is a rootless container.
|
||||
Rootless bool `json:"rootless"`
|
||||
|
||||
// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
|
||||
// to limit the resources (e.g., L3 cache) the container has available
|
||||
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
|
||||
}
|
||||
|
||||
type Hooks struct {
|
||||
@@ -191,20 +205,74 @@ type Hooks struct {
|
||||
Poststop []Hook
|
||||
}
|
||||
|
||||
// HookState is the payload provided to a hook on execution.
|
||||
type HookState struct {
|
||||
Version string `json:"version"`
|
||||
ID string `json:"id"`
|
||||
Pid int `json:"pid"`
|
||||
Root string `json:"root"`
|
||||
type Capabilities struct {
|
||||
// Bounding is the set of capabilities checked by the kernel.
|
||||
Bounding []string
|
||||
// Effective is the set of capabilities checked by the kernel.
|
||||
Effective []string
|
||||
// Inheritable is the capabilities preserved across execve.
|
||||
Inheritable []string
|
||||
// Permitted is the limiting superset for effective capabilities.
|
||||
Permitted []string
|
||||
// Ambient is the ambient set of capabilities that are kept.
|
||||
Ambient []string
|
||||
}
|
||||
|
||||
func (hooks *Hooks) UnmarshalJSON(b []byte) error {
|
||||
var state struct {
|
||||
Prestart []CommandHook
|
||||
Poststart []CommandHook
|
||||
Poststop []CommandHook
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(b, &state); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
deserialize := func(shooks []CommandHook) (hooks []Hook) {
|
||||
for _, shook := range shooks {
|
||||
hooks = append(hooks, shook)
|
||||
}
|
||||
|
||||
return hooks
|
||||
}
|
||||
|
||||
hooks.Prestart = deserialize(state.Prestart)
|
||||
hooks.Poststart = deserialize(state.Poststart)
|
||||
hooks.Poststop = deserialize(state.Poststop)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (hooks Hooks) MarshalJSON() ([]byte, error) {
|
||||
serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
|
||||
for _, hook := range hooks {
|
||||
switch chook := hook.(type) {
|
||||
case CommandHook:
|
||||
serializableHooks = append(serializableHooks, chook)
|
||||
default:
|
||||
logrus.Warnf("cannot serialize hook of type %T, skipping", hook)
|
||||
}
|
||||
}
|
||||
|
||||
return serializableHooks
|
||||
}
|
||||
|
||||
return json.Marshal(map[string]interface{}{
|
||||
"prestart": serialize(hooks.Prestart),
|
||||
"poststart": serialize(hooks.Poststart),
|
||||
"poststop": serialize(hooks.Poststop),
|
||||
})
|
||||
}
|
||||
|
||||
// HookState is the payload provided to a hook on execution.
|
||||
type HookState specs.State
|
||||
|
||||
type Hook interface {
|
||||
// Run executes the hook with the provided state.
|
||||
Run(HookState) error
|
||||
}
|
||||
|
||||
// NewFunctionHooks will call the provided function when the hook is run.
|
||||
// NewFunctionHook will call the provided function when the hook is run.
|
||||
func NewFunctionHook(f func(HookState) error) FuncHook {
|
||||
return FuncHook{
|
||||
run: f,
|
||||
@@ -220,13 +288,14 @@ func (f FuncHook) Run(s HookState) error {
|
||||
}
|
||||
|
||||
type Command struct {
|
||||
Path string `json:"path"`
|
||||
Args []string `json:"args"`
|
||||
Env []string `json:"env"`
|
||||
Dir string `json:"dir"`
|
||||
Path string `json:"path"`
|
||||
Args []string `json:"args"`
|
||||
Env []string `json:"env"`
|
||||
Dir string `json:"dir"`
|
||||
Timeout *time.Duration `json:"timeout"`
|
||||
}
|
||||
|
||||
// NewCommandHooks will execute the provided command when the hook is run.
|
||||
// NewCommandHook will execute the provided command when the hook is run.
|
||||
func NewCommandHook(cmd Command) CommandHook {
|
||||
return CommandHook{
|
||||
Command: cmd,
|
||||
@@ -242,11 +311,38 @@ func (c Command) Run(s HookState) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var stdout, stderr bytes.Buffer
|
||||
cmd := exec.Cmd{
|
||||
Path: c.Path,
|
||||
Args: c.Args,
|
||||
Env: c.Env,
|
||||
Stdin: bytes.NewReader(b),
|
||||
Path: c.Path,
|
||||
Args: c.Args,
|
||||
Env: c.Env,
|
||||
Stdin: bytes.NewReader(b),
|
||||
Stdout: &stdout,
|
||||
Stderr: &stderr,
|
||||
}
|
||||
if err := cmd.Start(); err != nil {
|
||||
return err
|
||||
}
|
||||
errC := make(chan error, 1)
|
||||
go func() {
|
||||
err := cmd.Wait()
|
||||
if err != nil {
|
||||
err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
|
||||
}
|
||||
errC <- err
|
||||
}()
|
||||
var timerCh <-chan time.Time
|
||||
if c.Timeout != nil {
|
||||
timer := time.NewTimer(*c.Timeout)
|
||||
defer timer.Stop()
|
||||
timerCh = timer.C
|
||||
}
|
||||
select {
|
||||
case err := <-errC:
|
||||
return err
|
||||
case <-timerCh:
|
||||
cmd.Process.Kill()
|
||||
cmd.Wait()
|
||||
return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
|
||||
}
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
61
vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
generated
vendored
Normal file
61
vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
generated
vendored
Normal file
@@ -0,0 +1,61 @@
|
||||
package configs
|
||||
|
||||
import "fmt"
|
||||
|
||||
// HostUID gets the translated uid for the process on host which could be
|
||||
// different when user namespaces are enabled.
|
||||
func (c Config) HostUID(containerId int) (int, error) {
|
||||
if c.Namespaces.Contains(NEWUSER) {
|
||||
if c.UidMappings == nil {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
|
||||
}
|
||||
id, found := c.hostIDFromMapping(containerId, c.UidMappings)
|
||||
if !found {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
// Return unchanged id.
|
||||
return containerId, nil
|
||||
}
|
||||
|
||||
// HostRootUID gets the root uid for the process on host which could be non-zero
|
||||
// when user namespaces are enabled.
|
||||
func (c Config) HostRootUID() (int, error) {
|
||||
return c.HostUID(0)
|
||||
}
|
||||
|
||||
// HostGID gets the translated gid for the process on host which could be
|
||||
// different when user namespaces are enabled.
|
||||
func (c Config) HostGID(containerId int) (int, error) {
|
||||
if c.Namespaces.Contains(NEWUSER) {
|
||||
if c.GidMappings == nil {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
|
||||
}
|
||||
id, found := c.hostIDFromMapping(containerId, c.GidMappings)
|
||||
if !found {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
// Return unchanged id.
|
||||
return containerId, nil
|
||||
}
|
||||
|
||||
// HostRootGID gets the root gid for the process on host which could be non-zero
|
||||
// when user namespaces are enabled.
|
||||
func (c Config) HostRootGID() (int, error) {
|
||||
return c.HostGID(0)
|
||||
}
|
||||
|
||||
// Utility function that gets a host ID for a container ID from user namespace map
|
||||
// if that ID is present in the map.
|
||||
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
|
||||
for _, m := range uMap {
|
||||
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
|
||||
hostID := m.HostID + (containerID - m.ContainerID)
|
||||
return hostID, true
|
||||
}
|
||||
}
|
||||
return -1, false
|
||||
}
|
||||
51
vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix.go
generated
vendored
51
vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix.go
generated
vendored
@@ -1,51 +0,0 @@
|
||||
// +build freebsd linux
|
||||
|
||||
package configs
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Gets the root uid for the process on host which could be non-zero
|
||||
// when user namespaces are enabled.
|
||||
func (c Config) HostUID() (int, error) {
|
||||
if c.Namespaces.Contains(NEWUSER) {
|
||||
if c.UidMappings == nil {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
|
||||
}
|
||||
id, found := c.hostIDFromMapping(0, c.UidMappings)
|
||||
if !found {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
// Return default root uid 0
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// Gets the root gid for the process on host which could be non-zero
|
||||
// when user namespaces are enabled.
|
||||
func (c Config) HostGID() (int, error) {
|
||||
if c.Namespaces.Contains(NEWUSER) {
|
||||
if c.GidMappings == nil {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
|
||||
}
|
||||
id, found := c.hostIDFromMapping(0, c.GidMappings)
|
||||
if !found {
|
||||
return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.")
|
||||
}
|
||||
return id, nil
|
||||
}
|
||||
// Return default root gid 0
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// Utility function that gets a host ID for a container ID from user namespace map
|
||||
// if that ID is present in the map.
|
||||
func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
|
||||
for _, m := range uMap {
|
||||
if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
|
||||
hostID := m.HostID + (containerID - m.ContainerID)
|
||||
return hostID, true
|
||||
}
|
||||
}
|
||||
return -1, false
|
||||
}
|
||||
156
vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix_test.go
generated
vendored
156
vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix_test.go
generated
vendored
@@ -1,156 +0,0 @@
|
||||
// +build linux freebsd
|
||||
|
||||
package configs
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
)
|
||||
|
||||
// Checks whether the expected capability is specified in the capabilities.
|
||||
func contains(expected string, values []string) bool {
|
||||
for _, v := range values {
|
||||
if v == expected {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func containsDevice(expected *Device, values []*Device) bool {
|
||||
for _, d := range values {
|
||||
if d.Path == expected.Path &&
|
||||
d.Permissions == expected.Permissions &&
|
||||
d.FileMode == expected.FileMode &&
|
||||
d.Major == expected.Major &&
|
||||
d.Minor == expected.Minor &&
|
||||
d.Type == expected.Type {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func loadConfig(name string) (*Config, error) {
|
||||
f, err := os.Open(filepath.Join("../sample_configs", name))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var container *Config
|
||||
if err := json.NewDecoder(f).Decode(&container); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Check that a config doesn't contain extra fields
|
||||
var configMap, abstractMap map[string]interface{}
|
||||
|
||||
if _, err := f.Seek(0, 0); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.NewDecoder(f).Decode(&abstractMap); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
configData, err := json.Marshal(&container)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(configData, &configMap); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for k := range configMap {
|
||||
delete(abstractMap, k)
|
||||
}
|
||||
|
||||
if len(abstractMap) != 0 {
|
||||
return nil, fmt.Errorf("unknown fields: %s", abstractMap)
|
||||
}
|
||||
|
||||
return container, nil
|
||||
}
|
||||
|
||||
func TestRemoveNamespace(t *testing.T) {
|
||||
ns := Namespaces{
|
||||
{Type: NEWNET},
|
||||
}
|
||||
if !ns.Remove(NEWNET) {
|
||||
t.Fatal("NEWNET was not removed")
|
||||
}
|
||||
if len(ns) != 0 {
|
||||
t.Fatalf("namespaces should have 0 items but reports %d", len(ns))
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostUIDNoUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{},
|
||||
}
|
||||
uid, err := config.HostUID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if uid != 0 {
|
||||
t.Fatalf("expected uid 0 with no USERNS but received %d", uid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostUIDWithUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{{Type: NEWUSER}},
|
||||
UidMappings: []IDMap{
|
||||
{
|
||||
ContainerID: 0,
|
||||
HostID: 1000,
|
||||
Size: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
uid, err := config.HostUID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if uid != 1000 {
|
||||
t.Fatalf("expected uid 1000 with no USERNS but received %d", uid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostGIDNoUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{},
|
||||
}
|
||||
uid, err := config.HostGID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if uid != 0 {
|
||||
t.Fatalf("expected gid 0 with no USERNS but received %d", uid)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHostGIDWithUSERNS(t *testing.T) {
|
||||
config := &Config{
|
||||
Namespaces: Namespaces{{Type: NEWUSER}},
|
||||
GidMappings: []IDMap{
|
||||
{
|
||||
ContainerID: 0,
|
||||
HostID: 1000,
|
||||
Size: 1,
|
||||
},
|
||||
},
|
||||
}
|
||||
uid, err := config.HostGID()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if uid != 1000 {
|
||||
t.Fatalf("expected gid 1000 with no USERNS but received %d", uid)
|
||||
}
|
||||
}
|
||||
3
vendor/github.com/opencontainers/runc/libcontainer/configs/config_windows_test.go
generated
vendored
3
vendor/github.com/opencontainers/runc/libcontainer/configs/config_windows_test.go
generated
vendored
@@ -1,3 +0,0 @@
|
||||
package configs
|
||||
|
||||
// All current tests are for Unix-specific functionality
|
||||
20
vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go
generated
vendored
20
vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go
generated
vendored
@@ -1,9 +1,9 @@
|
||||
// +build linux freebsd
|
||||
// +build linux
|
||||
|
||||
package configs
|
||||
|
||||
var (
|
||||
// These are devices that are to be both allowed and created.
|
||||
// DefaultSimpleDevices are devices that are to be both allowed and created.
|
||||
DefaultSimpleDevices = []*Device{
|
||||
// /dev/null and zero
|
||||
{
|
||||
@@ -107,19 +107,5 @@ var (
|
||||
Permissions: "rwm",
|
||||
},
|
||||
}, DefaultSimpleDevices...)
|
||||
DefaultAutoCreatedDevices = append([]*Device{
|
||||
{
|
||||
// /dev/fuse is created but not allowed.
|
||||
// This is to allow java to work. Because java
|
||||
// Insists on there being a /dev/fuse
|
||||
// https://github.com/docker/docker/issues/514
|
||||
// https://github.com/docker/docker/issues/2393
|
||||
//
|
||||
Path: "/dev/fuse",
|
||||
Type: 'c',
|
||||
Major: 10,
|
||||
Minor: 229,
|
||||
Permissions: "rwm",
|
||||
},
|
||||
}, DefaultSimpleDevices...)
|
||||
DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
|
||||
)
|
||||
|
||||
7
vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
generated
vendored
Normal file
7
vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
generated
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
package configs
|
||||
|
||||
type IntelRdt struct {
|
||||
// The schema for L3 cache id and capacity bitmask (CBM)
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
}
|
||||
9
vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
generated
vendored
9
vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
generated
vendored
@@ -1,5 +1,11 @@
|
||||
package configs
|
||||
|
||||
const (
|
||||
// EXT_COPYUP is a directive to copy up the contents of a directory when
|
||||
// a tmpfs is mounted over it.
|
||||
EXT_COPYUP = 1 << iota
|
||||
)
|
||||
|
||||
type Mount struct {
|
||||
// Source path for the mount.
|
||||
Source string `json:"source"`
|
||||
@@ -22,6 +28,9 @@ type Mount struct {
|
||||
// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
|
||||
Relabel string `json:"relabel"`
|
||||
|
||||
// Extensions are additional flags that are specific to runc.
|
||||
Extensions int `json:"extensions"`
|
||||
|
||||
// Optional Command to be run before Source is mounted.
|
||||
PremountCmds []Command `json:"premount_cmds"`
|
||||
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
// +build linux freebsd
|
||||
|
||||
package configs
|
||||
|
||||
import "fmt"
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"sync"
|
||||
)
|
||||
|
||||
const (
|
||||
NEWNET NamespaceType = "NEWNET"
|
||||
@@ -13,14 +15,59 @@ const (
|
||||
NEWUSER NamespaceType = "NEWUSER"
|
||||
)
|
||||
|
||||
var (
|
||||
nsLock sync.Mutex
|
||||
supportedNamespaces = make(map[NamespaceType]bool)
|
||||
)
|
||||
|
||||
// NsName converts the namespace type to its filename
|
||||
func NsName(ns NamespaceType) string {
|
||||
switch ns {
|
||||
case NEWNET:
|
||||
return "net"
|
||||
case NEWNS:
|
||||
return "mnt"
|
||||
case NEWPID:
|
||||
return "pid"
|
||||
case NEWIPC:
|
||||
return "ipc"
|
||||
case NEWUSER:
|
||||
return "user"
|
||||
case NEWUTS:
|
||||
return "uts"
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// IsNamespaceSupported returns whether a namespace is available or
|
||||
// not
|
||||
func IsNamespaceSupported(ns NamespaceType) bool {
|
||||
nsLock.Lock()
|
||||
defer nsLock.Unlock()
|
||||
supported, ok := supportedNamespaces[ns]
|
||||
if ok {
|
||||
return supported
|
||||
}
|
||||
nsFile := NsName(ns)
|
||||
// if the namespace type is unknown, just return false
|
||||
if nsFile == "" {
|
||||
return false
|
||||
}
|
||||
_, err := os.Stat(fmt.Sprintf("/proc/self/ns/%s", nsFile))
|
||||
// a namespace is supported if it exists and we have permissions to read it
|
||||
supported = err == nil
|
||||
supportedNamespaces[ns] = supported
|
||||
return supported
|
||||
}
|
||||
|
||||
func NamespaceTypes() []NamespaceType {
|
||||
return []NamespaceType{
|
||||
NEWUSER, // Keep user NS always first, don't move it.
|
||||
NEWIPC,
|
||||
NEWUTS,
|
||||
NEWNET,
|
||||
NEWPID,
|
||||
NEWNS,
|
||||
NEWUTS,
|
||||
NEWIPC,
|
||||
NEWUSER,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -32,29 +79,7 @@ type Namespace struct {
|
||||
}
|
||||
|
||||
func (n *Namespace) GetPath(pid int) string {
|
||||
if n.Path != "" {
|
||||
return n.Path
|
||||
}
|
||||
return fmt.Sprintf("/proc/%d/ns/%s", pid, n.file())
|
||||
}
|
||||
|
||||
func (n *Namespace) file() string {
|
||||
file := ""
|
||||
switch n.Type {
|
||||
case NEWNET:
|
||||
file = "net"
|
||||
case NEWNS:
|
||||
file = "mnt"
|
||||
case NEWPID:
|
||||
file = "pid"
|
||||
case NEWIPC:
|
||||
file = "ipc"
|
||||
case NEWUSER:
|
||||
file = "user"
|
||||
case NEWUTS:
|
||||
file = "uts"
|
||||
}
|
||||
return file
|
||||
return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type))
|
||||
}
|
||||
|
||||
func (n *Namespaces) Remove(t NamespaceType) bool {
|
||||
@@ -87,3 +112,11 @@ func (n *Namespaces) index(t NamespaceType) int {
|
||||
func (n *Namespaces) Contains(t NamespaceType) bool {
|
||||
return n.index(t) != -1
|
||||
}
|
||||
|
||||
func (n *Namespaces) PathOf(t NamespaceType) string {
|
||||
i := n.index(t)
|
||||
if i == -1 {
|
||||
return ""
|
||||
}
|
||||
return (*n)[i].Path
|
||||
}
|
||||
16
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
generated
vendored
16
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
generated
vendored
@@ -2,23 +2,23 @@
|
||||
|
||||
package configs
|
||||
|
||||
import "syscall"
|
||||
import "golang.org/x/sys/unix"
|
||||
|
||||
func (n *Namespace) Syscall() int {
|
||||
return namespaceInfo[n.Type]
|
||||
}
|
||||
|
||||
var namespaceInfo = map[NamespaceType]int{
|
||||
NEWNET: syscall.CLONE_NEWNET,
|
||||
NEWNS: syscall.CLONE_NEWNS,
|
||||
NEWUSER: syscall.CLONE_NEWUSER,
|
||||
NEWIPC: syscall.CLONE_NEWIPC,
|
||||
NEWUTS: syscall.CLONE_NEWUTS,
|
||||
NEWPID: syscall.CLONE_NEWPID,
|
||||
NEWNET: unix.CLONE_NEWNET,
|
||||
NEWNS: unix.CLONE_NEWNS,
|
||||
NEWUSER: unix.CLONE_NEWUSER,
|
||||
NEWIPC: unix.CLONE_NEWIPC,
|
||||
NEWUTS: unix.CLONE_NEWUTS,
|
||||
NEWPID: unix.CLONE_NEWPID,
|
||||
}
|
||||
|
||||
// CloneFlags parses the container's Namespaces options to set the correct
|
||||
// flags on clone, unshare. This functions returns flags only for new namespaces.
|
||||
// flags on clone, unshare. This function returns flags only for new namespaces.
|
||||
func (n *Namespaces) CloneFlags() uintptr {
|
||||
var flag int
|
||||
for _, v := range *n {
|
||||
|
||||
@@ -4,12 +4,10 @@ package configs
|
||||
|
||||
func (n *Namespace) Syscall() int {
|
||||
panic("No namespace syscall support")
|
||||
return 0
|
||||
}
|
||||
|
||||
// CloneFlags parses the container's Namespaces options to set the correct
|
||||
// flags on clone, unshare. This functions returns flags only for new namespaces.
|
||||
// flags on clone, unshare. This function returns flags only for new namespaces.
|
||||
func (n *Namespaces) CloneFlags() uintptr {
|
||||
panic("No namespace syscall support")
|
||||
return uintptr(0)
|
||||
}
|
||||
|
||||
2
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
generated
vendored
2
vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
generated
vendored
@@ -1,4 +1,4 @@
|
||||
// +build !linux,!freebsd
|
||||
// +build !linux
|
||||
|
||||
package configs
|
||||
|
||||
|
||||
93
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/config.go
generated
vendored
93
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/config.go
generated
vendored
@@ -1,93 +0,0 @@
|
||||
package validate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type Validator interface {
|
||||
Validate(*configs.Config) error
|
||||
}
|
||||
|
||||
func New() Validator {
|
||||
return &ConfigValidator{}
|
||||
}
|
||||
|
||||
type ConfigValidator struct {
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) Validate(config *configs.Config) error {
|
||||
if err := v.rootfs(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.network(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.hostname(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.security(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.usernamespace(config); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// rootfs validates the the rootfs is an absolute path and is not a symlink
|
||||
// to the container's root filesystem.
|
||||
func (v *ConfigValidator) rootfs(config *configs.Config) error {
|
||||
cleaned, err := filepath.Abs(config.Rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Rootfs != cleaned {
|
||||
return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) network(config *configs.Config) error {
|
||||
if !config.Namespaces.Contains(configs.NEWNET) {
|
||||
if len(config.Networks) > 0 || len(config.Routes) > 0 {
|
||||
return fmt.Errorf("unable to apply network settings without a private NET namespace")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) hostname(config *configs.Config) error {
|
||||
if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
|
||||
return fmt.Errorf("unable to set hostname without a private UTS namespace")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) security(config *configs.Config) error {
|
||||
// restrict sys without mount namespace
|
||||
if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
|
||||
!config.Namespaces.Contains(configs.NEWNS) {
|
||||
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) usernamespace(config *configs.Config) error {
|
||||
if config.Namespaces.Contains(configs.NEWUSER) {
|
||||
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
|
||||
return fmt.Errorf("USER namespaces aren't enabled in the kernel")
|
||||
}
|
||||
} else {
|
||||
if config.UidMappings != nil || config.GidMappings != nil {
|
||||
return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
117
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
generated
vendored
Normal file
117
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
generated
vendored
Normal file
@@ -0,0 +1,117 @@
|
||||
package validate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var (
|
||||
geteuid = os.Geteuid
|
||||
getegid = os.Getegid
|
||||
)
|
||||
|
||||
func (v *ConfigValidator) rootless(config *configs.Config) error {
|
||||
if err := rootlessMappings(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := rootlessMount(config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// XXX: We currently can't verify the user config at all, because
|
||||
// configs.Config doesn't store the user-related configs. So this
|
||||
// has to be verified by setupUser() in init_linux.go.
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func hasIDMapping(id int, mappings []configs.IDMap) bool {
|
||||
for _, m := range mappings {
|
||||
if id >= m.ContainerID && id < m.ContainerID+m.Size {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func rootlessMappings(config *configs.Config) error {
|
||||
if euid := geteuid(); euid != 0 {
|
||||
if !config.Namespaces.Contains(configs.NEWUSER) {
|
||||
return fmt.Errorf("rootless containers require user namespaces")
|
||||
}
|
||||
}
|
||||
|
||||
if len(config.UidMappings) == 0 {
|
||||
return fmt.Errorf("rootless containers requires at least one UID mapping")
|
||||
}
|
||||
if len(config.GidMappings) == 0 {
|
||||
return fmt.Errorf("rootless containers requires at least one UID mapping")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
|
||||
func rootlessCgroup(config *configs.Config) error {
|
||||
// Nothing set at all.
|
||||
if config.Cgroups == nil || config.Cgroups.Resources == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Used for comparing to the zero value.
|
||||
left := reflect.ValueOf(*config.Cgroups.Resources)
|
||||
right := reflect.Zero(left.Type())
|
||||
|
||||
// This is all we need to do, since specconv won't add cgroup options in
|
||||
// rootless mode.
|
||||
if !reflect.DeepEqual(left.Interface(), right.Interface()) {
|
||||
return fmt.Errorf("cannot specify resource limits in rootless container")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// mount verifies that the user isn't trying to set up any mounts they don't have
|
||||
// the rights to do. In addition, it makes sure that no mount has a `uid=` or
|
||||
// `gid=` option that doesn't resolve to root.
|
||||
func rootlessMount(config *configs.Config) error {
|
||||
// XXX: We could whitelist allowed devices at this point, but I'm not
|
||||
// convinced that's a good idea. The kernel is the best arbiter of
|
||||
// access control.
|
||||
|
||||
for _, mount := range config.Mounts {
|
||||
// Check that the options list doesn't contain any uid= or gid= entries
|
||||
// that don't resolve to root.
|
||||
for _, opt := range strings.Split(mount.Data, ",") {
|
||||
if strings.HasPrefix(opt, "uid=") {
|
||||
var uid int
|
||||
n, err := fmt.Sscanf(opt, "uid=%d", &uid)
|
||||
if n != 1 || err != nil {
|
||||
// Ignore unknown mount options.
|
||||
continue
|
||||
}
|
||||
if !hasIDMapping(uid, config.UidMappings) {
|
||||
return fmt.Errorf("cannot specify uid= mount options for unmapped uid in rootless containers")
|
||||
}
|
||||
}
|
||||
|
||||
if strings.HasPrefix(opt, "gid=") {
|
||||
var gid int
|
||||
n, err := fmt.Sscanf(opt, "gid=%d", &gid)
|
||||
if n != 1 || err != nil {
|
||||
// Ignore unknown mount options.
|
||||
continue
|
||||
}
|
||||
if !hasIDMapping(gid, config.GidMappings) {
|
||||
return fmt.Errorf("cannot specify gid= mount options for unmapped gid in rootless containers")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
212
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
generated
vendored
Normal file
212
vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
generated
vendored
Normal file
@@ -0,0 +1,212 @@
|
||||
package validate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
selinux "github.com/opencontainers/selinux/go-selinux"
|
||||
)
|
||||
|
||||
type Validator interface {
|
||||
Validate(*configs.Config) error
|
||||
}
|
||||
|
||||
func New() Validator {
|
||||
return &ConfigValidator{}
|
||||
}
|
||||
|
||||
type ConfigValidator struct {
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) Validate(config *configs.Config) error {
|
||||
if err := v.rootfs(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.network(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.hostname(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.security(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.usernamespace(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.sysctl(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := v.intelrdt(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Rootless {
|
||||
if err := v.rootless(config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// rootfs validates if the rootfs is an absolute path and is not a symlink
|
||||
// to the container's root filesystem.
|
||||
func (v *ConfigValidator) rootfs(config *configs.Config) error {
|
||||
if _, err := os.Stat(config.Rootfs); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs)
|
||||
}
|
||||
return err
|
||||
}
|
||||
cleaned, err := filepath.Abs(config.Rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
|
||||
return err
|
||||
}
|
||||
if filepath.Clean(config.Rootfs) != cleaned {
|
||||
return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) network(config *configs.Config) error {
|
||||
if !config.Namespaces.Contains(configs.NEWNET) {
|
||||
if len(config.Networks) > 0 || len(config.Routes) > 0 {
|
||||
return fmt.Errorf("unable to apply network settings without a private NET namespace")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) hostname(config *configs.Config) error {
|
||||
if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
|
||||
return fmt.Errorf("unable to set hostname without a private UTS namespace")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) security(config *configs.Config) error {
|
||||
// restrict sys without mount namespace
|
||||
if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
|
||||
!config.Namespaces.Contains(configs.NEWNS) {
|
||||
return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
|
||||
}
|
||||
if config.ProcessLabel != "" && !selinux.GetEnabled() {
|
||||
return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) usernamespace(config *configs.Config) error {
|
||||
if config.Namespaces.Contains(configs.NEWUSER) {
|
||||
if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
|
||||
return fmt.Errorf("USER namespaces aren't enabled in the kernel")
|
||||
}
|
||||
} else {
|
||||
if config.UidMappings != nil || config.GidMappings != nil {
|
||||
return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// sysctl validates that the specified sysctl keys are valid or not.
|
||||
// /proc/sys isn't completely namespaced and depending on which namespaces
|
||||
// are specified, a subset of sysctls are permitted.
|
||||
func (v *ConfigValidator) sysctl(config *configs.Config) error {
|
||||
validSysctlMap := map[string]bool{
|
||||
"kernel.msgmax": true,
|
||||
"kernel.msgmnb": true,
|
||||
"kernel.msgmni": true,
|
||||
"kernel.sem": true,
|
||||
"kernel.shmall": true,
|
||||
"kernel.shmmax": true,
|
||||
"kernel.shmmni": true,
|
||||
"kernel.shm_rmid_forced": true,
|
||||
}
|
||||
|
||||
for s := range config.Sysctl {
|
||||
if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") {
|
||||
if config.Namespaces.Contains(configs.NEWIPC) {
|
||||
continue
|
||||
} else {
|
||||
return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s)
|
||||
}
|
||||
}
|
||||
if strings.HasPrefix(s, "net.") {
|
||||
if config.Namespaces.Contains(configs.NEWNET) {
|
||||
if path := config.Namespaces.PathOf(configs.NEWNET); path != "" {
|
||||
if err := checkHostNs(s, path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
continue
|
||||
} else {
|
||||
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s)
|
||||
}
|
||||
}
|
||||
return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
|
||||
if config.IntelRdt != nil {
|
||||
if !intelrdt.IsEnabled() {
|
||||
return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
|
||||
}
|
||||
if config.IntelRdt.L3CacheSchema == "" {
|
||||
return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func isSymbolicLink(path string) (bool, error) {
|
||||
fi, err := os.Lstat(path)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil
|
||||
}
|
||||
|
||||
// checkHostNs checks whether network sysctl is used in host namespace.
|
||||
func checkHostNs(sysctlConfig string, path string) error {
|
||||
var currentProcessNetns = "/proc/self/ns/net"
|
||||
// readlink on the current processes network namespace
|
||||
destOfCurrentProcess, err := os.Readlink(currentProcessNetns)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read soft link %q error", currentProcessNetns)
|
||||
}
|
||||
|
||||
// First check if the provided path is a symbolic link
|
||||
symLink, err := isSymbolicLink(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not check that %q is a symlink: %v", path, err)
|
||||
}
|
||||
|
||||
if symLink == false {
|
||||
// The provided namespace is not a symbolic link,
|
||||
// it is not the host namespace.
|
||||
return nil
|
||||
}
|
||||
|
||||
// readlink on the path provided in the struct
|
||||
destOfContainer, err := os.Readlink(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("read soft link %q error", path)
|
||||
}
|
||||
if destOfContainer == destOfCurrentProcess {
|
||||
return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
41
vendor/github.com/opencontainers/runc/libcontainer/console_linux.go
generated
vendored
Normal file
41
vendor/github.com/opencontainers/runc/libcontainer/console_linux.go
generated
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// mount initializes the console inside the rootfs mounting with the specified mount label
|
||||
// and applying the correct ownership of the console.
|
||||
func mountConsole(slavePath string) error {
|
||||
oldMask := unix.Umask(0000)
|
||||
defer unix.Umask(oldMask)
|
||||
f, err := os.Create("/dev/console")
|
||||
if err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
if f != nil {
|
||||
f.Close()
|
||||
}
|
||||
return unix.Mount(slavePath, "/dev/console", "bind", unix.MS_BIND, "")
|
||||
}
|
||||
|
||||
// dupStdio opens the slavePath for the console and dups the fds to the current
|
||||
// processes stdio, fd 0,1,2.
|
||||
func dupStdio(slavePath string) error {
|
||||
fd, err := unix.Open(slavePath, unix.O_RDWR, 0)
|
||||
if err != nil {
|
||||
return &os.PathError{
|
||||
Op: "open",
|
||||
Path: slavePath,
|
||||
Err: err,
|
||||
}
|
||||
}
|
||||
for _, i := range []int{0, 1, 2} {
|
||||
if err := unix.Dup3(fd, i, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
166
vendor/github.com/opencontainers/runc/libcontainer/container.go
generated
vendored
Normal file
166
vendor/github.com/opencontainers/runc/libcontainer/container.go
generated
vendored
Normal file
@@ -0,0 +1,166 @@
|
||||
// Package libcontainer provides a native Go implementation for creating containers
|
||||
// with namespaces, cgroups, capabilities, and filesystem access controls.
|
||||
// It allows you to manage the lifecycle of the container performing additional operations
|
||||
// after the container is created.
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
// Status is the status of a container.
|
||||
type Status int
|
||||
|
||||
const (
|
||||
// Created is the status that denotes the container exists but has not been run yet.
|
||||
Created Status = iota
|
||||
// Running is the status that denotes the container exists and is running.
|
||||
Running
|
||||
// Pausing is the status that denotes the container exists, it is in the process of being paused.
|
||||
Pausing
|
||||
// Paused is the status that denotes the container exists, but all its processes are paused.
|
||||
Paused
|
||||
// Stopped is the status that denotes the container does not have a created or running process.
|
||||
Stopped
|
||||
)
|
||||
|
||||
func (s Status) String() string {
|
||||
switch s {
|
||||
case Created:
|
||||
return "created"
|
||||
case Running:
|
||||
return "running"
|
||||
case Pausing:
|
||||
return "pausing"
|
||||
case Paused:
|
||||
return "paused"
|
||||
case Stopped:
|
||||
return "stopped"
|
||||
default:
|
||||
return "unknown"
|
||||
}
|
||||
}
|
||||
|
||||
// BaseState represents the platform agnostic pieces relating to a
|
||||
// running container's state
|
||||
type BaseState struct {
|
||||
// ID is the container ID.
|
||||
ID string `json:"id"`
|
||||
|
||||
// InitProcessPid is the init process id in the parent namespace.
|
||||
InitProcessPid int `json:"init_process_pid"`
|
||||
|
||||
// InitProcessStartTime is the init process start time in clock cycles since boot time.
|
||||
InitProcessStartTime uint64 `json:"init_process_start"`
|
||||
|
||||
// Created is the unix timestamp for the creation time of the container in UTC
|
||||
Created time.Time `json:"created"`
|
||||
|
||||
// Config is the container's configuration.
|
||||
Config configs.Config `json:"config"`
|
||||
}
|
||||
|
||||
// BaseContainer is a libcontainer container object.
|
||||
//
|
||||
// Each container is thread-safe within the same process. Since a container can
|
||||
// be destroyed by a separate process, any function may return that the container
|
||||
// was not found. BaseContainer includes methods that are platform agnostic.
|
||||
type BaseContainer interface {
|
||||
// Returns the ID of the container
|
||||
ID() string
|
||||
|
||||
// Returns the current status of the container.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// Systemerror - System error.
|
||||
Status() (Status, error)
|
||||
|
||||
// State returns the current container's state information.
|
||||
//
|
||||
// errors:
|
||||
// SystemError - System error.
|
||||
State() (*State, error)
|
||||
|
||||
// Returns the current config of the container.
|
||||
Config() configs.Config
|
||||
|
||||
// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// Systemerror - System error.
|
||||
//
|
||||
// Some of the returned PIDs may no longer refer to processes in the Container, unless
|
||||
// the Container state is PAUSED in which case every PID in the slice is valid.
|
||||
Processes() ([]int, error)
|
||||
|
||||
// Returns statistics for the container.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// Systemerror - System error.
|
||||
Stats() (*Stats, error)
|
||||
|
||||
// Set resources of container as configured
|
||||
//
|
||||
// We can use this to change resources when containers are running.
|
||||
//
|
||||
// errors:
|
||||
// SystemError - System error.
|
||||
Set(config configs.Config) error
|
||||
|
||||
// Start a process inside the container. Returns error if process fails to
|
||||
// start. You can track process lifecycle with passed Process structure.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// ConfigInvalid - config is invalid,
|
||||
// ContainerPaused - Container is paused,
|
||||
// SystemError - System error.
|
||||
Start(process *Process) (err error)
|
||||
|
||||
// Run immediately starts the process inside the container. Returns error if process
|
||||
// fails to start. It does not block waiting for the exec fifo after start returns but
|
||||
// opens the fifo after start returns.
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotExists - Container no longer exists,
|
||||
// ConfigInvalid - config is invalid,
|
||||
// ContainerPaused - Container is paused,
|
||||
// SystemError - System error.
|
||||
Run(process *Process) (err error)
|
||||
|
||||
// Destroys the container, if its in a valid state, after killing any
|
||||
// remaining running processes.
|
||||
//
|
||||
// Any event registrations are removed before the container is destroyed.
|
||||
// No error is returned if the container is already destroyed.
|
||||
//
|
||||
// Running containers must first be stopped using Signal(..).
|
||||
// Paused containers must first be resumed using Resume(..).
|
||||
//
|
||||
// errors:
|
||||
// ContainerNotStopped - Container is still running,
|
||||
// ContainerPaused - Container is paused,
|
||||
// SystemError - System error.
|
||||
Destroy() error
|
||||
|
||||
// Signal sends the provided signal code to the container's initial process.
|
||||
//
|
||||
// If all is specified the signal is sent to all processes in the container
|
||||
// including the initial process.
|
||||
//
|
||||
// errors:
|
||||
// SystemError - System error.
|
||||
Signal(s os.Signal, all bool) error
|
||||
|
||||
// Exec signals the container to exec the users process at the end of the init.
|
||||
//
|
||||
// errors:
|
||||
// SystemError - System error.
|
||||
Exec() error
|
||||
}
|
||||
1849
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
Normal file
1849
vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
40
vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
generated
vendored
Normal file
40
vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
generated
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
package libcontainer
|
||||
|
||||
// cgroup restoring strategy provided by criu
|
||||
type cgMode uint32
|
||||
|
||||
const (
|
||||
CRIU_CG_MODE_SOFT cgMode = 3 + iota // restore cgroup properties if only dir created by criu
|
||||
CRIU_CG_MODE_FULL // always restore all cgroups and their properties
|
||||
CRIU_CG_MODE_STRICT // restore all, requiring them to not present in the system
|
||||
CRIU_CG_MODE_DEFAULT // the same as CRIU_CG_MODE_SOFT
|
||||
)
|
||||
|
||||
type CriuPageServerInfo struct {
|
||||
Address string // IP address of CRIU page server
|
||||
Port int32 // port number of CRIU page server
|
||||
}
|
||||
|
||||
type VethPairName struct {
|
||||
ContainerInterfaceName string
|
||||
HostInterfaceName string
|
||||
}
|
||||
|
||||
type CriuOpts struct {
|
||||
ImagesDirectory string // directory for storing image files
|
||||
WorkDirectory string // directory to cd and write logs/pidfiles/stats to
|
||||
ParentImage string // directory for storing parent image files in pre-dump and dump
|
||||
LeaveRunning bool // leave container in running state after checkpoint
|
||||
TcpEstablished bool // checkpoint/restore established TCP connections
|
||||
ExternalUnixConnections bool // allow external unix connections
|
||||
ShellJob bool // allow to dump and restore shell jobs
|
||||
FileLocks bool // handle file locks, for safety
|
||||
PreDump bool // call criu predump to perform iterative checkpoint
|
||||
PageServer CriuPageServerInfo // allow to dump to criu page server
|
||||
VethPairs []VethPairName // pass the veth to criu when restore
|
||||
ManageCgroupsMode cgMode // dump or restore cgroup mode
|
||||
EmptyNs uint32 // don't c/r properties for namespace from this mask
|
||||
AutoDedup bool // auto deduplication for incremental dumps
|
||||
LazyPages bool // restore memory pages lazily using userfaultfd
|
||||
StatusFd string // fd for feedback when lazy server is ready
|
||||
}
|
||||
1178
vendor/github.com/opencontainers/runc/libcontainer/criurpc/criurpc.pb.go
generated
vendored
Normal file
1178
vendor/github.com/opencontainers/runc/libcontainer/criurpc/criurpc.pb.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
104
vendor/github.com/opencontainers/runc/libcontainer/devices/devices.go
generated
vendored
Normal file
104
vendor/github.com/opencontainers/runc/libcontainer/devices/devices.go
generated
vendored
Normal file
@@ -0,0 +1,104 @@
|
||||
package devices
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrNotADevice = errors.New("not a device node")
|
||||
)
|
||||
|
||||
// Testing dependencies
|
||||
var (
|
||||
unixLstat = unix.Lstat
|
||||
ioutilReadDir = ioutil.ReadDir
|
||||
)
|
||||
|
||||
// Given the path to a device and its cgroup_permissions(which cannot be easily queried) look up the information about a linux device and return that information as a Device struct.
|
||||
func DeviceFromPath(path, permissions string) (*configs.Device, error) {
|
||||
var stat unix.Stat_t
|
||||
err := unixLstat(path, &stat)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var (
|
||||
devNumber = stat.Rdev
|
||||
major = unix.Major(devNumber)
|
||||
)
|
||||
if major == 0 {
|
||||
return nil, ErrNotADevice
|
||||
}
|
||||
|
||||
var (
|
||||
devType rune
|
||||
mode = stat.Mode
|
||||
)
|
||||
switch {
|
||||
case mode&unix.S_IFBLK == unix.S_IFBLK:
|
||||
devType = 'b'
|
||||
case mode&unix.S_IFCHR == unix.S_IFCHR:
|
||||
devType = 'c'
|
||||
}
|
||||
return &configs.Device{
|
||||
Type: devType,
|
||||
Path: path,
|
||||
Major: int64(major),
|
||||
Minor: int64(unix.Minor(devNumber)),
|
||||
Permissions: permissions,
|
||||
FileMode: os.FileMode(mode),
|
||||
Uid: stat.Uid,
|
||||
Gid: stat.Gid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func HostDevices() ([]*configs.Device, error) {
|
||||
return getDevices("/dev")
|
||||
}
|
||||
|
||||
func getDevices(path string) ([]*configs.Device, error) {
|
||||
files, err := ioutilReadDir(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out := []*configs.Device{}
|
||||
for _, f := range files {
|
||||
switch {
|
||||
case f.IsDir():
|
||||
switch f.Name() {
|
||||
// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
|
||||
case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts":
|
||||
continue
|
||||
default:
|
||||
sub, err := getDevices(filepath.Join(path, f.Name()))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
out = append(out, sub...)
|
||||
continue
|
||||
}
|
||||
case f.Name() == "console":
|
||||
continue
|
||||
}
|
||||
device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm")
|
||||
if err != nil {
|
||||
if err == ErrNotADevice {
|
||||
continue
|
||||
}
|
||||
if os.IsNotExist(err) {
|
||||
continue
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, device)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
70
vendor/github.com/opencontainers/runc/libcontainer/error.go
generated
vendored
Normal file
70
vendor/github.com/opencontainers/runc/libcontainer/error.go
generated
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
package libcontainer
|
||||
|
||||
import "io"
|
||||
|
||||
// ErrorCode is the API error code type.
|
||||
type ErrorCode int
|
||||
|
||||
// API error codes.
|
||||
const (
|
||||
// Factory errors
|
||||
IdInUse ErrorCode = iota
|
||||
InvalidIdFormat
|
||||
|
||||
// Container errors
|
||||
ContainerNotExists
|
||||
ContainerPaused
|
||||
ContainerNotStopped
|
||||
ContainerNotRunning
|
||||
ContainerNotPaused
|
||||
|
||||
// Process errors
|
||||
NoProcessOps
|
||||
|
||||
// Common errors
|
||||
ConfigInvalid
|
||||
ConsoleExists
|
||||
SystemError
|
||||
)
|
||||
|
||||
func (c ErrorCode) String() string {
|
||||
switch c {
|
||||
case IdInUse:
|
||||
return "Id already in use"
|
||||
case InvalidIdFormat:
|
||||
return "Invalid format"
|
||||
case ContainerPaused:
|
||||
return "Container paused"
|
||||
case ConfigInvalid:
|
||||
return "Invalid configuration"
|
||||
case SystemError:
|
||||
return "System error"
|
||||
case ContainerNotExists:
|
||||
return "Container does not exist"
|
||||
case ContainerNotStopped:
|
||||
return "Container is not stopped"
|
||||
case ContainerNotRunning:
|
||||
return "Container is not running"
|
||||
case ConsoleExists:
|
||||
return "Console exists for process"
|
||||
case ContainerNotPaused:
|
||||
return "Container is not paused"
|
||||
case NoProcessOps:
|
||||
return "No process operations"
|
||||
default:
|
||||
return "Unknown error"
|
||||
}
|
||||
}
|
||||
|
||||
// Error is the API error type.
|
||||
type Error interface {
|
||||
error
|
||||
|
||||
// Returns an error if it failed to write the detail of the Error to w.
|
||||
// The detail of the Error may include the error message and a
|
||||
// representation of the stack trace.
|
||||
Detail(w io.Writer) error
|
||||
|
||||
// Returns the error code for this error.
|
||||
Code() ErrorCode
|
||||
}
|
||||
44
vendor/github.com/opencontainers/runc/libcontainer/factory.go
generated
vendored
Normal file
44
vendor/github.com/opencontainers/runc/libcontainer/factory.go
generated
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type Factory interface {
|
||||
// Creates a new container with the given id and starts the initial process inside it.
|
||||
// id must be a string containing only letters, digits and underscores and must contain
|
||||
// between 1 and 1024 characters, inclusive.
|
||||
//
|
||||
// The id must not already be in use by an existing container. Containers created using
|
||||
// a factory with the same path (and filesystem) must have distinct ids.
|
||||
//
|
||||
// Returns the new container with a running process.
|
||||
//
|
||||
// errors:
|
||||
// IdInUse - id is already in use by a container
|
||||
// InvalidIdFormat - id has incorrect format
|
||||
// ConfigInvalid - config is invalid
|
||||
// Systemerror - System error
|
||||
//
|
||||
// On error, any partially created container parts are cleaned up (the operation is atomic).
|
||||
Create(id string, config *configs.Config) (Container, error)
|
||||
|
||||
// Load takes an ID for an existing container and returns the container information
|
||||
// from the state. This presents a read only view of the container.
|
||||
//
|
||||
// errors:
|
||||
// Path does not exist
|
||||
// System error
|
||||
Load(id string) (Container, error)
|
||||
|
||||
// StartInitialization is an internal API to libcontainer used during the reexec of the
|
||||
// container.
|
||||
//
|
||||
// Errors:
|
||||
// Pipe connection error
|
||||
// System error
|
||||
StartInitialization() error
|
||||
|
||||
// Type returns info string about factory type (e.g. lxc, libcontainer...)
|
||||
Type() string
|
||||
}
|
||||
364
vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
generated
vendored
Normal file
364
vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
generated
vendored
Normal file
@@ -0,0 +1,364 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime/debug"
|
||||
"strconv"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/fs"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/configs/validate"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
"github.com/opencontainers/runc/libcontainer/mount"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const (
|
||||
stateFilename = "state.json"
|
||||
execFifoFilename = "exec.fifo"
|
||||
)
|
||||
|
||||
var idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
|
||||
|
||||
// InitArgs returns an options func to configure a LinuxFactory with the
|
||||
// provided init binary path and arguments.
|
||||
func InitArgs(args ...string) func(*LinuxFactory) error {
|
||||
return func(l *LinuxFactory) (err error) {
|
||||
if len(args) > 0 {
|
||||
// Resolve relative paths to ensure that its available
|
||||
// after directory changes.
|
||||
if args[0], err = filepath.Abs(args[0]); err != nil {
|
||||
return newGenericError(err, ConfigInvalid)
|
||||
}
|
||||
}
|
||||
|
||||
l.InitArgs = args
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// SystemdCgroups is an options func to configure a LinuxFactory to return
|
||||
// containers that use systemd to create and manage cgroups.
|
||||
func SystemdCgroups(l *LinuxFactory) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &systemd.Manager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cgroupfs is an options func to configure a LinuxFactory to return
|
||||
// containers that use the native cgroups filesystem implementation to
|
||||
// create and manage cgroups.
|
||||
func Cgroupfs(l *LinuxFactory) error {
|
||||
l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
|
||||
return &fs.Manager{
|
||||
Cgroups: config,
|
||||
Paths: paths,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IntelRdtfs is an options func to configure a LinuxFactory to return
|
||||
// containers that use the Intel RDT "resource control" filesystem to
|
||||
// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
|
||||
func IntelRdtFs(l *LinuxFactory) error {
|
||||
l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
|
||||
return &intelrdt.IntelRdtManager{
|
||||
Config: config,
|
||||
Id: id,
|
||||
Path: path,
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
|
||||
func TmpfsRoot(l *LinuxFactory) error {
|
||||
mounted, err := mount.Mounted(l.Root)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !mounted {
|
||||
if err := unix.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// CriuPath returns an option func to configure a LinuxFactory with the
|
||||
// provided criupath
|
||||
func CriuPath(criupath string) func(*LinuxFactory) error {
|
||||
return func(l *LinuxFactory) error {
|
||||
l.CriuPath = criupath
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// New returns a linux based container factory based in the root directory and
|
||||
// configures the factory with the provided option funcs.
|
||||
func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
|
||||
if root != "" {
|
||||
if err := os.MkdirAll(root, 0700); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
}
|
||||
l := &LinuxFactory{
|
||||
Root: root,
|
||||
InitPath: "/proc/self/exe",
|
||||
InitArgs: []string{os.Args[0], "init"},
|
||||
Validator: validate.New(),
|
||||
CriuPath: "criu",
|
||||
}
|
||||
Cgroupfs(l)
|
||||
for _, opt := range options {
|
||||
if opt == nil {
|
||||
continue
|
||||
}
|
||||
if err := opt(l); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return l, nil
|
||||
}
|
||||
|
||||
// LinuxFactory implements the default factory interface for linux based systems.
|
||||
type LinuxFactory struct {
|
||||
// Root directory for the factory to store state.
|
||||
Root string
|
||||
|
||||
// InitPath is the path for calling the init responsibilities for spawning
|
||||
// a container.
|
||||
InitPath string
|
||||
|
||||
// InitArgs are arguments for calling the init responsibilities for spawning
|
||||
// a container.
|
||||
InitArgs []string
|
||||
|
||||
// CriuPath is the path to the criu binary used for checkpoint and restore of
|
||||
// containers.
|
||||
CriuPath string
|
||||
|
||||
// New{u,g}uidmapPath is the path to the binaries used for mapping with
|
||||
// rootless containers.
|
||||
NewuidmapPath string
|
||||
NewgidmapPath string
|
||||
|
||||
// Validator provides validation to container configurations.
|
||||
Validator validate.Validator
|
||||
|
||||
// NewCgroupsManager returns an initialized cgroups manager for a single container.
|
||||
NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
|
||||
|
||||
// NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
|
||||
NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
|
||||
if l.Root == "" {
|
||||
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
|
||||
}
|
||||
if err := l.validateID(id); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := l.Validator.Validate(config); err != nil {
|
||||
return nil, newGenericError(err, ConfigInvalid)
|
||||
}
|
||||
containerRoot := filepath.Join(l.Root, id)
|
||||
if _, err := os.Stat(containerRoot); err == nil {
|
||||
return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
|
||||
} else if !os.IsNotExist(err) {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
if err := os.MkdirAll(containerRoot, 0711); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
c := &linuxContainer{
|
||||
id: id,
|
||||
root: containerRoot,
|
||||
config: config,
|
||||
initPath: l.InitPath,
|
||||
initArgs: l.InitArgs,
|
||||
criuPath: l.CriuPath,
|
||||
newuidmapPath: l.NewuidmapPath,
|
||||
newgidmapPath: l.NewgidmapPath,
|
||||
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
|
||||
}
|
||||
if intelrdt.IsEnabled() {
|
||||
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
|
||||
}
|
||||
c.state = &stoppedState{c: c}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) Load(id string) (Container, error) {
|
||||
if l.Root == "" {
|
||||
return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
|
||||
}
|
||||
containerRoot := filepath.Join(l.Root, id)
|
||||
state, err := l.loadState(containerRoot, id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r := &nonChildProcess{
|
||||
processPid: state.InitProcessPid,
|
||||
processStartTime: state.InitProcessStartTime,
|
||||
fds: state.ExternalDescriptors,
|
||||
}
|
||||
c := &linuxContainer{
|
||||
initProcess: r,
|
||||
initProcessStartTime: state.InitProcessStartTime,
|
||||
id: id,
|
||||
config: &state.Config,
|
||||
initPath: l.InitPath,
|
||||
initArgs: l.InitArgs,
|
||||
criuPath: l.CriuPath,
|
||||
newuidmapPath: l.NewuidmapPath,
|
||||
newgidmapPath: l.NewgidmapPath,
|
||||
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
|
||||
root: containerRoot,
|
||||
created: state.Created,
|
||||
}
|
||||
c.state = &loadedState{c: c}
|
||||
if err := c.refreshState(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if intelrdt.IsEnabled() {
|
||||
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) Type() string {
|
||||
return "libcontainer"
|
||||
}
|
||||
|
||||
// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
|
||||
// This is a low level implementation detail of the reexec and should not be consumed externally
|
||||
func (l *LinuxFactory) StartInitialization() (err error) {
|
||||
var (
|
||||
pipefd, fifofd int
|
||||
consoleSocket *os.File
|
||||
envInitPipe = os.Getenv("_LIBCONTAINER_INITPIPE")
|
||||
envFifoFd = os.Getenv("_LIBCONTAINER_FIFOFD")
|
||||
envConsole = os.Getenv("_LIBCONTAINER_CONSOLE")
|
||||
)
|
||||
|
||||
// Get the INITPIPE.
|
||||
pipefd, err = strconv.Atoi(envInitPipe)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err)
|
||||
}
|
||||
|
||||
var (
|
||||
pipe = os.NewFile(uintptr(pipefd), "pipe")
|
||||
it = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
|
||||
)
|
||||
defer pipe.Close()
|
||||
|
||||
// Only init processes have FIFOFD.
|
||||
fifofd = -1
|
||||
if it == initStandard {
|
||||
if fifofd, err = strconv.Atoi(envFifoFd); err != nil {
|
||||
return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD=%s to int: %s", envFifoFd, err)
|
||||
}
|
||||
}
|
||||
|
||||
if envConsole != "" {
|
||||
console, err := strconv.Atoi(envConsole)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err)
|
||||
}
|
||||
consoleSocket = os.NewFile(uintptr(console), "console-socket")
|
||||
defer consoleSocket.Close()
|
||||
}
|
||||
|
||||
// clear the current process's environment to clean any libcontainer
|
||||
// specific env vars.
|
||||
os.Clearenv()
|
||||
|
||||
defer func() {
|
||||
// We have an error during the initialization of the container's init,
|
||||
// send it back to the parent process in the form of an initError.
|
||||
if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
return
|
||||
}
|
||||
if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
|
||||
fmt.Fprintln(os.Stderr, err)
|
||||
return
|
||||
}
|
||||
}()
|
||||
defer func() {
|
||||
if e := recover(); e != nil {
|
||||
err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
|
||||
}
|
||||
}()
|
||||
|
||||
i, err := newContainerInit(it, pipe, consoleSocket, fifofd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// If Init succeeds, syscall.Exec will not return, hence none of the defers will be called.
|
||||
return i.Init()
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) loadState(root, id string) (*State, error) {
|
||||
f, err := os.Open(filepath.Join(root, stateFilename))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists)
|
||||
}
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
defer f.Close()
|
||||
var state *State
|
||||
if err := json.NewDecoder(f).Decode(&state); err != nil {
|
||||
return nil, newGenericError(err, SystemError)
|
||||
}
|
||||
return state, nil
|
||||
}
|
||||
|
||||
func (l *LinuxFactory) validateID(id string) error {
|
||||
if !idRegex.MatchString(id) {
|
||||
return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewuidmapPath returns an option func to configure a LinuxFactory with the
|
||||
// provided ..
|
||||
func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error {
|
||||
return func(l *LinuxFactory) error {
|
||||
l.NewuidmapPath = newuidmapPath
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// NewgidmapPath returns an option func to configure a LinuxFactory with the
|
||||
// provided ..
|
||||
func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
|
||||
return func(l *LinuxFactory) error {
|
||||
l.NewgidmapPath = newgidmapPath
|
||||
return nil
|
||||
}
|
||||
}
|
||||
92
vendor/github.com/opencontainers/runc/libcontainer/generic_error.go
generated
vendored
Normal file
92
vendor/github.com/opencontainers/runc/libcontainer/generic_error.go
generated
vendored
Normal file
@@ -0,0 +1,92 @@
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/stacktrace"
|
||||
)
|
||||
|
||||
var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
|
||||
Code: {{.ECode}}
|
||||
{{if .Message }}
|
||||
Message: {{.Message}}
|
||||
{{end}}
|
||||
Frames:{{range $i, $frame := .Stack.Frames}}
|
||||
---
|
||||
{{$i}}: {{$frame.Function}}
|
||||
Package: {{$frame.Package}}
|
||||
File: {{$frame.File}}@{{$frame.Line}}{{end}}
|
||||
`))
|
||||
|
||||
func newGenericError(err error, c ErrorCode) Error {
|
||||
if le, ok := err.(Error); ok {
|
||||
return le
|
||||
}
|
||||
gerr := &genericError{
|
||||
Timestamp: time.Now(),
|
||||
Err: err,
|
||||
ECode: c,
|
||||
Stack: stacktrace.Capture(1),
|
||||
}
|
||||
if err != nil {
|
||||
gerr.Message = err.Error()
|
||||
}
|
||||
return gerr
|
||||
}
|
||||
|
||||
func newSystemError(err error) Error {
|
||||
return createSystemError(err, "")
|
||||
}
|
||||
|
||||
func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error {
|
||||
return createSystemError(err, fmt.Sprintf(cause, v...))
|
||||
}
|
||||
|
||||
func newSystemErrorWithCause(err error, cause string) Error {
|
||||
return createSystemError(err, cause)
|
||||
}
|
||||
|
||||
// createSystemError creates the specified error with the correct number of
|
||||
// stack frames skipped. This is only to be called by the other functions for
|
||||
// formatting the error.
|
||||
func createSystemError(err error, cause string) Error {
|
||||
gerr := &genericError{
|
||||
Timestamp: time.Now(),
|
||||
Err: err,
|
||||
ECode: SystemError,
|
||||
Cause: cause,
|
||||
Stack: stacktrace.Capture(2),
|
||||
}
|
||||
if err != nil {
|
||||
gerr.Message = err.Error()
|
||||
}
|
||||
return gerr
|
||||
}
|
||||
|
||||
type genericError struct {
|
||||
Timestamp time.Time
|
||||
ECode ErrorCode
|
||||
Err error `json:"-"`
|
||||
Cause string
|
||||
Message string
|
||||
Stack stacktrace.Stacktrace
|
||||
}
|
||||
|
||||
func (e *genericError) Error() string {
|
||||
if e.Cause == "" {
|
||||
return e.Message
|
||||
}
|
||||
frame := e.Stack.Frames[0]
|
||||
return fmt.Sprintf("%s:%d: %s caused %q", frame.File, frame.Line, e.Cause, e.Message)
|
||||
}
|
||||
|
||||
func (e *genericError) Code() ErrorCode {
|
||||
return e.ECode
|
||||
}
|
||||
|
||||
func (e *genericError) Detail(w io.Writer) error {
|
||||
return errorTemplate.Execute(w, e)
|
||||
}
|
||||
534
vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
generated
vendored
Normal file
534
vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
generated
vendored
Normal file
@@ -0,0 +1,534 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"syscall" // only for Errno
|
||||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/containerd/console"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/user"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
||||
type initType string
|
||||
|
||||
const (
|
||||
initSetns initType = "setns"
|
||||
initStandard initType = "standard"
|
||||
)
|
||||
|
||||
type pid struct {
|
||||
Pid int `json:"pid"`
|
||||
PidFirstChild int `json:"pid_first"`
|
||||
}
|
||||
|
||||
// network is an internal struct used to setup container networks.
|
||||
type network struct {
|
||||
configs.Network
|
||||
|
||||
// TempVethPeerName is a unique temporary veth peer name that was placed into
|
||||
// the container's namespace.
|
||||
TempVethPeerName string `json:"temp_veth_peer_name"`
|
||||
}
|
||||
|
||||
// initConfig is used for transferring parameters from Exec() to Init()
|
||||
type initConfig struct {
|
||||
Args []string `json:"args"`
|
||||
Env []string `json:"env"`
|
||||
Cwd string `json:"cwd"`
|
||||
Capabilities *configs.Capabilities `json:"capabilities"`
|
||||
ProcessLabel string `json:"process_label"`
|
||||
AppArmorProfile string `json:"apparmor_profile"`
|
||||
NoNewPrivileges bool `json:"no_new_privileges"`
|
||||
User string `json:"user"`
|
||||
AdditionalGroups []string `json:"additional_groups"`
|
||||
Config *configs.Config `json:"config"`
|
||||
Networks []*network `json:"network"`
|
||||
PassedFilesCount int `json:"passed_files_count"`
|
||||
ContainerId string `json:"containerid"`
|
||||
Rlimits []configs.Rlimit `json:"rlimits"`
|
||||
CreateConsole bool `json:"create_console"`
|
||||
ConsoleWidth uint16 `json:"console_width"`
|
||||
ConsoleHeight uint16 `json:"console_height"`
|
||||
Rootless bool `json:"rootless"`
|
||||
}
|
||||
|
||||
type initer interface {
|
||||
Init() error
|
||||
}
|
||||
|
||||
func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd int) (initer, error) {
|
||||
var config *initConfig
|
||||
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := populateProcessEnvironment(config.Env); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
switch t {
|
||||
case initSetns:
|
||||
return &linuxSetnsInit{
|
||||
pipe: pipe,
|
||||
consoleSocket: consoleSocket,
|
||||
config: config,
|
||||
}, nil
|
||||
case initStandard:
|
||||
return &linuxStandardInit{
|
||||
pipe: pipe,
|
||||
consoleSocket: consoleSocket,
|
||||
parentPid: unix.Getppid(),
|
||||
config: config,
|
||||
fifoFd: fifoFd,
|
||||
}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("unknown init type %q", t)
|
||||
}
|
||||
|
||||
// populateProcessEnvironment loads the provided environment variables into the
|
||||
// current processes's environment.
|
||||
func populateProcessEnvironment(env []string) error {
|
||||
for _, pair := range env {
|
||||
p := strings.SplitN(pair, "=", 2)
|
||||
if len(p) < 2 {
|
||||
return fmt.Errorf("invalid environment '%v'", pair)
|
||||
}
|
||||
if err := os.Setenv(p[0], p[1]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// finalizeNamespace drops the caps, sets the correct user
|
||||
// and working dir, and closes any leaked file descriptors
|
||||
// before executing the command inside the namespace
|
||||
func finalizeNamespace(config *initConfig) error {
|
||||
// Ensure that all unwanted fds we may have accidentally
|
||||
// inherited are marked close-on-exec so they stay out of the
|
||||
// container
|
||||
if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
capabilities := &configs.Capabilities{}
|
||||
if config.Capabilities != nil {
|
||||
capabilities = config.Capabilities
|
||||
} else if config.Config.Capabilities != nil {
|
||||
capabilities = config.Config.Capabilities
|
||||
}
|
||||
w, err := newContainerCapList(capabilities)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// drop capabilities in bounding set before changing user
|
||||
if err := w.ApplyBoundingSet(); err != nil {
|
||||
return err
|
||||
}
|
||||
// preserve existing capabilities while we change users
|
||||
if err := system.SetKeepCaps(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupUser(config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.ClearKeepCaps(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := w.ApplyCaps(); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Cwd != "" {
|
||||
if err := unix.Chdir(config.Cwd); err != nil {
|
||||
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupConsole sets up the console from inside the container, and sends the
|
||||
// master pty fd to the config.Pipe (using cmsg). This is done to ensure that
|
||||
// consoles are scoped to a container properly (see runc#814 and the many
|
||||
// issues related to that). This has to be run *after* we've pivoted to the new
|
||||
// rootfs (and the users' configuration is entirely set up).
|
||||
func setupConsole(socket *os.File, config *initConfig, mount bool) error {
|
||||
defer socket.Close()
|
||||
// At this point, /dev/ptmx points to something that we would expect. We
|
||||
// used to change the owner of the slave path, but since the /dev/pts mount
|
||||
// can have gid=X set (at the users' option). So touching the owner of the
|
||||
// slave PTY is not necessary, as the kernel will handle that for us. Note
|
||||
// however, that setupUser (specifically fixStdioPermissions) *will* change
|
||||
// the UID owner of the console to be the user the process will run as (so
|
||||
// they can actually control their console).
|
||||
|
||||
pty, slavePath, err := console.NewPty()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 {
|
||||
err = pty.Resize(console.WinSize{
|
||||
Height: config.ConsoleHeight,
|
||||
Width: config.ConsoleWidth,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// After we return from here, we don't need the console anymore.
|
||||
defer pty.Close()
|
||||
|
||||
// Mount the console inside our rootfs.
|
||||
if mount {
|
||||
if err := mountConsole(slavePath); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// While we can access console.master, using the API is a good idea.
|
||||
if err := utils.SendFd(socket, pty.Name(), pty.Fd()); err != nil {
|
||||
return err
|
||||
}
|
||||
// Now, dup over all the things.
|
||||
return dupStdio(slavePath)
|
||||
}
|
||||
|
||||
// syncParentReady sends to the given pipe a JSON payload which indicates that
|
||||
// the init is ready to Exec the child process. It then waits for the parent to
|
||||
// indicate that it is cleared to Exec.
|
||||
func syncParentReady(pipe io.ReadWriter) error {
|
||||
// Tell parent.
|
||||
if err := writeSync(pipe, procReady); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait for parent to give the all-clear.
|
||||
if err := readSync(pipe, procRun); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// syncParentHooks sends to the given pipe a JSON payload which indicates that
|
||||
// the parent should execute pre-start hooks. It then waits for the parent to
|
||||
// indicate that it is cleared to resume.
|
||||
func syncParentHooks(pipe io.ReadWriter) error {
|
||||
// Tell parent.
|
||||
if err := writeSync(pipe, procHooks); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Wait for parent to give the all-clear.
|
||||
if err := readSync(pipe, procResume); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupUser changes the groups, gid, and uid for the user inside the container
|
||||
func setupUser(config *initConfig) error {
|
||||
// Set up defaults.
|
||||
defaultExecUser := user.ExecUser{
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
Home: "/",
|
||||
}
|
||||
|
||||
passwdPath, err := user.GetPasswdPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
groupPath, err := user.GetGroupPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var addGroups []int
|
||||
if len(config.AdditionalGroups) > 0 {
|
||||
addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Rather than just erroring out later in setuid(2) and setgid(2), check
|
||||
// that the user is mapped here.
|
||||
if _, err := config.Config.HostUID(execUser.Uid); err != nil {
|
||||
return fmt.Errorf("cannot set uid to unmapped user in user namespace")
|
||||
}
|
||||
if _, err := config.Config.HostGID(execUser.Gid); err != nil {
|
||||
return fmt.Errorf("cannot set gid to unmapped user in user namespace")
|
||||
}
|
||||
|
||||
if config.Rootless {
|
||||
// We cannot set any additional groups in a rootless container and thus
|
||||
// we bail if the user asked us to do so. TODO: We currently can't do
|
||||
// this check earlier, but if libcontainer.Process.User was typesafe
|
||||
// this might work.
|
||||
if len(addGroups) > 0 {
|
||||
return fmt.Errorf("cannot set any additional groups in a rootless container")
|
||||
}
|
||||
}
|
||||
|
||||
// Before we change to the container's user make sure that the processes
|
||||
// STDIO is correctly owned by the user that we are switching to.
|
||||
if err := fixStdioPermissions(config, execUser); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// This isn't allowed in an unprivileged user namespace since Linux 3.19.
|
||||
// There's nothing we can do about /etc/group entries, so we silently
|
||||
// ignore setting groups here (since the user didn't explicitly ask us to
|
||||
// set the group).
|
||||
if !config.Rootless {
|
||||
suppGroups := append(execUser.Sgids, addGroups...)
|
||||
if err := unix.Setgroups(suppGroups); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := system.Setgid(execUser.Gid); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Setuid(execUser.Uid); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// if we didn't get HOME already, set it based on the user's HOME
|
||||
if envHome := os.Getenv("HOME"); envHome == "" {
|
||||
if err := os.Setenv("HOME", execUser.Home); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
|
||||
// The ownership needs to match because it is created outside of the container and needs to be
|
||||
// localized.
|
||||
func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
|
||||
var null unix.Stat_t
|
||||
if err := unix.Stat("/dev/null", &null); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, fd := range []uintptr{
|
||||
os.Stdin.Fd(),
|
||||
os.Stderr.Fd(),
|
||||
os.Stdout.Fd(),
|
||||
} {
|
||||
var s unix.Stat_t
|
||||
if err := unix.Fstat(int(fd), &s); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Skip chown of /dev/null if it was used as one of the STDIO fds.
|
||||
if s.Rdev == null.Rdev {
|
||||
continue
|
||||
}
|
||||
|
||||
// We only change the uid owner (as it is possible for the mount to
|
||||
// prefer a different gid, and there's no reason for us to change it).
|
||||
// The reason why we don't just leave the default uid=X mount setup is
|
||||
// that users expect to be able to actually use their console. Without
|
||||
// this code, you couldn't effectively run as a non-root user inside a
|
||||
// container and also have a console set up.
|
||||
if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil {
|
||||
// If we've hit an EINVAL then s.Gid isn't mapped in the user
|
||||
// namespace. If we've hit an EPERM then the inode's current owner
|
||||
// is not mapped in our user namespace (in particular,
|
||||
// privileged_wrt_inode_uidgid() has failed). In either case, we
|
||||
// are in a configuration where it's better for us to just not
|
||||
// touch the stdio rather than bail at this point.
|
||||
if err == unix.EINVAL || err == unix.EPERM {
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setupNetwork sets up and initializes any network interface inside the container.
|
||||
func setupNetwork(config *initConfig) error {
|
||||
for _, config := range config.Networks {
|
||||
strategy, err := getStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := strategy.initialize(config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRoute(config *configs.Config) error {
|
||||
for _, config := range config.Routes {
|
||||
_, dst, err := net.ParseCIDR(config.Destination)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
src := net.ParseIP(config.Source)
|
||||
if src == nil {
|
||||
return fmt.Errorf("Invalid source for route: %s", config.Source)
|
||||
}
|
||||
gw := net.ParseIP(config.Gateway)
|
||||
if gw == nil {
|
||||
return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
|
||||
}
|
||||
l, err := netlink.LinkByName(config.InterfaceName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
route := &netlink.Route{
|
||||
Scope: netlink.SCOPE_UNIVERSE,
|
||||
Dst: dst,
|
||||
Src: src,
|
||||
Gw: gw,
|
||||
LinkIndex: l.Attrs().Index,
|
||||
}
|
||||
if err := netlink.RouteAdd(route); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupRlimits(limits []configs.Rlimit, pid int) error {
|
||||
for _, rlimit := range limits {
|
||||
if err := system.Prlimit(pid, rlimit.Type, unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
|
||||
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
const _P_PID = 1
|
||||
|
||||
type siginfo struct {
|
||||
si_signo int32
|
||||
si_errno int32
|
||||
si_code int32
|
||||
// below here is a union; si_pid is the only field we use
|
||||
si_pid int32
|
||||
// Pad to 128 bytes as detailed in blockUntilWaitable
|
||||
pad [96]byte
|
||||
}
|
||||
|
||||
// isWaitable returns true if the process has exited false otherwise.
|
||||
// Its based off blockUntilWaitable in src/os/wait_waitid.go
|
||||
func isWaitable(pid int) (bool, error) {
|
||||
si := &siginfo{}
|
||||
_, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0)
|
||||
if e != 0 {
|
||||
return false, os.NewSyscallError("waitid", e)
|
||||
}
|
||||
|
||||
return si.si_pid != 0, nil
|
||||
}
|
||||
|
||||
// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise
|
||||
func isNoChildren(err error) bool {
|
||||
switch err := err.(type) {
|
||||
case syscall.Errno:
|
||||
if err == unix.ECHILD {
|
||||
return true
|
||||
}
|
||||
case *os.SyscallError:
|
||||
if err.Err == unix.ECHILD {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// signalAllProcesses freezes then iterates over all the processes inside the
|
||||
// manager's cgroups sending the signal s to them.
|
||||
// If s is SIGKILL then it will wait for each process to exit.
|
||||
// For all other signals it will check if the process is ready to report its
|
||||
// exit status and only if it is will a wait be performed.
|
||||
func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
|
||||
var procs []*os.Process
|
||||
if err := m.Freeze(configs.Frozen); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
pids, err := m.GetAllPids()
|
||||
if err != nil {
|
||||
m.Freeze(configs.Thawed)
|
||||
return err
|
||||
}
|
||||
for _, pid := range pids {
|
||||
p, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
logrus.Warn(err)
|
||||
continue
|
||||
}
|
||||
procs = append(procs, p)
|
||||
if err := p.Signal(s); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
}
|
||||
if err := m.Freeze(configs.Thawed); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
|
||||
subreaper, err := system.GetSubreaper()
|
||||
if err != nil {
|
||||
// The error here means that PR_GET_CHILD_SUBREAPER is not
|
||||
// supported because this code might run on a kernel older
|
||||
// than 3.4. We don't want to throw an error in that case,
|
||||
// and we simplify things, considering there is no subreaper
|
||||
// set.
|
||||
subreaper = 0
|
||||
}
|
||||
|
||||
for _, p := range procs {
|
||||
if s != unix.SIGKILL {
|
||||
if ok, err := isWaitable(p.Pid); err != nil {
|
||||
if !isNoChildren(err) {
|
||||
logrus.Warn("signalAllProcesses: ", p.Pid, err)
|
||||
}
|
||||
continue
|
||||
} else if !ok {
|
||||
// Not ready to report so don't wait
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// In case a subreaper has been setup, this code must not
|
||||
// wait for the process. Otherwise, we cannot be sure the
|
||||
// current process will be reaped by the subreaper, while
|
||||
// the subreaper might be waiting for this process in order
|
||||
// to retrieve its exit code.
|
||||
if subreaper == 0 {
|
||||
if _, err := p.Wait(); err != nil {
|
||||
if !isNoChildren(err) {
|
||||
logrus.Warn("wait: ", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
2
vendor/github.com/opencontainers/runc/libcontainer/integration/doc.go
generated
vendored
Normal file
2
vendor/github.com/opencontainers/runc/libcontainer/integration/doc.go
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
// integration is used for integration testing of libcontainer
|
||||
package integration
|
||||
553
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
generated
vendored
Normal file
553
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
generated
vendored
Normal file
@@ -0,0 +1,553 @@
|
||||
// +build linux
|
||||
|
||||
package intelrdt
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
/*
|
||||
* About Intel RDT/CAT feature:
|
||||
* Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
|
||||
* Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
|
||||
* Cache is the only resource that is supported in RDT.
|
||||
*
|
||||
* This feature provides a way for the software to restrict cache allocation to a
|
||||
* defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
|
||||
* The different subsets are identified by class of service (CLOS) and each CLOS
|
||||
* has a capacity bitmask (CBM).
|
||||
*
|
||||
* For more information about Intel RDT/CAT can be found in the section 17.17
|
||||
* of Intel Software Developer Manual.
|
||||
*
|
||||
* About Intel RDT/CAT kernel interface:
|
||||
* In Linux 4.10 kernel or newer, the interface is defined and exposed via
|
||||
* "resource control" filesystem, which is a "cgroup-like" interface.
|
||||
*
|
||||
* Comparing with cgroups, it has similar process management lifecycle and
|
||||
* interfaces in a container. But unlike cgroups' hierarchy, it has single level
|
||||
* filesystem layout.
|
||||
*
|
||||
* Intel RDT "resource control" filesystem hierarchy:
|
||||
* mount -t resctrl resctrl /sys/fs/resctrl
|
||||
* tree /sys/fs/resctrl
|
||||
* /sys/fs/resctrl/
|
||||
* |-- info
|
||||
* | |-- L3
|
||||
* | |-- cbm_mask
|
||||
* | |-- min_cbm_bits
|
||||
* | |-- num_closids
|
||||
* |-- cpus
|
||||
* |-- schemata
|
||||
* |-- tasks
|
||||
* |-- <container_id>
|
||||
* |-- cpus
|
||||
* |-- schemata
|
||||
* |-- tasks
|
||||
*
|
||||
* For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
|
||||
* resource constraints.
|
||||
*
|
||||
* The file `tasks` has a list of tasks that belongs to this group (e.g.,
|
||||
* <container_id>" group). Tasks can be added to a group by writing the task ID
|
||||
* to the "tasks" file (which will automatically remove them from the previous
|
||||
* group to which they belonged). New tasks created by fork(2) and clone(2) are
|
||||
* added to the same group as their parent. If a pid is not in any sub group, it is
|
||||
* in root group.
|
||||
*
|
||||
* The file `schemata` has allocation bitmasks/values for L3 cache on each socket,
|
||||
* which contains L3 cache id and capacity bitmask (CBM).
|
||||
* Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
* For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
|
||||
* which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
*
|
||||
* The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
|
||||
* be set is less than the max bit. The max bits in the CBM is varied among
|
||||
* supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
|
||||
* layout, the CBM in a group should be a subset of the CBM in root. Kernel will
|
||||
* check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
|
||||
* of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
|
||||
* values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
*
|
||||
* For more information about Intel RDT/CAT kernel interface:
|
||||
* https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
|
||||
*
|
||||
* An example for runc:
|
||||
* Consider a two-socket machine with two L3 caches where the default CBM is
|
||||
* 0xfffff and the max CBM length is 20 bits. With this configuration, tasks
|
||||
* inside the container only have access to the "upper" 80% of L3 cache id 0 and
|
||||
* the "lower" 50% L3 cache id 1:
|
||||
*
|
||||
* "linux": {
|
||||
* "intelRdt": {
|
||||
* "l3CacheSchema": "L3:0=ffff0;1=3ff"
|
||||
* }
|
||||
* }
|
||||
*/
|
||||
|
||||
type Manager interface {
|
||||
// Applies Intel RDT configuration to the process with the specified pid
|
||||
Apply(pid int) error
|
||||
|
||||
// Returns statistics for Intel RDT
|
||||
GetStats() (*Stats, error)
|
||||
|
||||
// Destroys the Intel RDT 'container_id' group
|
||||
Destroy() error
|
||||
|
||||
// Returns Intel RDT path to save in a state file and to be able to
|
||||
// restore the object later
|
||||
GetPath() string
|
||||
|
||||
// Set Intel RDT "resource control" filesystem as configured.
|
||||
Set(container *configs.Config) error
|
||||
}
|
||||
|
||||
// This implements interface Manager
|
||||
type IntelRdtManager struct {
|
||||
mu sync.Mutex
|
||||
Config *configs.Config
|
||||
Id string
|
||||
Path string
|
||||
}
|
||||
|
||||
const (
|
||||
IntelRdtTasks = "tasks"
|
||||
)
|
||||
|
||||
var (
|
||||
// The absolute root path of the Intel RDT "resource control" filesystem
|
||||
intelRdtRoot string
|
||||
intelRdtRootLock sync.Mutex
|
||||
|
||||
// The flag to indicate if Intel RDT is supported
|
||||
isEnabled bool
|
||||
)
|
||||
|
||||
type intelRdtData struct {
|
||||
root string
|
||||
config *configs.Config
|
||||
pid int
|
||||
}
|
||||
|
||||
// Check if Intel RDT is enabled in init()
|
||||
func init() {
|
||||
// 1. Check if hardware and kernel support Intel RDT/CAT feature
|
||||
// "cat_l3" flag is set if supported
|
||||
isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
|
||||
if !isFlagSet || err != nil {
|
||||
isEnabled = false
|
||||
return
|
||||
}
|
||||
|
||||
// 2. Check if Intel RDT "resource control" filesystem is mounted
|
||||
// The user guarantees to mount the filesystem
|
||||
isEnabled = isIntelRdtMounted()
|
||||
}
|
||||
|
||||
// Return the mount point path of Intel RDT "resource control" filesysem
|
||||
func findIntelRdtMountpointDir() (string, error) {
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
text := s.Text()
|
||||
fields := strings.Split(text, " ")
|
||||
// Safe as mountinfo encodes mountpoints with spaces as \040.
|
||||
index := strings.Index(text, " - ")
|
||||
postSeparatorFields := strings.Fields(text[index+3:])
|
||||
numPostFields := len(postSeparatorFields)
|
||||
|
||||
// This is an error as we can't detect if the mount is for "Intel RDT"
|
||||
if numPostFields == 0 {
|
||||
return "", fmt.Errorf("Found no fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
if postSeparatorFields[0] == "resctrl" {
|
||||
// Check that the mount is properly formated.
|
||||
if numPostFields < 3 {
|
||||
return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
return fields[4], nil
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return "", NewNotFoundError("Intel RDT")
|
||||
}
|
||||
|
||||
// Gets the root path of Intel RDT "resource control" filesystem
|
||||
func getIntelRdtRoot() (string, error) {
|
||||
intelRdtRootLock.Lock()
|
||||
defer intelRdtRootLock.Unlock()
|
||||
|
||||
if intelRdtRoot != "" {
|
||||
return intelRdtRoot, nil
|
||||
}
|
||||
|
||||
root, err := findIntelRdtMountpointDir()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if _, err := os.Stat(root); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
intelRdtRoot = root
|
||||
return intelRdtRoot, nil
|
||||
}
|
||||
|
||||
func isIntelRdtMounted() bool {
|
||||
_, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func parseCpuInfoFile(path string) (bool, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
text := s.Text()
|
||||
flags := strings.Split(text, " ")
|
||||
|
||||
// "cat_l3" flag is set if Intel RDT/CAT is supported
|
||||
for _, flag := range flags {
|
||||
if flag == "cat_l3" {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func parseUint(s string, base, bitSize int) (uint64, error) {
|
||||
value, err := strconv.ParseUint(s, base, bitSize)
|
||||
if err != nil {
|
||||
intValue, intErr := strconv.ParseInt(s, base, bitSize)
|
||||
// 1. Handle negative values greater than MinInt64 (and)
|
||||
// 2. Handle negative values lesser than MinInt64
|
||||
if intErr == nil && intValue < 0 {
|
||||
return 0, nil
|
||||
} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return value, err
|
||||
}
|
||||
|
||||
return value, nil
|
||||
}
|
||||
|
||||
// Gets a single uint64 value from the specified file.
|
||||
func getIntelRdtParamUint(path, file string) (uint64, error) {
|
||||
fileName := filepath.Join(path, file)
|
||||
contents, err := ioutil.ReadFile(fileName)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName)
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// Gets a string value from the specified file
|
||||
func getIntelRdtParamString(path, file string) (string, error) {
|
||||
contents, err := ioutil.ReadFile(filepath.Join(path, file))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return strings.TrimSpace(string(contents)), nil
|
||||
}
|
||||
|
||||
func readTasksFile(dir string) ([]int, error) {
|
||||
f, err := os.Open(filepath.Join(dir, IntelRdtTasks))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var (
|
||||
s = bufio.NewScanner(f)
|
||||
out = []int{}
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if t := s.Text(); t != "" {
|
||||
pid, err := strconv.Atoi(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
out = append(out, pid)
|
||||
}
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func writeFile(dir, file, data string) error {
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", file)
|
||||
}
|
||||
if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0700); err != nil {
|
||||
return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) {
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &intelRdtData{
|
||||
root: rootPath,
|
||||
config: c,
|
||||
pid: pid,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Get the read-only L3 cache information
|
||||
func getL3CacheInfo() (*L3CacheInfo, error) {
|
||||
l3CacheInfo := &L3CacheInfo{}
|
||||
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return l3CacheInfo, err
|
||||
}
|
||||
|
||||
path := filepath.Join(rootPath, "info", "L3")
|
||||
cbmMask, err := getIntelRdtParamString(path, "cbm_mask")
|
||||
if err != nil {
|
||||
return l3CacheInfo, err
|
||||
}
|
||||
minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits")
|
||||
if err != nil {
|
||||
return l3CacheInfo, err
|
||||
}
|
||||
numClosids, err := getIntelRdtParamUint(path, "num_closids")
|
||||
if err != nil {
|
||||
return l3CacheInfo, err
|
||||
}
|
||||
|
||||
l3CacheInfo.CbmMask = cbmMask
|
||||
l3CacheInfo.MinCbmBits = minCbmBits
|
||||
l3CacheInfo.NumClosids = numClosids
|
||||
|
||||
return l3CacheInfo, nil
|
||||
}
|
||||
|
||||
// WriteIntelRdtTasks writes the specified pid into the "tasks" file
|
||||
func WriteIntelRdtTasks(dir string, pid int) error {
|
||||
if dir == "" {
|
||||
return fmt.Errorf("no such directory for %s", IntelRdtTasks)
|
||||
}
|
||||
|
||||
// Dont attach any pid if -1 is specified as a pid
|
||||
if pid != -1 {
|
||||
if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil {
|
||||
return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if Intel RDT is enabled
|
||||
func IsEnabled() bool {
|
||||
return isEnabled
|
||||
}
|
||||
|
||||
// Get the 'container_id' path in Intel RDT "resource control" filesystem
|
||||
func GetIntelRdtPath(id string) (string, error) {
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
path := filepath.Join(rootPath, id)
|
||||
return path, nil
|
||||
}
|
||||
|
||||
// Applies Intel RDT configuration to the process with the specified pid
|
||||
func (m *IntelRdtManager) Apply(pid int) (err error) {
|
||||
// If intelRdt is not specified in config, we do nothing
|
||||
if m.Config.IntelRdt == nil {
|
||||
return nil
|
||||
}
|
||||
d, err := getIntelRdtData(m.Config, pid)
|
||||
if err != nil && !IsNotFound(err) {
|
||||
return err
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
path, err := d.join(m.Id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
m.Path = path
|
||||
return nil
|
||||
}
|
||||
|
||||
// Destroys the Intel RDT 'container_id' group
|
||||
func (m *IntelRdtManager) Destroy() error {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
if err := os.RemoveAll(m.Path); err != nil {
|
||||
return err
|
||||
}
|
||||
m.Path = ""
|
||||
return nil
|
||||
}
|
||||
|
||||
// Returns Intel RDT path to save in a state file and to be able to
|
||||
// restore the object later
|
||||
func (m *IntelRdtManager) GetPath() string {
|
||||
if m.Path == "" {
|
||||
m.Path, _ = GetIntelRdtPath(m.Id)
|
||||
}
|
||||
return m.Path
|
||||
}
|
||||
|
||||
// Returns statistics for Intel RDT
|
||||
func (m *IntelRdtManager) GetStats() (*Stats, error) {
|
||||
// If intelRdt is not specified in config
|
||||
if m.Config.IntelRdt == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
stats := NewStats()
|
||||
|
||||
// The read-only L3 cache information
|
||||
l3CacheInfo, err := getL3CacheInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stats.L3CacheInfo = l3CacheInfo
|
||||
|
||||
// The read-only L3 cache schema in root
|
||||
rootPath, err := getIntelRdtRoot()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// L3 cache schema is in the first line
|
||||
schemaRootStrings := strings.Split(tmpRootStrings, "\n")
|
||||
stats.L3CacheSchemaRoot = schemaRootStrings[0]
|
||||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// L3 cache schema is in the first line
|
||||
schemaStrings := strings.Split(tmpStrings, "\n")
|
||||
stats.L3CacheSchema = schemaStrings[0]
|
||||
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// Set Intel RDT "resource control" filesystem as configured.
|
||||
func (m *IntelRdtManager) Set(container *configs.Config) error {
|
||||
path := m.GetPath()
|
||||
|
||||
// About L3 cache schema file:
|
||||
// The schema has allocation masks/values for L3 cache on each socket,
|
||||
// which contains L3 cache id and capacity bitmask (CBM).
|
||||
// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
|
||||
// For example, on a two-socket machine, L3's schema line could be:
|
||||
// L3:0=ff;1=c0
|
||||
// Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
|
||||
//
|
||||
// About L3 cache CBM validity:
|
||||
// The valid L3 cache CBM is a *contiguous bits set* and number of
|
||||
// bits that can be set is less than the max bit. The max bits in the
|
||||
// CBM is varied among supported Intel Xeon platforms. In Intel RDT
|
||||
// "resource control" filesystem layout, the CBM in a group should
|
||||
// be a subset of the CBM in root. Kernel will check if it is valid
|
||||
// when writing.
|
||||
// e.g., 0xfffff in root indicates the max bits of CBM is 20 bits,
|
||||
// which mapping to entire L3 cache capacity. Some valid CBM values
|
||||
// to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
|
||||
if container.IntelRdt != nil {
|
||||
l3CacheSchema := container.IntelRdt.L3CacheSchema
|
||||
if l3CacheSchema != "" {
|
||||
if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (raw *intelRdtData) join(id string) (string, error) {
|
||||
path := filepath.Join(raw.root, id)
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if err := WriteIntelRdtTasks(path, raw.pid); err != nil {
|
||||
return "", err
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
type NotFoundError struct {
|
||||
ResourceControl string
|
||||
}
|
||||
|
||||
func (e *NotFoundError) Error() string {
|
||||
return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl)
|
||||
}
|
||||
|
||||
func NewNotFoundError(res string) error {
|
||||
return &NotFoundError{
|
||||
ResourceControl: res,
|
||||
}
|
||||
}
|
||||
|
||||
func IsNotFound(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
_, ok := err.(*NotFoundError)
|
||||
return ok
|
||||
}
|
||||
24
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
generated
vendored
Normal file
24
vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
generated
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
// +build linux
|
||||
|
||||
package intelrdt
|
||||
|
||||
type L3CacheInfo struct {
|
||||
CbmMask string `json:"cbm_mask,omitempty"`
|
||||
MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
|
||||
NumClosids uint64 `json:"num_closids,omitempty"`
|
||||
}
|
||||
|
||||
type Stats struct {
|
||||
// The read-only L3 cache information
|
||||
L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
|
||||
|
||||
// The read-only L3 cache schema in root
|
||||
L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
|
||||
|
||||
// The L3 cache schema in 'container_id' group
|
||||
L3CacheSchema string `json:"l3_cache_schema,omitempty"`
|
||||
}
|
||||
|
||||
func NewStats() *Stats {
|
||||
return &Stats{}
|
||||
}
|
||||
50
vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
generated
vendored
Normal file
50
vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
generated
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
// +build linux
|
||||
|
||||
package keys
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type KeySerial uint32
|
||||
|
||||
func JoinSessionKeyring(name string) (KeySerial, error) {
|
||||
sessKeyId, err := unix.KeyctlJoinSessionKeyring(name)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("could not create session key: %v", err)
|
||||
}
|
||||
return KeySerial(sessKeyId), nil
|
||||
}
|
||||
|
||||
// ModKeyringPerm modifies permissions on a keyring by reading the current permissions,
|
||||
// anding the bits with the given mask (clearing permissions) and setting
|
||||
// additional permission bits
|
||||
func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
|
||||
dest, err := unix.KeyctlString(unix.KEYCTL_DESCRIBE, int(ringId))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
res := strings.Split(dest, ";")
|
||||
if len(res) < 5 {
|
||||
return fmt.Errorf("Destination buffer for key description is too small")
|
||||
}
|
||||
|
||||
// parse permissions
|
||||
perm64, err := strconv.ParseUint(res[3], 16, 32)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
perm := (uint32(perm64) & mask) | setbits
|
||||
|
||||
if err := unix.KeyctlSetperm(int(ringId), perm); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
89
vendor/github.com/opencontainers/runc/libcontainer/message_linux.go
generated
vendored
Normal file
89
vendor/github.com/opencontainers/runc/libcontainer/message_linux.go
generated
vendored
Normal file
@@ -0,0 +1,89 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"github.com/vishvananda/netlink/nl"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// list of known message types we want to send to bootstrap program
|
||||
// The number is randomly chosen to not conflict with known netlink types
|
||||
const (
|
||||
InitMsg uint16 = 62000
|
||||
CloneFlagsAttr uint16 = 27281
|
||||
NsPathsAttr uint16 = 27282
|
||||
UidmapAttr uint16 = 27283
|
||||
GidmapAttr uint16 = 27284
|
||||
SetgroupAttr uint16 = 27285
|
||||
OomScoreAdjAttr uint16 = 27286
|
||||
RootlessAttr uint16 = 27287
|
||||
UidmapPathAttr uint16 = 27288
|
||||
GidmapPathAttr uint16 = 27289
|
||||
)
|
||||
|
||||
type Int32msg struct {
|
||||
Type uint16
|
||||
Value uint32
|
||||
}
|
||||
|
||||
// Serialize serializes the message.
|
||||
// Int32msg has the following representation
|
||||
// | nlattr len | nlattr type |
|
||||
// | uint32 value |
|
||||
func (msg *Int32msg) Serialize() []byte {
|
||||
buf := make([]byte, msg.Len())
|
||||
native := nl.NativeEndian()
|
||||
native.PutUint16(buf[0:2], uint16(msg.Len()))
|
||||
native.PutUint16(buf[2:4], msg.Type)
|
||||
native.PutUint32(buf[4:8], msg.Value)
|
||||
return buf
|
||||
}
|
||||
|
||||
func (msg *Int32msg) Len() int {
|
||||
return unix.NLA_HDRLEN + 4
|
||||
}
|
||||
|
||||
// Bytemsg has the following representation
|
||||
// | nlattr len | nlattr type |
|
||||
// | value | pad |
|
||||
type Bytemsg struct {
|
||||
Type uint16
|
||||
Value []byte
|
||||
}
|
||||
|
||||
func (msg *Bytemsg) Serialize() []byte {
|
||||
l := msg.Len()
|
||||
buf := make([]byte, (l+unix.NLA_ALIGNTO-1) & ^(unix.NLA_ALIGNTO-1))
|
||||
native := nl.NativeEndian()
|
||||
native.PutUint16(buf[0:2], uint16(l))
|
||||
native.PutUint16(buf[2:4], msg.Type)
|
||||
copy(buf[4:], msg.Value)
|
||||
return buf
|
||||
}
|
||||
|
||||
func (msg *Bytemsg) Len() int {
|
||||
return unix.NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
|
||||
}
|
||||
|
||||
type Boolmsg struct {
|
||||
Type uint16
|
||||
Value bool
|
||||
}
|
||||
|
||||
func (msg *Boolmsg) Serialize() []byte {
|
||||
buf := make([]byte, msg.Len())
|
||||
native := nl.NativeEndian()
|
||||
native.PutUint16(buf[0:2], uint16(msg.Len()))
|
||||
native.PutUint16(buf[2:4], msg.Type)
|
||||
if msg.Value {
|
||||
buf[4] = 1
|
||||
} else {
|
||||
buf[4] = 0
|
||||
}
|
||||
return buf
|
||||
}
|
||||
|
||||
func (msg *Boolmsg) Len() int {
|
||||
return unix.NLA_HDRLEN + 1
|
||||
}
|
||||
23
vendor/github.com/opencontainers/runc/libcontainer/mount/mount.go
generated
vendored
Normal file
23
vendor/github.com/opencontainers/runc/libcontainer/mount/mount.go
generated
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
package mount
|
||||
|
||||
// GetMounts retrieves a list of mounts for the current running process.
|
||||
func GetMounts() ([]*Info, error) {
|
||||
return parseMountTable()
|
||||
}
|
||||
|
||||
// Mounted looks at /proc/self/mountinfo to determine of the specified
|
||||
// mountpoint has been mounted
|
||||
func Mounted(mountpoint string) (bool, error) {
|
||||
entries, err := parseMountTable()
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Search the table for the mountpoint
|
||||
for _, e := range entries {
|
||||
if e.Mountpoint == mountpoint {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
82
vendor/github.com/opencontainers/runc/libcontainer/mount/mount_linux.go
generated
vendored
Normal file
82
vendor/github.com/opencontainers/runc/libcontainer/mount/mount_linux.go
generated
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
// +build linux
|
||||
|
||||
package mount
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
/* 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
|
||||
(1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11)
|
||||
|
||||
(1) mount ID: unique identifier of the mount (may be reused after umount)
|
||||
(2) parent ID: ID of parent (or of self for the top of the mount tree)
|
||||
(3) major:minor: value of st_dev for files on filesystem
|
||||
(4) root: root of the mount within the filesystem
|
||||
(5) mount point: mount point relative to the process's root
|
||||
(6) mount options: per mount options
|
||||
(7) optional fields: zero or more fields of the form "tag[:value]"
|
||||
(8) separator: marks the end of the optional fields
|
||||
(9) filesystem type: name of filesystem of the form "type[.subtype]"
|
||||
(10) mount source: filesystem specific information or "none"
|
||||
(11) super options: per super block options*/
|
||||
mountinfoFormat = "%d %d %d:%d %s %s %s %s"
|
||||
)
|
||||
|
||||
// Parse /proc/self/mountinfo because comparing Dev and ino does not work from
|
||||
// bind mounts
|
||||
func parseMountTable() ([]*Info, error) {
|
||||
f, err := os.Open("/proc/self/mountinfo")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
return parseInfoFile(f)
|
||||
}
|
||||
|
||||
func parseInfoFile(r io.Reader) ([]*Info, error) {
|
||||
var (
|
||||
s = bufio.NewScanner(r)
|
||||
out = []*Info{}
|
||||
)
|
||||
|
||||
for s.Scan() {
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var (
|
||||
p = &Info{}
|
||||
text = s.Text()
|
||||
optionalFields string
|
||||
)
|
||||
|
||||
if _, err := fmt.Sscanf(text, mountinfoFormat,
|
||||
&p.ID, &p.Parent, &p.Major, &p.Minor,
|
||||
&p.Root, &p.Mountpoint, &p.Opts, &optionalFields); err != nil {
|
||||
return nil, fmt.Errorf("Scanning '%s' failed: %s", text, err)
|
||||
}
|
||||
// Safe as mountinfo encodes mountpoints with spaces as \040.
|
||||
index := strings.Index(text, " - ")
|
||||
postSeparatorFields := strings.Fields(text[index+3:])
|
||||
if len(postSeparatorFields) < 3 {
|
||||
return nil, fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
|
||||
}
|
||||
|
||||
if optionalFields != "-" {
|
||||
p.Optional = optionalFields
|
||||
}
|
||||
|
||||
p.Fstype = postSeparatorFields[0]
|
||||
p.Source = postSeparatorFields[1]
|
||||
p.VfsOpts = strings.Join(postSeparatorFields[2:], " ")
|
||||
out = append(out, p)
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
40
vendor/github.com/opencontainers/runc/libcontainer/mount/mountinfo.go
generated
vendored
Normal file
40
vendor/github.com/opencontainers/runc/libcontainer/mount/mountinfo.go
generated
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
package mount
|
||||
|
||||
// Info reveals information about a particular mounted filesystem. This
|
||||
// struct is populated from the content in the /proc/<pid>/mountinfo file.
|
||||
type Info struct {
|
||||
// ID is a unique identifier of the mount (may be reused after umount).
|
||||
ID int
|
||||
|
||||
// Parent indicates the ID of the mount parent (or of self for the top of the
|
||||
// mount tree).
|
||||
Parent int
|
||||
|
||||
// Major indicates one half of the device ID which identifies the device class.
|
||||
Major int
|
||||
|
||||
// Minor indicates one half of the device ID which identifies a specific
|
||||
// instance of device.
|
||||
Minor int
|
||||
|
||||
// Root of the mount within the filesystem.
|
||||
Root string
|
||||
|
||||
// Mountpoint indicates the mount point relative to the process's root.
|
||||
Mountpoint string
|
||||
|
||||
// Opts represents mount-specific options.
|
||||
Opts string
|
||||
|
||||
// Optional represents optional fields.
|
||||
Optional string
|
||||
|
||||
// Fstype indicates the type of filesystem, such as EXT3.
|
||||
Fstype string
|
||||
|
||||
// Source indicates filesystem specific information or "none".
|
||||
Source string
|
||||
|
||||
// VfsOpts represents per super block options.
|
||||
VfsOpts string
|
||||
}
|
||||
259
vendor/github.com/opencontainers/runc/libcontainer/network_linux.go
generated
vendored
Normal file
259
vendor/github.com/opencontainers/runc/libcontainer/network_linux.go
generated
vendored
Normal file
@@ -0,0 +1,259 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/vishvananda/netlink"
|
||||
)
|
||||
|
||||
var strategies = map[string]networkStrategy{
|
||||
"veth": &veth{},
|
||||
"loopback": &loopback{},
|
||||
}
|
||||
|
||||
// networkStrategy represents a specific network configuration for
|
||||
// a container's networking stack
|
||||
type networkStrategy interface {
|
||||
create(*network, int) error
|
||||
initialize(*network) error
|
||||
detach(*configs.Network) error
|
||||
attach(*configs.Network) error
|
||||
}
|
||||
|
||||
// getStrategy returns the specific network strategy for the
|
||||
// provided type.
|
||||
func getStrategy(tpe string) (networkStrategy, error) {
|
||||
s, exists := strategies[tpe]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("unknown strategy type %q", tpe)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
|
||||
func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) {
|
||||
out := &NetworkInterface{Name: interfaceName}
|
||||
// This can happen if the network runtime information is missing - possible if the
|
||||
// container was created by an old version of libcontainer.
|
||||
if interfaceName == "" {
|
||||
return out, nil
|
||||
}
|
||||
type netStatsPair struct {
|
||||
// Where to write the output.
|
||||
Out *uint64
|
||||
// The network stats file to read.
|
||||
File string
|
||||
}
|
||||
// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
|
||||
netStats := []netStatsPair{
|
||||
{Out: &out.RxBytes, File: "tx_bytes"},
|
||||
{Out: &out.RxPackets, File: "tx_packets"},
|
||||
{Out: &out.RxErrors, File: "tx_errors"},
|
||||
{Out: &out.RxDropped, File: "tx_dropped"},
|
||||
|
||||
{Out: &out.TxBytes, File: "rx_bytes"},
|
||||
{Out: &out.TxPackets, File: "rx_packets"},
|
||||
{Out: &out.TxErrors, File: "rx_errors"},
|
||||
{Out: &out.TxDropped, File: "rx_dropped"},
|
||||
}
|
||||
for _, netStat := range netStats {
|
||||
data, err := readSysfsNetworkStats(interfaceName, netStat.File)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
*(netStat.Out) = data
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
|
||||
func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
|
||||
}
|
||||
|
||||
// loopback is a network strategy that provides a basic loopback device
|
||||
type loopback struct {
|
||||
}
|
||||
|
||||
func (l *loopback) create(n *network, nspid int) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *loopback) initialize(config *network) error {
|
||||
return netlink.LinkSetUp(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: "lo"}})
|
||||
}
|
||||
|
||||
func (l *loopback) attach(n *configs.Network) (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (l *loopback) detach(n *configs.Network) (err error) {
|
||||
return nil
|
||||
}
|
||||
|
||||
// veth is a network strategy that uses a bridge and creates
|
||||
// a veth pair, one that is attached to the bridge on the host and the other
|
||||
// is placed inside the container's namespace
|
||||
type veth struct {
|
||||
}
|
||||
|
||||
func (v *veth) detach(n *configs.Network) (err error) {
|
||||
return netlink.LinkSetMaster(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: n.HostInterfaceName}}, nil)
|
||||
}
|
||||
|
||||
// attach a container network interface to an external network
|
||||
func (v *veth) attach(n *configs.Network) (err error) {
|
||||
brl, err := netlink.LinkByName(n.Bridge)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
br, ok := brl.(*netlink.Bridge)
|
||||
if !ok {
|
||||
return fmt.Errorf("Wrong device type %T", brl)
|
||||
}
|
||||
host, err := netlink.LinkByName(n.HostInterfaceName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := netlink.LinkSetMaster(host, br); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetMTU(host, n.Mtu); err != nil {
|
||||
return err
|
||||
}
|
||||
if n.HairpinMode {
|
||||
if err := netlink.LinkSetHairpin(host, true); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := netlink.LinkSetUp(host); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (v *veth) create(n *network, nspid int) (err error) {
|
||||
tmpName, err := v.generateTempPeerName()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n.TempVethPeerName = tmpName
|
||||
if n.Bridge == "" {
|
||||
return fmt.Errorf("bridge is not specified")
|
||||
}
|
||||
veth := &netlink.Veth{
|
||||
LinkAttrs: netlink.LinkAttrs{
|
||||
Name: n.HostInterfaceName,
|
||||
TxQLen: n.TxQueueLen,
|
||||
},
|
||||
PeerName: n.TempVethPeerName,
|
||||
}
|
||||
if err := netlink.LinkAdd(veth); err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
netlink.LinkDel(veth)
|
||||
}
|
||||
}()
|
||||
if err := v.attach(&n.Network); err != nil {
|
||||
return err
|
||||
}
|
||||
child, err := netlink.LinkByName(n.TempVethPeerName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return netlink.LinkSetNsPid(child, nspid)
|
||||
}
|
||||
|
||||
func (v *veth) generateTempPeerName() (string, error) {
|
||||
return utils.GenerateRandomName("veth", 7)
|
||||
}
|
||||
|
||||
func (v *veth) initialize(config *network) error {
|
||||
peer := config.TempVethPeerName
|
||||
if peer == "" {
|
||||
return fmt.Errorf("peer is not specified")
|
||||
}
|
||||
child, err := netlink.LinkByName(peer)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetDown(child); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetName(child, config.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
// get the interface again after we changed the name as the index also changes.
|
||||
if child, err = netlink.LinkByName(config.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.MacAddress != "" {
|
||||
mac, err := net.ParseMAC(config.MacAddress)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetHardwareAddr(child, mac); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
ip, err := netlink.ParseAddr(config.Address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.AddrAdd(child, ip); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.IPv6Address != "" {
|
||||
ip6, err := netlink.ParseAddr(config.IPv6Address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.AddrAdd(child, ip6); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := netlink.LinkSetMTU(child, config.Mtu); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := netlink.LinkSetUp(child); err != nil {
|
||||
return err
|
||||
}
|
||||
if config.Gateway != "" {
|
||||
gw := net.ParseIP(config.Gateway)
|
||||
if err := netlink.RouteAdd(&netlink.Route{
|
||||
Scope: netlink.SCOPE_UNIVERSE,
|
||||
LinkIndex: child.Attrs().Index,
|
||||
Gw: gw,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if config.IPv6Gateway != "" {
|
||||
gw := net.ParseIP(config.IPv6Gateway)
|
||||
if err := netlink.RouteAdd(&netlink.Route{
|
||||
Scope: netlink.SCOPE_UNIVERSE,
|
||||
LinkIndex: child.Attrs().Index,
|
||||
Gw: gw,
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
90
vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go
generated
vendored
Normal file
90
vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go
generated
vendored
Normal file
@@ -0,0 +1,90 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const oomCgroupName = "memory"
|
||||
|
||||
type PressureLevel uint
|
||||
|
||||
const (
|
||||
LowPressure PressureLevel = iota
|
||||
MediumPressure
|
||||
CriticalPressure
|
||||
)
|
||||
|
||||
func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct{}, error) {
|
||||
evFile, err := os.Open(filepath.Join(cgDir, evName))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fd, err := unix.Eventfd(0, unix.EFD_CLOEXEC)
|
||||
if err != nil {
|
||||
evFile.Close()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
eventfd := os.NewFile(uintptr(fd), "eventfd")
|
||||
|
||||
eventControlPath := filepath.Join(cgDir, "cgroup.event_control")
|
||||
data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg)
|
||||
if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
|
||||
eventfd.Close()
|
||||
evFile.Close()
|
||||
return nil, err
|
||||
}
|
||||
ch := make(chan struct{})
|
||||
go func() {
|
||||
defer func() {
|
||||
eventfd.Close()
|
||||
evFile.Close()
|
||||
close(ch)
|
||||
}()
|
||||
buf := make([]byte, 8)
|
||||
for {
|
||||
if _, err := eventfd.Read(buf); err != nil {
|
||||
return
|
||||
}
|
||||
// When a cgroup is destroyed, an event is sent to eventfd.
|
||||
// So if the control path is gone, return instead of notifying.
|
||||
if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
|
||||
return
|
||||
}
|
||||
ch <- struct{}{}
|
||||
}
|
||||
}()
|
||||
return ch, nil
|
||||
}
|
||||
|
||||
// notifyOnOOM returns channel on which you can expect event about OOM,
|
||||
// if process died without OOM this channel will be closed.
|
||||
func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
|
||||
dir := paths[oomCgroupName]
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("path %q missing", oomCgroupName)
|
||||
}
|
||||
|
||||
return registerMemoryEvent(dir, "memory.oom_control", "")
|
||||
}
|
||||
|
||||
func notifyMemoryPressure(paths map[string]string, level PressureLevel) (<-chan struct{}, error) {
|
||||
dir := paths[oomCgroupName]
|
||||
if dir == "" {
|
||||
return nil, fmt.Errorf("path %q missing", oomCgroupName)
|
||||
}
|
||||
|
||||
if level > CriticalPressure {
|
||||
return nil, fmt.Errorf("invalid pressure level %d", level)
|
||||
}
|
||||
|
||||
levelStr := []string{"low", "medium", "critical"}[level]
|
||||
return registerMemoryEvent(dir, "memory.pressure_level", levelStr)
|
||||
}
|
||||
32
vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h
generated
vendored
Normal file
32
vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h
generated
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
#ifndef NSENTER_NAMESPACE_H
|
||||
#define NSENTER_NAMESPACE_H
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
# define _GNU_SOURCE
|
||||
#endif
|
||||
#include <sched.h>
|
||||
|
||||
/* All of these are taken from include/uapi/linux/sched.h */
|
||||
#ifndef CLONE_NEWNS
|
||||
# define CLONE_NEWNS 0x00020000 /* New mount namespace group */
|
||||
#endif
|
||||
#ifndef CLONE_NEWCGROUP
|
||||
# define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */
|
||||
#endif
|
||||
#ifndef CLONE_NEWUTS
|
||||
# define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
|
||||
#endif
|
||||
#ifndef CLONE_NEWIPC
|
||||
# define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
|
||||
#endif
|
||||
#ifndef CLONE_NEWUSER
|
||||
# define CLONE_NEWUSER 0x10000000 /* New user namespace */
|
||||
#endif
|
||||
#ifndef CLONE_NEWPID
|
||||
# define CLONE_NEWPID 0x20000000 /* New pid namespace */
|
||||
#endif
|
||||
#ifndef CLONE_NEWNET
|
||||
# define CLONE_NEWNET 0x40000000 /* New network namespace */
|
||||
#endif
|
||||
|
||||
#endif /* NSENTER_NAMESPACE_H */
|
||||
12
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go
generated
vendored
Normal file
12
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go
generated
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
// +build linux,!gccgo
|
||||
|
||||
package nsenter
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -Wall
|
||||
extern void nsexec();
|
||||
void __attribute__((constructor)) init(void) {
|
||||
nsexec();
|
||||
}
|
||||
*/
|
||||
import "C"
|
||||
25
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go
generated
vendored
Normal file
25
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go
generated
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
// +build linux,gccgo
|
||||
|
||||
package nsenter
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -Wall
|
||||
extern void nsexec();
|
||||
void __attribute__((constructor)) init(void) {
|
||||
nsexec();
|
||||
}
|
||||
*/
|
||||
import "C"
|
||||
|
||||
// AlwaysFalse is here to stay false
|
||||
// (and be exported so the compiler doesn't optimize out its reference)
|
||||
var AlwaysFalse bool
|
||||
|
||||
func init() {
|
||||
if AlwaysFalse {
|
||||
// by referencing this C init() in a noop test, it will ensure the compiler
|
||||
// links in the C function.
|
||||
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134
|
||||
C.init()
|
||||
}
|
||||
}
|
||||
5
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go
generated
vendored
Normal file
5
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go
generated
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
// +build !linux !cgo
|
||||
|
||||
package nsenter
|
||||
|
||||
import "C"
|
||||
963
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
generated
vendored
Normal file
963
vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
generated
vendored
Normal file
@@ -0,0 +1,963 @@
|
||||
|
||||
#define _GNU_SOURCE
|
||||
#include <endian.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <grp.h>
|
||||
#include <sched.h>
|
||||
#include <setjmp.h>
|
||||
#include <signal.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include <linux/limits.h>
|
||||
#include <linux/netlink.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
/* Get all of the CLONE_NEW* flags. */
|
||||
#include "namespace.h"
|
||||
|
||||
/* Synchronisation values. */
|
||||
enum sync_t {
|
||||
SYNC_USERMAP_PLS = 0x40, /* Request parent to map our users. */
|
||||
SYNC_USERMAP_ACK = 0x41, /* Mapping finished by the parent. */
|
||||
SYNC_RECVPID_PLS = 0x42, /* Tell parent we're sending the PID. */
|
||||
SYNC_RECVPID_ACK = 0x43, /* PID was correctly received by parent. */
|
||||
SYNC_GRANDCHILD = 0x44, /* The grandchild is ready to run. */
|
||||
SYNC_CHILD_READY = 0x45, /* The child or grandchild is ready to return. */
|
||||
|
||||
/* XXX: This doesn't help with segfaults and other such issues. */
|
||||
SYNC_ERR = 0xFF, /* Fatal error, no turning back. The error code follows. */
|
||||
};
|
||||
|
||||
/* longjmp() arguments. */
|
||||
#define JUMP_PARENT 0x00
|
||||
#define JUMP_CHILD 0xA0
|
||||
#define JUMP_INIT 0xA1
|
||||
|
||||
/* JSON buffer. */
|
||||
#define JSON_MAX 4096
|
||||
|
||||
/* Assume the stack grows down, so arguments should be above it. */
|
||||
struct clone_t {
|
||||
/*
|
||||
* Reserve some space for clone() to locate arguments
|
||||
* and retcode in this place
|
||||
*/
|
||||
char stack[4096] __attribute__ ((aligned(16)));
|
||||
char stack_ptr[0];
|
||||
|
||||
/* There's two children. This is used to execute the different code. */
|
||||
jmp_buf *env;
|
||||
int jmpval;
|
||||
};
|
||||
|
||||
struct nlconfig_t {
|
||||
char *data;
|
||||
|
||||
/* Process settings. */
|
||||
uint32_t cloneflags;
|
||||
char *oom_score_adj;
|
||||
size_t oom_score_adj_len;
|
||||
|
||||
/* User namespace settings. */
|
||||
char *uidmap;
|
||||
size_t uidmap_len;
|
||||
char *gidmap;
|
||||
size_t gidmap_len;
|
||||
char *namespaces;
|
||||
size_t namespaces_len;
|
||||
uint8_t is_setgroup;
|
||||
|
||||
/* Rootless container settings. */
|
||||
uint8_t is_rootless;
|
||||
char *uidmappath;
|
||||
size_t uidmappath_len;
|
||||
char *gidmappath;
|
||||
size_t gidmappath_len;
|
||||
};
|
||||
|
||||
/*
|
||||
* List of netlink message types sent to us as part of bootstrapping the init.
|
||||
* These constants are defined in libcontainer/message_linux.go.
|
||||
*/
|
||||
#define INIT_MSG 62000
|
||||
#define CLONE_FLAGS_ATTR 27281
|
||||
#define NS_PATHS_ATTR 27282
|
||||
#define UIDMAP_ATTR 27283
|
||||
#define GIDMAP_ATTR 27284
|
||||
#define SETGROUP_ATTR 27285
|
||||
#define OOM_SCORE_ADJ_ATTR 27286
|
||||
#define ROOTLESS_ATTR 27287
|
||||
#define UIDMAPPATH_ATTR 27288
|
||||
#define GIDMAPPATH_ATTR 27289
|
||||
|
||||
/*
|
||||
* Use the raw syscall for versions of glibc which don't include a function for
|
||||
* it, namely (glibc 2.12).
|
||||
*/
|
||||
#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
|
||||
# define _GNU_SOURCE
|
||||
# include "syscall.h"
|
||||
# if !defined(SYS_setns) && defined(__NR_setns)
|
||||
# define SYS_setns __NR_setns
|
||||
# endif
|
||||
|
||||
#ifndef SYS_setns
|
||||
# error "setns(2) syscall not supported by glibc version"
|
||||
#endif
|
||||
|
||||
int setns(int fd, int nstype)
|
||||
{
|
||||
return syscall(SYS_setns, fd, nstype);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* XXX: This is ugly. */
|
||||
static int syncfd = -1;
|
||||
|
||||
/* TODO(cyphar): Fix this so it correctly deals with syncT. */
|
||||
#define bail(fmt, ...) \
|
||||
do { \
|
||||
int ret = __COUNTER__ + 1; \
|
||||
fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__); \
|
||||
if (syncfd >= 0) { \
|
||||
enum sync_t s = SYNC_ERR; \
|
||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) \
|
||||
fprintf(stderr, "nsenter: failed: write(s)"); \
|
||||
if (write(syncfd, &ret, sizeof(ret)) != sizeof(ret)) \
|
||||
fprintf(stderr, "nsenter: failed: write(ret)"); \
|
||||
} \
|
||||
exit(ret); \
|
||||
} while(0)
|
||||
|
||||
static int write_file(char *data, size_t data_len, char *pathfmt, ...)
|
||||
{
|
||||
int fd, len, ret = 0;
|
||||
char path[PATH_MAX];
|
||||
|
||||
va_list ap;
|
||||
va_start(ap, pathfmt);
|
||||
len = vsnprintf(path, PATH_MAX, pathfmt, ap);
|
||||
va_end(ap);
|
||||
if (len < 0)
|
||||
return -1;
|
||||
|
||||
fd = open(path, O_RDWR);
|
||||
if (fd < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
len = write(fd, data, data_len);
|
||||
if (len != data_len) {
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
close(fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
enum policy_t {
|
||||
SETGROUPS_DEFAULT = 0,
|
||||
SETGROUPS_ALLOW,
|
||||
SETGROUPS_DENY,
|
||||
};
|
||||
|
||||
/* This *must* be called before we touch gid_map. */
|
||||
static void update_setgroups(int pid, enum policy_t setgroup)
|
||||
{
|
||||
char *policy;
|
||||
|
||||
switch (setgroup) {
|
||||
case SETGROUPS_ALLOW:
|
||||
policy = "allow";
|
||||
break;
|
||||
case SETGROUPS_DENY:
|
||||
policy = "deny";
|
||||
break;
|
||||
case SETGROUPS_DEFAULT:
|
||||
default:
|
||||
/* Nothing to do. */
|
||||
return;
|
||||
}
|
||||
|
||||
if (write_file(policy, strlen(policy), "/proc/%d/setgroups", pid) < 0) {
|
||||
/*
|
||||
* If the kernel is too old to support /proc/pid/setgroups,
|
||||
* open(2) or write(2) will return ENOENT. This is fine.
|
||||
*/
|
||||
if (errno != ENOENT)
|
||||
bail("failed to write '%s' to /proc/%d/setgroups", policy, pid);
|
||||
}
|
||||
}
|
||||
|
||||
static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len)
|
||||
{
|
||||
int child;
|
||||
|
||||
/*
|
||||
* If @app is NULL, execve will segfault. Just check it here and bail (if
|
||||
* we're in this path, the caller is already getting desparate and there
|
||||
* isn't a backup to this failing). This usually would be a configuration
|
||||
* or programming issue.
|
||||
*/
|
||||
if (!app)
|
||||
bail("mapping tool not present");
|
||||
|
||||
child = fork();
|
||||
if (child < 0)
|
||||
bail("failed to fork");
|
||||
|
||||
if (!child) {
|
||||
#define MAX_ARGV 20
|
||||
char *argv[MAX_ARGV];
|
||||
char *envp[] = { NULL };
|
||||
char pid_fmt[16];
|
||||
int argc = 0;
|
||||
char *next;
|
||||
|
||||
snprintf(pid_fmt, 16, "%d", pid);
|
||||
|
||||
argv[argc++] = (char *)app;
|
||||
argv[argc++] = pid_fmt;
|
||||
/*
|
||||
* Convert the map string into a list of argument that
|
||||
* newuidmap/newgidmap can understand.
|
||||
*/
|
||||
|
||||
while (argc < MAX_ARGV) {
|
||||
if (*map == '\0') {
|
||||
argv[argc++] = NULL;
|
||||
break;
|
||||
}
|
||||
argv[argc++] = map;
|
||||
next = strpbrk(map, "\n ");
|
||||
if (next == NULL)
|
||||
break;
|
||||
*next++ = '\0';
|
||||
map = next + strspn(next, "\n ");
|
||||
}
|
||||
|
||||
execve(app, argv, envp);
|
||||
bail("failed to execv");
|
||||
} else {
|
||||
int status;
|
||||
|
||||
while (true) {
|
||||
if (waitpid(child, &status, 0) < 0) {
|
||||
if (errno == EINTR)
|
||||
continue;
|
||||
bail("failed to waitpid");
|
||||
}
|
||||
if (WIFEXITED(status) || WIFSIGNALED(status))
|
||||
return WEXITSTATUS(status);
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void update_uidmap(const char *path, int pid, char *map, size_t map_len)
|
||||
{
|
||||
if (map == NULL || map_len <= 0)
|
||||
return;
|
||||
|
||||
if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) {
|
||||
if (errno != EPERM)
|
||||
bail("failed to update /proc/%d/uid_map", pid);
|
||||
if (try_mapping_tool(path, pid, map, map_len))
|
||||
bail("failed to use newuid map on %d", pid);
|
||||
}
|
||||
}
|
||||
|
||||
static void update_gidmap(const char *path, int pid, char *map, size_t map_len)
|
||||
{
|
||||
if (map == NULL || map_len <= 0)
|
||||
return;
|
||||
|
||||
if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) {
|
||||
if (errno != EPERM)
|
||||
bail("failed to update /proc/%d/gid_map", pid);
|
||||
if (try_mapping_tool(path, pid, map, map_len))
|
||||
bail("failed to use newgid map on %d", pid);
|
||||
}
|
||||
}
|
||||
|
||||
static void update_oom_score_adj(char *data, size_t len)
|
||||
{
|
||||
if (data == NULL || len <= 0)
|
||||
return;
|
||||
|
||||
if (write_file(data, len, "/proc/self/oom_score_adj") < 0)
|
||||
bail("failed to update /proc/self/oom_score_adj");
|
||||
}
|
||||
|
||||
/* A dummy function that just jumps to the given jumpval. */
|
||||
static int child_func(void *arg) __attribute__ ((noinline));
|
||||
static int child_func(void *arg)
|
||||
{
|
||||
struct clone_t *ca = (struct clone_t *)arg;
|
||||
longjmp(*ca->env, ca->jmpval);
|
||||
}
|
||||
|
||||
static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline));
|
||||
static int clone_parent(jmp_buf *env, int jmpval)
|
||||
{
|
||||
struct clone_t ca = {
|
||||
.env = env,
|
||||
.jmpval = jmpval,
|
||||
};
|
||||
|
||||
return clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca);
|
||||
}
|
||||
|
||||
/*
|
||||
* Gets the init pipe fd from the environment, which is used to read the
|
||||
* bootstrap data and tell the parent what the new pid is after we finish
|
||||
* setting up the environment.
|
||||
*/
|
||||
static int initpipe(void)
|
||||
{
|
||||
int pipenum;
|
||||
char *initpipe, *endptr;
|
||||
|
||||
initpipe = getenv("_LIBCONTAINER_INITPIPE");
|
||||
if (initpipe == NULL || *initpipe == '\0')
|
||||
return -1;
|
||||
|
||||
pipenum = strtol(initpipe, &endptr, 10);
|
||||
if (*endptr != '\0')
|
||||
bail("unable to parse _LIBCONTAINER_INITPIPE");
|
||||
|
||||
return pipenum;
|
||||
}
|
||||
|
||||
/* Returns the clone(2) flag for a namespace, given the name of a namespace. */
|
||||
static int nsflag(char *name)
|
||||
{
|
||||
if (!strcmp(name, "cgroup"))
|
||||
return CLONE_NEWCGROUP;
|
||||
else if (!strcmp(name, "ipc"))
|
||||
return CLONE_NEWIPC;
|
||||
else if (!strcmp(name, "mnt"))
|
||||
return CLONE_NEWNS;
|
||||
else if (!strcmp(name, "net"))
|
||||
return CLONE_NEWNET;
|
||||
else if (!strcmp(name, "pid"))
|
||||
return CLONE_NEWPID;
|
||||
else if (!strcmp(name, "user"))
|
||||
return CLONE_NEWUSER;
|
||||
else if (!strcmp(name, "uts"))
|
||||
return CLONE_NEWUTS;
|
||||
|
||||
/* If we don't recognise a name, fallback to 0. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static uint32_t readint32(char *buf)
|
||||
{
|
||||
return *(uint32_t *) buf;
|
||||
}
|
||||
|
||||
static uint8_t readint8(char *buf)
|
||||
{
|
||||
return *(uint8_t *) buf;
|
||||
}
|
||||
|
||||
static void nl_parse(int fd, struct nlconfig_t *config)
|
||||
{
|
||||
size_t len, size;
|
||||
struct nlmsghdr hdr;
|
||||
char *data, *current;
|
||||
|
||||
/* Retrieve the netlink header. */
|
||||
len = read(fd, &hdr, NLMSG_HDRLEN);
|
||||
if (len != NLMSG_HDRLEN)
|
||||
bail("invalid netlink header length %zu", len);
|
||||
|
||||
if (hdr.nlmsg_type == NLMSG_ERROR)
|
||||
bail("failed to read netlink message");
|
||||
|
||||
if (hdr.nlmsg_type != INIT_MSG)
|
||||
bail("unexpected msg type %d", hdr.nlmsg_type);
|
||||
|
||||
/* Retrieve data. */
|
||||
size = NLMSG_PAYLOAD(&hdr, 0);
|
||||
current = data = malloc(size);
|
||||
if (!data)
|
||||
bail("failed to allocate %zu bytes of memory for nl_payload", size);
|
||||
|
||||
len = read(fd, data, size);
|
||||
if (len != size)
|
||||
bail("failed to read netlink payload, %zu != %zu", len, size);
|
||||
|
||||
/* Parse the netlink payload. */
|
||||
config->data = data;
|
||||
while (current < data + size) {
|
||||
struct nlattr *nlattr = (struct nlattr *)current;
|
||||
size_t payload_len = nlattr->nla_len - NLA_HDRLEN;
|
||||
|
||||
/* Advance to payload. */
|
||||
current += NLA_HDRLEN;
|
||||
|
||||
/* Handle payload. */
|
||||
switch (nlattr->nla_type) {
|
||||
case CLONE_FLAGS_ATTR:
|
||||
config->cloneflags = readint32(current);
|
||||
break;
|
||||
case ROOTLESS_ATTR:
|
||||
config->is_rootless = readint8(current);
|
||||
break;
|
||||
case OOM_SCORE_ADJ_ATTR:
|
||||
config->oom_score_adj = current;
|
||||
config->oom_score_adj_len = payload_len;
|
||||
break;
|
||||
case NS_PATHS_ATTR:
|
||||
config->namespaces = current;
|
||||
config->namespaces_len = payload_len;
|
||||
break;
|
||||
case UIDMAP_ATTR:
|
||||
config->uidmap = current;
|
||||
config->uidmap_len = payload_len;
|
||||
break;
|
||||
case GIDMAP_ATTR:
|
||||
config->gidmap = current;
|
||||
config->gidmap_len = payload_len;
|
||||
break;
|
||||
case UIDMAPPATH_ATTR:
|
||||
config->uidmappath = current;
|
||||
config->uidmappath_len = payload_len;
|
||||
break;
|
||||
case GIDMAPPATH_ATTR:
|
||||
config->gidmappath = current;
|
||||
config->gidmappath_len = payload_len;
|
||||
break;
|
||||
case SETGROUP_ATTR:
|
||||
config->is_setgroup = readint8(current);
|
||||
break;
|
||||
default:
|
||||
bail("unknown netlink message type %d", nlattr->nla_type);
|
||||
}
|
||||
|
||||
current += NLA_ALIGN(payload_len);
|
||||
}
|
||||
}
|
||||
|
||||
void nl_free(struct nlconfig_t *config)
|
||||
{
|
||||
free(config->data);
|
||||
}
|
||||
|
||||
void join_namespaces(char *nslist)
|
||||
{
|
||||
int num = 0, i;
|
||||
char *saveptr = NULL;
|
||||
char *namespace = strtok_r(nslist, ",", &saveptr);
|
||||
struct namespace_t {
|
||||
int fd;
|
||||
int ns;
|
||||
char type[PATH_MAX];
|
||||
char path[PATH_MAX];
|
||||
} *namespaces = NULL;
|
||||
|
||||
if (!namespace || !strlen(namespace) || !strlen(nslist))
|
||||
bail("ns paths are empty");
|
||||
|
||||
/*
|
||||
* We have to open the file descriptors first, since after
|
||||
* we join the mnt namespace we might no longer be able to
|
||||
* access the paths.
|
||||
*/
|
||||
do {
|
||||
int fd;
|
||||
char *path;
|
||||
struct namespace_t *ns;
|
||||
|
||||
/* Resize the namespace array. */
|
||||
namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t));
|
||||
if (!namespaces)
|
||||
bail("failed to reallocate namespace array");
|
||||
ns = &namespaces[num - 1];
|
||||
|
||||
/* Split 'ns:path'. */
|
||||
path = strstr(namespace, ":");
|
||||
if (!path)
|
||||
bail("failed to parse %s", namespace);
|
||||
*path++ = '\0';
|
||||
|
||||
fd = open(path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
bail("failed to open %s", path);
|
||||
|
||||
ns->fd = fd;
|
||||
ns->ns = nsflag(namespace);
|
||||
strncpy(ns->path, path, PATH_MAX);
|
||||
} while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL);
|
||||
|
||||
/*
|
||||
* The ordering in which we join namespaces is important. We should
|
||||
* always join the user namespace *first*. This is all guaranteed
|
||||
* from the container_linux.go side of this, so we're just going to
|
||||
* follow the order given to us.
|
||||
*/
|
||||
|
||||
for (i = 0; i < num; i++) {
|
||||
struct namespace_t ns = namespaces[i];
|
||||
|
||||
if (setns(ns.fd, ns.ns) < 0)
|
||||
bail("failed to setns to %s", ns.path);
|
||||
|
||||
close(ns.fd);
|
||||
}
|
||||
|
||||
free(namespaces);
|
||||
}
|
||||
|
||||
void nsexec(void)
|
||||
{
|
||||
int pipenum;
|
||||
jmp_buf env;
|
||||
int sync_child_pipe[2], sync_grandchild_pipe[2];
|
||||
struct nlconfig_t config = { 0 };
|
||||
|
||||
/*
|
||||
* If we don't have an init pipe, just return to the go routine.
|
||||
* We'll only get an init pipe for start or exec.
|
||||
*/
|
||||
pipenum = initpipe();
|
||||
if (pipenum == -1)
|
||||
return;
|
||||
|
||||
/* Parse all of the netlink configuration. */
|
||||
nl_parse(pipenum, &config);
|
||||
|
||||
/* Set oom_score_adj. This has to be done before !dumpable because
|
||||
* /proc/self/oom_score_adj is not writeable unless you're an privileged
|
||||
* user (if !dumpable is set). All children inherit their parent's
|
||||
* oom_score_adj value on fork(2) so this will always be propagated
|
||||
* properly.
|
||||
*/
|
||||
update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len);
|
||||
|
||||
/*
|
||||
* Make the process non-dumpable, to avoid various race conditions that
|
||||
* could cause processes in namespaces we're joining to access host
|
||||
* resources (or potentially execute code).
|
||||
*
|
||||
* However, if the number of namespaces we are joining is 0, we are not
|
||||
* going to be switching to a different security context. Thus setting
|
||||
* ourselves to be non-dumpable only breaks things (like rootless
|
||||
* containers), which is the recommendation from the kernel folks.
|
||||
*/
|
||||
if (config.namespaces) {
|
||||
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
|
||||
bail("failed to set process as non-dumpable");
|
||||
}
|
||||
|
||||
/* Pipe so we can tell the child when we've finished setting up. */
|
||||
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0)
|
||||
bail("failed to setup sync pipe between parent and child");
|
||||
|
||||
/*
|
||||
* We need a new socketpair to sync with grandchild so we don't have
|
||||
* race condition with child.
|
||||
*/
|
||||
if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_grandchild_pipe) < 0)
|
||||
bail("failed to setup sync pipe between parent and grandchild");
|
||||
|
||||
/* TODO: Currently we aren't dealing with child deaths properly. */
|
||||
|
||||
/*
|
||||
* Okay, so this is quite annoying.
|
||||
*
|
||||
* In order for this unsharing code to be more extensible we need to split
|
||||
* up unshare(CLONE_NEWUSER) and clone() in various ways. The ideal case
|
||||
* would be if we did clone(CLONE_NEWUSER) and the other namespaces
|
||||
* separately, but because of SELinux issues we cannot really do that. But
|
||||
* we cannot just dump the namespace flags into clone(...) because several
|
||||
* usecases (such as rootless containers) require more granularity around
|
||||
* the namespace setup. In addition, some older kernels had issues where
|
||||
* CLONE_NEWUSER wasn't handled before other namespaces (but we cannot
|
||||
* handle this while also dealing with SELinux so we choose SELinux support
|
||||
* over broken kernel support).
|
||||
*
|
||||
* However, if we unshare(2) the user namespace *before* we clone(2), then
|
||||
* all hell breaks loose.
|
||||
*
|
||||
* The parent no longer has permissions to do many things (unshare(2) drops
|
||||
* all capabilities in your old namespace), and the container cannot be set
|
||||
* up to have more than one {uid,gid} mapping. This is obviously less than
|
||||
* ideal. In order to fix this, we have to first clone(2) and then unshare.
|
||||
*
|
||||
* Unfortunately, it's not as simple as that. We have to fork to enter the
|
||||
* PID namespace (the PID namespace only applies to children). Since we'll
|
||||
* have to double-fork, this clone_parent() call won't be able to get the
|
||||
* PID of the _actual_ init process (without doing more synchronisation than
|
||||
* I can deal with at the moment). So we'll just get the parent to send it
|
||||
* for us, the only job of this process is to update
|
||||
* /proc/pid/{setgroups,uid_map,gid_map}.
|
||||
*
|
||||
* And as a result of the above, we also need to setns(2) in the first child
|
||||
* because if we join a PID namespace in the topmost parent then our child
|
||||
* will be in that namespace (and it will not be able to give us a PID value
|
||||
* that makes sense without resorting to sending things with cmsg).
|
||||
*
|
||||
* This also deals with an older issue caused by dumping cloneflags into
|
||||
* clone(2): On old kernels, CLONE_PARENT didn't work with CLONE_NEWPID, so
|
||||
* we have to unshare(2) before clone(2) in order to do this. This was fixed
|
||||
* in upstream commit 1f7f4dde5c945f41a7abc2285be43d918029ecc5, and was
|
||||
* introduced by 40a0d32d1eaffe6aac7324ca92604b6b3977eb0e. As far as we're
|
||||
* aware, the last mainline kernel which had this bug was Linux 3.12.
|
||||
* However, we cannot comment on which kernels the broken patch was
|
||||
* backported to.
|
||||
*
|
||||
* -- Aleksa "what has my life come to?" Sarai
|
||||
*/
|
||||
|
||||
switch (setjmp(env)) {
|
||||
/*
|
||||
* Stage 0: We're in the parent. Our job is just to create a new child
|
||||
* (stage 1: JUMP_CHILD) process and write its uid_map and
|
||||
* gid_map. That process will go on to create a new process, then
|
||||
* it will send us its PID which we will send to the bootstrap
|
||||
* process.
|
||||
*/
|
||||
case JUMP_PARENT:{
|
||||
int len;
|
||||
pid_t child, first_child = -1;
|
||||
char buf[JSON_MAX];
|
||||
bool ready = false;
|
||||
|
||||
/* For debugging. */
|
||||
prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0, 0, 0);
|
||||
|
||||
/* Start the process of getting a container. */
|
||||
child = clone_parent(&env, JUMP_CHILD);
|
||||
if (child < 0)
|
||||
bail("unable to fork: child_func");
|
||||
|
||||
/*
|
||||
* State machine for synchronisation with the children.
|
||||
*
|
||||
* Father only return when both child and grandchild are
|
||||
* ready, so we can receive all possible error codes
|
||||
* generated by children.
|
||||
*/
|
||||
while (!ready) {
|
||||
enum sync_t s;
|
||||
int ret;
|
||||
|
||||
syncfd = sync_child_pipe[1];
|
||||
close(sync_child_pipe[0]);
|
||||
|
||||
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||
bail("failed to sync with child: next state");
|
||||
|
||||
switch (s) {
|
||||
case SYNC_ERR:
|
||||
/* We have to mirror the error code of the child. */
|
||||
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
|
||||
bail("failed to sync with child: read(error code)");
|
||||
|
||||
exit(ret);
|
||||
case SYNC_USERMAP_PLS:
|
||||
/*
|
||||
* Enable setgroups(2) if we've been asked to. But we also
|
||||
* have to explicitly disable setgroups(2) if we're
|
||||
* creating a rootless container (this is required since
|
||||
* Linux 3.19).
|
||||
*/
|
||||
if (config.is_rootless && config.is_setgroup) {
|
||||
kill(child, SIGKILL);
|
||||
bail("cannot allow setgroup in an unprivileged user namespace setup");
|
||||
}
|
||||
|
||||
if (config.is_setgroup)
|
||||
update_setgroups(child, SETGROUPS_ALLOW);
|
||||
if (config.is_rootless)
|
||||
update_setgroups(child, SETGROUPS_DENY);
|
||||
|
||||
/* Set up mappings. */
|
||||
update_uidmap(config.uidmappath, child, config.uidmap, config.uidmap_len);
|
||||
update_gidmap(config.gidmappath, child, config.gidmap, config.gidmap_len);
|
||||
|
||||
s = SYNC_USERMAP_ACK;
|
||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||
kill(child, SIGKILL);
|
||||
bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
|
||||
}
|
||||
break;
|
||||
case SYNC_RECVPID_PLS:{
|
||||
first_child = child;
|
||||
|
||||
/* Get the init_func pid. */
|
||||
if (read(syncfd, &child, sizeof(child)) != sizeof(child)) {
|
||||
kill(first_child, SIGKILL);
|
||||
bail("failed to sync with child: read(childpid)");
|
||||
}
|
||||
|
||||
/* Send ACK. */
|
||||
s = SYNC_RECVPID_ACK;
|
||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||
kill(first_child, SIGKILL);
|
||||
kill(child, SIGKILL);
|
||||
bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
|
||||
}
|
||||
}
|
||||
break;
|
||||
case SYNC_CHILD_READY:
|
||||
ready = true;
|
||||
break;
|
||||
default:
|
||||
bail("unexpected sync value: %u", s);
|
||||
}
|
||||
}
|
||||
|
||||
/* Now sync with grandchild. */
|
||||
|
||||
ready = false;
|
||||
while (!ready) {
|
||||
enum sync_t s;
|
||||
int ret;
|
||||
|
||||
syncfd = sync_grandchild_pipe[1];
|
||||
close(sync_grandchild_pipe[0]);
|
||||
|
||||
s = SYNC_GRANDCHILD;
|
||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||
kill(child, SIGKILL);
|
||||
bail("failed to sync with child: write(SYNC_GRANDCHILD)");
|
||||
}
|
||||
|
||||
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||
bail("failed to sync with child: next state");
|
||||
|
||||
switch (s) {
|
||||
case SYNC_ERR:
|
||||
/* We have to mirror the error code of the child. */
|
||||
if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
|
||||
bail("failed to sync with child: read(error code)");
|
||||
|
||||
exit(ret);
|
||||
case SYNC_CHILD_READY:
|
||||
ready = true;
|
||||
break;
|
||||
default:
|
||||
bail("unexpected sync value: %u", s);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Send the init_func pid and the pid of the first child back to our parent.
|
||||
*
|
||||
* We need to send both back because we can't reap the first child we created (CLONE_PARENT).
|
||||
* It becomes the responsibility of our parent to reap the first child.
|
||||
*/
|
||||
len = snprintf(buf, JSON_MAX, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child);
|
||||
if (len < 0) {
|
||||
kill(child, SIGKILL);
|
||||
bail("unable to generate JSON for child pid");
|
||||
}
|
||||
if (write(pipenum, buf, len) != len) {
|
||||
kill(child, SIGKILL);
|
||||
bail("unable to send child pid to bootstrapper");
|
||||
}
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Stage 1: We're in the first child process. Our job is to join any
|
||||
* provided namespaces in the netlink payload and unshare all
|
||||
* of the requested namespaces. If we've been asked to
|
||||
* CLONE_NEWUSER, we will ask our parent (stage 0) to set up
|
||||
* our user mappings for us. Then, we create a new child
|
||||
* (stage 2: JUMP_INIT) for PID namespace. We then send the
|
||||
* child's PID to our parent (stage 0).
|
||||
*/
|
||||
case JUMP_CHILD:{
|
||||
pid_t child;
|
||||
enum sync_t s;
|
||||
|
||||
/* We're in a child and thus need to tell the parent if we die. */
|
||||
syncfd = sync_child_pipe[0];
|
||||
close(sync_child_pipe[1]);
|
||||
|
||||
/* For debugging. */
|
||||
prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0, 0, 0);
|
||||
|
||||
/*
|
||||
* We need to setns first. We cannot do this earlier (in stage 0)
|
||||
* because of the fact that we forked to get here (the PID of
|
||||
* [stage 2: JUMP_INIT]) would be meaningless). We could send it
|
||||
* using cmsg(3) but that's just annoying.
|
||||
*/
|
||||
if (config.namespaces)
|
||||
join_namespaces(config.namespaces);
|
||||
|
||||
/*
|
||||
* Unshare all of the namespaces. Now, it should be noted that this
|
||||
* ordering might break in the future (especially with rootless
|
||||
* containers). But for now, it's not possible to split this into
|
||||
* CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues.
|
||||
*
|
||||
* Note that we don't merge this with clone() because there were
|
||||
* some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
|
||||
* was broken, so we'll just do it the long way anyway.
|
||||
*/
|
||||
if (unshare(config.cloneflags) < 0)
|
||||
bail("failed to unshare namespaces");
|
||||
|
||||
/*
|
||||
* Deal with user namespaces first. They are quite special, as they
|
||||
* affect our ability to unshare other namespaces and are used as
|
||||
* context for privilege checks.
|
||||
*/
|
||||
if (config.cloneflags & CLONE_NEWUSER) {
|
||||
/*
|
||||
* We don't have the privileges to do any mapping here (see the
|
||||
* clone_parent rant). So signal our parent to hook us up.
|
||||
*/
|
||||
|
||||
/* Switching is only necessary if we joined namespaces. */
|
||||
if (config.namespaces) {
|
||||
if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
|
||||
bail("failed to set process as dumpable");
|
||||
}
|
||||
s = SYNC_USERMAP_PLS;
|
||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||
bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
|
||||
|
||||
/* ... wait for mapping ... */
|
||||
|
||||
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||
bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
|
||||
if (s != SYNC_USERMAP_ACK)
|
||||
bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
|
||||
/* Switching is only necessary if we joined namespaces. */
|
||||
if (config.namespaces) {
|
||||
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
|
||||
bail("failed to set process as dumpable");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* TODO: What about non-namespace clone flags that we're dropping here?
|
||||
*
|
||||
* We fork again because of PID namespace, setns(2) or unshare(2) don't
|
||||
* change the PID namespace of the calling process, because doing so
|
||||
* would change the caller's idea of its own PID (as reported by getpid()),
|
||||
* which would break many applications and libraries, so we must fork
|
||||
* to actually enter the new PID namespace.
|
||||
*/
|
||||
child = clone_parent(&env, JUMP_INIT);
|
||||
if (child < 0)
|
||||
bail("unable to fork: init_func");
|
||||
|
||||
/* Send the child to our parent, which knows what it's doing. */
|
||||
s = SYNC_RECVPID_PLS;
|
||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||
kill(child, SIGKILL);
|
||||
bail("failed to sync with parent: write(SYNC_RECVPID_PLS)");
|
||||
}
|
||||
if (write(syncfd, &child, sizeof(child)) != sizeof(child)) {
|
||||
kill(child, SIGKILL);
|
||||
bail("failed to sync with parent: write(childpid)");
|
||||
}
|
||||
|
||||
/* ... wait for parent to get the pid ... */
|
||||
|
||||
if (read(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||
kill(child, SIGKILL);
|
||||
bail("failed to sync with parent: read(SYNC_RECVPID_ACK)");
|
||||
}
|
||||
if (s != SYNC_RECVPID_ACK) {
|
||||
kill(child, SIGKILL);
|
||||
bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
|
||||
}
|
||||
|
||||
s = SYNC_CHILD_READY;
|
||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
|
||||
kill(child, SIGKILL);
|
||||
bail("failed to sync with parent: write(SYNC_CHILD_READY)");
|
||||
}
|
||||
|
||||
/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
|
||||
exit(0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Stage 2: We're the final child process, and the only process that will
|
||||
* actually return to the Go runtime. Our job is to just do the
|
||||
* final cleanup steps and then return to the Go runtime to allow
|
||||
* init_linux.go to run.
|
||||
*/
|
||||
case JUMP_INIT:{
|
||||
/*
|
||||
* We're inside the child now, having jumped from the
|
||||
* start_child() code after forking in the parent.
|
||||
*/
|
||||
enum sync_t s;
|
||||
|
||||
/* We're in a child and thus need to tell the parent if we die. */
|
||||
syncfd = sync_grandchild_pipe[0];
|
||||
close(sync_grandchild_pipe[1]);
|
||||
close(sync_child_pipe[0]);
|
||||
close(sync_child_pipe[1]);
|
||||
|
||||
/* For debugging. */
|
||||
prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0, 0);
|
||||
|
||||
if (read(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||
bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
|
||||
if (s != SYNC_GRANDCHILD)
|
||||
bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s);
|
||||
|
||||
if (setsid() < 0)
|
||||
bail("setsid failed");
|
||||
|
||||
if (setuid(0) < 0)
|
||||
bail("setuid failed");
|
||||
|
||||
if (setgid(0) < 0)
|
||||
bail("setgid failed");
|
||||
|
||||
if (!config.is_rootless && config.is_setgroup) {
|
||||
if (setgroups(0, NULL) < 0)
|
||||
bail("setgroups failed");
|
||||
}
|
||||
|
||||
s = SYNC_CHILD_READY;
|
||||
if (write(syncfd, &s, sizeof(s)) != sizeof(s))
|
||||
bail("failed to sync with patent: write(SYNC_CHILD_READY)");
|
||||
|
||||
/* Close sync pipes. */
|
||||
close(sync_grandchild_pipe[0]);
|
||||
|
||||
/* Free netlink data. */
|
||||
nl_free(&config);
|
||||
|
||||
/* Finish executing, let the Go runtime take over. */
|
||||
return;
|
||||
}
|
||||
default:
|
||||
bail("unexpected jump value");
|
||||
}
|
||||
|
||||
/* Should never be reached. */
|
||||
bail("should never be reached");
|
||||
}
|
||||
110
vendor/github.com/opencontainers/runc/libcontainer/process.go
generated
vendored
Normal file
110
vendor/github.com/opencontainers/runc/libcontainer/process.go
generated
vendored
Normal file
@@ -0,0 +1,110 @@
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
type processOperations interface {
|
||||
wait() (*os.ProcessState, error)
|
||||
signal(sig os.Signal) error
|
||||
pid() int
|
||||
}
|
||||
|
||||
// Process specifies the configuration and IO for a process inside
|
||||
// a container.
|
||||
type Process struct {
|
||||
// The command to be run followed by any arguments.
|
||||
Args []string
|
||||
|
||||
// Env specifies the environment variables for the process.
|
||||
Env []string
|
||||
|
||||
// User will set the uid and gid of the executing process running inside the container
|
||||
// local to the container's user and group configuration.
|
||||
User string
|
||||
|
||||
// AdditionalGroups specifies the gids that should be added to supplementary groups
|
||||
// in addition to those that the user belongs to.
|
||||
AdditionalGroups []string
|
||||
|
||||
// Cwd will change the processes current working directory inside the container's rootfs.
|
||||
Cwd string
|
||||
|
||||
// Stdin is a pointer to a reader which provides the standard input stream.
|
||||
Stdin io.Reader
|
||||
|
||||
// Stdout is a pointer to a writer which receives the standard output stream.
|
||||
Stdout io.Writer
|
||||
|
||||
// Stderr is a pointer to a writer which receives the standard error stream.
|
||||
Stderr io.Writer
|
||||
|
||||
// ExtraFiles specifies additional open files to be inherited by the container
|
||||
ExtraFiles []*os.File
|
||||
|
||||
// Initial sizings for the console
|
||||
ConsoleWidth uint16
|
||||
ConsoleHeight uint16
|
||||
|
||||
// Capabilities specify the capabilities to keep when executing the process inside the container
|
||||
// All capabilities not specified will be dropped from the processes capability mask
|
||||
Capabilities *configs.Capabilities
|
||||
|
||||
// AppArmorProfile specifies the profile to apply to the process and is
|
||||
// changed at the time the process is execed
|
||||
AppArmorProfile string
|
||||
|
||||
// Label specifies the label to apply to the process. It is commonly used by selinux
|
||||
Label string
|
||||
|
||||
// NoNewPrivileges controls whether processes can gain additional privileges.
|
||||
NoNewPrivileges *bool
|
||||
|
||||
// Rlimits specifies the resource limits, such as max open files, to set in the container
|
||||
// If Rlimits are not set, the container will inherit rlimits from the parent process
|
||||
Rlimits []configs.Rlimit
|
||||
|
||||
// ConsoleSocket provides the masterfd console.
|
||||
ConsoleSocket *os.File
|
||||
|
||||
ops processOperations
|
||||
}
|
||||
|
||||
// Wait waits for the process to exit.
|
||||
// Wait releases any resources associated with the Process
|
||||
func (p Process) Wait() (*os.ProcessState, error) {
|
||||
if p.ops == nil {
|
||||
return nil, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
}
|
||||
return p.ops.wait()
|
||||
}
|
||||
|
||||
// Pid returns the process ID
|
||||
func (p Process) Pid() (int, error) {
|
||||
// math.MinInt32 is returned here, because it's invalid value
|
||||
// for the kill() system call.
|
||||
if p.ops == nil {
|
||||
return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
}
|
||||
return p.ops.pid(), nil
|
||||
}
|
||||
|
||||
// Signal sends a signal to the Process.
|
||||
func (p Process) Signal(sig os.Signal) error {
|
||||
if p.ops == nil {
|
||||
return newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
|
||||
}
|
||||
return p.ops.signal(sig)
|
||||
}
|
||||
|
||||
// IO holds the process's STDIO
|
||||
type IO struct {
|
||||
Stdin io.WriteCloser
|
||||
Stdout io.ReadCloser
|
||||
Stderr io.ReadCloser
|
||||
}
|
||||
547
vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
generated
vendored
Normal file
547
vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
generated
vendored
Normal file
@@ -0,0 +1,547 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"syscall" // only for Signal
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type parentProcess interface {
|
||||
// pid returns the pid for the running process.
|
||||
pid() int
|
||||
|
||||
// start starts the process execution.
|
||||
start() error
|
||||
|
||||
// send a SIGKILL to the process and wait for the exit.
|
||||
terminate() error
|
||||
|
||||
// wait waits on the process returning the process state.
|
||||
wait() (*os.ProcessState, error)
|
||||
|
||||
// startTime returns the process start time.
|
||||
startTime() (uint64, error)
|
||||
|
||||
signal(os.Signal) error
|
||||
|
||||
externalDescriptors() []string
|
||||
|
||||
setExternalDescriptors(fds []string)
|
||||
}
|
||||
|
||||
type setnsProcess struct {
|
||||
cmd *exec.Cmd
|
||||
parentPipe *os.File
|
||||
childPipe *os.File
|
||||
cgroupPaths map[string]string
|
||||
intelRdtPath string
|
||||
config *initConfig
|
||||
fds []string
|
||||
process *Process
|
||||
bootstrapData io.Reader
|
||||
}
|
||||
|
||||
func (p *setnsProcess) startTime() (uint64, error) {
|
||||
stat, err := system.Stat(p.pid())
|
||||
return stat.StartTime, err
|
||||
}
|
||||
|
||||
func (p *setnsProcess) signal(sig os.Signal) error {
|
||||
s, ok := sig.(syscall.Signal)
|
||||
if !ok {
|
||||
return errors.New("os: unsupported signal type")
|
||||
}
|
||||
return unix.Kill(p.pid(), s)
|
||||
}
|
||||
|
||||
func (p *setnsProcess) start() (err error) {
|
||||
defer p.parentPipe.Close()
|
||||
err = p.cmd.Start()
|
||||
p.childPipe.Close()
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "starting setns process")
|
||||
}
|
||||
if p.bootstrapData != nil {
|
||||
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
|
||||
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
||||
}
|
||||
}
|
||||
if err = p.execSetns(); err != nil {
|
||||
return newSystemErrorWithCause(err, "executing setns process")
|
||||
}
|
||||
if len(p.cgroupPaths) > 0 {
|
||||
if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
|
||||
return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
|
||||
}
|
||||
}
|
||||
if p.intelRdtPath != "" {
|
||||
// if Intel RDT "resource control" filesystem path exists
|
||||
_, err := os.Stat(p.intelRdtPath)
|
||||
if err == nil {
|
||||
if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil {
|
||||
return newSystemErrorWithCausef(err, "adding pid %d to Intel RDT resource control filesystem", p.pid())
|
||||
}
|
||||
}
|
||||
}
|
||||
// set rlimits, this has to be done here because we lose permissions
|
||||
// to raise the limits once we enter a user-namespace
|
||||
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting rlimits for process")
|
||||
}
|
||||
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
|
||||
return newSystemErrorWithCause(err, "writing config to pipe")
|
||||
}
|
||||
|
||||
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
|
||||
switch sync.Type {
|
||||
case procReady:
|
||||
// This shouldn't happen.
|
||||
panic("unexpected procReady in setns")
|
||||
case procHooks:
|
||||
// This shouldn't happen.
|
||||
panic("unexpected procHooks in setns")
|
||||
default:
|
||||
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
|
||||
}
|
||||
})
|
||||
|
||||
if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil {
|
||||
return newSystemErrorWithCause(err, "calling shutdown on init pipe")
|
||||
}
|
||||
// Must be done after Shutdown so the child will exit and we can wait for it.
|
||||
if ierr != nil {
|
||||
p.wait()
|
||||
return ierr
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// execSetns runs the process that executes C code to perform the setns calls
|
||||
// because setns support requires the C process to fork off a child and perform the setns
|
||||
// before the go runtime boots, we wait on the process to die and receive the child's pid
|
||||
// over the provided pipe.
|
||||
func (p *setnsProcess) execSetns() error {
|
||||
status, err := p.cmd.Process.Wait()
|
||||
if err != nil {
|
||||
p.cmd.Wait()
|
||||
return newSystemErrorWithCause(err, "waiting on setns process to finish")
|
||||
}
|
||||
if !status.Success() {
|
||||
p.cmd.Wait()
|
||||
return newSystemError(&exec.ExitError{ProcessState: status})
|
||||
}
|
||||
var pid *pid
|
||||
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
|
||||
p.cmd.Wait()
|
||||
return newSystemErrorWithCause(err, "reading pid from init pipe")
|
||||
}
|
||||
|
||||
// Clean up the zombie parent process
|
||||
firstChildProcess, err := os.FindProcess(pid.PidFirstChild)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Ignore the error in case the child has already been reaped for any reason
|
||||
_, _ = firstChildProcess.Wait()
|
||||
|
||||
process, err := os.FindProcess(pid.Pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.cmd.Process = process
|
||||
p.process.ops = p
|
||||
return nil
|
||||
}
|
||||
|
||||
// terminate sends a SIGKILL to the forked process for the setns routine then waits to
|
||||
// avoid the process becoming a zombie.
|
||||
func (p *setnsProcess) terminate() error {
|
||||
if p.cmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
err := p.cmd.Process.Kill()
|
||||
if _, werr := p.wait(); err == nil {
|
||||
err = werr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (p *setnsProcess) wait() (*os.ProcessState, error) {
|
||||
err := p.cmd.Wait()
|
||||
|
||||
// Return actual ProcessState even on Wait error
|
||||
return p.cmd.ProcessState, err
|
||||
}
|
||||
|
||||
func (p *setnsProcess) pid() int {
|
||||
return p.cmd.Process.Pid
|
||||
}
|
||||
|
||||
func (p *setnsProcess) externalDescriptors() []string {
|
||||
return p.fds
|
||||
}
|
||||
|
||||
func (p *setnsProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
||||
|
||||
type initProcess struct {
|
||||
cmd *exec.Cmd
|
||||
parentPipe *os.File
|
||||
childPipe *os.File
|
||||
config *initConfig
|
||||
manager cgroups.Manager
|
||||
intelRdtManager intelrdt.Manager
|
||||
container *linuxContainer
|
||||
fds []string
|
||||
process *Process
|
||||
bootstrapData io.Reader
|
||||
sharePidns bool
|
||||
}
|
||||
|
||||
func (p *initProcess) pid() int {
|
||||
return p.cmd.Process.Pid
|
||||
}
|
||||
|
||||
func (p *initProcess) externalDescriptors() []string {
|
||||
return p.fds
|
||||
}
|
||||
|
||||
// execSetns runs the process that executes C code to perform the setns calls
|
||||
// because setns support requires the C process to fork off a child and perform the setns
|
||||
// before the go runtime boots, we wait on the process to die and receive the child's pid
|
||||
// over the provided pipe.
|
||||
// This is called by initProcess.start function
|
||||
func (p *initProcess) execSetns() error {
|
||||
status, err := p.cmd.Process.Wait()
|
||||
if err != nil {
|
||||
p.cmd.Wait()
|
||||
return err
|
||||
}
|
||||
if !status.Success() {
|
||||
p.cmd.Wait()
|
||||
return &exec.ExitError{ProcessState: status}
|
||||
}
|
||||
var pid *pid
|
||||
if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
|
||||
p.cmd.Wait()
|
||||
return err
|
||||
}
|
||||
|
||||
// Clean up the zombie parent process
|
||||
firstChildProcess, err := os.FindProcess(pid.PidFirstChild)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Ignore the error in case the child has already been reaped for any reason
|
||||
_, _ = firstChildProcess.Wait()
|
||||
|
||||
process, err := os.FindProcess(pid.Pid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
p.cmd.Process = process
|
||||
p.process.ops = p
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *initProcess) start() error {
|
||||
defer p.parentPipe.Close()
|
||||
err := p.cmd.Start()
|
||||
p.process.ops = p
|
||||
p.childPipe.Close()
|
||||
if err != nil {
|
||||
p.process.ops = nil
|
||||
return newSystemErrorWithCause(err, "starting init process command")
|
||||
}
|
||||
// Do this before syncing with child so that no children can escape the
|
||||
// cgroup. We don't need to worry about not doing this and not being root
|
||||
// because we'd be using the rootless cgroup manager in that case.
|
||||
if err := p.manager.Apply(p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "applying cgroup configuration for process")
|
||||
}
|
||||
if p.intelRdtManager != nil {
|
||||
if err := p.intelRdtManager.Apply(p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "applying Intel RDT configuration for process")
|
||||
}
|
||||
}
|
||||
defer func() {
|
||||
if err != nil {
|
||||
// TODO: should not be the responsibility to call here
|
||||
p.manager.Destroy()
|
||||
if p.intelRdtManager != nil {
|
||||
p.intelRdtManager.Destroy()
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
|
||||
return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
|
||||
}
|
||||
|
||||
if err := p.execSetns(); err != nil {
|
||||
return newSystemErrorWithCause(err, "running exec setns process for init")
|
||||
}
|
||||
|
||||
// Save the standard descriptor names before the container process
|
||||
// can potentially move them (e.g., via dup2()). If we don't do this now,
|
||||
// we won't know at checkpoint time which file descriptor to look up.
|
||||
fds, err := getPipeFds(p.pid())
|
||||
if err != nil {
|
||||
return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
|
||||
}
|
||||
p.setExternalDescriptors(fds)
|
||||
if err := p.createNetworkInterfaces(); err != nil {
|
||||
return newSystemErrorWithCause(err, "creating network interfaces")
|
||||
}
|
||||
if err := p.sendConfig(); err != nil {
|
||||
return newSystemErrorWithCause(err, "sending config to init process")
|
||||
}
|
||||
var (
|
||||
sentRun bool
|
||||
sentResume bool
|
||||
)
|
||||
|
||||
ierr := parseSync(p.parentPipe, func(sync *syncT) error {
|
||||
switch sync.Type {
|
||||
case procReady:
|
||||
// set rlimits, this has to be done here because we lose permissions
|
||||
// to raise the limits once we enter a user-namespace
|
||||
if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting rlimits for ready process")
|
||||
}
|
||||
// call prestart hooks
|
||||
if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
|
||||
if err := p.manager.Set(p.config.Config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting cgroup config for ready process")
|
||||
}
|
||||
if p.intelRdtManager != nil {
|
||||
if err := p.intelRdtManager.Set(p.config.Config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting Intel RDT config for ready process")
|
||||
}
|
||||
}
|
||||
|
||||
if p.config.Config.Hooks != nil {
|
||||
bundle, annotations := utils.Annotations(p.container.config.Labels)
|
||||
s := configs.HookState{
|
||||
Version: p.container.config.Version,
|
||||
ID: p.container.id,
|
||||
Pid: p.pid(),
|
||||
Bundle: bundle,
|
||||
Annotations: annotations,
|
||||
}
|
||||
for i, hook := range p.config.Config.Hooks.Prestart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sync with child.
|
||||
if err := writeSync(p.parentPipe, procRun); err != nil {
|
||||
return newSystemErrorWithCause(err, "writing syncT 'run'")
|
||||
}
|
||||
sentRun = true
|
||||
case procHooks:
|
||||
// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
|
||||
if err := p.manager.Set(p.config.Config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting cgroup config for procHooks process")
|
||||
}
|
||||
if p.intelRdtManager != nil {
|
||||
if err := p.intelRdtManager.Set(p.config.Config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting Intel RDT config for procHooks process")
|
||||
}
|
||||
}
|
||||
if p.config.Config.Hooks != nil {
|
||||
bundle, annotations := utils.Annotations(p.container.config.Labels)
|
||||
s := configs.HookState{
|
||||
Version: p.container.config.Version,
|
||||
ID: p.container.id,
|
||||
Pid: p.pid(),
|
||||
Bundle: bundle,
|
||||
Annotations: annotations,
|
||||
}
|
||||
for i, hook := range p.config.Config.Hooks.Prestart {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return newSystemErrorWithCausef(err, "running prestart hook %d", i)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sync with child.
|
||||
if err := writeSync(p.parentPipe, procResume); err != nil {
|
||||
return newSystemErrorWithCause(err, "writing syncT 'resume'")
|
||||
}
|
||||
sentResume = true
|
||||
default:
|
||||
return newSystemError(fmt.Errorf("invalid JSON payload from child"))
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if !sentRun {
|
||||
return newSystemErrorWithCause(ierr, "container init")
|
||||
}
|
||||
if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
|
||||
return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
|
||||
}
|
||||
if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil {
|
||||
return newSystemErrorWithCause(err, "shutting down init pipe")
|
||||
}
|
||||
|
||||
// Must be done after Shutdown so the child will exit and we can wait for it.
|
||||
if ierr != nil {
|
||||
p.wait()
|
||||
return ierr
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *initProcess) wait() (*os.ProcessState, error) {
|
||||
err := p.cmd.Wait()
|
||||
if err != nil {
|
||||
return p.cmd.ProcessState, err
|
||||
}
|
||||
// we should kill all processes in cgroup when init is died if we use host PID namespace
|
||||
if p.sharePidns {
|
||||
signalAllProcesses(p.manager, unix.SIGKILL)
|
||||
}
|
||||
return p.cmd.ProcessState, nil
|
||||
}
|
||||
|
||||
func (p *initProcess) terminate() error {
|
||||
if p.cmd.Process == nil {
|
||||
return nil
|
||||
}
|
||||
err := p.cmd.Process.Kill()
|
||||
if _, werr := p.wait(); err == nil {
|
||||
err = werr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (p *initProcess) startTime() (uint64, error) {
|
||||
stat, err := system.Stat(p.pid())
|
||||
return stat.StartTime, err
|
||||
}
|
||||
|
||||
func (p *initProcess) sendConfig() error {
|
||||
// send the config to the container's init process, we don't use JSON Encode
|
||||
// here because there might be a problem in JSON decoder in some cases, see:
|
||||
// https://github.com/docker/docker/issues/14203#issuecomment-174177790
|
||||
return utils.WriteJSON(p.parentPipe, p.config)
|
||||
}
|
||||
|
||||
func (p *initProcess) createNetworkInterfaces() error {
|
||||
for _, config := range p.config.Config.Networks {
|
||||
strategy, err := getStrategy(config.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n := &network{
|
||||
Network: *config,
|
||||
}
|
||||
if err := strategy.create(n, p.pid()); err != nil {
|
||||
return err
|
||||
}
|
||||
p.config.Networks = append(p.config.Networks, n)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *initProcess) signal(sig os.Signal) error {
|
||||
s, ok := sig.(syscall.Signal)
|
||||
if !ok {
|
||||
return errors.New("os: unsupported signal type")
|
||||
}
|
||||
return unix.Kill(p.pid(), s)
|
||||
}
|
||||
|
||||
func (p *initProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
||||
|
||||
func getPipeFds(pid int) ([]string, error) {
|
||||
fds := make([]string, 3)
|
||||
|
||||
dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
|
||||
for i := 0; i < 3; i++ {
|
||||
// XXX: This breaks if the path is not a valid symlink (which can
|
||||
// happen in certain particularly unlucky mount namespace setups).
|
||||
f := filepath.Join(dirPath, strconv.Itoa(i))
|
||||
target, err := os.Readlink(f)
|
||||
if err != nil {
|
||||
// Ignore permission errors, for rootless containers and other
|
||||
// non-dumpable processes. if we can't get the fd for a particular
|
||||
// file, there's not much we can do.
|
||||
if os.IsPermission(err) {
|
||||
continue
|
||||
}
|
||||
return fds, err
|
||||
}
|
||||
fds[i] = target
|
||||
}
|
||||
return fds, nil
|
||||
}
|
||||
|
||||
// InitializeIO creates pipes for use with the process's stdio and returns the
|
||||
// opposite side for each. Do not use this if you want to have a pseudoterminal
|
||||
// set up for you by libcontainer (TODO: fix that too).
|
||||
// TODO: This is mostly unnecessary, and should be handled by clients.
|
||||
func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
|
||||
var fds []uintptr
|
||||
i = &IO{}
|
||||
// cleanup in case of an error
|
||||
defer func() {
|
||||
if err != nil {
|
||||
for _, fd := range fds {
|
||||
unix.Close(int(fd))
|
||||
}
|
||||
}
|
||||
}()
|
||||
// STDIN
|
||||
r, w, err := os.Pipe()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fds = append(fds, r.Fd(), w.Fd())
|
||||
p.Stdin, i.Stdin = r, w
|
||||
// STDOUT
|
||||
if r, w, err = os.Pipe(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fds = append(fds, r.Fd(), w.Fd())
|
||||
p.Stdout, i.Stdout = w, r
|
||||
// STDERR
|
||||
if r, w, err = os.Pipe(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
fds = append(fds, r.Fd(), w.Fd())
|
||||
p.Stderr, i.Stderr = w, r
|
||||
// change ownership of the pipes incase we are in a user namespace
|
||||
for _, fd := range fds {
|
||||
if err := unix.Fchown(int(fd), rootuid, rootgid); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return i, nil
|
||||
}
|
||||
122
vendor/github.com/opencontainers/runc/libcontainer/restored_process.go
generated
vendored
Normal file
122
vendor/github.com/opencontainers/runc/libcontainer/restored_process.go
generated
vendored
Normal file
@@ -0,0 +1,122 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
)
|
||||
|
||||
func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) {
|
||||
var (
|
||||
err error
|
||||
)
|
||||
proc, err := os.FindProcess(pid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stat, err := system.Stat(pid)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &restoredProcess{
|
||||
proc: proc,
|
||||
processStartTime: stat.StartTime,
|
||||
fds: fds,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type restoredProcess struct {
|
||||
proc *os.Process
|
||||
processStartTime uint64
|
||||
fds []string
|
||||
}
|
||||
|
||||
func (p *restoredProcess) start() error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
|
||||
}
|
||||
|
||||
func (p *restoredProcess) pid() int {
|
||||
return p.proc.Pid
|
||||
}
|
||||
|
||||
func (p *restoredProcess) terminate() error {
|
||||
err := p.proc.Kill()
|
||||
if _, werr := p.wait(); err == nil {
|
||||
err = werr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (p *restoredProcess) wait() (*os.ProcessState, error) {
|
||||
// TODO: how do we wait on the actual process?
|
||||
// maybe use --exec-cmd in criu
|
||||
st, err := p.proc.Wait()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return st, nil
|
||||
}
|
||||
|
||||
func (p *restoredProcess) startTime() (uint64, error) {
|
||||
return p.processStartTime, nil
|
||||
}
|
||||
|
||||
func (p *restoredProcess) signal(s os.Signal) error {
|
||||
return p.proc.Signal(s)
|
||||
}
|
||||
|
||||
func (p *restoredProcess) externalDescriptors() []string {
|
||||
return p.fds
|
||||
}
|
||||
|
||||
func (p *restoredProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
||||
|
||||
// nonChildProcess represents a process where the calling process is not
|
||||
// the parent process. This process is created when a factory loads a container from
|
||||
// a persisted state.
|
||||
type nonChildProcess struct {
|
||||
processPid int
|
||||
processStartTime uint64
|
||||
fds []string
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) start() error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) pid() int {
|
||||
return p.processPid
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) terminate() error {
|
||||
return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError)
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) wait() (*os.ProcessState, error) {
|
||||
return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) startTime() (uint64, error) {
|
||||
return p.processStartTime, nil
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) signal(s os.Signal) error {
|
||||
proc, err := os.FindProcess(p.processPid)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return proc.Signal(s)
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) externalDescriptors() []string {
|
||||
return p.fds
|
||||
}
|
||||
|
||||
func (p *nonChildProcess) setExternalDescriptors(newFds []string) {
|
||||
p.fds = newFds
|
||||
}
|
||||
838
vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
generated
vendored
Normal file
838
vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
generated
vendored
Normal file
@@ -0,0 +1,838 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/cyphar/filepath-securejoin"
|
||||
"github.com/mrunalp/fileutils"
|
||||
"github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/mount"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
|
||||
|
||||
// needsSetupDev returns true if /dev needs to be set up.
|
||||
func needsSetupDev(config *configs.Config) bool {
|
||||
for _, m := range config.Mounts {
|
||||
if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// prepareRootfs sets up the devices, mount points, and filesystems for use
|
||||
// inside a new mount namespace. It doesn't set anything as ro. You must call
|
||||
// finalizeRootfs after this function to finish setting up the rootfs.
|
||||
func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
|
||||
config := iConfig.Config
|
||||
if err := prepareRoot(config); err != nil {
|
||||
return newSystemErrorWithCause(err, "preparing rootfs")
|
||||
}
|
||||
|
||||
setupDev := needsSetupDev(config)
|
||||
for _, m := range config.Mounts {
|
||||
for _, precmd := range m.PremountCmds {
|
||||
if err := mountCmd(precmd); err != nil {
|
||||
return newSystemErrorWithCause(err, "running premount command")
|
||||
}
|
||||
}
|
||||
|
||||
if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
|
||||
return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
|
||||
}
|
||||
|
||||
for _, postcmd := range m.PostmountCmds {
|
||||
if err := mountCmd(postcmd); err != nil {
|
||||
return newSystemErrorWithCause(err, "running postmount command")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if setupDev {
|
||||
if err := createDevices(config); err != nil {
|
||||
return newSystemErrorWithCause(err, "creating device nodes")
|
||||
}
|
||||
if err := setupPtmx(config); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting up ptmx")
|
||||
}
|
||||
if err := setupDevSymlinks(config.Rootfs); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting up /dev symlinks")
|
||||
}
|
||||
}
|
||||
|
||||
// Signal the parent to run the pre-start hooks.
|
||||
// The hooks are run after the mounts are setup, but before we switch to the new
|
||||
// root, so that the old root is still available in the hooks for any mount
|
||||
// manipulations.
|
||||
// Note that iConfig.Cwd is not guaranteed to exist here.
|
||||
if err := syncParentHooks(pipe); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// The reason these operations are done here rather than in finalizeRootfs
|
||||
// is because the console-handling code gets quite sticky if we have to set
|
||||
// up the console before doing the pivot_root(2). This is because the
|
||||
// Console API has to also work with the ExecIn case, which means that the
|
||||
// API must be able to deal with being inside as well as outside the
|
||||
// container. It's just cleaner to do this here (at the expense of the
|
||||
// operation not being perfectly split).
|
||||
|
||||
if err := unix.Chdir(config.Rootfs); err != nil {
|
||||
return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs)
|
||||
}
|
||||
|
||||
if config.NoPivotRoot {
|
||||
err = msMoveRoot(config.Rootfs)
|
||||
} else if config.Namespaces.Contains(configs.NEWNS) {
|
||||
err = pivotRoot(config.Rootfs)
|
||||
} else {
|
||||
err = chroot(config.Rootfs)
|
||||
}
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "jailing process inside rootfs")
|
||||
}
|
||||
|
||||
if setupDev {
|
||||
if err := reOpenDevNull(); err != nil {
|
||||
return newSystemErrorWithCause(err, "reopening /dev/null inside container")
|
||||
}
|
||||
}
|
||||
|
||||
if cwd := iConfig.Cwd; cwd != "" {
|
||||
// Note that spec.Process.Cwd can contain unclean value like "../../../../foo/bar...".
|
||||
// However, we are safe to call MkDirAll directly because we are in the jail here.
|
||||
if err := os.MkdirAll(cwd, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// finalizeRootfs sets anything to ro if necessary. You must call
|
||||
// prepareRootfs first.
|
||||
func finalizeRootfs(config *configs.Config) (err error) {
|
||||
// remount dev as ro if specified
|
||||
for _, m := range config.Mounts {
|
||||
if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
|
||||
if m.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
|
||||
if err := remountReadonly(m); err != nil {
|
||||
return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// set rootfs ( / ) as readonly
|
||||
if config.Readonlyfs {
|
||||
if err := setReadonly(); err != nil {
|
||||
return newSystemErrorWithCause(err, "setting rootfs as readonly")
|
||||
}
|
||||
}
|
||||
|
||||
unix.Umask(0022)
|
||||
return nil
|
||||
}
|
||||
|
||||
func mountCmd(cmd configs.Command) error {
|
||||
command := exec.Command(cmd.Path, cmd.Args[:]...)
|
||||
command.Env = cmd.Env
|
||||
command.Dir = cmd.Dir
|
||||
if out, err := command.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
|
||||
var (
|
||||
dest = m.Destination
|
||||
)
|
||||
if !strings.HasPrefix(dest, rootfs) {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
}
|
||||
|
||||
switch m.Device {
|
||||
case "proc", "sysfs":
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
// Selinux kernels do not support labeling of /proc or /sys
|
||||
return mountPropagate(m, rootfs, "")
|
||||
case "mqueue":
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
||||
// older kernels do not support labeling of /dev/mqueue
|
||||
if err := mountPropagate(m, rootfs, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return label.SetFileLabel(dest, mountLabel)
|
||||
}
|
||||
return nil
|
||||
case "tmpfs":
|
||||
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
|
||||
tmpDir := ""
|
||||
stat, err := os.Stat(dest)
|
||||
if err != nil {
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if copyUp {
|
||||
tmpDir, err = ioutil.TempDir("/tmp", "runctmpdir")
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
|
||||
}
|
||||
defer os.RemoveAll(tmpDir)
|
||||
m.Destination = tmpDir
|
||||
}
|
||||
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
if copyUp {
|
||||
if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
|
||||
errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
|
||||
if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
|
||||
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
|
||||
}
|
||||
return errMsg
|
||||
}
|
||||
if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil {
|
||||
errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
|
||||
if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
|
||||
return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
|
||||
}
|
||||
return errMsg
|
||||
}
|
||||
}
|
||||
if stat != nil {
|
||||
if err = os.Chmod(dest, stat.Mode()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
case "bind":
|
||||
stat, err := os.Stat(m.Source)
|
||||
if err != nil {
|
||||
// error out if the source of a bind mount does not exist as we will be
|
||||
// unable to bind anything to it.
|
||||
return err
|
||||
}
|
||||
// ensure that the destination of the bind mount is resolved of symlinks at mount time because
|
||||
// any previous mounts can invalidate the next mount's destination.
|
||||
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
|
||||
// evil stuff to try to escape the container's rootfs.
|
||||
if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkMountDestination(rootfs, dest); err != nil {
|
||||
return err
|
||||
}
|
||||
// update the mount with the correct dest after symlinks are resolved.
|
||||
m.Destination = dest
|
||||
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := mountPropagate(m, rootfs, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
// bind mount won't change mount options, we need remount to make mount options effective.
|
||||
// first check that we have non-default options required before attempting a remount
|
||||
if m.Flags&^(unix.MS_REC|unix.MS_REMOUNT|unix.MS_BIND) != 0 {
|
||||
// only remount if unique mount options are set
|
||||
if err := remount(m, rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if m.Relabel != "" {
|
||||
if err := label.Validate(m.Relabel); err != nil {
|
||||
return err
|
||||
}
|
||||
shared := label.IsShared(m.Relabel)
|
||||
if err := label.Relabel(m.Source, mountLabel, shared); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
case "cgroup":
|
||||
binds, err := getCgroupMounts(m)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var merged []string
|
||||
for _, b := range binds {
|
||||
ss := filepath.Base(b.Destination)
|
||||
if strings.Contains(ss, ",") {
|
||||
merged = append(merged, ss)
|
||||
}
|
||||
}
|
||||
tmpfs := &configs.Mount{
|
||||
Source: "tmpfs",
|
||||
Device: "tmpfs",
|
||||
Destination: m.Destination,
|
||||
Flags: defaultMountFlags,
|
||||
Data: "mode=755",
|
||||
PropagationFlags: m.PropagationFlags,
|
||||
}
|
||||
if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, b := range binds {
|
||||
if err := mountToRootfs(b, rootfs, mountLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, mc := range merged {
|
||||
for _, ss := range strings.Split(mc, ",") {
|
||||
// symlink(2) is very dumb, it will just shove the path into
|
||||
// the link and doesn't do any checks or relative path
|
||||
// conversion. Also, don't error out if the cgroup already exists.
|
||||
if err := os.Symlink(mc, filepath.Join(rootfs, m.Destination, ss)); err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
if m.Flags&unix.MS_RDONLY != 0 {
|
||||
// remount cgroup root as readonly
|
||||
mcgrouproot := &configs.Mount{
|
||||
Source: m.Destination,
|
||||
Device: "bind",
|
||||
Destination: m.Destination,
|
||||
Flags: defaultMountFlags | unix.MS_RDONLY | unix.MS_BIND,
|
||||
}
|
||||
if err := remount(mcgrouproot, rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
default:
|
||||
// ensure that the destination of the mount is resolved of symlinks at mount time because
|
||||
// any previous mounts can invalidate the next mount's destination.
|
||||
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
|
||||
// evil stuff to try to escape the container's rootfs.
|
||||
var err error
|
||||
if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := checkMountDestination(rootfs, dest); err != nil {
|
||||
return err
|
||||
}
|
||||
// update the mount with the correct dest after symlinks are resolved.
|
||||
m.Destination = dest
|
||||
if err := os.MkdirAll(dest, 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
return mountPropagate(m, rootfs, mountLabel)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
|
||||
mounts, err := cgroups.GetCgroupMounts(false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var binds []*configs.Mount
|
||||
|
||||
for _, mm := range mounts {
|
||||
dir, err := mm.GetOwnCgroup(cgroupPaths)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
relDir, err := filepath.Rel(mm.Root, dir)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
binds = append(binds, &configs.Mount{
|
||||
Device: "bind",
|
||||
Source: filepath.Join(mm.Mountpoint, relDir),
|
||||
Destination: filepath.Join(m.Destination, filepath.Base(mm.Mountpoint)),
|
||||
Flags: unix.MS_BIND | unix.MS_REC | m.Flags,
|
||||
PropagationFlags: m.PropagationFlags,
|
||||
})
|
||||
}
|
||||
|
||||
return binds, nil
|
||||
}
|
||||
|
||||
// checkMountDestination checks to ensure that the mount destination is not over the top of /proc.
|
||||
// dest is required to be an abs path and have any symlinks resolved before calling this function.
|
||||
func checkMountDestination(rootfs, dest string) error {
|
||||
invalidDestinations := []string{
|
||||
"/proc",
|
||||
}
|
||||
// White list, it should be sub directories of invalid destinations
|
||||
validDestinations := []string{
|
||||
// These entries can be bind mounted by files emulated by fuse,
|
||||
// so commands like top, free displays stats in container.
|
||||
"/proc/cpuinfo",
|
||||
"/proc/diskstats",
|
||||
"/proc/meminfo",
|
||||
"/proc/stat",
|
||||
"/proc/swaps",
|
||||
"/proc/uptime",
|
||||
"/proc/net/dev",
|
||||
}
|
||||
for _, valid := range validDestinations {
|
||||
path, err := filepath.Rel(filepath.Join(rootfs, valid), dest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if path == "." {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
for _, invalid := range invalidDestinations {
|
||||
path, err := filepath.Rel(filepath.Join(rootfs, invalid), dest)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if path == "." || !strings.HasPrefix(path, "..") {
|
||||
return fmt.Errorf("%q cannot be mounted because it is located inside %q", dest, invalid)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupDevSymlinks(rootfs string) error {
|
||||
var links = [][2]string{
|
||||
{"/proc/self/fd", "/dev/fd"},
|
||||
{"/proc/self/fd/0", "/dev/stdin"},
|
||||
{"/proc/self/fd/1", "/dev/stdout"},
|
||||
{"/proc/self/fd/2", "/dev/stderr"},
|
||||
}
|
||||
// kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink
|
||||
// in /dev if it exists in /proc.
|
||||
if _, err := os.Stat("/proc/kcore"); err == nil {
|
||||
links = append(links, [2]string{"/proc/kcore", "/dev/core"})
|
||||
}
|
||||
for _, link := range links {
|
||||
var (
|
||||
src = link[0]
|
||||
dst = filepath.Join(rootfs, link[1])
|
||||
)
|
||||
if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
|
||||
return fmt.Errorf("symlink %s %s %s", src, dst, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// If stdin, stdout, and/or stderr are pointing to `/dev/null` in the parent's rootfs
|
||||
// this method will make them point to `/dev/null` in this container's rootfs. This
|
||||
// needs to be called after we chroot/pivot into the container's rootfs so that any
|
||||
// symlinks are resolved locally.
|
||||
func reOpenDevNull() error {
|
||||
var stat, devNullStat unix.Stat_t
|
||||
file, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("Failed to open /dev/null - %s", err)
|
||||
}
|
||||
defer file.Close()
|
||||
if err := unix.Fstat(int(file.Fd()), &devNullStat); err != nil {
|
||||
return err
|
||||
}
|
||||
for fd := 0; fd < 3; fd++ {
|
||||
if err := unix.Fstat(fd, &stat); err != nil {
|
||||
return err
|
||||
}
|
||||
if stat.Rdev == devNullStat.Rdev {
|
||||
// Close and re-open the fd.
|
||||
if err := unix.Dup3(int(file.Fd()), fd, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create the device nodes in the container.
|
||||
func createDevices(config *configs.Config) error {
|
||||
useBindMount := system.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)
|
||||
oldMask := unix.Umask(0000)
|
||||
for _, node := range config.Devices {
|
||||
// containers running in a user namespace are not allowed to mknod
|
||||
// devices so we can just bind mount it from the host.
|
||||
if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil {
|
||||
unix.Umask(oldMask)
|
||||
return err
|
||||
}
|
||||
}
|
||||
unix.Umask(oldMask)
|
||||
return nil
|
||||
}
|
||||
|
||||
func bindMountDeviceNode(dest string, node *configs.Device) error {
|
||||
f, err := os.Create(dest)
|
||||
if err != nil && !os.IsExist(err) {
|
||||
return err
|
||||
}
|
||||
if f != nil {
|
||||
f.Close()
|
||||
}
|
||||
return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "")
|
||||
}
|
||||
|
||||
// Creates the device node in the rootfs of the container.
|
||||
func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
|
||||
dest := filepath.Join(rootfs, node.Path)
|
||||
if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if bind {
|
||||
return bindMountDeviceNode(dest, node)
|
||||
}
|
||||
if err := mknodDevice(dest, node); err != nil {
|
||||
if os.IsExist(err) {
|
||||
return nil
|
||||
} else if os.IsPermission(err) {
|
||||
return bindMountDeviceNode(dest, node)
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func mknodDevice(dest string, node *configs.Device) error {
|
||||
fileMode := node.FileMode
|
||||
switch node.Type {
|
||||
case 'c', 'u':
|
||||
fileMode |= unix.S_IFCHR
|
||||
case 'b':
|
||||
fileMode |= unix.S_IFBLK
|
||||
case 'p':
|
||||
fileMode |= unix.S_IFIFO
|
||||
default:
|
||||
return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
|
||||
}
|
||||
if err := unix.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil {
|
||||
return err
|
||||
}
|
||||
return unix.Chown(dest, int(node.Uid), int(node.Gid))
|
||||
}
|
||||
|
||||
func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
|
||||
for _, m := range mountinfo {
|
||||
if m.Mountpoint == dir {
|
||||
return m
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get the parent mount point of directory passed in as argument. Also return
|
||||
// optional fields.
|
||||
func getParentMount(rootfs string) (string, string, error) {
|
||||
var path string
|
||||
|
||||
mountinfos, err := mount.GetMounts()
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
|
||||
mountinfo := getMountInfo(mountinfos, rootfs)
|
||||
if mountinfo != nil {
|
||||
return rootfs, mountinfo.Optional, nil
|
||||
}
|
||||
|
||||
path = rootfs
|
||||
for {
|
||||
path = filepath.Dir(path)
|
||||
|
||||
mountinfo = getMountInfo(mountinfos, path)
|
||||
if mountinfo != nil {
|
||||
return path, mountinfo.Optional, nil
|
||||
}
|
||||
|
||||
if path == "/" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// If we are here, we did not find parent mount. Something is wrong.
|
||||
return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs)
|
||||
}
|
||||
|
||||
// Make parent mount private if it was shared
|
||||
func rootfsParentMountPrivate(rootfs string) error {
|
||||
sharedMount := false
|
||||
|
||||
parentMount, optionalOpts, err := getParentMount(rootfs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
optsSplit := strings.Split(optionalOpts, " ")
|
||||
for _, opt := range optsSplit {
|
||||
if strings.HasPrefix(opt, "shared:") {
|
||||
sharedMount = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Make parent mount PRIVATE if it was shared. It is needed for two
|
||||
// reasons. First of all pivot_root() will fail if parent mount is
|
||||
// shared. Secondly when we bind mount rootfs it will propagate to
|
||||
// parent namespace and we don't want that to happen.
|
||||
if sharedMount {
|
||||
return unix.Mount("", parentMount, "", unix.MS_PRIVATE, "")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func prepareRoot(config *configs.Config) error {
|
||||
flag := unix.MS_SLAVE | unix.MS_REC
|
||||
if config.RootPropagation != 0 {
|
||||
flag = config.RootPropagation
|
||||
}
|
||||
if err := unix.Mount("", "/", "", uintptr(flag), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Make parent mount private to make sure following bind mount does
|
||||
// not propagate in other namespaces. Also it will help with kernel
|
||||
// check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent))
|
||||
if err := rootfsParentMountPrivate(config.Rootfs); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return unix.Mount(config.Rootfs, config.Rootfs, "bind", unix.MS_BIND|unix.MS_REC, "")
|
||||
}
|
||||
|
||||
func setReadonly() error {
|
||||
return unix.Mount("/", "/", "bind", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "")
|
||||
}
|
||||
|
||||
func setupPtmx(config *configs.Config) error {
|
||||
ptmx := filepath.Join(config.Rootfs, "dev/ptmx")
|
||||
if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
if err := os.Symlink("pts/ptmx", ptmx); err != nil {
|
||||
return fmt.Errorf("symlink dev ptmx %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// pivotRoot will call pivot_root such that rootfs becomes the new root
|
||||
// filesystem, and everything else is cleaned up.
|
||||
func pivotRoot(rootfs string) error {
|
||||
// While the documentation may claim otherwise, pivot_root(".", ".") is
|
||||
// actually valid. What this results in is / being the new root but
|
||||
// /proc/self/cwd being the old root. Since we can play around with the cwd
|
||||
// with pivot_root this allows us to pivot without creating directories in
|
||||
// the rootfs. Shout-outs to the LXC developers for giving us this idea.
|
||||
|
||||
oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer unix.Close(oldroot)
|
||||
|
||||
newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer unix.Close(newroot)
|
||||
|
||||
// Change to the new root so that the pivot_root actually acts on it.
|
||||
if err := unix.Fchdir(newroot); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := unix.PivotRoot(".", "."); err != nil {
|
||||
return fmt.Errorf("pivot_root %s", err)
|
||||
}
|
||||
|
||||
// Currently our "." is oldroot (according to the current kernel code).
|
||||
// However, purely for safety, we will fchdir(oldroot) since there isn't
|
||||
// really any guarantee from the kernel what /proc/self/cwd will be after a
|
||||
// pivot_root(2).
|
||||
|
||||
if err := unix.Fchdir(oldroot); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Make oldroot rslave to make sure our unmounts don't propagate to the
|
||||
// host (and thus bork the machine). We don't use rprivate because this is
|
||||
// known to cause issues due to races where we still have a reference to a
|
||||
// mount while a process in the host namespace are trying to operate on
|
||||
// something they think has no mounts (devicemapper in particular).
|
||||
if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
|
||||
if err := unix.Unmount(".", unix.MNT_DETACH); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Switch back to our shiny new root.
|
||||
if err := unix.Chdir("/"); err != nil {
|
||||
return fmt.Errorf("chdir / %s", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func msMoveRoot(rootfs string) error {
|
||||
if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return chroot(rootfs)
|
||||
}
|
||||
|
||||
func chroot(rootfs string) error {
|
||||
if err := unix.Chroot("."); err != nil {
|
||||
return err
|
||||
}
|
||||
return unix.Chdir("/")
|
||||
}
|
||||
|
||||
// createIfNotExists creates a file or a directory only if it does not already exist.
|
||||
func createIfNotExists(path string, isDir bool) error {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
if isDir {
|
||||
return os.MkdirAll(path, 0755)
|
||||
}
|
||||
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
f, err := os.OpenFile(path, os.O_CREATE, 0755)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
f.Close()
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// readonlyPath will make a path read only.
|
||||
func readonlyPath(path string) error {
|
||||
if err := unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
return unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "")
|
||||
}
|
||||
|
||||
// remountReadonly will remount an existing mount point and ensure that it is read-only.
|
||||
func remountReadonly(m *configs.Mount) error {
|
||||
var (
|
||||
dest = m.Destination
|
||||
flags = m.Flags
|
||||
)
|
||||
for i := 0; i < 5; i++ {
|
||||
// There is a special case in the kernel for
|
||||
// MS_REMOUNT | MS_BIND, which allows us to change only the
|
||||
// flags even as an unprivileged user (i.e. user namespace)
|
||||
// assuming we don't drop any security related flags (nodev,
|
||||
// nosuid, etc.). So, let's use that case so that we can do
|
||||
// this re-mount without failing in a userns.
|
||||
flags |= unix.MS_REMOUNT | unix.MS_BIND | unix.MS_RDONLY
|
||||
if err := unix.Mount("", dest, "", uintptr(flags), ""); err != nil {
|
||||
switch err {
|
||||
case unix.EBUSY:
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
continue
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("unable to mount %s as readonly max retries reached", dest)
|
||||
}
|
||||
|
||||
// maskPath masks the top of the specified path inside a container to avoid
|
||||
// security issues from processes reading information from non-namespace aware
|
||||
// mounts ( proc/kcore ).
|
||||
// For files, maskPath bind mounts /dev/null over the top of the specified path.
|
||||
// For directories, maskPath mounts read-only tmpfs over the top of the specified path.
|
||||
func maskPath(path string) error {
|
||||
if err := unix.Mount("/dev/null", path, "", unix.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
|
||||
if err == unix.ENOTDIR {
|
||||
return unix.Mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, "")
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeSystemProperty writes the value to a path under /proc/sys as determined from the key.
|
||||
// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward.
|
||||
func writeSystemProperty(key, value string) error {
|
||||
keyPath := strings.Replace(key, ".", "/", -1)
|
||||
return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644)
|
||||
}
|
||||
|
||||
func remount(m *configs.Mount, rootfs string) error {
|
||||
var (
|
||||
dest = m.Destination
|
||||
)
|
||||
if !strings.HasPrefix(dest, rootfs) {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
}
|
||||
if err := unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Do the mount operation followed by additional mounts required to take care
|
||||
// of propagation flags.
|
||||
func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
|
||||
var (
|
||||
dest = m.Destination
|
||||
data = label.FormatMountLabel(m.Data, mountLabel)
|
||||
flags = m.Flags
|
||||
)
|
||||
if libcontainerUtils.CleanPath(dest) == "/dev" {
|
||||
flags &= ^unix.MS_RDONLY
|
||||
}
|
||||
|
||||
copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
|
||||
if !(copyUp || strings.HasPrefix(dest, rootfs)) {
|
||||
dest = filepath.Join(rootfs, dest)
|
||||
}
|
||||
|
||||
if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, pflag := range m.PropagationFlags {
|
||||
if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
76
vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go
generated
vendored
Normal file
76
vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go
generated
vendored
Normal file
@@ -0,0 +1,76 @@
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var operators = map[string]configs.Operator{
|
||||
"SCMP_CMP_NE": configs.NotEqualTo,
|
||||
"SCMP_CMP_LT": configs.LessThan,
|
||||
"SCMP_CMP_LE": configs.LessThanOrEqualTo,
|
||||
"SCMP_CMP_EQ": configs.EqualTo,
|
||||
"SCMP_CMP_GE": configs.GreaterThanOrEqualTo,
|
||||
"SCMP_CMP_GT": configs.GreaterThan,
|
||||
"SCMP_CMP_MASKED_EQ": configs.MaskEqualTo,
|
||||
}
|
||||
|
||||
var actions = map[string]configs.Action{
|
||||
"SCMP_ACT_KILL": configs.Kill,
|
||||
"SCMP_ACT_ERRNO": configs.Errno,
|
||||
"SCMP_ACT_TRAP": configs.Trap,
|
||||
"SCMP_ACT_ALLOW": configs.Allow,
|
||||
"SCMP_ACT_TRACE": configs.Trace,
|
||||
}
|
||||
|
||||
var archs = map[string]string{
|
||||
"SCMP_ARCH_X86": "x86",
|
||||
"SCMP_ARCH_X86_64": "amd64",
|
||||
"SCMP_ARCH_X32": "x32",
|
||||
"SCMP_ARCH_ARM": "arm",
|
||||
"SCMP_ARCH_AARCH64": "arm64",
|
||||
"SCMP_ARCH_MIPS": "mips",
|
||||
"SCMP_ARCH_MIPS64": "mips64",
|
||||
"SCMP_ARCH_MIPS64N32": "mips64n32",
|
||||
"SCMP_ARCH_MIPSEL": "mipsel",
|
||||
"SCMP_ARCH_MIPSEL64": "mipsel64",
|
||||
"SCMP_ARCH_MIPSEL64N32": "mipsel64n32",
|
||||
"SCMP_ARCH_PPC": "ppc",
|
||||
"SCMP_ARCH_PPC64": "ppc64",
|
||||
"SCMP_ARCH_PPC64LE": "ppc64le",
|
||||
"SCMP_ARCH_S390": "s390",
|
||||
"SCMP_ARCH_S390X": "s390x",
|
||||
}
|
||||
|
||||
// ConvertStringToOperator converts a string into a Seccomp comparison operator.
|
||||
// Comparison operators use the names they are assigned by Libseccomp's header.
|
||||
// Attempting to convert a string that is not a valid operator results in an
|
||||
// error.
|
||||
func ConvertStringToOperator(in string) (configs.Operator, error) {
|
||||
if op, ok := operators[in]; ok == true {
|
||||
return op, nil
|
||||
}
|
||||
return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in)
|
||||
}
|
||||
|
||||
// ConvertStringToAction converts a string into a Seccomp rule match action.
|
||||
// Actions use the names they are assigned in Libseccomp's header, though some
|
||||
// (notable, SCMP_ACT_TRACE) are not available in this implementation and will
|
||||
// return errors.
|
||||
// Attempting to convert a string that is not a valid action results in an
|
||||
// error.
|
||||
func ConvertStringToAction(in string) (configs.Action, error) {
|
||||
if act, ok := actions[in]; ok == true {
|
||||
return act, nil
|
||||
}
|
||||
return 0, fmt.Errorf("string %s is not a valid action for seccomp", in)
|
||||
}
|
||||
|
||||
// ConvertStringToArch converts a string into a Seccomp comparison arch.
|
||||
func ConvertStringToArch(in string) (string, error) {
|
||||
if arch, ok := archs[in]; ok == true {
|
||||
return arch, nil
|
||||
}
|
||||
return "", fmt.Errorf("string %s is not a valid arch for seccomp", in)
|
||||
}
|
||||
258
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
generated
vendored
Normal file
258
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
generated
vendored
Normal file
@@ -0,0 +1,258 @@
|
||||
// +build linux,cgo,seccomp
|
||||
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
libseccomp "github.com/seccomp/libseccomp-golang"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
var (
|
||||
actAllow = libseccomp.ActAllow
|
||||
actTrap = libseccomp.ActTrap
|
||||
actKill = libseccomp.ActKill
|
||||
actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM))
|
||||
actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM))
|
||||
)
|
||||
|
||||
const (
|
||||
// Linux system calls can have at most 6 arguments
|
||||
syscallMaxArguments int = 6
|
||||
)
|
||||
|
||||
// Filters given syscalls in a container, preventing them from being used
|
||||
// Started in the container init process, and carried over to all child processes
|
||||
// Setns calls, however, require a separate invocation, as they are not children
|
||||
// of the init until they join the namespace
|
||||
func InitSeccomp(config *configs.Seccomp) error {
|
||||
if config == nil {
|
||||
return fmt.Errorf("cannot initialize Seccomp - nil config passed")
|
||||
}
|
||||
|
||||
defaultAction, err := getAction(config.DefaultAction)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error initializing seccomp - invalid default action")
|
||||
}
|
||||
|
||||
filter, err := libseccomp.NewFilter(defaultAction)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating filter: %s", err)
|
||||
}
|
||||
|
||||
// Add extra architectures
|
||||
for _, arch := range config.Architectures {
|
||||
scmpArch, err := libseccomp.GetArchFromString(arch)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error validating Seccomp architecture: %s", err)
|
||||
}
|
||||
|
||||
if err := filter.AddArch(scmpArch); err != nil {
|
||||
return fmt.Errorf("error adding architecture to seccomp filter: %s", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Unset no new privs bit
|
||||
if err := filter.SetNoNewPrivsBit(false); err != nil {
|
||||
return fmt.Errorf("error setting no new privileges: %s", err)
|
||||
}
|
||||
|
||||
// Add a rule for each syscall
|
||||
for _, call := range config.Syscalls {
|
||||
if call == nil {
|
||||
return fmt.Errorf("encountered nil syscall while initializing Seccomp")
|
||||
}
|
||||
|
||||
if err = matchCall(filter, call); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err = filter.Load(); err != nil {
|
||||
return fmt.Errorf("error loading seccomp filter into kernel: %s", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsEnabled returns if the kernel has been configured to support seccomp.
|
||||
func IsEnabled() bool {
|
||||
// Try to read from /proc/self/status for kernels > 3.8
|
||||
s, err := parseStatusFile("/proc/self/status")
|
||||
if err != nil {
|
||||
// Check if Seccomp is supported, via CONFIG_SECCOMP.
|
||||
if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL {
|
||||
// Make sure the kernel has CONFIG_SECCOMP_FILTER.
|
||||
if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
_, ok := s["Seccomp"]
|
||||
return ok
|
||||
}
|
||||
|
||||
// Convert Libcontainer Action to Libseccomp ScmpAction
|
||||
func getAction(act configs.Action) (libseccomp.ScmpAction, error) {
|
||||
switch act {
|
||||
case configs.Kill:
|
||||
return actKill, nil
|
||||
case configs.Errno:
|
||||
return actErrno, nil
|
||||
case configs.Trap:
|
||||
return actTrap, nil
|
||||
case configs.Allow:
|
||||
return actAllow, nil
|
||||
case configs.Trace:
|
||||
return actTrace, nil
|
||||
default:
|
||||
return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule")
|
||||
}
|
||||
}
|
||||
|
||||
// Convert Libcontainer Operator to Libseccomp ScmpCompareOp
|
||||
func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) {
|
||||
switch op {
|
||||
case configs.EqualTo:
|
||||
return libseccomp.CompareEqual, nil
|
||||
case configs.NotEqualTo:
|
||||
return libseccomp.CompareNotEqual, nil
|
||||
case configs.GreaterThan:
|
||||
return libseccomp.CompareGreater, nil
|
||||
case configs.GreaterThanOrEqualTo:
|
||||
return libseccomp.CompareGreaterEqual, nil
|
||||
case configs.LessThan:
|
||||
return libseccomp.CompareLess, nil
|
||||
case configs.LessThanOrEqualTo:
|
||||
return libseccomp.CompareLessOrEqual, nil
|
||||
case configs.MaskEqualTo:
|
||||
return libseccomp.CompareMaskedEqual, nil
|
||||
default:
|
||||
return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule")
|
||||
}
|
||||
}
|
||||
|
||||
// Convert Libcontainer Arg to Libseccomp ScmpCondition
|
||||
func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
|
||||
cond := libseccomp.ScmpCondition{}
|
||||
|
||||
if arg == nil {
|
||||
return cond, fmt.Errorf("cannot convert nil to syscall condition")
|
||||
}
|
||||
|
||||
op, err := getOperator(arg.Op)
|
||||
if err != nil {
|
||||
return cond, err
|
||||
}
|
||||
|
||||
return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo)
|
||||
}
|
||||
|
||||
// Add a rule to match a single syscall
|
||||
func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
|
||||
if call == nil || filter == nil {
|
||||
return fmt.Errorf("cannot use nil as syscall to block")
|
||||
}
|
||||
|
||||
if len(call.Name) == 0 {
|
||||
return fmt.Errorf("empty string is not a valid syscall")
|
||||
}
|
||||
|
||||
// If we can't resolve the syscall, assume it's not supported on this kernel
|
||||
// Ignore it, don't error out
|
||||
callNum, err := libseccomp.GetSyscallFromName(call.Name)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert the call's action to the libseccomp equivalent
|
||||
callAct, err := getAction(call.Action)
|
||||
if err != nil {
|
||||
return fmt.Errorf("action in seccomp profile is invalid: %s", err)
|
||||
}
|
||||
|
||||
// Unconditional match - just add the rule
|
||||
if len(call.Args) == 0 {
|
||||
if err = filter.AddRule(callNum, callAct); err != nil {
|
||||
return fmt.Errorf("error adding seccomp filter rule for syscall %s: %s", call.Name, err)
|
||||
}
|
||||
} else {
|
||||
// If two or more arguments have the same condition,
|
||||
// Revert to old behavior, adding each condition as a separate rule
|
||||
argCounts := make([]uint, syscallMaxArguments)
|
||||
conditions := []libseccomp.ScmpCondition{}
|
||||
|
||||
for _, cond := range call.Args {
|
||||
newCond, err := getCondition(cond)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %s", call.Name, err)
|
||||
}
|
||||
|
||||
argCounts[cond.Index] += 1
|
||||
|
||||
conditions = append(conditions, newCond)
|
||||
}
|
||||
|
||||
hasMultipleArgs := false
|
||||
for _, count := range argCounts {
|
||||
if count > 1 {
|
||||
hasMultipleArgs = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if hasMultipleArgs {
|
||||
// Revert to old behavior
|
||||
// Add each condition attached to a separate rule
|
||||
for _, cond := range conditions {
|
||||
condArr := []libseccomp.ScmpCondition{cond}
|
||||
|
||||
if err = filter.AddRuleConditional(callNum, callAct, condArr); err != nil {
|
||||
return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// No conditions share same argument
|
||||
// Use new, proper behavior
|
||||
if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
|
||||
return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func parseStatusFile(path string) (map[string]string, error) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
s := bufio.NewScanner(f)
|
||||
status := make(map[string]string)
|
||||
|
||||
for s.Scan() {
|
||||
text := s.Text()
|
||||
parts := strings.Split(text, ":")
|
||||
|
||||
if len(parts) <= 1 {
|
||||
continue
|
||||
}
|
||||
|
||||
status[parts[0]] = parts[1]
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return status, nil
|
||||
}
|
||||
24
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go
generated
vendored
Normal file
24
vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go
generated
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
// +build !linux !cgo !seccomp
|
||||
|
||||
package seccomp
|
||||
|
||||
import (
|
||||
"errors"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
)
|
||||
|
||||
var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported")
|
||||
|
||||
// InitSeccomp does nothing because seccomp is not supported.
|
||||
func InitSeccomp(config *configs.Seccomp) error {
|
||||
if config != nil {
|
||||
return ErrSeccompNotEnabled
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// IsEnabled returns false, because it is not supported.
|
||||
func IsEnabled() bool {
|
||||
return false
|
||||
}
|
||||
76
vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
generated
vendored
Normal file
76
vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
generated
vendored
Normal file
@@ -0,0 +1,76 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
"github.com/opencontainers/runc/libcontainer/keys"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// linuxSetnsInit performs the container's initialization for running a new process
|
||||
// inside an existing container.
|
||||
type linuxSetnsInit struct {
|
||||
pipe *os.File
|
||||
consoleSocket *os.File
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
func (l *linuxSetnsInit) getSessionRingName() string {
|
||||
return fmt.Sprintf("_ses.%s", l.config.ContainerId)
|
||||
}
|
||||
|
||||
func (l *linuxSetnsInit) Init() error {
|
||||
if !l.config.Config.NoNewKeyring {
|
||||
// do not inherit the parent's session keyring
|
||||
if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if l.config.CreateConsole {
|
||||
if err := setupConsole(l.consoleSocket, l.config, false); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Setctty(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if l.config.NoNewPrivileges {
|
||||
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
|
||||
// do this before dropping capabilities; otherwise do it as late as possible
|
||||
// just before execve so as few syscalls take place after it as possible.
|
||||
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := finalizeNamespace(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
// Set seccomp as close to execve as possible, so as few syscalls take
|
||||
// place afterward (reducing the amount of syscalls that users need to
|
||||
// enable in their seccomp profiles).
|
||||
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return newSystemErrorWithCause(err, "init seccomp")
|
||||
}
|
||||
}
|
||||
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
|
||||
}
|
||||
221
vendor/github.com/opencontainers/runc/libcontainer/specconv/example.go
generated
vendored
Normal file
221
vendor/github.com/opencontainers/runc/libcontainer/specconv/example.go
generated
vendored
Normal file
@@ -0,0 +1,221 @@
|
||||
package specconv
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
)
|
||||
|
||||
// Example returns an example spec file, with many options set so a user can
|
||||
// see what a standard spec file looks like.
|
||||
func Example() *specs.Spec {
|
||||
return &specs.Spec{
|
||||
Version: specs.Version,
|
||||
Root: &specs.Root{
|
||||
Path: "rootfs",
|
||||
Readonly: true,
|
||||
},
|
||||
Process: &specs.Process{
|
||||
Terminal: true,
|
||||
User: specs.User{},
|
||||
Args: []string{
|
||||
"sh",
|
||||
},
|
||||
Env: []string{
|
||||
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
||||
"TERM=xterm",
|
||||
},
|
||||
Cwd: "/",
|
||||
NoNewPrivileges: true,
|
||||
Capabilities: &specs.LinuxCapabilities{
|
||||
Bounding: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
Permitted: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
Inheritable: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
Ambient: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
Effective: []string{
|
||||
"CAP_AUDIT_WRITE",
|
||||
"CAP_KILL",
|
||||
"CAP_NET_BIND_SERVICE",
|
||||
},
|
||||
},
|
||||
Rlimits: []specs.POSIXRlimit{
|
||||
{
|
||||
Type: "RLIMIT_NOFILE",
|
||||
Hard: uint64(1024),
|
||||
Soft: uint64(1024),
|
||||
},
|
||||
},
|
||||
},
|
||||
Hostname: "runc",
|
||||
Mounts: []specs.Mount{
|
||||
{
|
||||
Destination: "/proc",
|
||||
Type: "proc",
|
||||
Source: "proc",
|
||||
Options: nil,
|
||||
},
|
||||
{
|
||||
Destination: "/dev",
|
||||
Type: "tmpfs",
|
||||
Source: "tmpfs",
|
||||
Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
|
||||
},
|
||||
{
|
||||
Destination: "/dev/pts",
|
||||
Type: "devpts",
|
||||
Source: "devpts",
|
||||
Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
|
||||
},
|
||||
{
|
||||
Destination: "/dev/shm",
|
||||
Type: "tmpfs",
|
||||
Source: "shm",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
|
||||
},
|
||||
{
|
||||
Destination: "/dev/mqueue",
|
||||
Type: "mqueue",
|
||||
Source: "mqueue",
|
||||
Options: []string{"nosuid", "noexec", "nodev"},
|
||||
},
|
||||
{
|
||||
Destination: "/sys",
|
||||
Type: "sysfs",
|
||||
Source: "sysfs",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "ro"},
|
||||
},
|
||||
{
|
||||
Destination: "/sys/fs/cgroup",
|
||||
Type: "cgroup",
|
||||
Source: "cgroup",
|
||||
Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
|
||||
},
|
||||
},
|
||||
Linux: &specs.Linux{
|
||||
MaskedPaths: []string{
|
||||
"/proc/kcore",
|
||||
"/proc/latency_stats",
|
||||
"/proc/timer_list",
|
||||
"/proc/timer_stats",
|
||||
"/proc/sched_debug",
|
||||
"/sys/firmware",
|
||||
"/proc/scsi",
|
||||
},
|
||||
ReadonlyPaths: []string{
|
||||
"/proc/asound",
|
||||
"/proc/bus",
|
||||
"/proc/fs",
|
||||
"/proc/irq",
|
||||
"/proc/sys",
|
||||
"/proc/sysrq-trigger",
|
||||
},
|
||||
Resources: &specs.LinuxResources{
|
||||
Devices: []specs.LinuxDeviceCgroup{
|
||||
{
|
||||
Allow: false,
|
||||
Access: "rwm",
|
||||
},
|
||||
},
|
||||
},
|
||||
Namespaces: []specs.LinuxNamespace{
|
||||
{
|
||||
Type: "pid",
|
||||
},
|
||||
{
|
||||
Type: "network",
|
||||
},
|
||||
{
|
||||
Type: "ipc",
|
||||
},
|
||||
{
|
||||
Type: "uts",
|
||||
},
|
||||
{
|
||||
Type: "mount",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// ToRootless converts the given spec file into one that should work with
|
||||
// rootless containers, by removing incompatible options and adding others that
|
||||
// are needed.
|
||||
func ToRootless(spec *specs.Spec) {
|
||||
var namespaces []specs.LinuxNamespace
|
||||
|
||||
// Remove networkns from the spec.
|
||||
for _, ns := range spec.Linux.Namespaces {
|
||||
switch ns.Type {
|
||||
case specs.NetworkNamespace, specs.UserNamespace:
|
||||
// Do nothing.
|
||||
default:
|
||||
namespaces = append(namespaces, ns)
|
||||
}
|
||||
}
|
||||
// Add userns to the spec.
|
||||
namespaces = append(namespaces, specs.LinuxNamespace{
|
||||
Type: specs.UserNamespace,
|
||||
})
|
||||
spec.Linux.Namespaces = namespaces
|
||||
|
||||
// Add mappings for the current user.
|
||||
spec.Linux.UIDMappings = []specs.LinuxIDMapping{{
|
||||
HostID: uint32(os.Geteuid()),
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
}}
|
||||
spec.Linux.GIDMappings = []specs.LinuxIDMapping{{
|
||||
HostID: uint32(os.Getegid()),
|
||||
ContainerID: 0,
|
||||
Size: 1,
|
||||
}}
|
||||
|
||||
// Fix up mounts.
|
||||
var mounts []specs.Mount
|
||||
for _, mount := range spec.Mounts {
|
||||
// Ignore all mounts that are under /sys.
|
||||
if strings.HasPrefix(mount.Destination, "/sys") {
|
||||
continue
|
||||
}
|
||||
|
||||
// Remove all gid= and uid= mappings.
|
||||
var options []string
|
||||
for _, option := range mount.Options {
|
||||
if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") {
|
||||
options = append(options, option)
|
||||
}
|
||||
}
|
||||
|
||||
mount.Options = options
|
||||
mounts = append(mounts, mount)
|
||||
}
|
||||
// Add the sysfs mount as an rbind.
|
||||
mounts = append(mounts, specs.Mount{
|
||||
Source: "/sys",
|
||||
Destination: "/sys",
|
||||
Type: "none",
|
||||
Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
|
||||
})
|
||||
spec.Mounts = mounts
|
||||
|
||||
// Remove cgroup settings.
|
||||
spec.Linux.Resources = nil
|
||||
}
|
||||
832
vendor/github.com/opencontainers/runc/libcontainer/specconv/spec_linux.go
generated
vendored
Normal file
832
vendor/github.com/opencontainers/runc/libcontainer/specconv/spec_linux.go
generated
vendored
Normal file
@@ -0,0 +1,832 @@
|
||||
// +build linux
|
||||
|
||||
// Package specconv implements conversion of specifications to libcontainer
|
||||
// configurations
|
||||
package specconv
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
const wildcard = -1
|
||||
|
||||
var namespaceMapping = map[specs.LinuxNamespaceType]configs.NamespaceType{
|
||||
specs.PIDNamespace: configs.NEWPID,
|
||||
specs.NetworkNamespace: configs.NEWNET,
|
||||
specs.MountNamespace: configs.NEWNS,
|
||||
specs.UserNamespace: configs.NEWUSER,
|
||||
specs.IPCNamespace: configs.NEWIPC,
|
||||
specs.UTSNamespace: configs.NEWUTS,
|
||||
}
|
||||
|
||||
var mountPropagationMapping = map[string]int{
|
||||
"rprivate": unix.MS_PRIVATE | unix.MS_REC,
|
||||
"private": unix.MS_PRIVATE,
|
||||
"rslave": unix.MS_SLAVE | unix.MS_REC,
|
||||
"slave": unix.MS_SLAVE,
|
||||
"rshared": unix.MS_SHARED | unix.MS_REC,
|
||||
"shared": unix.MS_SHARED,
|
||||
"runbindable": unix.MS_UNBINDABLE | unix.MS_REC,
|
||||
"unbindable": unix.MS_UNBINDABLE,
|
||||
"": 0,
|
||||
}
|
||||
|
||||
var allowedDevices = []*configs.Device{
|
||||
// allow mknod for any device
|
||||
{
|
||||
Type: 'c',
|
||||
Major: wildcard,
|
||||
Minor: wildcard,
|
||||
Permissions: "m",
|
||||
Allow: true,
|
||||
},
|
||||
{
|
||||
Type: 'b',
|
||||
Major: wildcard,
|
||||
Minor: wildcard,
|
||||
Permissions: "m",
|
||||
Allow: true,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/null",
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/random",
|
||||
Major: 1,
|
||||
Minor: 8,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/full",
|
||||
Major: 1,
|
||||
Minor: 7,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/tty",
|
||||
Major: 5,
|
||||
Minor: 0,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/zero",
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/urandom",
|
||||
Major: 1,
|
||||
Minor: 9,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
{
|
||||
Path: "/dev/console",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 1,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
// /dev/pts/ - pts namespaces are "coming soon"
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 136,
|
||||
Minor: wildcard,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 5,
|
||||
Minor: 2,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
// tuntap
|
||||
{
|
||||
Path: "",
|
||||
Type: 'c',
|
||||
Major: 10,
|
||||
Minor: 200,
|
||||
Permissions: "rwm",
|
||||
Allow: true,
|
||||
},
|
||||
}
|
||||
|
||||
type CreateOpts struct {
|
||||
CgroupName string
|
||||
UseSystemdCgroup bool
|
||||
NoPivotRoot bool
|
||||
NoNewKeyring bool
|
||||
Spec *specs.Spec
|
||||
Rootless bool
|
||||
}
|
||||
|
||||
// CreateLibcontainerConfig creates a new libcontainer configuration from a
|
||||
// given specification and a cgroup name
|
||||
func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
|
||||
// runc's cwd will always be the bundle path
|
||||
rcwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cwd, err := filepath.Abs(rcwd)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
spec := opts.Spec
|
||||
if spec.Root == nil {
|
||||
return nil, fmt.Errorf("Root must be specified")
|
||||
}
|
||||
rootfsPath := spec.Root.Path
|
||||
if !filepath.IsAbs(rootfsPath) {
|
||||
rootfsPath = filepath.Join(cwd, rootfsPath)
|
||||
}
|
||||
labels := []string{}
|
||||
for k, v := range spec.Annotations {
|
||||
labels = append(labels, fmt.Sprintf("%s=%s", k, v))
|
||||
}
|
||||
config := &configs.Config{
|
||||
Rootfs: rootfsPath,
|
||||
NoPivotRoot: opts.NoPivotRoot,
|
||||
Readonlyfs: spec.Root.Readonly,
|
||||
Hostname: spec.Hostname,
|
||||
Labels: append(labels, fmt.Sprintf("bundle=%s", cwd)),
|
||||
NoNewKeyring: opts.NoNewKeyring,
|
||||
Rootless: opts.Rootless,
|
||||
}
|
||||
|
||||
exists := false
|
||||
for _, m := range spec.Mounts {
|
||||
config.Mounts = append(config.Mounts, createLibcontainerMount(cwd, m))
|
||||
}
|
||||
if err := createDevices(spec, config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c, err := createCgroupConfig(opts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
config.Cgroups = c
|
||||
// set linux-specific config
|
||||
if spec.Linux != nil {
|
||||
if config.RootPropagation, exists = mountPropagationMapping[spec.Linux.RootfsPropagation]; !exists {
|
||||
return nil, fmt.Errorf("rootfsPropagation=%v is not supported", spec.Linux.RootfsPropagation)
|
||||
}
|
||||
if config.NoPivotRoot && (config.RootPropagation&unix.MS_PRIVATE != 0) {
|
||||
return nil, fmt.Errorf("rootfsPropagation of [r]private is not safe without pivot_root")
|
||||
}
|
||||
|
||||
for _, ns := range spec.Linux.Namespaces {
|
||||
t, exists := namespaceMapping[ns.Type]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("namespace %q does not exist", ns)
|
||||
}
|
||||
if config.Namespaces.Contains(t) {
|
||||
return nil, fmt.Errorf("malformed spec file: duplicated ns %q", ns)
|
||||
}
|
||||
config.Namespaces.Add(t, ns.Path)
|
||||
}
|
||||
if config.Namespaces.Contains(configs.NEWNET) {
|
||||
config.Networks = []*configs.Network{
|
||||
{
|
||||
Type: "loopback",
|
||||
},
|
||||
}
|
||||
}
|
||||
if config.Namespaces.Contains(configs.NEWUSER) {
|
||||
if err := setupUserNamespace(spec, config); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
config.MaskPaths = spec.Linux.MaskedPaths
|
||||
config.ReadonlyPaths = spec.Linux.ReadonlyPaths
|
||||
config.MountLabel = spec.Linux.MountLabel
|
||||
config.Sysctl = spec.Linux.Sysctl
|
||||
if spec.Linux.Seccomp != nil {
|
||||
seccomp, err := setupSeccomp(spec.Linux.Seccomp)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
config.Seccomp = seccomp
|
||||
}
|
||||
}
|
||||
if spec.Process.SelinuxLabel != "" {
|
||||
config.ProcessLabel = spec.Process.SelinuxLabel
|
||||
}
|
||||
if spec.Process != nil && spec.Process.OOMScoreAdj != nil {
|
||||
config.OomScoreAdj = *spec.Process.OOMScoreAdj
|
||||
}
|
||||
if spec.Process.Capabilities != nil {
|
||||
config.Capabilities = &configs.Capabilities{
|
||||
Bounding: spec.Process.Capabilities.Bounding,
|
||||
Effective: spec.Process.Capabilities.Effective,
|
||||
Permitted: spec.Process.Capabilities.Permitted,
|
||||
Inheritable: spec.Process.Capabilities.Inheritable,
|
||||
Ambient: spec.Process.Capabilities.Ambient,
|
||||
}
|
||||
}
|
||||
createHooks(spec, config)
|
||||
config.Version = specs.Version
|
||||
if spec.Linux.IntelRdt != nil {
|
||||
config.IntelRdt = &configs.IntelRdt{}
|
||||
if spec.Linux.IntelRdt.L3CacheSchema != "" {
|
||||
config.IntelRdt.L3CacheSchema = spec.Linux.IntelRdt.L3CacheSchema
|
||||
}
|
||||
}
|
||||
return config, nil
|
||||
}
|
||||
|
||||
func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount {
|
||||
flags, pgflags, data, ext := parseMountOptions(m.Options)
|
||||
source := m.Source
|
||||
if m.Type == "bind" {
|
||||
if !filepath.IsAbs(source) {
|
||||
source = filepath.Join(cwd, m.Source)
|
||||
}
|
||||
}
|
||||
return &configs.Mount{
|
||||
Device: m.Type,
|
||||
Source: source,
|
||||
Destination: m.Destination,
|
||||
Data: data,
|
||||
Flags: flags,
|
||||
PropagationFlags: pgflags,
|
||||
Extensions: ext,
|
||||
}
|
||||
}
|
||||
|
||||
func createCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) {
|
||||
var (
|
||||
myCgroupPath string
|
||||
|
||||
spec = opts.Spec
|
||||
useSystemdCgroup = opts.UseSystemdCgroup
|
||||
name = opts.CgroupName
|
||||
)
|
||||
|
||||
c := &configs.Cgroup{
|
||||
Resources: &configs.Resources{},
|
||||
}
|
||||
|
||||
if spec.Linux != nil && spec.Linux.CgroupsPath != "" {
|
||||
myCgroupPath = libcontainerUtils.CleanPath(spec.Linux.CgroupsPath)
|
||||
if useSystemdCgroup {
|
||||
myCgroupPath = spec.Linux.CgroupsPath
|
||||
}
|
||||
}
|
||||
|
||||
if useSystemdCgroup {
|
||||
if myCgroupPath == "" {
|
||||
c.Parent = "system.slice"
|
||||
c.ScopePrefix = "runc"
|
||||
c.Name = name
|
||||
} else {
|
||||
// Parse the path from expected "slice:prefix:name"
|
||||
// for e.g. "system.slice:docker:1234"
|
||||
parts := strings.Split(myCgroupPath, ":")
|
||||
if len(parts) != 3 {
|
||||
return nil, fmt.Errorf("expected cgroupsPath to be of format \"slice:prefix:name\" for systemd cgroups")
|
||||
}
|
||||
c.Parent = parts[0]
|
||||
c.ScopePrefix = parts[1]
|
||||
c.Name = parts[2]
|
||||
}
|
||||
} else {
|
||||
if myCgroupPath == "" {
|
||||
c.Name = name
|
||||
}
|
||||
c.Path = myCgroupPath
|
||||
}
|
||||
|
||||
// In rootless containers, any attempt to make cgroup changes will fail.
|
||||
// libcontainer will validate this and we shouldn't add any cgroup options
|
||||
// the user didn't specify.
|
||||
if !opts.Rootless {
|
||||
c.Resources.AllowedDevices = allowedDevices
|
||||
}
|
||||
if spec.Linux != nil {
|
||||
r := spec.Linux.Resources
|
||||
if r == nil {
|
||||
return c, nil
|
||||
}
|
||||
for i, d := range spec.Linux.Resources.Devices {
|
||||
var (
|
||||
t = "a"
|
||||
major = int64(-1)
|
||||
minor = int64(-1)
|
||||
)
|
||||
if d.Type != "" {
|
||||
t = d.Type
|
||||
}
|
||||
if d.Major != nil {
|
||||
major = *d.Major
|
||||
}
|
||||
if d.Minor != nil {
|
||||
minor = *d.Minor
|
||||
}
|
||||
if d.Access == "" {
|
||||
return nil, fmt.Errorf("device access at %d field cannot be empty", i)
|
||||
}
|
||||
dt, err := stringToCgroupDeviceRune(t)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dd := &configs.Device{
|
||||
Type: dt,
|
||||
Major: major,
|
||||
Minor: minor,
|
||||
Permissions: d.Access,
|
||||
Allow: d.Allow,
|
||||
}
|
||||
c.Resources.Devices = append(c.Resources.Devices, dd)
|
||||
}
|
||||
if r.Memory != nil {
|
||||
if r.Memory.Limit != nil {
|
||||
c.Resources.Memory = *r.Memory.Limit
|
||||
}
|
||||
if r.Memory.Reservation != nil {
|
||||
c.Resources.MemoryReservation = *r.Memory.Reservation
|
||||
}
|
||||
if r.Memory.Swap != nil {
|
||||
c.Resources.MemorySwap = *r.Memory.Swap
|
||||
}
|
||||
if r.Memory.Kernel != nil {
|
||||
c.Resources.KernelMemory = *r.Memory.Kernel
|
||||
}
|
||||
if r.Memory.KernelTCP != nil {
|
||||
c.Resources.KernelMemoryTCP = *r.Memory.KernelTCP
|
||||
}
|
||||
if r.Memory.Swappiness != nil {
|
||||
c.Resources.MemorySwappiness = r.Memory.Swappiness
|
||||
}
|
||||
if r.Memory.DisableOOMKiller != nil {
|
||||
c.Resources.OomKillDisable = *r.Memory.DisableOOMKiller
|
||||
}
|
||||
}
|
||||
if r.CPU != nil {
|
||||
if r.CPU.Shares != nil {
|
||||
c.Resources.CpuShares = *r.CPU.Shares
|
||||
}
|
||||
if r.CPU.Quota != nil {
|
||||
c.Resources.CpuQuota = *r.CPU.Quota
|
||||
}
|
||||
if r.CPU.Period != nil {
|
||||
c.Resources.CpuPeriod = *r.CPU.Period
|
||||
}
|
||||
if r.CPU.RealtimeRuntime != nil {
|
||||
c.Resources.CpuRtRuntime = *r.CPU.RealtimeRuntime
|
||||
}
|
||||
if r.CPU.RealtimePeriod != nil {
|
||||
c.Resources.CpuRtPeriod = *r.CPU.RealtimePeriod
|
||||
}
|
||||
if r.CPU.Cpus != "" {
|
||||
c.Resources.CpusetCpus = r.CPU.Cpus
|
||||
}
|
||||
if r.CPU.Mems != "" {
|
||||
c.Resources.CpusetMems = r.CPU.Mems
|
||||
}
|
||||
}
|
||||
if r.Pids != nil {
|
||||
c.Resources.PidsLimit = r.Pids.Limit
|
||||
}
|
||||
if r.BlockIO != nil {
|
||||
if r.BlockIO.Weight != nil {
|
||||
c.Resources.BlkioWeight = *r.BlockIO.Weight
|
||||
}
|
||||
if r.BlockIO.LeafWeight != nil {
|
||||
c.Resources.BlkioLeafWeight = *r.BlockIO.LeafWeight
|
||||
}
|
||||
if r.BlockIO.WeightDevice != nil {
|
||||
for _, wd := range r.BlockIO.WeightDevice {
|
||||
var weight, leafWeight uint16
|
||||
if wd.Weight != nil {
|
||||
weight = *wd.Weight
|
||||
}
|
||||
if wd.LeafWeight != nil {
|
||||
leafWeight = *wd.LeafWeight
|
||||
}
|
||||
weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, weight, leafWeight)
|
||||
c.Resources.BlkioWeightDevice = append(c.Resources.BlkioWeightDevice, weightDevice)
|
||||
}
|
||||
}
|
||||
if r.BlockIO.ThrottleReadBpsDevice != nil {
|
||||
for _, td := range r.BlockIO.ThrottleReadBpsDevice {
|
||||
rate := td.Rate
|
||||
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate)
|
||||
c.Resources.BlkioThrottleReadBpsDevice = append(c.Resources.BlkioThrottleReadBpsDevice, throttleDevice)
|
||||
}
|
||||
}
|
||||
if r.BlockIO.ThrottleWriteBpsDevice != nil {
|
||||
for _, td := range r.BlockIO.ThrottleWriteBpsDevice {
|
||||
rate := td.Rate
|
||||
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate)
|
||||
c.Resources.BlkioThrottleWriteBpsDevice = append(c.Resources.BlkioThrottleWriteBpsDevice, throttleDevice)
|
||||
}
|
||||
}
|
||||
if r.BlockIO.ThrottleReadIOPSDevice != nil {
|
||||
for _, td := range r.BlockIO.ThrottleReadIOPSDevice {
|
||||
rate := td.Rate
|
||||
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate)
|
||||
c.Resources.BlkioThrottleReadIOPSDevice = append(c.Resources.BlkioThrottleReadIOPSDevice, throttleDevice)
|
||||
}
|
||||
}
|
||||
if r.BlockIO.ThrottleWriteIOPSDevice != nil {
|
||||
for _, td := range r.BlockIO.ThrottleWriteIOPSDevice {
|
||||
rate := td.Rate
|
||||
throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate)
|
||||
c.Resources.BlkioThrottleWriteIOPSDevice = append(c.Resources.BlkioThrottleWriteIOPSDevice, throttleDevice)
|
||||
}
|
||||
}
|
||||
}
|
||||
for _, l := range r.HugepageLimits {
|
||||
c.Resources.HugetlbLimit = append(c.Resources.HugetlbLimit, &configs.HugepageLimit{
|
||||
Pagesize: l.Pagesize,
|
||||
Limit: l.Limit,
|
||||
})
|
||||
}
|
||||
if r.Network != nil {
|
||||
if r.Network.ClassID != nil {
|
||||
c.Resources.NetClsClassid = *r.Network.ClassID
|
||||
}
|
||||
for _, m := range r.Network.Priorities {
|
||||
c.Resources.NetPrioIfpriomap = append(c.Resources.NetPrioIfpriomap, &configs.IfPrioMap{
|
||||
Interface: m.Name,
|
||||
Priority: int64(m.Priority),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
if !opts.Rootless {
|
||||
// append the default allowed devices to the end of the list
|
||||
c.Resources.Devices = append(c.Resources.Devices, allowedDevices...)
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func stringToCgroupDeviceRune(s string) (rune, error) {
|
||||
switch s {
|
||||
case "a":
|
||||
return 'a', nil
|
||||
case "b":
|
||||
return 'b', nil
|
||||
case "c":
|
||||
return 'c', nil
|
||||
default:
|
||||
return 0, fmt.Errorf("invalid cgroup device type %q", s)
|
||||
}
|
||||
}
|
||||
|
||||
func stringToDeviceRune(s string) (rune, error) {
|
||||
switch s {
|
||||
case "p":
|
||||
return 'p', nil
|
||||
case "u":
|
||||
return 'u', nil
|
||||
case "b":
|
||||
return 'b', nil
|
||||
case "c":
|
||||
return 'c', nil
|
||||
default:
|
||||
return 0, fmt.Errorf("invalid device type %q", s)
|
||||
}
|
||||
}
|
||||
|
||||
func createDevices(spec *specs.Spec, config *configs.Config) error {
|
||||
// add whitelisted devices
|
||||
config.Devices = []*configs.Device{
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/null",
|
||||
Major: 1,
|
||||
Minor: 3,
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/random",
|
||||
Major: 1,
|
||||
Minor: 8,
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/full",
|
||||
Major: 1,
|
||||
Minor: 7,
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/tty",
|
||||
Major: 5,
|
||||
Minor: 0,
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/zero",
|
||||
Major: 1,
|
||||
Minor: 5,
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
},
|
||||
{
|
||||
Type: 'c',
|
||||
Path: "/dev/urandom",
|
||||
Major: 1,
|
||||
Minor: 9,
|
||||
FileMode: 0666,
|
||||
Uid: 0,
|
||||
Gid: 0,
|
||||
},
|
||||
}
|
||||
// merge in additional devices from the spec
|
||||
if spec.Linux != nil {
|
||||
for _, d := range spec.Linux.Devices {
|
||||
var uid, gid uint32
|
||||
var filemode os.FileMode = 0666
|
||||
|
||||
if d.UID != nil {
|
||||
uid = *d.UID
|
||||
}
|
||||
if d.GID != nil {
|
||||
gid = *d.GID
|
||||
}
|
||||
dt, err := stringToDeviceRune(d.Type)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if d.FileMode != nil {
|
||||
filemode = *d.FileMode
|
||||
}
|
||||
device := &configs.Device{
|
||||
Type: dt,
|
||||
Path: d.Path,
|
||||
Major: d.Major,
|
||||
Minor: d.Minor,
|
||||
FileMode: filemode,
|
||||
Uid: uid,
|
||||
Gid: gid,
|
||||
}
|
||||
config.Devices = append(config.Devices, device)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setupUserNamespace(spec *specs.Spec, config *configs.Config) error {
|
||||
create := func(m specs.LinuxIDMapping) configs.IDMap {
|
||||
return configs.IDMap{
|
||||
HostID: int(m.HostID),
|
||||
ContainerID: int(m.ContainerID),
|
||||
Size: int(m.Size),
|
||||
}
|
||||
}
|
||||
if spec.Linux != nil {
|
||||
for _, m := range spec.Linux.UIDMappings {
|
||||
config.UidMappings = append(config.UidMappings, create(m))
|
||||
}
|
||||
for _, m := range spec.Linux.GIDMappings {
|
||||
config.GidMappings = append(config.GidMappings, create(m))
|
||||
}
|
||||
}
|
||||
rootUID, err := config.HostRootUID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
rootGID, err := config.HostRootGID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, node := range config.Devices {
|
||||
node.Uid = uint32(rootUID)
|
||||
node.Gid = uint32(rootGID)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseMountOptions parses the string and returns the flags, propagation
|
||||
// flags and any mount data that it contains.
|
||||
func parseMountOptions(options []string) (int, []int, string, int) {
|
||||
var (
|
||||
flag int
|
||||
pgflag []int
|
||||
data []string
|
||||
extFlags int
|
||||
)
|
||||
flags := map[string]struct {
|
||||
clear bool
|
||||
flag int
|
||||
}{
|
||||
"acl": {false, unix.MS_POSIXACL},
|
||||
"async": {true, unix.MS_SYNCHRONOUS},
|
||||
"atime": {true, unix.MS_NOATIME},
|
||||
"bind": {false, unix.MS_BIND},
|
||||
"defaults": {false, 0},
|
||||
"dev": {true, unix.MS_NODEV},
|
||||
"diratime": {true, unix.MS_NODIRATIME},
|
||||
"dirsync": {false, unix.MS_DIRSYNC},
|
||||
"exec": {true, unix.MS_NOEXEC},
|
||||
"iversion": {false, unix.MS_I_VERSION},
|
||||
"lazytime": {false, unix.MS_LAZYTIME},
|
||||
"loud": {true, unix.MS_SILENT},
|
||||
"mand": {false, unix.MS_MANDLOCK},
|
||||
"noacl": {true, unix.MS_POSIXACL},
|
||||
"noatime": {false, unix.MS_NOATIME},
|
||||
"nodev": {false, unix.MS_NODEV},
|
||||
"nodiratime": {false, unix.MS_NODIRATIME},
|
||||
"noexec": {false, unix.MS_NOEXEC},
|
||||
"noiversion": {true, unix.MS_I_VERSION},
|
||||
"nolazytime": {true, unix.MS_LAZYTIME},
|
||||
"nomand": {true, unix.MS_MANDLOCK},
|
||||
"norelatime": {true, unix.MS_RELATIME},
|
||||
"nostrictatime": {true, unix.MS_STRICTATIME},
|
||||
"nosuid": {false, unix.MS_NOSUID},
|
||||
"rbind": {false, unix.MS_BIND | unix.MS_REC},
|
||||
"relatime": {false, unix.MS_RELATIME},
|
||||
"remount": {false, unix.MS_REMOUNT},
|
||||
"ro": {false, unix.MS_RDONLY},
|
||||
"rw": {true, unix.MS_RDONLY},
|
||||
"silent": {false, unix.MS_SILENT},
|
||||
"strictatime": {false, unix.MS_STRICTATIME},
|
||||
"suid": {true, unix.MS_NOSUID},
|
||||
"sync": {false, unix.MS_SYNCHRONOUS},
|
||||
}
|
||||
propagationFlags := map[string]int{
|
||||
"private": unix.MS_PRIVATE,
|
||||
"shared": unix.MS_SHARED,
|
||||
"slave": unix.MS_SLAVE,
|
||||
"unbindable": unix.MS_UNBINDABLE,
|
||||
"rprivate": unix.MS_PRIVATE | unix.MS_REC,
|
||||
"rshared": unix.MS_SHARED | unix.MS_REC,
|
||||
"rslave": unix.MS_SLAVE | unix.MS_REC,
|
||||
"runbindable": unix.MS_UNBINDABLE | unix.MS_REC,
|
||||
}
|
||||
extensionFlags := map[string]struct {
|
||||
clear bool
|
||||
flag int
|
||||
}{
|
||||
"tmpcopyup": {false, configs.EXT_COPYUP},
|
||||
}
|
||||
for _, o := range options {
|
||||
// If the option does not exist in the flags table or the flag
|
||||
// is not supported on the platform,
|
||||
// then it is a data value for a specific fs type
|
||||
if f, exists := flags[o]; exists && f.flag != 0 {
|
||||
if f.clear {
|
||||
flag &= ^f.flag
|
||||
} else {
|
||||
flag |= f.flag
|
||||
}
|
||||
} else if f, exists := propagationFlags[o]; exists && f != 0 {
|
||||
pgflag = append(pgflag, f)
|
||||
} else if f, exists := extensionFlags[o]; exists && f.flag != 0 {
|
||||
if f.clear {
|
||||
extFlags &= ^f.flag
|
||||
} else {
|
||||
extFlags |= f.flag
|
||||
}
|
||||
} else {
|
||||
data = append(data, o)
|
||||
}
|
||||
}
|
||||
return flag, pgflag, strings.Join(data, ","), extFlags
|
||||
}
|
||||
|
||||
func setupSeccomp(config *specs.LinuxSeccomp) (*configs.Seccomp, error) {
|
||||
if config == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// No default action specified, no syscalls listed, assume seccomp disabled
|
||||
if config.DefaultAction == "" && len(config.Syscalls) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
newConfig := new(configs.Seccomp)
|
||||
newConfig.Syscalls = []*configs.Syscall{}
|
||||
|
||||
if len(config.Architectures) > 0 {
|
||||
newConfig.Architectures = []string{}
|
||||
for _, arch := range config.Architectures {
|
||||
newArch, err := seccomp.ConvertStringToArch(string(arch))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newConfig.Architectures = append(newConfig.Architectures, newArch)
|
||||
}
|
||||
}
|
||||
|
||||
// Convert default action from string representation
|
||||
newDefaultAction, err := seccomp.ConvertStringToAction(string(config.DefaultAction))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
newConfig.DefaultAction = newDefaultAction
|
||||
|
||||
// Loop through all syscall blocks and convert them to libcontainer format
|
||||
for _, call := range config.Syscalls {
|
||||
newAction, err := seccomp.ConvertStringToAction(string(call.Action))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, name := range call.Names {
|
||||
newCall := configs.Syscall{
|
||||
Name: name,
|
||||
Action: newAction,
|
||||
Args: []*configs.Arg{},
|
||||
}
|
||||
// Loop through all the arguments of the syscall and convert them
|
||||
for _, arg := range call.Args {
|
||||
newOp, err := seccomp.ConvertStringToOperator(string(arg.Op))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
newArg := configs.Arg{
|
||||
Index: arg.Index,
|
||||
Value: arg.Value,
|
||||
ValueTwo: arg.ValueTwo,
|
||||
Op: newOp,
|
||||
}
|
||||
|
||||
newCall.Args = append(newCall.Args, &newArg)
|
||||
}
|
||||
newConfig.Syscalls = append(newConfig.Syscalls, &newCall)
|
||||
}
|
||||
}
|
||||
|
||||
return newConfig, nil
|
||||
}
|
||||
|
||||
func createHooks(rspec *specs.Spec, config *configs.Config) {
|
||||
config.Hooks = &configs.Hooks{}
|
||||
if rspec.Hooks != nil {
|
||||
|
||||
for _, h := range rspec.Hooks.Prestart {
|
||||
cmd := createCommandHook(h)
|
||||
config.Hooks.Prestart = append(config.Hooks.Prestart, configs.NewCommandHook(cmd))
|
||||
}
|
||||
for _, h := range rspec.Hooks.Poststart {
|
||||
cmd := createCommandHook(h)
|
||||
config.Hooks.Poststart = append(config.Hooks.Poststart, configs.NewCommandHook(cmd))
|
||||
}
|
||||
for _, h := range rspec.Hooks.Poststop {
|
||||
cmd := createCommandHook(h)
|
||||
config.Hooks.Poststop = append(config.Hooks.Poststop, configs.NewCommandHook(cmd))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func createCommandHook(h specs.Hook) configs.Command {
|
||||
cmd := configs.Command{
|
||||
Path: h.Path,
|
||||
Args: h.Args,
|
||||
Env: h.Env,
|
||||
}
|
||||
if h.Timeout != nil {
|
||||
d := time.Duration(*h.Timeout) * time.Second
|
||||
cmd.Timeout = &d
|
||||
}
|
||||
return cmd
|
||||
}
|
||||
27
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go
generated
vendored
Normal file
27
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go
generated
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
package stacktrace
|
||||
|
||||
import "runtime"
|
||||
|
||||
// Capture captures a stacktrace for the current calling go program
|
||||
//
|
||||
// skip is the number of frames to skip
|
||||
func Capture(userSkip int) Stacktrace {
|
||||
var (
|
||||
skip = userSkip + 1 // add one for our own function
|
||||
frames []Frame
|
||||
prevPc uintptr
|
||||
)
|
||||
for i := skip; ; i++ {
|
||||
pc, file, line, ok := runtime.Caller(i)
|
||||
//detect if caller is repeated to avoid loop, gccgo
|
||||
//currently runs into a loop without this check
|
||||
if !ok || pc == prevPc {
|
||||
break
|
||||
}
|
||||
frames = append(frames, NewFrame(pc, file, line))
|
||||
prevPc = pc
|
||||
}
|
||||
return Stacktrace{
|
||||
Frames: frames,
|
||||
}
|
||||
}
|
||||
38
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go
generated
vendored
Normal file
38
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go
generated
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
package stacktrace
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// NewFrame returns a new stack frame for the provided information
|
||||
func NewFrame(pc uintptr, file string, line int) Frame {
|
||||
fn := runtime.FuncForPC(pc)
|
||||
if fn == nil {
|
||||
return Frame{}
|
||||
}
|
||||
pack, name := parseFunctionName(fn.Name())
|
||||
return Frame{
|
||||
Line: line,
|
||||
File: filepath.Base(file),
|
||||
Package: pack,
|
||||
Function: name,
|
||||
}
|
||||
}
|
||||
|
||||
func parseFunctionName(name string) (string, string) {
|
||||
i := strings.LastIndex(name, ".")
|
||||
if i == -1 {
|
||||
return "", name
|
||||
}
|
||||
return name[:i], name[i+1:]
|
||||
}
|
||||
|
||||
// Frame contains all the information for a stack frame within a go program
|
||||
type Frame struct {
|
||||
File string
|
||||
Function string
|
||||
Package string
|
||||
Line int
|
||||
}
|
||||
5
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go
generated
vendored
Normal file
5
vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go
generated
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
package stacktrace
|
||||
|
||||
type Stacktrace struct {
|
||||
Frames []Frame
|
||||
}
|
||||
193
vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
generated
vendored
Normal file
193
vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
generated
vendored
Normal file
@@ -0,0 +1,193 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"syscall" //only for Exec
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/apparmor"
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/keys"
|
||||
"github.com/opencontainers/runc/libcontainer/seccomp"
|
||||
"github.com/opencontainers/runc/libcontainer/system"
|
||||
"github.com/opencontainers/selinux/go-selinux/label"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
type linuxStandardInit struct {
|
||||
pipe *os.File
|
||||
consoleSocket *os.File
|
||||
parentPid int
|
||||
fifoFd int
|
||||
config *initConfig
|
||||
}
|
||||
|
||||
func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
|
||||
var newperms uint32
|
||||
|
||||
if l.config.Config.Namespaces.Contains(configs.NEWUSER) {
|
||||
// With user ns we need 'other' search permissions.
|
||||
newperms = 0x8
|
||||
} else {
|
||||
// Without user ns we need 'UID' search permissions.
|
||||
newperms = 0x80000
|
||||
}
|
||||
|
||||
// Create a unique per session container name that we can join in setns;
|
||||
// However, other containers can also join it.
|
||||
return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
|
||||
}
|
||||
|
||||
func (l *linuxStandardInit) Init() error {
|
||||
if !l.config.Config.NoNewKeyring {
|
||||
ringname, keepperms, newperms := l.getSessionRingParams()
|
||||
|
||||
// Do not inherit the parent's session keyring.
|
||||
sessKeyId, err := keys.JoinSessionKeyring(ringname)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Make session keyring searcheable.
|
||||
if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := setupNetwork(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := setupRoute(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
label.Init()
|
||||
if err := prepareRootfs(l.pipe, l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
// Set up the console. This has to be done *before* we finalize the rootfs,
|
||||
// but *after* we've given the user the chance to set up all of the mounts
|
||||
// they wanted.
|
||||
if l.config.CreateConsole {
|
||||
if err := setupConsole(l.consoleSocket, l.config, true); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := system.Setctty(); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Finish the rootfs setup.
|
||||
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
|
||||
if err := finalizeRootfs(l.config.Config); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if hostname := l.config.Config.Hostname; hostname != "" {
|
||||
if err := unix.Sethostname([]byte(hostname)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for key, value := range l.config.Config.Sysctl {
|
||||
if err := writeSystemProperty(key, value); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, path := range l.config.Config.ReadonlyPaths {
|
||||
if err := readonlyPath(path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, path := range l.config.Config.MaskPaths {
|
||||
if err := maskPath(path); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
pdeath, err := system.GetParentDeathSignal()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if l.config.NoNewPrivileges {
|
||||
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// Tell our parent that we're ready to Execv. This must be done before the
|
||||
// Seccomp rules have been applied, because we need to be able to read and
|
||||
// write to a socket.
|
||||
if err := syncParentReady(l.pipe); err != nil {
|
||||
return err
|
||||
}
|
||||
// Without NoNewPrivileges seccomp is a privileged operation, so we need to
|
||||
// do this before dropping capabilities; otherwise do it as late as possible
|
||||
// just before execve so as few syscalls take place after it as possible.
|
||||
if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if err := finalizeNamespace(l.config); err != nil {
|
||||
return err
|
||||
}
|
||||
// finalizeNamespace can change user/group which clears the parent death
|
||||
// signal, so we restore it here.
|
||||
if err := pdeath.Restore(); err != nil {
|
||||
return err
|
||||
}
|
||||
// Compare the parent from the initial start of the init process and make
|
||||
// sure that it did not change. if the parent changes that means it died
|
||||
// and we were reparented to something else so we should just kill ourself
|
||||
// and not cause problems for someone else.
|
||||
if unix.Getppid() != l.parentPid {
|
||||
return unix.Kill(unix.Getpid(), unix.SIGKILL)
|
||||
}
|
||||
// Check for the arg before waiting to make sure it exists and it is
|
||||
// returned as a create time error.
|
||||
name, err := exec.LookPath(l.config.Args[0])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Close the pipe to signal that we have completed our init.
|
||||
l.pipe.Close()
|
||||
// Wait for the FIFO to be opened on the other side before exec-ing the
|
||||
// user process. We open it through /proc/self/fd/$fd, because the fd that
|
||||
// was given to us was an O_PATH fd to the fifo itself. Linux allows us to
|
||||
// re-open an O_PATH fd through /proc.
|
||||
fd, err := unix.Open(fmt.Sprintf("/proc/self/fd/%d", l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0)
|
||||
if err != nil {
|
||||
return newSystemErrorWithCause(err, "open exec fifo")
|
||||
}
|
||||
if _, err := unix.Write(fd, []byte("0")); err != nil {
|
||||
return newSystemErrorWithCause(err, "write 0 exec fifo")
|
||||
}
|
||||
// Close the O_PATH fifofd fd before exec because the kernel resets
|
||||
// dumpable in the wrong order. This has been fixed in newer kernels, but
|
||||
// we keep this to ensure CVE-2016-9962 doesn't re-emerge on older kernels.
|
||||
// N.B. the core issue itself (passing dirfds to the host filesystem) has
|
||||
// since been resolved.
|
||||
// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
|
||||
unix.Close(l.fifoFd)
|
||||
// Set seccomp as close to execve as possible, so as few syscalls take
|
||||
// place afterward (reducing the amount of syscalls that users need to
|
||||
// enable in their seccomp profiles).
|
||||
if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
|
||||
if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
|
||||
return newSystemErrorWithCause(err, "init seccomp")
|
||||
}
|
||||
}
|
||||
if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
|
||||
return newSystemErrorWithCause(err, "exec user process")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
255
vendor/github.com/opencontainers/runc/libcontainer/state_linux.go
generated
vendored
Normal file
255
vendor/github.com/opencontainers/runc/libcontainer/state_linux.go
generated
vendored
Normal file
@@ -0,0 +1,255 @@
|
||||
// +build linux
|
||||
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/configs"
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func newStateTransitionError(from, to containerState) error {
|
||||
return &stateTransitionError{
|
||||
From: from.status().String(),
|
||||
To: to.status().String(),
|
||||
}
|
||||
}
|
||||
|
||||
// stateTransitionError is returned when an invalid state transition happens from one
|
||||
// state to another.
|
||||
type stateTransitionError struct {
|
||||
From string
|
||||
To string
|
||||
}
|
||||
|
||||
func (s *stateTransitionError) Error() string {
|
||||
return fmt.Sprintf("invalid state transition from %s to %s", s.From, s.To)
|
||||
}
|
||||
|
||||
type containerState interface {
|
||||
transition(containerState) error
|
||||
destroy() error
|
||||
status() Status
|
||||
}
|
||||
|
||||
func destroy(c *linuxContainer) error {
|
||||
if !c.config.Namespaces.Contains(configs.NEWPID) {
|
||||
if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil {
|
||||
logrus.Warn(err)
|
||||
}
|
||||
}
|
||||
err := c.cgroupManager.Destroy()
|
||||
if c.intelRdtManager != nil {
|
||||
if ierr := c.intelRdtManager.Destroy(); err == nil {
|
||||
err = ierr
|
||||
}
|
||||
}
|
||||
if rerr := os.RemoveAll(c.root); err == nil {
|
||||
err = rerr
|
||||
}
|
||||
c.initProcess = nil
|
||||
if herr := runPoststopHooks(c); err == nil {
|
||||
err = herr
|
||||
}
|
||||
c.state = &stoppedState{c: c}
|
||||
return err
|
||||
}
|
||||
|
||||
func runPoststopHooks(c *linuxContainer) error {
|
||||
if c.config.Hooks != nil {
|
||||
bundle, annotations := utils.Annotations(c.config.Labels)
|
||||
s := configs.HookState{
|
||||
Version: c.config.Version,
|
||||
ID: c.id,
|
||||
Bundle: bundle,
|
||||
Annotations: annotations,
|
||||
}
|
||||
for _, hook := range c.config.Hooks.Poststop {
|
||||
if err := hook.Run(s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// stoppedState represents a container is a stopped/destroyed state.
|
||||
type stoppedState struct {
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (b *stoppedState) status() Status {
|
||||
return Stopped
|
||||
}
|
||||
|
||||
func (b *stoppedState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *runningState, *restoredState:
|
||||
b.c.state = s
|
||||
return nil
|
||||
case *stoppedState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(b, s)
|
||||
}
|
||||
|
||||
func (b *stoppedState) destroy() error {
|
||||
return destroy(b.c)
|
||||
}
|
||||
|
||||
// runningState represents a container that is currently running.
|
||||
type runningState struct {
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (r *runningState) status() Status {
|
||||
return Running
|
||||
}
|
||||
|
||||
func (r *runningState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *stoppedState:
|
||||
t, err := r.c.runType()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if t == Running {
|
||||
return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped)
|
||||
}
|
||||
r.c.state = s
|
||||
return nil
|
||||
case *pausedState:
|
||||
r.c.state = s
|
||||
return nil
|
||||
case *runningState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(r, s)
|
||||
}
|
||||
|
||||
func (r *runningState) destroy() error {
|
||||
t, err := r.c.runType()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if t == Running {
|
||||
return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
|
||||
}
|
||||
return destroy(r.c)
|
||||
}
|
||||
|
||||
type createdState struct {
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (i *createdState) status() Status {
|
||||
return Created
|
||||
}
|
||||
|
||||
func (i *createdState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *runningState, *pausedState, *stoppedState:
|
||||
i.c.state = s
|
||||
return nil
|
||||
case *createdState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(i, s)
|
||||
}
|
||||
|
||||
func (i *createdState) destroy() error {
|
||||
i.c.initProcess.signal(unix.SIGKILL)
|
||||
return destroy(i.c)
|
||||
}
|
||||
|
||||
// pausedState represents a container that is currently pause. It cannot be destroyed in a
|
||||
// paused state and must transition back to running first.
|
||||
type pausedState struct {
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (p *pausedState) status() Status {
|
||||
return Paused
|
||||
}
|
||||
|
||||
func (p *pausedState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *runningState, *stoppedState:
|
||||
p.c.state = s
|
||||
return nil
|
||||
case *pausedState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(p, s)
|
||||
}
|
||||
|
||||
func (p *pausedState) destroy() error {
|
||||
t, err := p.c.runType()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if t != Running && t != Created {
|
||||
if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil {
|
||||
return err
|
||||
}
|
||||
return destroy(p.c)
|
||||
}
|
||||
return newGenericError(fmt.Errorf("container is paused"), ContainerPaused)
|
||||
}
|
||||
|
||||
// restoredState is the same as the running state but also has associated checkpoint
|
||||
// information that maybe need destroyed when the container is stopped and destroy is called.
|
||||
type restoredState struct {
|
||||
imageDir string
|
||||
c *linuxContainer
|
||||
}
|
||||
|
||||
func (r *restoredState) status() Status {
|
||||
return Running
|
||||
}
|
||||
|
||||
func (r *restoredState) transition(s containerState) error {
|
||||
switch s.(type) {
|
||||
case *stoppedState, *runningState:
|
||||
return nil
|
||||
}
|
||||
return newStateTransitionError(r, s)
|
||||
}
|
||||
|
||||
func (r *restoredState) destroy() error {
|
||||
if _, err := os.Stat(filepath.Join(r.c.root, "checkpoint")); err != nil {
|
||||
if !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return destroy(r.c)
|
||||
}
|
||||
|
||||
// loadedState is used whenever a container is restored, loaded, or setting additional
|
||||
// processes inside and it should not be destroyed when it is exiting.
|
||||
type loadedState struct {
|
||||
c *linuxContainer
|
||||
s Status
|
||||
}
|
||||
|
||||
func (n *loadedState) status() Status {
|
||||
return n.s
|
||||
}
|
||||
|
||||
func (n *loadedState) transition(s containerState) error {
|
||||
n.c.state = s
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *loadedState) destroy() error {
|
||||
if err := n.c.refreshState(); err != nil {
|
||||
return err
|
||||
}
|
||||
return n.c.state.destroy()
|
||||
}
|
||||
15
vendor/github.com/opencontainers/runc/libcontainer/stats.go
generated
vendored
Normal file
15
vendor/github.com/opencontainers/runc/libcontainer/stats.go
generated
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
package libcontainer
|
||||
|
||||
type NetworkInterface struct {
|
||||
// Name is the name of the network interface.
|
||||
Name string
|
||||
|
||||
RxBytes uint64
|
||||
RxPackets uint64
|
||||
RxErrors uint64
|
||||
RxDropped uint64
|
||||
TxBytes uint64
|
||||
TxPackets uint64
|
||||
TxErrors uint64
|
||||
TxDropped uint64
|
||||
}
|
||||
10
vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go
generated
vendored
Normal file
10
vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go
generated
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
package libcontainer
|
||||
|
||||
import "github.com/opencontainers/runc/libcontainer/cgroups"
|
||||
import "github.com/opencontainers/runc/libcontainer/intelrdt"
|
||||
|
||||
type Stats struct {
|
||||
Interfaces []*NetworkInterface
|
||||
CgroupStats *cgroups.Stats
|
||||
IntelRdtStats *intelrdt.Stats
|
||||
}
|
||||
107
vendor/github.com/opencontainers/runc/libcontainer/sync.go
generated
vendored
Normal file
107
vendor/github.com/opencontainers/runc/libcontainer/sync.go
generated
vendored
Normal file
@@ -0,0 +1,107 @@
|
||||
package libcontainer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/opencontainers/runc/libcontainer/utils"
|
||||
)
|
||||
|
||||
type syncType string
|
||||
|
||||
// Constants that are used for synchronisation between the parent and child
|
||||
// during container setup. They come in pairs (with procError being a generic
|
||||
// response which is followed by a &genericError).
|
||||
//
|
||||
// [ child ] <-> [ parent ]
|
||||
//
|
||||
// procHooks --> [run hooks]
|
||||
// <-- procResume
|
||||
//
|
||||
// procConsole -->
|
||||
// <-- procConsoleReq
|
||||
// [send(fd)] --> [recv(fd)]
|
||||
// <-- procConsoleAck
|
||||
//
|
||||
// procReady --> [final setup]
|
||||
// <-- procRun
|
||||
const (
|
||||
procError syncType = "procError"
|
||||
procReady syncType = "procReady"
|
||||
procRun syncType = "procRun"
|
||||
procHooks syncType = "procHooks"
|
||||
procResume syncType = "procResume"
|
||||
)
|
||||
|
||||
type syncT struct {
|
||||
Type syncType `json:"type"`
|
||||
}
|
||||
|
||||
// writeSync is used to write to a synchronisation pipe. An error is returned
|
||||
// if there was a problem writing the payload.
|
||||
func writeSync(pipe io.Writer, sync syncType) error {
|
||||
if err := utils.WriteJSON(pipe, syncT{sync}); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// readSync is used to read from a synchronisation pipe. An error is returned
|
||||
// if we got a genericError, the pipe was closed, or we got an unexpected flag.
|
||||
func readSync(pipe io.Reader, expected syncType) error {
|
||||
var procSync syncT
|
||||
if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
|
||||
if err == io.EOF {
|
||||
return fmt.Errorf("parent closed synchronisation channel")
|
||||
}
|
||||
|
||||
if procSync.Type == procError {
|
||||
var ierr genericError
|
||||
|
||||
if err := json.NewDecoder(pipe).Decode(&ierr); err != nil {
|
||||
return fmt.Errorf("failed reading error from parent: %v", err)
|
||||
}
|
||||
|
||||
return &ierr
|
||||
}
|
||||
|
||||
if procSync.Type != expected {
|
||||
return fmt.Errorf("invalid synchronisation flag from parent")
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// parseSync runs the given callback function on each syncT received from the
|
||||
// child. It will return once io.EOF is returned from the given pipe.
|
||||
func parseSync(pipe io.Reader, fn func(*syncT) error) error {
|
||||
dec := json.NewDecoder(pipe)
|
||||
for {
|
||||
var sync syncT
|
||||
if err := dec.Decode(&sync); err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// We handle this case outside fn for cleanliness reasons.
|
||||
var ierr *genericError
|
||||
if sync.Type == procError {
|
||||
if err := dec.Decode(&ierr); err != nil && err != io.EOF {
|
||||
return newSystemErrorWithCause(err, "decoding proc error from init")
|
||||
}
|
||||
if ierr != nil {
|
||||
return ierr
|
||||
}
|
||||
// Programmer error.
|
||||
panic("No error following JSON procError payload.")
|
||||
}
|
||||
|
||||
if err := fn(&sync); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
63
vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
generated
vendored
63
vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
generated
vendored
@@ -7,10 +7,25 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"syscall"
|
||||
"syscall" // only for exec
|
||||
"unsafe"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// If arg2 is nonzero, set the "child subreaper" attribute of the
|
||||
// calling process; if arg2 is zero, unset the attribute. When a
|
||||
// process is marked as a child subreaper, all of the children
|
||||
// that it creates, and their descendants, will be marked as
|
||||
// having a subreaper. In effect, a subreaper fulfills the role
|
||||
// of init(1) for its descendant processes. Upon termination of
|
||||
// a process that is orphaned (i.e., its immediate parent has
|
||||
// already terminated) and marked as having a subreaper, the
|
||||
// nearest still living ancestor subreaper will receive a SIGCHLD
|
||||
// signal and be able to wait(2) on the process to discover its
|
||||
// termination status.
|
||||
const PR_SET_CHILD_SUBREAPER = 36
|
||||
|
||||
type ParentDeathSignal int
|
||||
|
||||
func (p ParentDeathSignal) Restore() error {
|
||||
@@ -40,8 +55,16 @@ func Execv(cmd string, args []string, env []string) error {
|
||||
return syscall.Exec(name, args, env)
|
||||
}
|
||||
|
||||
func Prlimit(pid, resource int, limit unix.Rlimit) error {
|
||||
_, _, err := unix.RawSyscall6(unix.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0)
|
||||
if err != 0 {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func SetParentDeathSignal(sig uintptr) error {
|
||||
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 {
|
||||
if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
@@ -49,15 +72,14 @@ func SetParentDeathSignal(sig uintptr) error {
|
||||
|
||||
func GetParentDeathSignal() (ParentDeathSignal, error) {
|
||||
var sig int
|
||||
_, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0)
|
||||
if err != 0 {
|
||||
if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
return ParentDeathSignal(sig), nil
|
||||
}
|
||||
|
||||
func SetKeepCaps() error {
|
||||
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 1, 0); err != 0 {
|
||||
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -65,7 +87,7 @@ func SetKeepCaps() error {
|
||||
}
|
||||
|
||||
func ClearKeepCaps() error {
|
||||
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 0, 0); err != 0 {
|
||||
if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -73,23 +95,18 @@ func ClearKeepCaps() error {
|
||||
}
|
||||
|
||||
func Setctty() error {
|
||||
if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 {
|
||||
if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
/*
|
||||
* Detect whether we are currently running in a user namespace.
|
||||
* Copied from github.com/lxc/lxd/shared/util.go
|
||||
*/
|
||||
// RunningInUserNS detects whether we are currently running in a user namespace.
|
||||
// Copied from github.com/lxc/lxd/shared/util.go
|
||||
func RunningInUserNS() bool {
|
||||
file, err := os.Open("/proc/self/uid_map")
|
||||
if err != nil {
|
||||
/*
|
||||
* This kernel-provided file only exists if user namespaces are
|
||||
* supported
|
||||
*/
|
||||
// This kernel-provided file only exists if user namespaces are supported
|
||||
return false
|
||||
}
|
||||
defer file.Close()
|
||||
@@ -112,3 +129,19 @@ func RunningInUserNS() bool {
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// SetSubreaper sets the value i as the subreaper setting for the calling process
|
||||
func SetSubreaper(i int) error {
|
||||
return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
|
||||
}
|
||||
|
||||
// GetSubreaper returns the subreaper setting for the calling process
|
||||
func GetSubreaper() (int, error) {
|
||||
var i uintptr
|
||||
|
||||
if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {
|
||||
return -1, err
|
||||
}
|
||||
|
||||
return int(i), nil
|
||||
}
|
||||
|
||||
112
vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
generated
vendored
112
vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
generated
vendored
@@ -1,27 +1,113 @@
|
||||
package system
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// look in /proc to find the process start time so that we can verify
|
||||
// that this pid has started after ourself
|
||||
// State is the status of a process.
|
||||
type State rune
|
||||
|
||||
const ( // Only values for Linux 3.14 and later are listed here
|
||||
Dead State = 'X'
|
||||
DiskSleep State = 'D'
|
||||
Running State = 'R'
|
||||
Sleeping State = 'S'
|
||||
Stopped State = 'T'
|
||||
TracingStop State = 't'
|
||||
Zombie State = 'Z'
|
||||
)
|
||||
|
||||
// String forms of the state from proc(5)'s documentation for
|
||||
// /proc/[pid]/status' "State" field.
|
||||
func (s State) String() string {
|
||||
switch s {
|
||||
case Dead:
|
||||
return "dead"
|
||||
case DiskSleep:
|
||||
return "disk sleep"
|
||||
case Running:
|
||||
return "running"
|
||||
case Sleeping:
|
||||
return "sleeping"
|
||||
case Stopped:
|
||||
return "stopped"
|
||||
case TracingStop:
|
||||
return "tracing stop"
|
||||
case Zombie:
|
||||
return "zombie"
|
||||
default:
|
||||
return fmt.Sprintf("unknown (%c)", s)
|
||||
}
|
||||
}
|
||||
|
||||
// Stat_t represents the information from /proc/[pid]/stat, as
|
||||
// described in proc(5) with names based on the /proc/[pid]/status
|
||||
// fields.
|
||||
type Stat_t struct {
|
||||
// PID is the process ID.
|
||||
PID uint
|
||||
|
||||
// Name is the command run by the process.
|
||||
Name string
|
||||
|
||||
// State is the state of the process.
|
||||
State State
|
||||
|
||||
// StartTime is the number of clock ticks after system boot (since
|
||||
// Linux 2.6).
|
||||
StartTime uint64
|
||||
}
|
||||
|
||||
// Stat returns a Stat_t instance for the specified process.
|
||||
func Stat(pid int) (stat Stat_t, err error) {
|
||||
bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
|
||||
if err != nil {
|
||||
return stat, err
|
||||
}
|
||||
return parseStat(string(bytes))
|
||||
}
|
||||
|
||||
// GetProcessStartTime is deprecated. Use Stat(pid) and
|
||||
// Stat_t.StartTime instead.
|
||||
func GetProcessStartTime(pid int) (string, error) {
|
||||
data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
|
||||
stat, err := Stat(pid)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
parts := strings.Split(string(data), " ")
|
||||
// the starttime is located at pos 22
|
||||
// from the man page
|
||||
//
|
||||
// starttime %llu (was %lu before Linux 2.6)
|
||||
// (22) The time the process started after system boot. In kernels before Linux 2.6, this
|
||||
// value was expressed in jiffies. Since Linux 2.6, the value is expressed in clock ticks
|
||||
// (divide by sysconf(_SC_CLK_TCK)).
|
||||
return parts[22-1], nil // starts at 1
|
||||
return fmt.Sprintf("%d", stat.StartTime), nil
|
||||
}
|
||||
|
||||
func parseStat(data string) (stat Stat_t, err error) {
|
||||
// From proc(5), field 2 could contain space and is inside `(` and `)`.
|
||||
// The following is an example:
|
||||
// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
|
||||
i := strings.LastIndex(data, ")")
|
||||
if i <= 2 || i >= len(data)-1 {
|
||||
return stat, fmt.Errorf("invalid stat data: %q", data)
|
||||
}
|
||||
|
||||
parts := strings.SplitN(data[:i], "(", 2)
|
||||
if len(parts) != 2 {
|
||||
return stat, fmt.Errorf("invalid stat data: %q", data)
|
||||
}
|
||||
|
||||
stat.Name = parts[1]
|
||||
_, err = fmt.Sscanf(parts[0], "%d", &stat.PID)
|
||||
if err != nil {
|
||||
return stat, err
|
||||
}
|
||||
|
||||
// parts indexes should be offset by 3 from the field number given
|
||||
// proc(5), because parts is zero-indexed and we've removed fields
|
||||
// one (PID) and two (Name) in the paren-split.
|
||||
parts = strings.Split(data[i+2:], " ")
|
||||
var state int
|
||||
fmt.Sscanf(parts[3-3], "%c", &state)
|
||||
stat.State = State(state)
|
||||
fmt.Sscanf(parts[22-3], "%d", &stat.StartTime)
|
||||
return stat, nil
|
||||
}
|
||||
|
||||
40
vendor/github.com/opencontainers/runc/libcontainer/system/setns_linux.go
generated
vendored
40
vendor/github.com/opencontainers/runc/libcontainer/system/setns_linux.go
generated
vendored
@@ -1,40 +0,0 @@
|
||||
package system
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092
|
||||
//
|
||||
// We need different setns values for the different platforms and arch
|
||||
// We are declaring the macro here because the SETNS syscall does not exist in th stdlib
|
||||
var setNsMap = map[string]uintptr{
|
||||
"linux/386": 346,
|
||||
"linux/arm64": 268,
|
||||
"linux/amd64": 308,
|
||||
"linux/arm": 375,
|
||||
"linux/ppc": 350,
|
||||
"linux/ppc64": 350,
|
||||
"linux/ppc64le": 350,
|
||||
"linux/s390x": 339,
|
||||
}
|
||||
|
||||
var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
|
||||
|
||||
func SysSetns() uint32 {
|
||||
return uint32(sysSetns)
|
||||
}
|
||||
|
||||
func Setns(fd uintptr, flags uintptr) error {
|
||||
ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
|
||||
if !exists {
|
||||
return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
|
||||
}
|
||||
_, _, err := syscall.RawSyscall(ns, fd, flags, 0)
|
||||
if err != 0 {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -1,14 +1,15 @@
|
||||
// +build linux,arm
|
||||
// +build linux
|
||||
// +build 386 arm
|
||||
|
||||
package system
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// Setuid sets the uid of the calling thread to the specified uid.
|
||||
func Setuid(uid int) (err error) {
|
||||
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
|
||||
_, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0)
|
||||
if e1 != 0 {
|
||||
err = e1
|
||||
}
|
||||
@@ -17,7 +18,7 @@ func Setuid(uid int) (err error) {
|
||||
|
||||
// Setgid sets the gid of the calling thread to the specified gid.
|
||||
func Setgid(gid int) (err error) {
|
||||
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
|
||||
_, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0)
|
||||
if e1 != 0 {
|
||||
err = e1
|
||||
}
|
||||
25
vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go
generated
vendored
25
vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go
generated
vendored
@@ -1,25 +0,0 @@
|
||||
// +build linux,386
|
||||
|
||||
package system
|
||||
|
||||
import (
|
||||
"syscall"
|
||||
)
|
||||
|
||||
// Setuid sets the uid of the calling thread to the specified uid.
|
||||
func Setuid(uid int) (err error) {
|
||||
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0)
|
||||
if e1 != 0 {
|
||||
err = e1
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Setgid sets the gid of the calling thread to the specified gid.
|
||||
func Setgid(gid int) (err error) {
|
||||
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
|
||||
if e1 != 0 {
|
||||
err = e1
|
||||
}
|
||||
return
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user