Update opencontainers/runc to v1.0.0-rc5

``` $ gvt delete github.com/opencontainers/runc/libcontainer/cgroups $ gvt delete github.com/opencontainers/runc/libcontainer/configs $ gvt delete github.com/opencontainers/runc/libcontainer/system $ gvt delete github.com/opencontainers/runc/libcontainer/user $ gvt delete github.com/opencontainers/runc/libcontainer/utils $ gvt fetch --tag v1.0.0-rc5 github.com/opencontainers/runc/libcontainer 2018/07/23 17:08:18 Fetching: github.com/opencontainers/runc/libcontainer 2018/07/23 17:08:24 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/vishvananda/netlink 2018/07/23 17:08:24 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/golang.org/x/sys/unix 2018/07/23 17:08:24 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/cyphar/filepath-securejoin 2018/07/23 17:08:24 ·· Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/pkg/errors 2018/07/23 17:08:24 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/opencontainers/selinux/go-selinux/label 2018/07/23 17:08:25 ·· Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/opencontainers/selinux/go-selinux 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/containerd/console 2018/07/23 17:08:25 ·· Fetching recursive dependency: github.com/opencontainers/runc/vendor/golang.org/x/sys/windows 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/sirupsen/logrus 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/godbus/dbus 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/mrunalp/fileutils 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/coreos/go-systemd/util 2018/07/23 17:08:25 ·· Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/coreos/pkg/dlopen 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/golang/protobuf/proto 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/syndtr/gocapability/capability 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/coreos/go-systemd/dbus 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/opencontainers/runtime-spec/specs-go 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/seccomp/libseccomp-golang 2018/07/23 17:08:25 · Fetching recursive dependency: github.com/opencontainers/runc/vendor/github.com/docker/go-units ```
2026-03-02 17:50:39 +00:00 · 2018-07-23 17:09:16 +02:00
parent a74f203668
commit a82ba60760
477 changed files with 184518 additions and 3724 deletions
--- a/vendor/github.com/opencontainers/runc/libcontainer/LICENSE
+++ b/vendor/github.com/opencontainers/runc/libcontainer/LICENSE
@@ -0,0 +1,191 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   Copyright 2014 Docker, Inc.
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go
@@ -0,0 +1,54 @@
+// +build apparmor,linux
+
+package apparmor
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+)
+
+// IsEnabled returns true if apparmor is enabled for the host.
+func IsEnabled() bool {
+	if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil && os.Getenv("container") == "" {
+		if _, err = os.Stat("/sbin/apparmor_parser"); err == nil {
+			buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled")
+			return err == nil && len(buf) > 1 && buf[0] == 'Y'
+		}
+	}
+	return false
+}
+
+func setprocattr(attr, value string) error {
+	// Under AppArmor you can only change your own attr, so use /proc/self/
+	// instead of /proc/<tid>/ like libapparmor does
+	path := fmt.Sprintf("/proc/self/attr/%s", attr)
+
+	f, err := os.OpenFile(path, os.O_WRONLY, 0)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+
+	_, err = fmt.Fprintf(f, "%s", value)
+	return err
+}
+
+// changeOnExec reimplements aa_change_onexec from libapparmor in Go
+func changeOnExec(name string) error {
+	value := "exec " + name
+	if err := setprocattr("exec", value); err != nil {
+		return fmt.Errorf("apparmor failed to apply profile: %s", err)
+	}
+	return nil
+}
+
+// ApplyProfile will apply the profile with the specified name to the process after
+// the next exec.
+func ApplyProfile(name string) error {
+	if name == "" {
+		return nil
+	}
+
+	return changeOnExec(name)
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_disabled.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_disabled.go
@@ -0,0 +1,20 @@
+// +build !apparmor !linux
+
+package apparmor
+
+import (
+	"errors"
+)
+
+var ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported")
+
+func IsEnabled() bool {
+	return false
+}
+
+func ApplyProfile(name string) error {
+	if name != "" {
+		return ErrApparmorNotEnabled
+	}
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/capabilities_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/capabilities_linux.go
@@ -0,0 +1,113 @@
+// +build linux
+
+package libcontainer
+
+import (
+	"fmt"
+	"strings"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/syndtr/gocapability/capability"
+)
+
+const allCapabilityTypes = capability.CAPS | capability.BOUNDS | capability.AMBS
+
+var capabilityMap map[string]capability.Cap
+
+func init() {
+	capabilityMap = make(map[string]capability.Cap)
+	last := capability.CAP_LAST_CAP
+	// workaround for RHEL6 which has no /proc/sys/kernel/cap_last_cap
+	if last == capability.Cap(63) {
+		last = capability.CAP_BLOCK_SUSPEND
+	}
+	for _, cap := range capability.List() {
+		if cap > last {
+			continue
+		}
+		capKey := fmt.Sprintf("CAP_%s", strings.ToUpper(cap.String()))
+		capabilityMap[capKey] = cap
+	}
+}
+
+func newContainerCapList(capConfig *configs.Capabilities) (*containerCapabilities, error) {
+	bounding := []capability.Cap{}
+	for _, c := range capConfig.Bounding {
+		v, ok := capabilityMap[c]
+		if !ok {
+			return nil, fmt.Errorf("unknown capability %q", c)
+		}
+		bounding = append(bounding, v)
+	}
+	effective := []capability.Cap{}
+	for _, c := range capConfig.Effective {
+		v, ok := capabilityMap[c]
+		if !ok {
+			return nil, fmt.Errorf("unknown capability %q", c)
+		}
+		effective = append(effective, v)
+	}
+	inheritable := []capability.Cap{}
+	for _, c := range capConfig.Inheritable {
+		v, ok := capabilityMap[c]
+		if !ok {
+			return nil, fmt.Errorf("unknown capability %q", c)
+		}
+		inheritable = append(inheritable, v)
+	}
+	permitted := []capability.Cap{}
+	for _, c := range capConfig.Permitted {
+		v, ok := capabilityMap[c]
+		if !ok {
+			return nil, fmt.Errorf("unknown capability %q", c)
+		}
+		permitted = append(permitted, v)
+	}
+	ambient := []capability.Cap{}
+	for _, c := range capConfig.Ambient {
+		v, ok := capabilityMap[c]
+		if !ok {
+			return nil, fmt.Errorf("unknown capability %q", c)
+		}
+		ambient = append(ambient, v)
+	}
+	pid, err := capability.NewPid(0)
+	if err != nil {
+		return nil, err
+	}
+	return &containerCapabilities{
+		bounding:    bounding,
+		effective:   effective,
+		inheritable: inheritable,
+		permitted:   permitted,
+		ambient:     ambient,
+		pid:         pid,
+	}, nil
+}
+
+type containerCapabilities struct {
+	pid         capability.Capabilities
+	bounding    []capability.Cap
+	effective   []capability.Cap
+	inheritable []capability.Cap
+	permitted   []capability.Cap
+	ambient     []capability.Cap
+}
+
+// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist.
+func (c *containerCapabilities) ApplyBoundingSet() error {
+	c.pid.Clear(capability.BOUNDS)
+	c.pid.Set(capability.BOUNDS, c.bounding...)
+	return c.pid.Apply(capability.BOUNDS)
+}
+
+// Apply sets all the capabilities for the current process in the config.
+func (c *containerCapabilities) ApplyCaps() error {
+	c.pid.Clear(allCapabilityTypes)
+	c.pid.Set(capability.BOUNDS, c.bounding...)
+	c.pid.Set(capability.PERMITTED, c.permitted...)
+	c.pid.Set(capability.INHERITABLE, c.inheritable...)
+	c.pid.Set(capability.EFFECTIVE, c.effective...)
+	c.pid.Set(capability.AMBIENT, c.ambient...)
+	return c.pid.Apply(allCapabilityTypes)
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go
@@ -9,7 +9,7 @@ import (
 )

 type Manager interface {
-	// Apply cgroup configuration to the process with the specified pid
+	// Applies cgroup configuration to the process with the specified pid
 	Apply(pid int) error

 	// Returns the PIDs inside the cgroup set
@@ -27,9 +27,9 @@ type Manager interface {
 	// Destroys the cgroup set
 	Destroy() error

-	// NewCgroupManager() and LoadCgroupManager() require following attributes:
+	// The option func SystemdCgroups() and Cgroupfs() require following attributes:
 	// 	Paths   map[string]string
-	// 	Cgroups *cgroups.Cgroup
+	// 	Cgroups *configs.Cgroup
 	// Paths maps cgroup subsystem to path at which it is mounted.
 	// Cgroups specifies specific cgroup settings for the various subsystems

@@ -37,7 +37,7 @@ type Manager interface {
 	// restore the object later.
 	GetPaths() map[string]string

-	// Set the cgroup as configured.
+	// Sets the cgroup as configured.
 	Set(container *configs.Config) error
 }

--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_test.go
@@ -1,18 +0,0 @@
-// +build linux
-
-package cgroups
-
-import (
-	"testing"
-)
-
-func TestParseCgroups(t *testing.T) {
-	cgroups, err := ParseCgroupFile("/proc/self/cgroup")
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	if _, ok := cgroups["cpu"]; !ok {
-		t.Fail()
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/apply_raw.go
@@ -9,11 +9,11 @@ import (
 	"io/ioutil"
 	"os"
 	"path/filepath"
-	"strconv"
 	"sync"

 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
+	libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
 )

 var (
@@ -30,8 +30,8 @@ var (
 		&NetPrioGroup{},
 		&PerfEventGroup{},
 		&FreezerGroup{},
+		&NameGroup{GroupName: "name=systemd", Join: true},
 	}
-	CgroupProcesses  = "cgroup.procs"
 	HugePageSizes, _ = cgroups.GetHugePageSize()
 )

@@ -104,6 +104,8 @@ func (m *Manager) Apply(pid int) (err error) {
 	if m.Cgroups == nil {
 		return nil
 	}
+	m.mu.Lock()
+	defer m.mu.Unlock()

 	var c = m.Cgroups

@@ -112,8 +114,8 @@ func (m *Manager) Apply(pid int) (err error) {
 		return err
 	}

+	m.Paths = make(map[string]string)
 	if c.Paths != nil {
-		paths := make(map[string]string)
 		for name, path := range c.Paths {
 			_, err := d.path(name)
 			if err != nil {
@@ -122,35 +124,39 @@ func (m *Manager) Apply(pid int) (err error) {
 				}
 				return err
 			}
-			paths[name] = path
+			m.Paths[name] = path
 		}
-		m.Paths = paths
 		return cgroups.EnterPid(m.Paths, pid)
 	}

-	paths := make(map[string]string)
-	defer func() {
-		if err != nil {
-			cgroups.RemovePaths(paths)
-		}
-	}()
 	for _, sys := range subsystems {
-		if err := sys.Apply(d); err != nil {
-			return err
-		}
 		// TODO: Apply should, ideally, be reentrant or be broken up into a separate
 		// create and join phase so that the cgroup hierarchy for a container can be
 		// created then join consists of writing the process pids to cgroup.procs
 		p, err := d.path(sys.Name())
 		if err != nil {
-			if cgroups.IsNotFound(err) {
+			// The non-presence of the devices subsystem is
+			// considered fatal for security reasons.
+			if cgroups.IsNotFound(err) && sys.Name() != "devices" {
 				continue
 			}
 			return err
 		}
-		paths[sys.Name()] = p
+		m.Paths[sys.Name()] = p
+
+		if err := sys.Apply(d); err != nil {
+			if os.IsPermission(err) && m.Cgroups.Path == "" {
+				// If we didn't set a cgroup path, then let's defer the error here
+				// until we know whether we have set limits or not.
+				// If we hadn't set limits, then it's ok that we couldn't join this cgroup, because
+				// it will have the same limits as its parent.
+				delete(m.Paths, sys.Name())
+				continue
+			}
+			return err
+		}
+
 	}
-	m.Paths = paths
 	return nil
 }

@@ -191,19 +197,20 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
 }

 func (m *Manager) Set(container *configs.Config) error {
-	for _, sys := range subsystems {
-		// Generate fake cgroup data.
-		d, err := getCgroupData(container.Cgroups, -1)
-		if err != nil {
-			return err
-		}
-		// Get the path, but don't error out if the cgroup wasn't found.
-		path, err := d.path(sys.Name())
-		if err != nil && !cgroups.IsNotFound(err) {
-			return err
-		}
+	// If Paths are set, then we are just joining cgroups paths
+	// and there is no need to set any values.
+	if m.Cgroups.Paths != nil {
+		return nil
+	}

+	paths := m.GetPaths()
+	for _, sys := range subsystems {
+		path := paths[sys.Name()]
 		if err := sys.Set(path, container.Cgroups); err != nil {
+			if path == "" {
+				// cgroup never applied
+				return fmt.Errorf("cannot set limits on the %s cgroup, as the container has not joined it", sys.Name())
+			}
 			return err
 		}
 	}
@@ -219,14 +226,8 @@ func (m *Manager) Set(container *configs.Config) error {
 // Freeze toggles the container's freezer cgroup depending on the state
 // provided
 func (m *Manager) Freeze(state configs.FreezerState) error {
-	d, err := getCgroupData(m.Cgroups, 0)
-	if err != nil {
-		return err
-	}
-	dir, err := d.path("freezer")
-	if err != nil {
-		return err
-	}
+	paths := m.GetPaths()
+	dir := paths["freezer"]
 	prevState := m.Cgroups.Resources.Freezer
 	m.Cgroups.Resources.Freezer = state
 	freezer, err := subsystems.Get("freezer")
@@ -242,28 +243,13 @@ func (m *Manager) Freeze(state configs.FreezerState) error {
 }

 func (m *Manager) GetPids() ([]int, error) {
-	dir, err := getCgroupPath(m.Cgroups)
-	if err != nil {
-		return nil, err
-	}
-	return cgroups.GetPids(dir)
+	paths := m.GetPaths()
+	return cgroups.GetPids(paths["devices"])
 }

 func (m *Manager) GetAllPids() ([]int, error) {
-	dir, err := getCgroupPath(m.Cgroups)
-	if err != nil {
-		return nil, err
-	}
-	return cgroups.GetAllPids(dir)
-}
-
-func getCgroupPath(c *configs.Cgroup) (string, error) {
-	d, err := getCgroupData(c, 0)
-	if err != nil {
-		return "", err
-	}
-
-	return d.path("devices")
+	paths := m.GetPaths()
+	return cgroups.GetAllPids(paths["devices"])
 }

 func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
@@ -276,38 +262,26 @@ func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
 		return nil, fmt.Errorf("cgroup: either Path or Name and Parent should be used")
 	}

-	innerPath := c.Path
+	// XXX: Do not remove this code. Path safety is important! -- cyphar
+	cgPath := libcontainerUtils.CleanPath(c.Path)
+	cgParent := libcontainerUtils.CleanPath(c.Parent)
+	cgName := libcontainerUtils.CleanPath(c.Name)
+
+	innerPath := cgPath
 	if innerPath == "" {
-		innerPath = filepath.Join(c.Parent, c.Name)
+		innerPath = filepath.Join(cgParent, cgName)
 	}

 	return &cgroupData{
 		root:      root,
-		innerPath: c.Path,
+		innerPath: innerPath,
 		config:    c,
 		pid:       pid,
 	}, nil
 }

-func (raw *cgroupData) parentPath(subsystem, mountpoint, root string) (string, error) {
-	// Use GetThisCgroupDir instead of GetInitCgroupDir, because the creating
-	// process could in container and shared pid namespace with host, and
-	// /proc/1/cgroup could point to whole other world of cgroups.
-	initPath, err := cgroups.GetThisCgroupDir(subsystem)
-	if err != nil {
-		return "", err
-	}
-	// This is needed for nested containers, because in /proc/self/cgroup we
-	// see pathes from host, which don't exist in container.
-	relDir, err := filepath.Rel(root, initPath)
-	if err != nil {
-		return "", err
-	}
-	return filepath.Join(mountpoint, relDir), nil
-}
-
 func (raw *cgroupData) path(subsystem string) (string, error) {
-	mnt, root, err := cgroups.FindCgroupMountpointAndRoot(subsystem)
+	mnt, err := cgroups.FindCgroupMountpoint(subsystem)
 	// If we didn't mount the subsystem, there is no point we make the path.
 	if err != nil {
 		return "", err
@@ -315,11 +289,14 @@ func (raw *cgroupData) path(subsystem string) (string, error) {

 	// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
 	if filepath.IsAbs(raw.innerPath) {
-		// Sometimes subsystems can be mounted togethger as 'cpu,cpuacct'.
+		// Sometimes subsystems can be mounted together as 'cpu,cpuacct'.
 		return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil
 	}

-	parentPath, err := raw.parentPath(subsystem, mnt, root)
+	// Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating
+	// process could in container and shared pid namespace with host, and
+	// /proc/1/cgroup could point to whole other world of cgroups.
+	parentPath, err := cgroups.GetOwnCgroupPath(subsystem)
 	if err != nil {
 		return "", err
 	}
@@ -335,7 +312,7 @@ func (raw *cgroupData) join(subsystem string) (string, error) {
 	if err := os.MkdirAll(path, 0755); err != nil {
 		return "", err
 	}
-	if err := writeFile(path, CgroupProcesses, strconv.Itoa(raw.pid)); err != nil {
+	if err := cgroups.WriteCgroupProc(path, raw.pid); err != nil {
 		return "", err
 	}
 	return path, nil
@@ -345,9 +322,12 @@ func writeFile(dir, file, data string) error {
 	// Normally dir should not be empty, one case is that cgroup subsystem
 	// is not mounted, we will get empty dir, and we want it fail here.
 	if dir == "" {
-		return fmt.Errorf("no such directory for %s.", file)
+		return fmt.Errorf("no such directory for %s", file)
 	}
-	return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
+	if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700); err != nil {
+		return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
+	}
+	return nil
 }

 func readFile(dir, file string) (string, error) {
@@ -365,8 +345,8 @@ func removePath(p string, err error) error {
 	return nil
 }

-func CheckCpushares(path string, c int64) error {
-	var cpuShares int64
+func CheckCpushares(path string, c uint64) error {
+	var cpuShares uint64

 	if c == 0 {
 		return nil
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio_test.go
@@ -1,636 +0,0 @@
-// +build linux
-
-package fs
-
-import (
-	"strconv"
-	"testing"
-
-	"github.com/opencontainers/runc/libcontainer/cgroups"
-	"github.com/opencontainers/runc/libcontainer/configs"
-)
-
-const (
-	sectorsRecursiveContents      = `8:0 1024`
-	serviceBytesRecursiveContents = `8:0 Read 100
-8:0 Write 200
-8:0 Sync 300
-8:0 Async 500
-8:0 Total 500
-Total 500`
-	servicedRecursiveContents = `8:0 Read 10
-8:0 Write 40
-8:0 Sync 20
-8:0 Async 30
-8:0 Total 50
-Total 50`
-	queuedRecursiveContents = `8:0 Read 1
-8:0 Write 4
-8:0 Sync 2
-8:0 Async 3
-8:0 Total 5
-Total 5`
-	serviceTimeRecursiveContents = `8:0 Read 173959
-8:0 Write 0
-8:0 Sync 0
-8:0 Async 173959
-8:0 Total 17395
-Total 17395`
-	waitTimeRecursiveContents = `8:0 Read 15571
-8:0 Write 0
-8:0 Sync 0
-8:0 Async 15571
-8:0 Total 15571`
-	mergedRecursiveContents = `8:0 Read 5
-8:0 Write 10
-8:0 Sync 0
-8:0 Async 0
-8:0 Total 15
-Total 15`
-	timeRecursiveContents = `8:0 8`
-	throttleServiceBytes  = `8:0 Read 11030528
-8:0 Write 23
-8:0 Sync 42
-8:0 Async 11030528
-8:0 Total 11030528
-252:0 Read 11030528
-252:0 Write 23
-252:0 Sync 42
-252:0 Async 11030528
-252:0 Total 11030528
-Total 22061056`
-	throttleServiced = `8:0 Read 164
-8:0 Write 23
-8:0 Sync 42
-8:0 Async 164
-8:0 Total 164
-252:0 Read 164
-252:0 Write 23
-252:0 Sync 42
-252:0 Async 164
-252:0 Total 164
-Total 328`
-)
-
-func appendBlkioStatEntry(blkioStatEntries *[]cgroups.BlkioStatEntry, major, minor, value uint64, op string) {
-	*blkioStatEntries = append(*blkioStatEntries, cgroups.BlkioStatEntry{Major: major, Minor: minor, Value: value, Op: op})
-}
-
-func TestBlkioSetWeight(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-
-	const (
-		weightBefore = 100
-		weightAfter  = 200
-	)
-
-	helper.writeFileContents(map[string]string{
-		"blkio.weight": strconv.Itoa(weightBefore),
-	})
-
-	helper.CgroupData.config.Resources.BlkioWeight = weightAfter
-	blkio := &BlkioGroup{}
-	if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamUint(helper.CgroupPath, "blkio.weight")
-	if err != nil {
-		t.Fatalf("Failed to parse blkio.weight - %s", err)
-	}
-
-	if value != weightAfter {
-		t.Fatal("Got the wrong value, set blkio.weight failed.")
-	}
-}
-
-func TestBlkioSetWeightDevice(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-
-	const (
-		weightDeviceBefore = "8:0 400"
-	)
-
-	wd := configs.NewWeightDevice(8, 0, 500, 0)
-	weightDeviceAfter := wd.WeightString()
-
-	helper.writeFileContents(map[string]string{
-		"blkio.weight_device": weightDeviceBefore,
-	})
-
-	helper.CgroupData.config.Resources.BlkioWeightDevice = []*configs.WeightDevice{wd}
-	blkio := &BlkioGroup{}
-	if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamString(helper.CgroupPath, "blkio.weight_device")
-	if err != nil {
-		t.Fatalf("Failed to parse blkio.weight_device - %s", err)
-	}
-
-	if value != weightDeviceAfter {
-		t.Fatal("Got the wrong value, set blkio.weight_device failed.")
-	}
-}
-
-// regression #274
-func TestBlkioSetMultipleWeightDevice(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-
-	const (
-		weightDeviceBefore = "8:0 400"
-	)
-
-	wd1 := configs.NewWeightDevice(8, 0, 500, 0)
-	wd2 := configs.NewWeightDevice(8, 16, 500, 0)
-	// we cannot actually set and check both because normal ioutil.WriteFile
-	// when writing to cgroup file will overwrite the whole file content instead
-	// of updating it as the kernel is doing. Just check the second device
-	// is present will suffice for the test to ensure multiple writes are done.
-	weightDeviceAfter := wd2.WeightString()
-
-	helper.writeFileContents(map[string]string{
-		"blkio.weight_device": weightDeviceBefore,
-	})
-
-	helper.CgroupData.config.Resources.BlkioWeightDevice = []*configs.WeightDevice{wd1, wd2}
-	blkio := &BlkioGroup{}
-	if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamString(helper.CgroupPath, "blkio.weight_device")
-	if err != nil {
-		t.Fatalf("Failed to parse blkio.weight_device - %s", err)
-	}
-
-	if value != weightDeviceAfter {
-		t.Fatal("Got the wrong value, set blkio.weight_device failed.")
-	}
-}
-
-func TestBlkioStats(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
-		"blkio.io_serviced_recursive":      servicedRecursiveContents,
-		"blkio.io_queued_recursive":        queuedRecursiveContents,
-		"blkio.io_service_time_recursive":  serviceTimeRecursiveContents,
-		"blkio.io_wait_time_recursive":     waitTimeRecursiveContents,
-		"blkio.io_merged_recursive":        mergedRecursiveContents,
-		"blkio.time_recursive":             timeRecursiveContents,
-		"blkio.sectors_recursive":          sectorsRecursiveContents,
-	})
-
-	blkio := &BlkioGroup{}
-	actualStats := *cgroups.NewStats()
-	err := blkio.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// Verify expected stats.
-	expectedStats := cgroups.BlkioStats{}
-	appendBlkioStatEntry(&expectedStats.SectorsRecursive, 8, 0, 1024, "")
-
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 100, "Read")
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 200, "Write")
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 300, "Sync")
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Async")
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 500, "Total")
-
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 10, "Read")
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 40, "Write")
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 20, "Sync")
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 30, "Async")
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 50, "Total")
-
-	appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 1, "Read")
-	appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 4, "Write")
-	appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 2, "Sync")
-	appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 3, "Async")
-	appendBlkioStatEntry(&expectedStats.IoQueuedRecursive, 8, 0, 5, "Total")
-
-	appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Read")
-	appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Write")
-	appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 0, "Sync")
-	appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 173959, "Async")
-	appendBlkioStatEntry(&expectedStats.IoServiceTimeRecursive, 8, 0, 17395, "Total")
-
-	appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Read")
-	appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Write")
-	appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 0, "Sync")
-	appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Async")
-	appendBlkioStatEntry(&expectedStats.IoWaitTimeRecursive, 8, 0, 15571, "Total")
-
-	appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 5, "Read")
-	appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 10, "Write")
-	appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Sync")
-	appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 0, "Async")
-	appendBlkioStatEntry(&expectedStats.IoMergedRecursive, 8, 0, 15, "Total")
-
-	appendBlkioStatEntry(&expectedStats.IoTimeRecursive, 8, 0, 8, "")
-
-	expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats)
-}
-
-func TestBlkioStatsNoSectorsFile(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
-		"blkio.io_serviced_recursive":      servicedRecursiveContents,
-		"blkio.io_queued_recursive":        queuedRecursiveContents,
-		"blkio.io_service_time_recursive":  serviceTimeRecursiveContents,
-		"blkio.io_wait_time_recursive":     waitTimeRecursiveContents,
-		"blkio.io_merged_recursive":        mergedRecursiveContents,
-		"blkio.time_recursive":             timeRecursiveContents,
-	})
-
-	blkio := &BlkioGroup{}
-	actualStats := *cgroups.NewStats()
-	err := blkio.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatalf("Failed unexpectedly: %s", err)
-	}
-}
-
-func TestBlkioStatsNoServiceBytesFile(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"blkio.io_serviced_recursive":     servicedRecursiveContents,
-		"blkio.io_queued_recursive":       queuedRecursiveContents,
-		"blkio.sectors_recursive":         sectorsRecursiveContents,
-		"blkio.io_service_time_recursive": serviceTimeRecursiveContents,
-		"blkio.io_wait_time_recursive":    waitTimeRecursiveContents,
-		"blkio.io_merged_recursive":       mergedRecursiveContents,
-		"blkio.time_recursive":            timeRecursiveContents,
-	})
-
-	blkio := &BlkioGroup{}
-	actualStats := *cgroups.NewStats()
-	err := blkio.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatalf("Failed unexpectedly: %s", err)
-	}
-}
-
-func TestBlkioStatsNoServicedFile(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
-		"blkio.io_queued_recursive":        queuedRecursiveContents,
-		"blkio.sectors_recursive":          sectorsRecursiveContents,
-		"blkio.io_service_time_recursive":  serviceTimeRecursiveContents,
-		"blkio.io_wait_time_recursive":     waitTimeRecursiveContents,
-		"blkio.io_merged_recursive":        mergedRecursiveContents,
-		"blkio.time_recursive":             timeRecursiveContents,
-	})
-
-	blkio := &BlkioGroup{}
-	actualStats := *cgroups.NewStats()
-	err := blkio.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatalf("Failed unexpectedly: %s", err)
-	}
-}
-
-func TestBlkioStatsNoQueuedFile(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
-		"blkio.io_serviced_recursive":      servicedRecursiveContents,
-		"blkio.sectors_recursive":          sectorsRecursiveContents,
-		"blkio.io_service_time_recursive":  serviceTimeRecursiveContents,
-		"blkio.io_wait_time_recursive":     waitTimeRecursiveContents,
-		"blkio.io_merged_recursive":        mergedRecursiveContents,
-		"blkio.time_recursive":             timeRecursiveContents,
-	})
-
-	blkio := &BlkioGroup{}
-	actualStats := *cgroups.NewStats()
-	err := blkio.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatalf("Failed unexpectedly: %s", err)
-	}
-}
-
-func TestBlkioStatsNoServiceTimeFile(t *testing.T) {
-	if testing.Short() {
-		t.Skip("skipping test in short mode.")
-	}
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
-		"blkio.io_serviced_recursive":      servicedRecursiveContents,
-		"blkio.io_queued_recursive":        queuedRecursiveContents,
-		"blkio.io_wait_time_recursive":     waitTimeRecursiveContents,
-		"blkio.io_merged_recursive":        mergedRecursiveContents,
-		"blkio.time_recursive":             timeRecursiveContents,
-		"blkio.sectors_recursive":          sectorsRecursiveContents,
-	})
-
-	blkio := &BlkioGroup{}
-	actualStats := *cgroups.NewStats()
-	err := blkio.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatalf("Failed unexpectedly: %s", err)
-	}
-}
-
-func TestBlkioStatsNoWaitTimeFile(t *testing.T) {
-	if testing.Short() {
-		t.Skip("skipping test in short mode.")
-	}
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
-		"blkio.io_serviced_recursive":      servicedRecursiveContents,
-		"blkio.io_queued_recursive":        queuedRecursiveContents,
-		"blkio.io_service_time_recursive":  serviceTimeRecursiveContents,
-		"blkio.io_merged_recursive":        mergedRecursiveContents,
-		"blkio.time_recursive":             timeRecursiveContents,
-		"blkio.sectors_recursive":          sectorsRecursiveContents,
-	})
-
-	blkio := &BlkioGroup{}
-	actualStats := *cgroups.NewStats()
-	err := blkio.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatalf("Failed unexpectedly: %s", err)
-	}
-}
-
-func TestBlkioStatsNoMergedFile(t *testing.T) {
-	if testing.Short() {
-		t.Skip("skipping test in short mode.")
-	}
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
-		"blkio.io_serviced_recursive":      servicedRecursiveContents,
-		"blkio.io_queued_recursive":        queuedRecursiveContents,
-		"blkio.io_service_time_recursive":  serviceTimeRecursiveContents,
-		"blkio.io_wait_time_recursive":     waitTimeRecursiveContents,
-		"blkio.time_recursive":             timeRecursiveContents,
-		"blkio.sectors_recursive":          sectorsRecursiveContents,
-	})
-
-	blkio := &BlkioGroup{}
-	actualStats := *cgroups.NewStats()
-	err := blkio.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatalf("Failed unexpectedly: %s", err)
-	}
-}
-
-func TestBlkioStatsNoTimeFile(t *testing.T) {
-	if testing.Short() {
-		t.Skip("skipping test in short mode.")
-	}
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"blkio.io_service_bytes_recursive": serviceBytesRecursiveContents,
-		"blkio.io_serviced_recursive":      servicedRecursiveContents,
-		"blkio.io_queued_recursive":        queuedRecursiveContents,
-		"blkio.io_service_time_recursive":  serviceTimeRecursiveContents,
-		"blkio.io_wait_time_recursive":     waitTimeRecursiveContents,
-		"blkio.io_merged_recursive":        mergedRecursiveContents,
-		"blkio.sectors_recursive":          sectorsRecursiveContents,
-	})
-
-	blkio := &BlkioGroup{}
-	actualStats := *cgroups.NewStats()
-	err := blkio.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatalf("Failed unexpectedly: %s", err)
-	}
-}
-
-func TestBlkioStatsUnexpectedNumberOfFields(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"blkio.io_service_bytes_recursive": "8:0 Read 100 100",
-		"blkio.io_serviced_recursive":      servicedRecursiveContents,
-		"blkio.io_queued_recursive":        queuedRecursiveContents,
-		"blkio.sectors_recursive":          sectorsRecursiveContents,
-		"blkio.io_service_time_recursive":  serviceTimeRecursiveContents,
-		"blkio.io_wait_time_recursive":     waitTimeRecursiveContents,
-		"blkio.io_merged_recursive":        mergedRecursiveContents,
-		"blkio.time_recursive":             timeRecursiveContents,
-	})
-
-	blkio := &BlkioGroup{}
-	actualStats := *cgroups.NewStats()
-	err := blkio.GetStats(helper.CgroupPath, &actualStats)
-	if err == nil {
-		t.Fatal("Expected to fail, but did not")
-	}
-}
-
-func TestBlkioStatsUnexpectedFieldType(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"blkio.io_service_bytes_recursive": "8:0 Read Write",
-		"blkio.io_serviced_recursive":      servicedRecursiveContents,
-		"blkio.io_queued_recursive":        queuedRecursiveContents,
-		"blkio.sectors_recursive":          sectorsRecursiveContents,
-		"blkio.io_service_time_recursive":  serviceTimeRecursiveContents,
-		"blkio.io_wait_time_recursive":     waitTimeRecursiveContents,
-		"blkio.io_merged_recursive":        mergedRecursiveContents,
-		"blkio.time_recursive":             timeRecursiveContents,
-	})
-
-	blkio := &BlkioGroup{}
-	actualStats := *cgroups.NewStats()
-	err := blkio.GetStats(helper.CgroupPath, &actualStats)
-	if err == nil {
-		t.Fatal("Expected to fail, but did not")
-	}
-}
-
-func TestNonCFQBlkioStats(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"blkio.io_service_bytes_recursive": "",
-		"blkio.io_serviced_recursive":      "",
-		"blkio.io_queued_recursive":        "",
-		"blkio.sectors_recursive":          "",
-		"blkio.io_service_time_recursive":  "",
-		"blkio.io_wait_time_recursive":     "",
-		"blkio.io_merged_recursive":        "",
-		"blkio.time_recursive":             "",
-		"blkio.throttle.io_service_bytes":  throttleServiceBytes,
-		"blkio.throttle.io_serviced":       throttleServiced,
-	})
-
-	blkio := &BlkioGroup{}
-	actualStats := *cgroups.NewStats()
-	err := blkio.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// Verify expected stats.
-	expectedStats := cgroups.BlkioStats{}
-
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Read")
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 23, "Write")
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 42, "Sync")
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Async")
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 8, 0, 11030528, "Total")
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Read")
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 23, "Write")
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 42, "Sync")
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Async")
-	appendBlkioStatEntry(&expectedStats.IoServiceBytesRecursive, 252, 0, 11030528, "Total")
-
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Read")
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 23, "Write")
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 42, "Sync")
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Async")
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 8, 0, 164, "Total")
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Read")
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 23, "Write")
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 42, "Sync")
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Async")
-	appendBlkioStatEntry(&expectedStats.IoServicedRecursive, 252, 0, 164, "Total")
-
-	expectBlkioStatsEquals(t, expectedStats, actualStats.BlkioStats)
-}
-
-func TestBlkioSetThrottleReadBpsDevice(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-
-	const (
-		throttleBefore = `8:0 1024`
-	)
-
-	td := configs.NewThrottleDevice(8, 0, 2048)
-	throttleAfter := td.String()
-
-	helper.writeFileContents(map[string]string{
-		"blkio.throttle.read_bps_device": throttleBefore,
-	})
-
-	helper.CgroupData.config.Resources.BlkioThrottleReadBpsDevice = []*configs.ThrottleDevice{td}
-	blkio := &BlkioGroup{}
-	if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.read_bps_device")
-	if err != nil {
-		t.Fatalf("Failed to parse blkio.throttle.read_bps_device - %s", err)
-	}
-
-	if value != throttleAfter {
-		t.Fatal("Got the wrong value, set blkio.throttle.read_bps_device failed.")
-	}
-}
-func TestBlkioSetThrottleWriteBpsDevice(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-
-	const (
-		throttleBefore = `8:0 1024`
-	)
-
-	td := configs.NewThrottleDevice(8, 0, 2048)
-	throttleAfter := td.String()
-
-	helper.writeFileContents(map[string]string{
-		"blkio.throttle.write_bps_device": throttleBefore,
-	})
-
-	helper.CgroupData.config.Resources.BlkioThrottleWriteBpsDevice = []*configs.ThrottleDevice{td}
-	blkio := &BlkioGroup{}
-	if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.write_bps_device")
-	if err != nil {
-		t.Fatalf("Failed to parse blkio.throttle.write_bps_device - %s", err)
-	}
-
-	if value != throttleAfter {
-		t.Fatal("Got the wrong value, set blkio.throttle.write_bps_device failed.")
-	}
-}
-func TestBlkioSetThrottleReadIOpsDevice(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-
-	const (
-		throttleBefore = `8:0 1024`
-	)
-
-	td := configs.NewThrottleDevice(8, 0, 2048)
-	throttleAfter := td.String()
-
-	helper.writeFileContents(map[string]string{
-		"blkio.throttle.read_iops_device": throttleBefore,
-	})
-
-	helper.CgroupData.config.Resources.BlkioThrottleReadIOPSDevice = []*configs.ThrottleDevice{td}
-	blkio := &BlkioGroup{}
-	if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.read_iops_device")
-	if err != nil {
-		t.Fatalf("Failed to parse blkio.throttle.read_iops_device - %s", err)
-	}
-
-	if value != throttleAfter {
-		t.Fatal("Got the wrong value, set blkio.throttle.read_iops_device failed.")
-	}
-}
-func TestBlkioSetThrottleWriteIOpsDevice(t *testing.T) {
-	helper := NewCgroupTestUtil("blkio", t)
-	defer helper.cleanup()
-
-	const (
-		throttleBefore = `8:0 1024`
-	)
-
-	td := configs.NewThrottleDevice(8, 0, 2048)
-	throttleAfter := td.String()
-
-	helper.writeFileContents(map[string]string{
-		"blkio.throttle.write_iops_device": throttleBefore,
-	})
-
-	helper.CgroupData.config.Resources.BlkioThrottleWriteIOPSDevice = []*configs.ThrottleDevice{td}
-	blkio := &BlkioGroup{}
-	if err := blkio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamString(helper.CgroupPath, "blkio.throttle.write_iops_device")
-	if err != nil {
-		t.Fatalf("Failed to parse blkio.throttle.write_iops_device - %s", err)
-	}
-
-	if value != throttleAfter {
-		t.Fatal("Got the wrong value, set blkio.throttle.write_iops_device failed.")
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go
@@ -22,21 +22,59 @@ func (s *CpuGroup) Name() string {
 func (s *CpuGroup) Apply(d *cgroupData) error {
 	// We always want to join the cpu group, to allow fair cpu scheduling
 	// on a container basis
-	_, err := d.join("cpu")
+	path, err := d.path("cpu")
 	if err != nil && !cgroups.IsNotFound(err) {
 		return err
 	}
+	return s.ApplyDir(path, d.config, d.pid)
+}
+
+func (s *CpuGroup) ApplyDir(path string, cgroup *configs.Cgroup, pid int) error {
+	// This might happen if we have no cpu cgroup mounted.
+	// Just do nothing and don't fail.
+	if path == "" {
+		return nil
+	}
+	if err := os.MkdirAll(path, 0755); err != nil {
+		return err
+	}
+	// We should set the real-Time group scheduling settings before moving
+	// in the process because if the process is already in SCHED_RR mode
+	// and no RT bandwidth is set, adding it will fail.
+	if err := s.SetRtSched(path, cgroup); err != nil {
+		return err
+	}
+	// because we are not using d.join we need to place the pid into the procs file
+	// unlike the other subsystems
+	if err := cgroups.WriteCgroupProc(path, pid); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (s *CpuGroup) SetRtSched(path string, cgroup *configs.Cgroup) error {
+	if cgroup.Resources.CpuRtPeriod != 0 {
+		if err := writeFile(path, "cpu.rt_period_us", strconv.FormatUint(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
+			return err
+		}
+	}
+	if cgroup.Resources.CpuRtRuntime != 0 {
+		if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
+			return err
+		}
+	}
 	return nil
 }

 func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
 	if cgroup.Resources.CpuShares != 0 {
-		if err := writeFile(path, "cpu.shares", strconv.FormatInt(cgroup.Resources.CpuShares, 10)); err != nil {
+		if err := writeFile(path, "cpu.shares", strconv.FormatUint(cgroup.Resources.CpuShares, 10)); err != nil {
 			return err
 		}
 	}
 	if cgroup.Resources.CpuPeriod != 0 {
-		if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatInt(cgroup.Resources.CpuPeriod, 10)); err != nil {
+		if err := writeFile(path, "cpu.cfs_period_us", strconv.FormatUint(cgroup.Resources.CpuPeriod, 10)); err != nil {
 			return err
 		}
 	}
@@ -45,15 +83,8 @@ func (s *CpuGroup) Set(path string, cgroup *configs.Cgroup) error {
 			return err
 		}
 	}
-	if cgroup.Resources.CpuRtPeriod != 0 {
-		if err := writeFile(path, "cpu.rt_period_us", strconv.FormatInt(cgroup.Resources.CpuRtPeriod, 10)); err != nil {
-			return err
-		}
-	}
-	if cgroup.Resources.CpuRtRuntime != 0 {
-		if err := writeFile(path, "cpu.rt_runtime_us", strconv.FormatInt(cgroup.Resources.CpuRtRuntime, 10)); err != nil {
-			return err
-		}
+	if err := s.SetRtSched(path, cgroup); err != nil {
+		return err
 	}

 	return nil
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu_test.go
@@ -1,163 +0,0 @@
-// +build linux
-
-package fs
-
-import (
-	"fmt"
-	"strconv"
-	"testing"
-
-	"github.com/opencontainers/runc/libcontainer/cgroups"
-)
-
-func TestCpuSetShares(t *testing.T) {
-	helper := NewCgroupTestUtil("cpu", t)
-	defer helper.cleanup()
-
-	const (
-		sharesBefore = 1024
-		sharesAfter  = 512
-	)
-
-	helper.writeFileContents(map[string]string{
-		"cpu.shares": strconv.Itoa(sharesBefore),
-	})
-
-	helper.CgroupData.config.Resources.CpuShares = sharesAfter
-	cpu := &CpuGroup{}
-	if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamUint(helper.CgroupPath, "cpu.shares")
-	if err != nil {
-		t.Fatalf("Failed to parse cpu.shares - %s", err)
-	}
-
-	if value != sharesAfter {
-		t.Fatal("Got the wrong value, set cpu.shares failed.")
-	}
-}
-
-func TestCpuSetBandWidth(t *testing.T) {
-	helper := NewCgroupTestUtil("cpu", t)
-	defer helper.cleanup()
-
-	const (
-		quotaBefore     = 8000
-		quotaAfter      = 5000
-		periodBefore    = 10000
-		periodAfter     = 7000
-		rtRuntimeBefore = 8000
-		rtRuntimeAfter  = 5000
-		rtPeriodBefore  = 10000
-		rtPeriodAfter   = 7000
-	)
-
-	helper.writeFileContents(map[string]string{
-		"cpu.cfs_quota_us":  strconv.Itoa(quotaBefore),
-		"cpu.cfs_period_us": strconv.Itoa(periodBefore),
-		"cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore),
-		"cpu.rt_period_us":  strconv.Itoa(rtPeriodBefore),
-	})
-
-	helper.CgroupData.config.Resources.CpuQuota = quotaAfter
-	helper.CgroupData.config.Resources.CpuPeriod = periodAfter
-	helper.CgroupData.config.Resources.CpuRtRuntime = rtRuntimeAfter
-	helper.CgroupData.config.Resources.CpuRtPeriod = rtPeriodAfter
-	cpu := &CpuGroup{}
-	if err := cpu.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	quota, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_quota_us")
-	if err != nil {
-		t.Fatalf("Failed to parse cpu.cfs_quota_us - %s", err)
-	}
-	if quota != quotaAfter {
-		t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.")
-	}
-
-	period, err := getCgroupParamUint(helper.CgroupPath, "cpu.cfs_period_us")
-	if err != nil {
-		t.Fatalf("Failed to parse cpu.cfs_period_us - %s", err)
-	}
-	if period != periodAfter {
-		t.Fatal("Got the wrong value, set cpu.cfs_period_us failed.")
-	}
-	rtRuntime, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_runtime_us")
-	if err != nil {
-		t.Fatalf("Failed to parse cpu.rt_runtime_us - %s", err)
-	}
-	if rtRuntime != rtRuntimeAfter {
-		t.Fatal("Got the wrong value, set cpu.rt_runtime_us failed.")
-	}
-	rtPeriod, err := getCgroupParamUint(helper.CgroupPath, "cpu.rt_period_us")
-	if err != nil {
-		t.Fatalf("Failed to parse cpu.rt_period_us - %s", err)
-	}
-	if rtPeriod != rtPeriodAfter {
-		t.Fatal("Got the wrong value, set cpu.rt_period_us failed.")
-	}
-}
-
-func TestCpuStats(t *testing.T) {
-	helper := NewCgroupTestUtil("cpu", t)
-	defer helper.cleanup()
-
-	const (
-		kNrPeriods     = 2000
-		kNrThrottled   = 200
-		kThrottledTime = uint64(18446744073709551615)
-	)
-
-	cpuStatContent := fmt.Sprintf("nr_periods %d\n nr_throttled %d\n throttled_time %d\n",
-		kNrPeriods, kNrThrottled, kThrottledTime)
-	helper.writeFileContents(map[string]string{
-		"cpu.stat": cpuStatContent,
-	})
-
-	cpu := &CpuGroup{}
-	actualStats := *cgroups.NewStats()
-	err := cpu.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	expectedStats := cgroups.ThrottlingData{
-		Periods:          kNrPeriods,
-		ThrottledPeriods: kNrThrottled,
-		ThrottledTime:    kThrottledTime}
-
-	expectThrottlingDataEquals(t, expectedStats, actualStats.CpuStats.ThrottlingData)
-}
-
-func TestNoCpuStatFile(t *testing.T) {
-	helper := NewCgroupTestUtil("cpu", t)
-	defer helper.cleanup()
-
-	cpu := &CpuGroup{}
-	actualStats := *cgroups.NewStats()
-	err := cpu.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatal("Expected not to fail, but did")
-	}
-}
-
-func TestInvalidCpuStat(t *testing.T) {
-	helper := NewCgroupTestUtil("cpu", t)
-	defer helper.cleanup()
-	cpuStatContent := `nr_periods 2000
-	nr_throttled 200
-	throttled_time fortytwo`
-	helper.writeFileContents(map[string]string{
-		"cpu.stat": cpuStatContent,
-	})
-
-	cpu := &CpuGroup{}
-	actualStats := *cgroups.NewStats()
-	err := cpu.GetStats(helper.CgroupPath, &actualStats)
-	if err == nil {
-		t.Fatal("Expected failed stat parsing.")
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go
@@ -8,7 +8,6 @@ import (
 	"io/ioutil"
 	"os"
 	"path/filepath"
-	"strconv"

 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
@@ -58,16 +57,34 @@ func (s *CpusetGroup) ApplyDir(dir string, cgroup *configs.Cgroup, pid int) erro
 	if dir == "" {
 		return nil
 	}
-	root, err := getCgroupRoot()
+	mountInfo, err := ioutil.ReadFile("/proc/self/mountinfo")
 	if err != nil {
 		return err
 	}
-	if err := s.ensureParent(dir, root); err != nil {
+	root := filepath.Dir(cgroups.GetClosestMountpointAncestor(dir, string(mountInfo)))
+	// 'ensureParent' start with parent because we don't want to
+	// explicitly inherit from parent, it could conflict with
+	// 'cpuset.cpu_exclusive'.
+	if err := s.ensureParent(filepath.Dir(dir), root); err != nil {
 		return err
 	}
+	if err := os.MkdirAll(dir, 0755); err != nil {
+		return err
+	}
+	// We didn't inherit cpuset configs from parent, but we have
+	// to ensure cpuset configs are set before moving task into the
+	// cgroup.
+	// The logic is, if user specified cpuset configs, use these
+	// specified configs, otherwise, inherit from parent. This makes
+	// cpuset configs work correctly with 'cpuset.cpu_exclusive', and
+	// keep backward compatbility.
+	if err := s.ensureCpusAndMems(dir, cgroup); err != nil {
+		return err
+	}
+
 	// because we are not using d.join we need to place the pid into the procs file
 	// unlike the other subsystems
-	if err := writeFile(dir, "cgroup.procs", strconv.Itoa(pid)); err != nil {
+	if err := cgroups.WriteCgroupProc(dir, pid); err != nil {
 		return err
 	}

@@ -137,3 +154,10 @@ func (s *CpusetGroup) copyIfNeeded(current, parent string) error {
 func (s *CpusetGroup) isEmpty(b []byte) bool {
 	return len(bytes.Trim(b, "\n")) == 0
 }
+
+func (s *CpusetGroup) ensureCpusAndMems(path string, cgroup *configs.Cgroup) error {
+	if err := s.Set(path, cgroup); err != nil {
+		return err
+	}
+	return s.copyIfNeeded(path, filepath.Dir(path))
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset_test.go
@@ -1,65 +0,0 @@
-// +build linux
-
-package fs
-
-import (
-	"testing"
-)
-
-func TestCpusetSetCpus(t *testing.T) {
-	helper := NewCgroupTestUtil("cpuset", t)
-	defer helper.cleanup()
-
-	const (
-		cpusBefore = "0"
-		cpusAfter  = "1-3"
-	)
-
-	helper.writeFileContents(map[string]string{
-		"cpuset.cpus": cpusBefore,
-	})
-
-	helper.CgroupData.config.Resources.CpusetCpus = cpusAfter
-	cpuset := &CpusetGroup{}
-	if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamString(helper.CgroupPath, "cpuset.cpus")
-	if err != nil {
-		t.Fatalf("Failed to parse cpuset.cpus - %s", err)
-	}
-
-	if value != cpusAfter {
-		t.Fatal("Got the wrong value, set cpuset.cpus failed.")
-	}
-}
-
-func TestCpusetSetMems(t *testing.T) {
-	helper := NewCgroupTestUtil("cpuset", t)
-	defer helper.cleanup()
-
-	const (
-		memsBefore = "0"
-		memsAfter  = "1"
-	)
-
-	helper.writeFileContents(map[string]string{
-		"cpuset.mems": memsBefore,
-	})
-
-	helper.CgroupData.config.Resources.CpusetMems = memsAfter
-	cpuset := &CpusetGroup{}
-	if err := cpuset.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamString(helper.CgroupPath, "cpuset.mems")
-	if err != nil {
-		t.Fatalf("Failed to parse cpuset.mems - %s", err)
-	}
-
-	if value != memsAfter {
-		t.Fatal("Got the wrong value, set cpuset.mems failed.")
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go
@@ -5,6 +5,7 @@ package fs
 import (
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/system"
 )

 type DevicesGroup struct {
@@ -25,6 +26,10 @@ func (s *DevicesGroup) Apply(d *cgroupData) error {
 }

 func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
+	if system.RunningInUserNS() {
+		return nil
+	}
+
 	devices := cgroup.Resources.Devices
 	if len(devices) > 0 {
 		for _, dev := range devices {
@@ -38,21 +43,23 @@ func (s *DevicesGroup) Set(path string, cgroup *configs.Cgroup) error {
 		}
 		return nil
 	}
-	if !cgroup.Resources.AllowAllDevices {
-		if err := writeFile(path, "devices.deny", "a"); err != nil {
-			return err
-		}
-
-		for _, dev := range cgroup.Resources.AllowedDevices {
-			if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
+	if cgroup.Resources.AllowAllDevices != nil {
+		if *cgroup.Resources.AllowAllDevices == false {
+			if err := writeFile(path, "devices.deny", "a"); err != nil {
 				return err
 			}
-		}
-		return nil
-	}

-	if err := writeFile(path, "devices.allow", "a"); err != nil {
-		return err
+			for _, dev := range cgroup.Resources.AllowedDevices {
+				if err := writeFile(path, "devices.allow", dev.CgroupString()); err != nil {
+					return err
+				}
+			}
+			return nil
+		}
+
+		if err := writeFile(path, "devices.allow", "a"); err != nil {
+			return err
+		}
 	}

 	for _, dev := range cgroup.Resources.DeniedDevices {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices_test.go
@@ -1,84 +0,0 @@
-// +build linux
-
-package fs
-
-import (
-	"testing"
-
-	"github.com/opencontainers/runc/libcontainer/configs"
-)
-
-var (
-	allowedDevices = []*configs.Device{
-		{
-			Path:        "/dev/zero",
-			Type:        'c',
-			Major:       1,
-			Minor:       5,
-			Permissions: "rwm",
-			FileMode:    0666,
-		},
-	}
-	allowedList   = "c 1:5 rwm"
-	deniedDevices = []*configs.Device{
-		{
-			Path:        "/dev/null",
-			Type:        'c',
-			Major:       1,
-			Minor:       3,
-			Permissions: "rwm",
-			FileMode:    0666,
-		},
-	}
-	deniedList = "c 1:3 rwm"
-)
-
-func TestDevicesSetAllow(t *testing.T) {
-	helper := NewCgroupTestUtil("devices", t)
-	defer helper.cleanup()
-
-	helper.writeFileContents(map[string]string{
-		"devices.deny": "a",
-	})
-
-	helper.CgroupData.config.Resources.AllowAllDevices = false
-	helper.CgroupData.config.Resources.AllowedDevices = allowedDevices
-	devices := &DevicesGroup{}
-	if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamString(helper.CgroupPath, "devices.allow")
-	if err != nil {
-		t.Fatalf("Failed to parse devices.allow - %s", err)
-	}
-
-	if value != allowedList {
-		t.Fatal("Got the wrong value, set devices.allow failed.")
-	}
-}
-
-func TestDevicesSetDeny(t *testing.T) {
-	helper := NewCgroupTestUtil("devices", t)
-	defer helper.cleanup()
-
-	helper.writeFileContents(map[string]string{
-		"devices.allow": "a",
-	})
-
-	helper.CgroupData.config.Resources.AllowAllDevices = true
-	helper.CgroupData.config.Resources.DeniedDevices = deniedDevices
-	devices := &DevicesGroup{}
-	if err := devices.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamString(helper.CgroupPath, "devices.deny")
-	if err != nil {
-		t.Fatalf("Failed to parse devices.deny - %s", err)
-	}
-
-	if value != deniedList {
-		t.Fatal("Got the wrong value, set devices.deny failed.")
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go
@@ -29,11 +29,15 @@ func (s *FreezerGroup) Apply(d *cgroupData) error {
 func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
 	switch cgroup.Resources.Freezer {
 	case configs.Frozen, configs.Thawed:
-		if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
-			return err
-		}
-
 		for {
+			// In case this loop does not exit because it doesn't get the expected
+			// state, let's write again this state, hoping it's going to be properly
+			// set this time. Otherwise, this loop could run infinitely, waiting for
+			// a state change that would never happen.
+			if err := writeFile(path, "freezer.state", string(cgroup.Resources.Freezer)); err != nil {
+				return err
+			}
+
 			state, err := readFile(path, "freezer.state")
 			if err != nil {
 				return err
@@ -41,6 +45,7 @@ func (s *FreezerGroup) Set(path string, cgroup *configs.Cgroup) error {
 			if strings.TrimSpace(state) == string(cgroup.Resources.Freezer) {
 				break
 			}
+
 			time.Sleep(1 * time.Millisecond)
 		}
 	case configs.Undefined:
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer_test.go
@@ -1,47 +0,0 @@
-// +build linux
-
-package fs
-
-import (
-	"testing"
-
-	"github.com/opencontainers/runc/libcontainer/configs"
-)
-
-func TestFreezerSetState(t *testing.T) {
-	helper := NewCgroupTestUtil("freezer", t)
-	defer helper.cleanup()
-
-	helper.writeFileContents(map[string]string{
-		"freezer.state": string(configs.Frozen),
-	})
-
-	helper.CgroupData.config.Resources.Freezer = configs.Thawed
-	freezer := &FreezerGroup{}
-	if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamString(helper.CgroupPath, "freezer.state")
-	if err != nil {
-		t.Fatalf("Failed to parse freezer.state - %s", err)
-	}
-	if value != string(configs.Thawed) {
-		t.Fatal("Got the wrong value, set freezer.state failed.")
-	}
-}
-
-func TestFreezerSetInvalidState(t *testing.T) {
-	helper := NewCgroupTestUtil("freezer", t)
-	defer helper.cleanup()
-
-	const (
-		invalidArg configs.FreezerState = "Invalid"
-	)
-
-	helper.CgroupData.config.Resources.Freezer = invalidArg
-	freezer := &FreezerGroup{}
-	if err := freezer.Set(helper.CgroupPath, helper.CgroupData.config); err == nil {
-		t.Fatal("Failed to return invalid argument error")
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb_test.go
@@ -1,154 +0,0 @@
-// +build linux
-
-package fs
-
-import (
-	"fmt"
-	"strconv"
-	"testing"
-
-	"github.com/opencontainers/runc/libcontainer/cgroups"
-	"github.com/opencontainers/runc/libcontainer/configs"
-)
-
-const (
-	hugetlbUsageContents    = "128\n"
-	hugetlbMaxUsageContents = "256\n"
-	hugetlbFailcnt          = "100\n"
-)
-
-var (
-	usage    = "hugetlb.%s.usage_in_bytes"
-	limit    = "hugetlb.%s.limit_in_bytes"
-	maxUsage = "hugetlb.%s.max_usage_in_bytes"
-	failcnt  = "hugetlb.%s.failcnt"
-)
-
-func TestHugetlbSetHugetlb(t *testing.T) {
-	helper := NewCgroupTestUtil("hugetlb", t)
-	defer helper.cleanup()
-
-	const (
-		hugetlbBefore = 256
-		hugetlbAfter  = 512
-	)
-
-	for _, pageSize := range HugePageSizes {
-		helper.writeFileContents(map[string]string{
-			fmt.Sprintf(limit, pageSize): strconv.Itoa(hugetlbBefore),
-		})
-	}
-
-	for _, pageSize := range HugePageSizes {
-		helper.CgroupData.config.Resources.HugetlbLimit = []*configs.HugepageLimit{
-			{
-				Pagesize: pageSize,
-				Limit:    hugetlbAfter,
-			},
-		}
-		hugetlb := &HugetlbGroup{}
-		if err := hugetlb.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-			t.Fatal(err)
-		}
-	}
-
-	for _, pageSize := range HugePageSizes {
-		limit := fmt.Sprintf(limit, pageSize)
-		value, err := getCgroupParamUint(helper.CgroupPath, limit)
-		if err != nil {
-			t.Fatalf("Failed to parse %s - %s", limit, err)
-		}
-		if value != hugetlbAfter {
-			t.Fatalf("Set hugetlb.limit_in_bytes failed. Expected: %v, Got: %v", hugetlbAfter, value)
-		}
-	}
-}
-
-func TestHugetlbStats(t *testing.T) {
-	helper := NewCgroupTestUtil("hugetlb", t)
-	defer helper.cleanup()
-	for _, pageSize := range HugePageSizes {
-		helper.writeFileContents(map[string]string{
-			fmt.Sprintf(usage, pageSize):    hugetlbUsageContents,
-			fmt.Sprintf(maxUsage, pageSize): hugetlbMaxUsageContents,
-			fmt.Sprintf(failcnt, pageSize):  hugetlbFailcnt,
-		})
-	}
-
-	hugetlb := &HugetlbGroup{}
-	actualStats := *cgroups.NewStats()
-	err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatal(err)
-	}
-	expectedStats := cgroups.HugetlbStats{Usage: 128, MaxUsage: 256, Failcnt: 100}
-	for _, pageSize := range HugePageSizes {
-		expectHugetlbStatEquals(t, expectedStats, actualStats.HugetlbStats[pageSize])
-	}
-}
-
-func TestHugetlbStatsNoUsageFile(t *testing.T) {
-	helper := NewCgroupTestUtil("hugetlb", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		maxUsage: hugetlbMaxUsageContents,
-	})
-
-	hugetlb := &HugetlbGroup{}
-	actualStats := *cgroups.NewStats()
-	err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
-	if err == nil {
-		t.Fatal("Expected failure")
-	}
-}
-
-func TestHugetlbStatsNoMaxUsageFile(t *testing.T) {
-	helper := NewCgroupTestUtil("hugetlb", t)
-	defer helper.cleanup()
-	for _, pageSize := range HugePageSizes {
-		helper.writeFileContents(map[string]string{
-			fmt.Sprintf(usage, pageSize): hugetlbUsageContents,
-		})
-	}
-
-	hugetlb := &HugetlbGroup{}
-	actualStats := *cgroups.NewStats()
-	err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
-	if err == nil {
-		t.Fatal("Expected failure")
-	}
-}
-
-func TestHugetlbStatsBadUsageFile(t *testing.T) {
-	helper := NewCgroupTestUtil("hugetlb", t)
-	defer helper.cleanup()
-	for _, pageSize := range HugePageSizes {
-		helper.writeFileContents(map[string]string{
-			fmt.Sprintf(usage, pageSize): "bad",
-			maxUsage:                     hugetlbMaxUsageContents,
-		})
-	}
-
-	hugetlb := &HugetlbGroup{}
-	actualStats := *cgroups.NewStats()
-	err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
-	if err == nil {
-		t.Fatal("Expected failure")
-	}
-}
-
-func TestHugetlbStatsBadMaxUsageFile(t *testing.T) {
-	helper := NewCgroupTestUtil("hugetlb", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		usage:    hugetlbUsageContents,
-		maxUsage: "bad",
-	})
-
-	hugetlb := &HugetlbGroup{}
-	actualStats := *cgroups.NewStats()
-	err := hugetlb.GetStats(helper.CgroupPath, &actualStats)
-	if err == nil {
-		t.Fatal("Expected failure")
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go
@@ -5,13 +5,23 @@ package fs
 import (
 	"bufio"
 	"fmt"
+	"io/ioutil"
 	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
+	"syscall" // only for Errno

 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
+
+	"golang.org/x/sys/unix"
+)
+
+const (
+	cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
+	cgroupMemorySwapLimit   = "memory.memsw.limit_in_bytes"
+	cgroupMemoryLimit       = "memory.limit_in_bytes"
 )

 type MemoryGroup struct {
@@ -25,20 +35,23 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
 	path, err := d.path("memory")
 	if err != nil && !cgroups.IsNotFound(err) {
 		return err
+	} else if path == "" {
+		return nil
 	}
 	if memoryAssigned(d.config) {
-		if path != "" {
+		if _, err := os.Stat(path); os.IsNotExist(err) {
 			if err := os.MkdirAll(path, 0755); err != nil {
 				return err
 			}
-		}
-		// We have to set kernel memory here, as we can't change it once
-		// processes have been attached.
-		if err := s.SetKernelMemory(path, d.config); err != nil {
-			return err
+			// Only enable kernel memory accouting when this cgroup
+			// is created by libcontainer, otherwise we might get
+			// error when people use `cgroupsPath` to join an existed
+			// cgroup whose kernel memory is not initialized.
+			if err := EnableKernelMemoryAccounting(path); err != nil {
+				return err
+			}
 		}
 	}
-
 	defer func() {
 		if err != nil {
 			os.RemoveAll(path)
@@ -54,30 +67,115 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
 	return nil
 }

-func (s *MemoryGroup) SetKernelMemory(path string, cgroup *configs.Cgroup) error {
-	// This has to be done separately because it has special constraints (it
-	// can't be done after there are processes attached to the cgroup).
-	if cgroup.Resources.KernelMemory > 0 {
-		if err := writeFile(path, "memory.kmem.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemory, 10)); err != nil {
+func EnableKernelMemoryAccounting(path string) error {
+	// Check if kernel memory is enabled
+	// We have to limit the kernel memory here as it won't be accounted at all
+	// until a limit is set on the cgroup and limit cannot be set once the
+	// cgroup has children, or if there are already tasks in the cgroup.
+	for _, i := range []int64{1, -1} {
+		if err := setKernelMemory(path, i); err != nil {
 			return err
 		}
 	}
 	return nil
 }

+func setKernelMemory(path string, kernelMemoryLimit int64) error {
+	if path == "" {
+		return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
+	}
+	if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
+		// kernel memory is not enabled on the system so we should do nothing
+		return nil
+	}
+	if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
+		// Check if the error number returned by the syscall is "EBUSY"
+		// The EBUSY signal is returned on attempts to write to the
+		// memory.kmem.limit_in_bytes file if the cgroup has children or
+		// once tasks have been attached to the cgroup
+		if pathErr, ok := err.(*os.PathError); ok {
+			if errNo, ok := pathErr.Err.(syscall.Errno); ok {
+				if errNo == unix.EBUSY {
+					return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
+				}
+			}
+		}
+		return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
+	}
+	return nil
+}
+
+func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
+	// If the memory update is set to -1 we should also
+	// set swap to -1, it means unlimited memory.
+	if cgroup.Resources.Memory == -1 {
+		// Only set swap if it's enabled in kernel
+		if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) {
+			cgroup.Resources.MemorySwap = -1
+		}
+	}
+
+	// When memory and swap memory are both set, we need to handle the cases
+	// for updating container.
+	if cgroup.Resources.Memory != 0 && cgroup.Resources.MemorySwap != 0 {
+		memoryUsage, err := getMemoryData(path, "")
+		if err != nil {
+			return err
+		}
+
+		// When update memory limit, we should adapt the write sequence
+		// for memory and swap memory, so it won't fail because the new
+		// value and the old value don't fit kernel's validation.
+		if cgroup.Resources.MemorySwap == -1 || memoryUsage.Limit < uint64(cgroup.Resources.MemorySwap) {
+			if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
+				return err
+			}
+			if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
+				return err
+			}
+		} else {
+			if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
+				return err
+			}
+			if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
+				return err
+			}
+		}
+	} else {
+		if cgroup.Resources.Memory != 0 {
+			if err := writeFile(path, cgroupMemoryLimit, strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
+				return err
+			}
+		}
+		if cgroup.Resources.MemorySwap != 0 {
+			if err := writeFile(path, cgroupMemorySwapLimit, strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
 func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
-	if cgroup.Resources.Memory != 0 {
-		if err := writeFile(path, "memory.limit_in_bytes", strconv.FormatInt(cgroup.Resources.Memory, 10)); err != nil {
+	if err := setMemoryAndSwap(path, cgroup); err != nil {
+		return err
+	}
+
+	if cgroup.Resources.KernelMemory != 0 {
+		if err := setKernelMemory(path, cgroup.Resources.KernelMemory); err != nil {
 			return err
 		}
 	}
+
 	if cgroup.Resources.MemoryReservation != 0 {
 		if err := writeFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemoryReservation, 10)); err != nil {
 			return err
 		}
 	}
-	if cgroup.Resources.MemorySwap > 0 {
-		if err := writeFile(path, "memory.memsw.limit_in_bytes", strconv.FormatInt(cgroup.Resources.MemorySwap, 10)); err != nil {
+
+	if cgroup.Resources.KernelMemoryTCP != 0 {
+		if err := writeFile(path, "memory.kmem.tcp.limit_in_bytes", strconv.FormatInt(cgroup.Resources.KernelMemoryTCP, 10)); err != nil {
 			return err
 		}
 	}
@@ -86,14 +184,14 @@ func (s *MemoryGroup) Set(path string, cgroup *configs.Cgroup) error {
 			return err
 		}
 	}
-	if cgroup.Resources.MemorySwappiness >= 0 && cgroup.Resources.MemorySwappiness <= 100 {
-		if err := writeFile(path, "memory.swappiness", strconv.FormatInt(cgroup.Resources.MemorySwappiness, 10)); err != nil {
+	if cgroup.Resources.MemorySwappiness == nil || int64(*cgroup.Resources.MemorySwappiness) == -1 {
+		return nil
+	} else if *cgroup.Resources.MemorySwappiness <= 100 {
+		if err := writeFile(path, "memory.swappiness", strconv.FormatUint(*cgroup.Resources.MemorySwappiness, 10)); err != nil {
 			return err
 		}
-	} else if cgroup.Resources.MemorySwappiness == -1 {
-		return nil
 	} else {
-		return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", cgroup.Resources.MemorySwappiness)
+		return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *cgroup.Resources.MemorySwappiness)
 	}

 	return nil
@@ -139,7 +237,20 @@ func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error {
 		return err
 	}
 	stats.MemoryStats.KernelUsage = kernelUsage
+	kernelTCPUsage, err := getMemoryData(path, "kmem.tcp")
+	if err != nil {
+		return err
+	}
+	stats.MemoryStats.KernelTCPUsage = kernelTCPUsage

+	useHierarchy := strings.Join([]string{"memory", "use_hierarchy"}, ".")
+	value, err := getCgroupParamUint(path, useHierarchy)
+	if err != nil {
+		return err
+	}
+	if value == 1 {
+		stats.MemoryStats.UseHierarchy = true
+	}
 	return nil
 }

@@ -148,8 +259,9 @@ func memoryAssigned(cgroup *configs.Cgroup) bool {
 		cgroup.Resources.MemoryReservation != 0 ||
 		cgroup.Resources.MemorySwap > 0 ||
 		cgroup.Resources.KernelMemory > 0 ||
+		cgroup.Resources.KernelMemoryTCP > 0 ||
 		cgroup.Resources.OomKillDisable ||
-		cgroup.Resources.MemorySwappiness != -1
+		(cgroup.Resources.MemorySwappiness != nil && int64(*cgroup.Resources.MemorySwappiness) != -1)
 }

 func getMemoryData(path, name string) (cgroups.MemoryData, error) {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory_test.go
@@ -1,339 +0,0 @@
-// +build linux
-
-package fs
-
-import (
-	"strconv"
-	"testing"
-
-	"github.com/opencontainers/runc/libcontainer/cgroups"
-)
-
-const (
-	memoryStatContents = `cache 512
-rss 1024`
-	memoryUsageContents    = "2048\n"
-	memoryMaxUsageContents = "4096\n"
-	memoryFailcnt          = "100\n"
-	memoryLimitContents    = "8192\n"
-)
-
-func TestMemorySetMemory(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
-	defer helper.cleanup()
-
-	const (
-		memoryBefore      = 314572800 // 300M
-		memoryAfter       = 524288000 // 500M
-		reservationBefore = 209715200 // 200M
-		reservationAfter  = 314572800 // 300M
-	)
-
-	helper.writeFileContents(map[string]string{
-		"memory.limit_in_bytes":      strconv.Itoa(memoryBefore),
-		"memory.soft_limit_in_bytes": strconv.Itoa(reservationBefore),
-	})
-
-	helper.CgroupData.config.Resources.Memory = memoryAfter
-	helper.CgroupData.config.Resources.MemoryReservation = reservationAfter
-	memory := &MemoryGroup{}
-	if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamUint(helper.CgroupPath, "memory.limit_in_bytes")
-	if err != nil {
-		t.Fatalf("Failed to parse memory.limit_in_bytes - %s", err)
-	}
-	if value != memoryAfter {
-		t.Fatal("Got the wrong value, set memory.limit_in_bytes failed.")
-	}
-
-	value, err = getCgroupParamUint(helper.CgroupPath, "memory.soft_limit_in_bytes")
-	if err != nil {
-		t.Fatalf("Failed to parse memory.soft_limit_in_bytes - %s", err)
-	}
-	if value != reservationAfter {
-		t.Fatal("Got the wrong value, set memory.soft_limit_in_bytes failed.")
-	}
-}
-
-func TestMemorySetMemoryswap(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
-	defer helper.cleanup()
-
-	const (
-		memoryswapBefore = 314572800 // 300M
-		memoryswapAfter  = 524288000 // 500M
-	)
-
-	helper.writeFileContents(map[string]string{
-		"memory.memsw.limit_in_bytes": strconv.Itoa(memoryswapBefore),
-	})
-
-	helper.CgroupData.config.Resources.MemorySwap = memoryswapAfter
-	memory := &MemoryGroup{}
-	if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamUint(helper.CgroupPath, "memory.memsw.limit_in_bytes")
-	if err != nil {
-		t.Fatalf("Failed to parse memory.memsw.limit_in_bytes - %s", err)
-	}
-	if value != memoryswapAfter {
-		t.Fatal("Got the wrong value, set memory.memsw.limit_in_bytes failed.")
-	}
-}
-
-func TestMemorySetKernelMemory(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
-	defer helper.cleanup()
-
-	const (
-		kernelMemoryBefore = 314572800 // 300M
-		kernelMemoryAfter  = 524288000 // 500M
-	)
-
-	helper.writeFileContents(map[string]string{
-		"memory.kmem.limit_in_bytes": strconv.Itoa(kernelMemoryBefore),
-	})
-
-	helper.CgroupData.config.Resources.KernelMemory = kernelMemoryAfter
-	memory := &MemoryGroup{}
-	if err := memory.SetKernelMemory(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamUint(helper.CgroupPath, "memory.kmem.limit_in_bytes")
-	if err != nil {
-		t.Fatalf("Failed to parse memory.kmem.limit_in_bytes - %s", err)
-	}
-	if value != kernelMemoryAfter {
-		t.Fatal("Got the wrong value, set memory.kmem.limit_in_bytes failed.")
-	}
-}
-
-func TestMemorySetMemorySwappinessDefault(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
-	defer helper.cleanup()
-
-	const (
-		swappinessBefore = 60 //deafult is 60
-		swappinessAfter  = 0
-	)
-
-	helper.writeFileContents(map[string]string{
-		"memory.swappiness": strconv.Itoa(swappinessBefore),
-	})
-
-	helper.CgroupData.config.Resources.Memory = swappinessAfter
-	memory := &MemoryGroup{}
-	if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamUint(helper.CgroupPath, "memory.swappiness")
-	if err != nil {
-		t.Fatalf("Failed to parse memory.swappiness - %s", err)
-	}
-	if value != swappinessAfter {
-		t.Fatal("Got the wrong value, set memory.swappiness failed.")
-	}
-}
-
-func TestMemoryStats(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"memory.stat":                     memoryStatContents,
-		"memory.usage_in_bytes":           memoryUsageContents,
-		"memory.limit_in_bytes":           memoryLimitContents,
-		"memory.max_usage_in_bytes":       memoryMaxUsageContents,
-		"memory.failcnt":                  memoryFailcnt,
-		"memory.memsw.usage_in_bytes":     memoryUsageContents,
-		"memory.memsw.max_usage_in_bytes": memoryMaxUsageContents,
-		"memory.memsw.failcnt":            memoryFailcnt,
-		"memory.memsw.limit_in_bytes":     memoryLimitContents,
-		"memory.kmem.usage_in_bytes":      memoryUsageContents,
-		"memory.kmem.max_usage_in_bytes":  memoryMaxUsageContents,
-		"memory.kmem.failcnt":             memoryFailcnt,
-		"memory.kmem.limit_in_bytes":      memoryLimitContents,
-	})
-
-	memory := &MemoryGroup{}
-	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatal(err)
-	}
-	expectedStats := cgroups.MemoryStats{Cache: 512, Usage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, SwapUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, KernelUsage: cgroups.MemoryData{Usage: 2048, MaxUsage: 4096, Failcnt: 100, Limit: 8192}, Stats: map[string]uint64{"cache": 512, "rss": 1024}}
-	expectMemoryStatEquals(t, expectedStats, actualStats.MemoryStats)
-}
-
-func TestMemoryStatsNoStatFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"memory.usage_in_bytes":     memoryUsageContents,
-		"memory.max_usage_in_bytes": memoryMaxUsageContents,
-		"memory.limit_in_bytes":     memoryLimitContents,
-	})
-
-	memory := &MemoryGroup{}
-	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
-	if err != nil {
-		t.Fatal(err)
-	}
-}
-
-func TestMemoryStatsNoUsageFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"memory.stat":               memoryStatContents,
-		"memory.max_usage_in_bytes": memoryMaxUsageContents,
-		"memory.limit_in_bytes":     memoryLimitContents,
-	})
-
-	memory := &MemoryGroup{}
-	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
-	if err == nil {
-		t.Fatal("Expected failure")
-	}
-}
-
-func TestMemoryStatsNoMaxUsageFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"memory.stat":           memoryStatContents,
-		"memory.usage_in_bytes": memoryUsageContents,
-		"memory.limit_in_bytes": memoryLimitContents,
-	})
-
-	memory := &MemoryGroup{}
-	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
-	if err == nil {
-		t.Fatal("Expected failure")
-	}
-}
-
-func TestMemoryStatsNoLimitInBytesFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"memory.stat":               memoryStatContents,
-		"memory.usage_in_bytes":     memoryUsageContents,
-		"memory.max_usage_in_bytes": memoryMaxUsageContents,
-	})
-
-	memory := &MemoryGroup{}
-	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
-	if err == nil {
-		t.Fatal("Expected failure")
-	}
-}
-
-func TestMemoryStatsBadStatFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"memory.stat":               "rss rss",
-		"memory.usage_in_bytes":     memoryUsageContents,
-		"memory.max_usage_in_bytes": memoryMaxUsageContents,
-		"memory.limit_in_bytes":     memoryLimitContents,
-	})
-
-	memory := &MemoryGroup{}
-	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
-	if err == nil {
-		t.Fatal("Expected failure")
-	}
-}
-
-func TestMemoryStatsBadUsageFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"memory.stat":               memoryStatContents,
-		"memory.usage_in_bytes":     "bad",
-		"memory.max_usage_in_bytes": memoryMaxUsageContents,
-		"memory.limit_in_bytes":     memoryLimitContents,
-	})
-
-	memory := &MemoryGroup{}
-	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
-	if err == nil {
-		t.Fatal("Expected failure")
-	}
-}
-
-func TestMemoryStatsBadMaxUsageFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"memory.stat":               memoryStatContents,
-		"memory.usage_in_bytes":     memoryUsageContents,
-		"memory.max_usage_in_bytes": "bad",
-		"memory.limit_in_bytes":     memoryLimitContents,
-	})
-
-	memory := &MemoryGroup{}
-	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
-	if err == nil {
-		t.Fatal("Expected failure")
-	}
-}
-
-func TestMemoryStatsBadLimitInBytesFile(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
-	defer helper.cleanup()
-	helper.writeFileContents(map[string]string{
-		"memory.stat":               memoryStatContents,
-		"memory.usage_in_bytes":     memoryUsageContents,
-		"memory.max_usage_in_bytes": memoryMaxUsageContents,
-		"memory.limit_in_bytes":     "bad",
-	})
-
-	memory := &MemoryGroup{}
-	actualStats := *cgroups.NewStats()
-	err := memory.GetStats(helper.CgroupPath, &actualStats)
-	if err == nil {
-		t.Fatal("Expected failure")
-	}
-}
-
-func TestMemorySetOomControl(t *testing.T) {
-	helper := NewCgroupTestUtil("memory", t)
-	defer helper.cleanup()
-
-	const (
-		oom_kill_disable = 1 // disable oom killer, default is 0
-	)
-
-	helper.writeFileContents(map[string]string{
-		"memory.oom_control": strconv.Itoa(oom_kill_disable),
-	})
-
-	memory := &MemoryGroup{}
-	if err := memory.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamUint(helper.CgroupPath, "memory.oom_control")
-	if err != nil {
-		t.Fatalf("Failed to parse memory.oom_control - %s", err)
-	}
-
-	if value != oom_kill_disable {
-		t.Fatalf("Got the wrong value, set memory.oom_control failed.")
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go
@@ -9,6 +9,7 @@ import (

 type NameGroup struct {
 	GroupName string
+	Join      bool
 }

 func (s *NameGroup) Name() string {
@@ -16,6 +17,10 @@ func (s *NameGroup) Name() string {
 }

 func (s *NameGroup) Apply(d *cgroupData) error {
+	if s.Join {
+		// ignore errors if the named cgroup does not exist
+		d.join(s.GroupName)
+	}
 	return nil
 }

@@ -24,6 +29,9 @@ func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error {
 }

 func (s *NameGroup) Remove(d *cgroupData) error {
+	if s.Join {
+		removePath(d.path(s.GroupName))
+	}
 	return nil
 }

--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go
@@ -3,6 +3,8 @@
 package fs

 import (
+	"strconv"
+
 	"github.com/opencontainers/runc/libcontainer/cgroups"
 	"github.com/opencontainers/runc/libcontainer/configs"
 )
@@ -23,8 +25,8 @@ func (s *NetClsGroup) Apply(d *cgroupData) error {
 }

 func (s *NetClsGroup) Set(path string, cgroup *configs.Cgroup) error {
-	if cgroup.Resources.NetClsClassid != "" {
-		if err := writeFile(path, "net_cls.classid", cgroup.Resources.NetClsClassid); err != nil {
+	if cgroup.Resources.NetClsClassid != 0 {
+		if err := writeFile(path, "net_cls.classid", strconv.FormatUint(uint64(cgroup.Resources.NetClsClassid), 10)); err != nil {
 			return err
 		}
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls_test.go
@@ -1,38 +0,0 @@
-// +build linux
-
-package fs
-
-import (
-	"testing"
-)
-
-const (
-	classidBefore = "0x100002"
-	classidAfter  = "0x100001"
-)
-
-func TestNetClsSetClassid(t *testing.T) {
-	helper := NewCgroupTestUtil("net_cls", t)
-	defer helper.cleanup()
-
-	helper.writeFileContents(map[string]string{
-		"net_cls.classid": classidBefore,
-	})
-
-	helper.CgroupData.config.Resources.NetClsClassid = classidAfter
-	netcls := &NetClsGroup{}
-	if err := netcls.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	// As we are in mock environment, we can't get correct value of classid from
-	// net_cls.classid.
-	// So. we just judge if we successfully write classid into file
-	value, err := getCgroupParamString(helper.CgroupPath, "net_cls.classid")
-	if err != nil {
-		t.Fatalf("Failed to parse net_cls.classid - %s", err)
-	}
-	if value != classidAfter {
-		t.Fatal("Got the wrong value, set net_cls.classid failed.")
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio_test.go
@@ -1,38 +0,0 @@
-// +build linux
-
-package fs
-
-import (
-	"strings"
-	"testing"
-
-	"github.com/opencontainers/runc/libcontainer/configs"
-)
-
-var (
-	prioMap = []*configs.IfPrioMap{
-		{
-			Interface: "test",
-			Priority:  5,
-		},
-	}
-)
-
-func TestNetPrioSetIfPrio(t *testing.T) {
-	helper := NewCgroupTestUtil("net_prio", t)
-	defer helper.cleanup()
-
-	helper.CgroupData.config.Resources.NetPrioIfpriomap = prioMap
-	netPrio := &NetPrioGroup{}
-	if err := netPrio.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamString(helper.CgroupPath, "net_prio.ifpriomap")
-	if err != nil {
-		t.Fatalf("Failed to parse net_prio.ifpriomap - %s", err)
-	}
-	if !strings.Contains(value, "test 5") {
-		t.Fatal("Got the wrong value, set net_prio.ifpriomap failed.")
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go
@@ -4,6 +4,7 @@ package fs

 import (
 	"fmt"
+	"path/filepath"
 	"strconv"

 	"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -47,11 +48,26 @@ func (s *PidsGroup) Remove(d *cgroupData) error {
 }

 func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error {
-	value, err := getCgroupParamUint(path, "pids.current")
+	current, err := getCgroupParamUint(path, "pids.current")
 	if err != nil {
 		return fmt.Errorf("failed to parse pids.current - %s", err)
 	}

-	stats.PidsStats.Current = value
+	maxString, err := getCgroupParamString(path, "pids.max")
+	if err != nil {
+		return fmt.Errorf("failed to parse pids.max - %s", err)
+	}
+
+	// Default if pids.max == "max" is 0 -- which represents "no limit".
+	var max uint64
+	if maxString != "max" {
+		max, err = parseUint(maxString, 10, 64)
+		if err != nil {
+			return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max"))
+		}
+	}
+
+	stats.PidsStats.Current = current
+	stats.PidsStats.Limit = max
 	return nil
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids_test.go
@@ -1,83 +0,0 @@
-// +build linux
-
-package fs
-
-import (
-	"strconv"
-	"testing"
-
-	"github.com/opencontainers/runc/libcontainer/cgroups"
-)
-
-const (
-	maxUnlimited = -1
-	maxLimited   = 1024
-)
-
-func TestPidsSetMax(t *testing.T) {
-	helper := NewCgroupTestUtil("pids", t)
-	defer helper.cleanup()
-
-	helper.writeFileContents(map[string]string{
-		"pids.max": "max",
-	})
-
-	helper.CgroupData.config.Resources.PidsLimit = maxLimited
-	pids := &PidsGroup{}
-	if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamUint(helper.CgroupPath, "pids.max")
-	if err != nil {
-		t.Fatalf("Failed to parse pids.max - %s", err)
-	}
-
-	if value != maxLimited {
-		t.Fatalf("Expected %d, got %d for setting pids.max - limited", maxLimited, value)
-	}
-}
-
-func TestPidsSetUnlimited(t *testing.T) {
-	helper := NewCgroupTestUtil("pids", t)
-	defer helper.cleanup()
-
-	helper.writeFileContents(map[string]string{
-		"pids.max": strconv.Itoa(maxLimited),
-	})
-
-	helper.CgroupData.config.Resources.PidsLimit = maxUnlimited
-	pids := &PidsGroup{}
-	if err := pids.Set(helper.CgroupPath, helper.CgroupData.config); err != nil {
-		t.Fatal(err)
-	}
-
-	value, err := getCgroupParamString(helper.CgroupPath, "pids.max")
-	if err != nil {
-		t.Fatalf("Failed to parse pids.max - %s", err)
-	}
-
-	if value != "max" {
-		t.Fatalf("Expected %s, got %s for setting pids.max - unlimited", "max", value)
-	}
-}
-
-func TestPidsStats(t *testing.T) {
-	helper := NewCgroupTestUtil("pids", t)
-	defer helper.cleanup()
-
-	helper.writeFileContents(map[string]string{
-		"pids.current": strconv.Itoa(1337),
-		"pids.max":     strconv.Itoa(maxLimited),
-	})
-
-	pids := &PidsGroup{}
-	stats := *cgroups.NewStats()
-	if err := pids.GetStats(helper.CgroupPath, &stats); err != nil {
-		t.Fatal(err)
-	}
-
-	if stats.PidsStats.Current != 1337 {
-		t.Fatalf("Expected %d, got %d for pids.current", 1337, stats.PidsStats.Current)
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/stats_util_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/stats_util_test.go
@@ -1,117 +0,0 @@
-// +build linux
-
-package fs
-
-import (
-	"fmt"
-	"testing"
-
-	"github.com/Sirupsen/logrus"
-	"github.com/opencontainers/runc/libcontainer/cgroups"
-)
-
-func blkioStatEntryEquals(expected, actual []cgroups.BlkioStatEntry) error {
-	if len(expected) != len(actual) {
-		return fmt.Errorf("blkioStatEntries length do not match")
-	}
-	for i, expValue := range expected {
-		actValue := actual[i]
-		if expValue != actValue {
-			return fmt.Errorf("Expected blkio stat entry %v but found %v", expValue, actValue)
-		}
-	}
-	return nil
-}
-
-func expectBlkioStatsEquals(t *testing.T, expected, actual cgroups.BlkioStats) {
-	if err := blkioStatEntryEquals(expected.IoServiceBytesRecursive, actual.IoServiceBytesRecursive); err != nil {
-		logrus.Printf("blkio IoServiceBytesRecursive do not match - %s\n", err)
-		t.Fail()
-	}
-
-	if err := blkioStatEntryEquals(expected.IoServicedRecursive, actual.IoServicedRecursive); err != nil {
-		logrus.Printf("blkio IoServicedRecursive do not match - %s\n", err)
-		t.Fail()
-	}
-
-	if err := blkioStatEntryEquals(expected.IoQueuedRecursive, actual.IoQueuedRecursive); err != nil {
-		logrus.Printf("blkio IoQueuedRecursive do not match - %s\n", err)
-		t.Fail()
-	}
-
-	if err := blkioStatEntryEquals(expected.SectorsRecursive, actual.SectorsRecursive); err != nil {
-		logrus.Printf("blkio SectorsRecursive do not match - %s\n", err)
-		t.Fail()
-	}
-
-	if err := blkioStatEntryEquals(expected.IoServiceTimeRecursive, actual.IoServiceTimeRecursive); err != nil {
-		logrus.Printf("blkio IoServiceTimeRecursive do not match - %s\n", err)
-		t.Fail()
-	}
-
-	if err := blkioStatEntryEquals(expected.IoWaitTimeRecursive, actual.IoWaitTimeRecursive); err != nil {
-		logrus.Printf("blkio IoWaitTimeRecursive do not match - %s\n", err)
-		t.Fail()
-	}
-
-	if err := blkioStatEntryEquals(expected.IoMergedRecursive, actual.IoMergedRecursive); err != nil {
-		logrus.Printf("blkio IoMergedRecursive do not match - %v vs %v\n", expected.IoMergedRecursive, actual.IoMergedRecursive)
-		t.Fail()
-	}
-
-	if err := blkioStatEntryEquals(expected.IoTimeRecursive, actual.IoTimeRecursive); err != nil {
-		logrus.Printf("blkio IoTimeRecursive do not match - %s\n", err)
-		t.Fail()
-	}
-}
-
-func expectThrottlingDataEquals(t *testing.T, expected, actual cgroups.ThrottlingData) {
-	if expected != actual {
-		logrus.Printf("Expected throttling data %v but found %v\n", expected, actual)
-		t.Fail()
-	}
-}
-
-func expectHugetlbStatEquals(t *testing.T, expected, actual cgroups.HugetlbStats) {
-	if expected != actual {
-		logrus.Printf("Expected hugetlb stats %v but found %v\n", expected, actual)
-		t.Fail()
-	}
-}
-
-func expectMemoryStatEquals(t *testing.T, expected, actual cgroups.MemoryStats) {
-	expectMemoryDataEquals(t, expected.Usage, actual.Usage)
-	expectMemoryDataEquals(t, expected.SwapUsage, actual.SwapUsage)
-	expectMemoryDataEquals(t, expected.KernelUsage, actual.KernelUsage)
-
-	for key, expValue := range expected.Stats {
-		actValue, ok := actual.Stats[key]
-		if !ok {
-			logrus.Printf("Expected memory stat key %s not found\n", key)
-			t.Fail()
-		}
-		if expValue != actValue {
-			logrus.Printf("Expected memory stat value %d but found %d\n", expValue, actValue)
-			t.Fail()
-		}
-	}
-}
-
-func expectMemoryDataEquals(t *testing.T, expected, actual cgroups.MemoryData) {
-	if expected.Usage != actual.Usage {
-		logrus.Printf("Expected memory usage %d but found %d\n", expected.Usage, actual.Usage)
-		t.Fail()
-	}
-	if expected.MaxUsage != actual.MaxUsage {
-		logrus.Printf("Expected memory max usage %d but found %d\n", expected.MaxUsage, actual.MaxUsage)
-		t.Fail()
-	}
-	if expected.Failcnt != actual.Failcnt {
-		logrus.Printf("Expected memory failcnt %d but found %d\n", expected.Failcnt, actual.Failcnt)
-		t.Fail()
-	}
-	if expected.Limit != actual.Limit {
-		logrus.Printf("Expected memory limit %d but found %d\n", expected.Limit, actual.Limit)
-		t.Fail()
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/util_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/util_test.go
@@ -1,67 +0,0 @@
-// +build linux
-
-/*
-Utility for testing cgroup operations.
-
-Creates a mock of the cgroup filesystem for the duration of the test.
-*/
-package fs
-
-import (
-	"io/ioutil"
-	"os"
-	"path/filepath"
-	"testing"
-
-	"github.com/opencontainers/runc/libcontainer/configs"
-)
-
-type cgroupTestUtil struct {
-	// cgroup data to use in tests.
-	CgroupData *cgroupData
-
-	// Path to the mock cgroup directory.
-	CgroupPath string
-
-	// Temporary directory to store mock cgroup filesystem.
-	tempDir string
-	t       *testing.T
-}
-
-// Creates a new test util for the specified subsystem
-func NewCgroupTestUtil(subsystem string, t *testing.T) *cgroupTestUtil {
-	d := &cgroupData{
-		config: &configs.Cgroup{},
-	}
-	d.config.Resources = &configs.Resources{}
-	tempDir, err := ioutil.TempDir("", "cgroup_test")
-	if err != nil {
-		t.Fatal(err)
-	}
-	d.root = tempDir
-	testCgroupPath := filepath.Join(d.root, subsystem)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	// Ensure the full mock cgroup path exists.
-	err = os.MkdirAll(testCgroupPath, 0755)
-	if err != nil {
-		t.Fatal(err)
-	}
-	return &cgroupTestUtil{CgroupData: d, CgroupPath: testCgroupPath, tempDir: tempDir, t: t}
-}
-
-func (c *cgroupTestUtil) cleanup() {
-	os.RemoveAll(c.tempDir)
-}
-
-// Write the specified contents on the mock of the specified cgroup files.
-func (c *cgroupTestUtil) writeFileContents(fileContents map[string]string) {
-	for file, contents := range fileContents {
-		err := writeFile(c.CgroupPath, file, contents)
-		if err != nil {
-			c.t.Fatal(err)
-		}
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils.go
@@ -12,7 +12,6 @@ import (
 )

 var (
-	ErrNotSupportStat = errors.New("stats are not supported for subsystem")
 	ErrNotValidFormat = errors.New("line is not a valid key value format")
 )

--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/utils_test.go
@@ -1,97 +0,0 @@
-// +build linux
-
-package fs
-
-import (
-	"io/ioutil"
-	"math"
-	"os"
-	"path/filepath"
-	"strconv"
-	"testing"
-)
-
-const (
-	cgroupFile  = "cgroup.file"
-	floatValue  = 2048.0
-	floatString = "2048"
-)
-
-func TestGetCgroupParamsInt(t *testing.T) {
-	// Setup tempdir.
-	tempDir, err := ioutil.TempDir("", "cgroup_utils_test")
-	if err != nil {
-		t.Fatal(err)
-	}
-	defer os.RemoveAll(tempDir)
-	tempFile := filepath.Join(tempDir, cgroupFile)
-
-	// Success.
-	err = ioutil.WriteFile(tempFile, []byte(floatString), 0755)
-	if err != nil {
-		t.Fatal(err)
-	}
-	value, err := getCgroupParamUint(tempDir, cgroupFile)
-	if err != nil {
-		t.Fatal(err)
-	} else if value != floatValue {
-		t.Fatalf("Expected %d to equal %f", value, floatValue)
-	}
-
-	// Success with new line.
-	err = ioutil.WriteFile(tempFile, []byte(floatString+"\n"), 0755)
-	if err != nil {
-		t.Fatal(err)
-	}
-	value, err = getCgroupParamUint(tempDir, cgroupFile)
-	if err != nil {
-		t.Fatal(err)
-	} else if value != floatValue {
-		t.Fatalf("Expected %d to equal %f", value, floatValue)
-	}
-
-	// Success with negative values
-	err = ioutil.WriteFile(tempFile, []byte("-12345"), 0755)
-	if err != nil {
-		t.Fatal(err)
-	}
-	value, err = getCgroupParamUint(tempDir, cgroupFile)
-	if err != nil {
-		t.Fatal(err)
-	} else if value != 0 {
-		t.Fatalf("Expected %d to equal %d", value, 0)
-	}
-
-	// Success with negative values lesser than min int64
-	s := strconv.FormatFloat(math.MinInt64, 'f', -1, 64)
-	err = ioutil.WriteFile(tempFile, []byte(s), 0755)
-	if err != nil {
-		t.Fatal(err)
-	}
-	value, err = getCgroupParamUint(tempDir, cgroupFile)
-	if err != nil {
-		t.Fatal(err)
-	} else if value != 0 {
-		t.Fatalf("Expected %d to equal %d", value, 0)
-	}
-
-	// Not a float.
-	err = ioutil.WriteFile(tempFile, []byte("not-a-float"), 0755)
-	if err != nil {
-		t.Fatal(err)
-	}
-	_, err = getCgroupParamUint(tempDir, cgroupFile)
-	if err == nil {
-		t.Fatal("Expecting error, got none")
-	}
-
-	// Unknown file.
-	err = os.Remove(tempFile)
-	if err != nil {
-		t.Fatal(err)
-	}
-	_, err = getCgroupParamUint(tempDir, cgroupFile)
-	if err == nil {
-		t.Fatal("Expecting error, got none")
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go
@@ -11,6 +11,7 @@ type ThrottlingData struct {
 	ThrottledTime uint64 `json:"throttled_time,omitempty"`
 }

+// CpuUsage denotes the usage of a CPU.
 // All CPU stats are aggregate since container inception.
 type CpuUsage struct {
 	// Total CPU time consumed.
@@ -46,14 +47,21 @@ type MemoryStats struct {
 	Usage MemoryData `json:"usage,omitempty"`
 	// usage of memory + swap
 	SwapUsage MemoryData `json:"swap_usage,omitempty"`
-	// usafe of kernel memory
-	KernelUsage MemoryData        `json:"kernel_usage,omitempty"`
-	Stats       map[string]uint64 `json:"stats,omitempty"`
+	// usage of kernel memory
+	KernelUsage MemoryData `json:"kernel_usage,omitempty"`
+	// usage of kernel TCP memory
+	KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"`
+	// if true, memory usage is accounted for throughout a hierarchy of cgroups.
+	UseHierarchy bool `json:"use_hierarchy"`
+
+	Stats map[string]uint64 `json:"stats,omitempty"`
 }

 type PidsStats struct {
 	// number of pids in the cgroup
 	Current uint64 `json:"current,omitempty"`
+	// active pids hard limit
+	Limit uint64 `json:"limit,omitempty"`
 }

 type BlkioStatEntry struct {
@@ -80,7 +88,7 @@ type HugetlbStats struct {
 	Usage uint64 `json:"usage,omitempty"`
 	// maximum usage ever recorded.
 	MaxUsage uint64 `json:"max_usage,omitempty"`
-	// number of times htgetlb usage allocation failure.
+	// number of times hugetlb usage allocation failure.
 	Failcnt uint64 `json:"failcnt"`
 }

--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_nosystemd.go
@@ -1,4 +1,4 @@
-// +build !linux
+// +build !linux static_build

 package systemd

@@ -43,7 +43,7 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
 }

 func (m *Manager) Set(container *configs.Config) error {
-	return nil, fmt.Errorf("Systemd not supported")
+	return fmt.Errorf("Systemd not supported")
 }

 func (m *Manager) Freeze(state configs.FreezerState) error {
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/apply_systemd.go
@@ -1,14 +1,12 @@
-// +build linux
+// +build linux,!static_build

 package systemd

 import (
 	"errors"
 	"fmt"
-	"io/ioutil"
 	"os"
 	"path/filepath"
-	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -67,13 +65,16 @@ var subsystems = subsystemSet{

 const (
 	testScopeWait = 4
+	testSliceWait = 4
 )

 var (
 	connLock                        sync.Mutex
 	theConn                         *systemdDbus.Conn
 	hasStartTransientUnit           bool
+	hasStartTransientSliceUnit      bool
 	hasTransientDefaultDependencies bool
+	hasDelegate                     bool
 )

 func newProp(name string, units interface{}) systemdDbus.Property {
@@ -146,20 +147,52 @@ func UseSystemd() bool {

 		// Not critical because of the stop unit logic above.
 		theConn.StopUnit(scope, "replace", nil)
+
+		// Assume StartTransientUnit on a scope allows Delegate
+		hasDelegate = true
+		dl := newProp("Delegate", true)
+		if _, err := theConn.StartTransientUnit(scope, "replace", []systemdDbus.Property{dl}, nil); err != nil {
+			if dbusError, ok := err.(dbus.Error); ok {
+				if strings.Contains(dbusError.Name, "org.freedesktop.DBus.Error.PropertyReadOnly") {
+					hasDelegate = false
+				}
+			}
+		}
+
+		// Assume we have the ability to start a transient unit as a slice
+		// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
+		// For details, see: https://bugzilla.redhat.com/show_bug.cgi?id=1370299
+		hasStartTransientSliceUnit = true
+
+		// To ensure simple clean-up, we create a slice off the root with no hierarchy
+		slice := fmt.Sprintf("libcontainer_%d_systemd_test_default.slice", os.Getpid())
+		if _, err := theConn.StartTransientUnit(slice, "replace", nil, nil); err != nil {
+			if _, ok := err.(dbus.Error); ok {
+				hasStartTransientSliceUnit = false
+			}
+		}
+
+		for i := 0; i <= testSliceWait; i++ {
+			if _, err := theConn.StopUnit(slice, "replace", nil); err != nil {
+				if dbusError, ok := err.(dbus.Error); ok {
+					if strings.Contains(dbusError.Name, "org.freedesktop.systemd1.NoSuchUnit") {
+						hasStartTransientSliceUnit = false
+						break
+					}
+				}
+			} else {
+				break
+			}
+			time.Sleep(time.Millisecond)
+		}
+
+		// Not critical because of the stop unit logic above.
+		theConn.StopUnit(scope, "replace", nil)
+		theConn.StopUnit(slice, "replace", nil)
 	}
 	return hasStartTransientUnit
 }

-func getIfaceForUnit(unitName string) string {
-	if strings.HasSuffix(unitName, ".scope") {
-		return "Scope"
-	}
-	if strings.HasSuffix(unitName, ".service") {
-		return "Service"
-	}
-	return "Unit"
-}
-
 func (m *Manager) Apply(pid int) error {
 	var (
 		c          = m.Cgroups
@@ -189,11 +222,29 @@ func (m *Manager) Apply(pid int) error {
 		slice = c.Parent
 	}

-	properties = append(properties,
-		systemdDbus.PropSlice(slice),
-		systemdDbus.PropDescription("docker container "+c.Name),
-		newProp("PIDs", []uint32{uint32(pid)}),
-	)
+	properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name))
+
+	// if we create a slice, the parent is defined via a Wants=
+	if strings.HasSuffix(unitName, ".slice") {
+		// This was broken until systemd v229, but has been back-ported on RHEL environments >= 219
+		if !hasStartTransientSliceUnit {
+			return fmt.Errorf("systemd version does not support ability to start a slice as transient unit")
+		}
+		properties = append(properties, systemdDbus.PropWants(slice))
+	} else {
+		// otherwise, we use Slice=
+		properties = append(properties, systemdDbus.PropSlice(slice))
+	}
+
+	// only add pid if its valid, -1 is used w/ general slice creation.
+	if pid != -1 {
+		properties = append(properties, newProp("PIDs", []uint32{uint32(pid)}))
+	}
+
+	if hasDelegate {
+		// This is only supported on systemd versions 218 and above.
+		properties = append(properties, newProp("Delegate", true))
+	}

 	// Always enable accounting, this gets us the same behaviour as the fs implementation,
 	// plus the kernel has some problems with joining the memory cgroup at a later time.
@@ -214,7 +265,21 @@ func (m *Manager) Apply(pid int) error {

 	if c.Resources.CpuShares != 0 {
 		properties = append(properties,
-			newProp("CPUShares", uint64(c.Resources.CpuShares)))
+			newProp("CPUShares", c.Resources.CpuShares))
+	}
+
+	// cpu.cfs_quota_us and cpu.cfs_period_us are controlled by systemd.
+	if c.Resources.CpuQuota != 0 && c.Resources.CpuPeriod != 0 {
+		cpuQuotaPerSecUSec := uint64(c.Resources.CpuQuota*1000000) / c.Resources.CpuPeriod
+		// systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota
+		// (integer percentage of CPU) internally.  This means that if a fractional percent of
+		// CPU is indicated by Resources.CpuQuota, we need to round up to the nearest
+		// 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect.
+		if cpuQuotaPerSecUSec%10000 != 0 {
+			cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000
+		}
+		properties = append(properties,
+			newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec))
 	}

 	if c.Resources.BlkioWeight != 0 {
@@ -222,67 +287,22 @@ func (m *Manager) Apply(pid int) error {
 			newProp("BlockIOWeight", uint64(c.Resources.BlkioWeight)))
 	}

-	// We need to set kernel memory before processes join cgroup because
-	// kmem.limit_in_bytes can only be set when the cgroup is empty.
-	// And swap memory limit needs to be set after memory limit, only
-	// memory limit is handled by systemd, so it's kind of ugly here.
-	if c.Resources.KernelMemory > 0 {
+	// We have to set kernel memory here, as we can't change it once
+	// processes have been attached to the cgroup.
+	if c.Resources.KernelMemory != 0 {
 		if err := setKernelMemory(c); err != nil {
 			return err
 		}
 	}

-	if _, err := theConn.StartTransientUnit(unitName, "replace", properties, nil); err != nil {
+	statusChan := make(chan string)
+	if _, err := theConn.StartTransientUnit(unitName, "replace", properties, statusChan); err != nil && !isUnitExists(err) {
 		return err
 	}

-	if err := joinDevices(c, pid); err != nil {
-		return err
-	}
+	<-statusChan

-	// TODO: CpuQuota and CpuPeriod not available in systemd
-	// we need to manually join the cpu.cfs_quota_us and cpu.cfs_period_us
-	if err := joinCpu(c, pid); err != nil {
-		return err
-	}
-
-	// TODO: MemoryReservation and MemorySwap not available in systemd
-	if err := joinMemory(c, pid); err != nil {
-		return err
-	}
-
-	// we need to manually join the freezer, net_cls, net_prio, pids and cpuset cgroup in systemd
-	// because it does not currently support it via the dbus api.
-	if err := joinFreezer(c, pid); err != nil {
-		return err
-	}
-
-	if err := joinNetPrio(c, pid); err != nil {
-		return err
-	}
-	if err := joinNetCls(c, pid); err != nil {
-		return err
-	}
-
-	if err := joinPids(c, pid); err != nil {
-		return err
-	}
-
-	if err := joinCpuset(c, pid); err != nil {
-		return err
-	}
-
-	if err := joinHugetlb(c, pid); err != nil {
-		return err
-	}
-
-	if err := joinPerfEvent(c, pid); err != nil {
-		return err
-	}
-	// FIXME: Systemd does have `BlockIODeviceWeight` property, but we got problem
-	// using that (at least on systemd 208, see https://github.com/opencontainers/runc/libcontainer/pull/354),
-	// so use fs work around for now.
-	if err := joinBlkio(c, pid); err != nil {
+	if err := joinCgroups(c, pid); err != nil {
 		return err
 	}

@@ -323,15 +343,6 @@ func (m *Manager) GetPaths() map[string]string {
 	return paths
 }

-func writeFile(dir, file, data string) error {
-	// Normally dir should not be empty, one case is that cgroup subsystem
-	// is not mounted, we will get empty dir, and we want it fail here.
-	if dir == "" {
-		return fmt.Errorf("no such directory for %s.", file)
-	}
-	return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
-}
-
 func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
 	path, err := getSubsystemPath(c, subsystem)
 	if err != nil {
@@ -340,57 +351,52 @@ func join(c *configs.Cgroup, subsystem string, pid int) (string, error) {
 	if err := os.MkdirAll(path, 0755); err != nil {
 		return "", err
 	}
-	if err := writeFile(path, "cgroup.procs", strconv.Itoa(pid)); err != nil {
+	if err := cgroups.WriteCgroupProc(path, pid); err != nil {
 		return "", err
 	}
-
 	return path, nil
 }

-func joinCpu(c *configs.Cgroup, pid int) error {
-	_, err := join(c, "cpu", pid)
-	if err != nil && !cgroups.IsNotFound(err) {
-		return err
+func joinCgroups(c *configs.Cgroup, pid int) error {
+	for _, sys := range subsystems {
+		name := sys.Name()
+		switch name {
+		case "name=systemd":
+			// let systemd handle this
+		case "cpuset":
+			path, err := getSubsystemPath(c, name)
+			if err != nil && !cgroups.IsNotFound(err) {
+				return err
+			}
+			s := &fs.CpusetGroup{}
+			if err := s.ApplyDir(path, c, pid); err != nil {
+				return err
+			}
+		default:
+			_, err := join(c, name, pid)
+			if err != nil {
+				// Even if it's `not found` error, we'll return err
+				// because devices cgroup is hard requirement for
+				// container security.
+				if name == "devices" {
+					return err
+				}
+				// For other subsystems, omit the `not found` error
+				// because they are optional.
+				if !cgroups.IsNotFound(err) {
+					return err
+				}
+			}
+		}
 	}
+
 	return nil
 }

-func joinFreezer(c *configs.Cgroup, pid int) error {
-	_, err := join(c, "freezer", pid)
-	if err != nil && !cgroups.IsNotFound(err) {
-		return err
-	}
-	return nil
-}
-
-func joinNetPrio(c *configs.Cgroup, pid int) error {
-	_, err := join(c, "net_prio", pid)
-	if err != nil && !cgroups.IsNotFound(err) {
-		return err
-	}
-	return nil
-}
-
-func joinNetCls(c *configs.Cgroup, pid int) error {
-	_, err := join(c, "net_cls", pid)
-	if err != nil && !cgroups.IsNotFound(err) {
-		return err
-	}
-	return nil
-}
-
-func joinPids(c *configs.Cgroup, pid int) error {
-	_, err := join(c, "pids", pid)
-	if err != nil && !cgroups.IsNotFound(err) {
-		return err
-	}
-	return nil
-}
-
-// systemd represents slice heirarchy using `-`, so we need to follow suit when
+// systemd represents slice hierarchy using `-`, so we need to follow suit when
 // generating the path of slice. Essentially, test-a-b.slice becomes
-// test.slice/test-a.slice/test-a-b.slice.
-func expandSlice(slice string) (string, error) {
+// /test.slice/test-a.slice/test-a-b.slice.
+func ExpandSlice(slice string) (string, error) {
 	suffix := ".slice"
 	// Name has to end with ".slice", but can't be just ".slice".
 	if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) {
@@ -404,6 +410,10 @@ func expandSlice(slice string) (string, error) {

 	var path, prefix string
 	sliceName := strings.TrimSuffix(slice, suffix)
+	// if input was -.slice, we should just return root now
+	if sliceName == "-" {
+		return "/", nil
+	}
 	for _, component := range strings.Split(sliceName, "-") {
 		// test--a.slice isn't permitted, nor is -test.slice.
 		if component == "" {
@@ -411,10 +421,9 @@ func expandSlice(slice string) (string, error) {
 		}

 		// Append the component to the path and to the prefix.
-		path += prefix + component + suffix + "/"
+		path += "/" + prefix + component + suffix
 		prefix += component + "-"
 	}
-
 	return path, nil
 }

@@ -424,17 +433,19 @@ func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) {
 		return "", err
 	}

-	initPath, err := cgroups.GetInitCgroupDir(subsystem)
+	initPath, err := cgroups.GetInitCgroup(subsystem)
 	if err != nil {
 		return "", err
 	}
+	// if pid 1 is systemd 226 or later, it will be in init.scope, not the root
+	initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope")

 	slice := "system.slice"
 	if c.Parent != "" {
 		slice = c.Parent
 	}

-	slice, err = expandSlice(slice)
+	slice, err = ExpandSlice(slice)
 	if err != nil {
 		return "", err
 	}
@@ -495,6 +506,11 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
 }

 func (m *Manager) Set(container *configs.Config) error {
+	// If Paths are set, then we are just joining cgroups paths
+	// and there is no need to set any values.
+	if m.Cgroups.Paths != nil {
+		return nil
+	}
 	for _, sys := range subsystems {
 		// Get the subsystem path, but don't error out for not found cgroups.
 		path, err := getSubsystemPath(container.Cgroups, sys.Name())
@@ -516,28 +532,11 @@ func (m *Manager) Set(container *configs.Config) error {
 }

 func getUnitName(c *configs.Cgroup) string {
-	return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
-}
-
-// Atm we can't use the systemd device support because of two missing things:
-// * Support for wildcards to allow mknod on any device
-// * Support for wildcards to allow /dev/pts support
-//
-// The second is available in more recent systemd as "char-pts", but not in e.g. v208 which is
-// in wide use. When both these are available we will be able to switch, but need to keep the old
-// implementation for backwards compat.
-//
-// Note: we can't use systemd to set up the initial limits, and then change the cgroup
-// because systemd will re-write the device settings if it needs to re-apply the cgroup context.
-// This happens at least for v208 when any sibling unit is started.
-func joinDevices(c *configs.Cgroup, pid int) error {
-	_, err := join(c, "devices", pid)
-	// Even if it's `not found` error, we'll return err because devices cgroup
-	// is hard requirement for container security.
-	if err != nil {
-		return err
+	// by default, we create a scope unless the user explicitly asks for a slice.
+	if !strings.HasSuffix(c.Name, ".slice") {
+		return fmt.Sprintf("%s-%s.scope", c.ScopePrefix, c.Name)
 	}
-	return nil
+	return c.Name
 }

 func setKernelMemory(c *configs.Cgroup) error {
@@ -549,57 +548,15 @@ func setKernelMemory(c *configs.Cgroup) error {
 	if err := os.MkdirAll(path, 0755); err != nil {
 		return err
 	}
-
-	// This doesn't get called by manager.Set, so we need to do it here.
-	s := &fs.MemoryGroup{}
-	return s.SetKernelMemory(path, c)
+	return fs.EnableKernelMemoryAccounting(path)
 }

-func joinMemory(c *configs.Cgroup, pid int) error {
-	_, err := join(c, "memory", pid)
-	if err != nil && !cgroups.IsNotFound(err) {
-		return err
-	}
-	return nil
-}
-
-// systemd does not atm set up the cpuset controller, so we must manually
-// join it. Additionally that is a very finicky controller where each
-// level must have a full setup as the default for a new directory is "no cpus"
-func joinCpuset(c *configs.Cgroup, pid int) error {
-	path, err := getSubsystemPath(c, "cpuset")
-	if err != nil && !cgroups.IsNotFound(err) {
-		return err
-	}
-
-	s := &fs.CpusetGroup{}
-
-	return s.ApplyDir(path, c, pid)
-}
-
-// `BlockIODeviceWeight` property of systemd does not work properly, and systemd
-// expects device path instead of major minor numbers, which is also confusing
-// for users. So we use fs work around for now.
-func joinBlkio(c *configs.Cgroup, pid int) error {
-	_, err := join(c, "blkio", pid)
+// isUnitExists returns true if the error is that a systemd unit already exists.
+func isUnitExists(err error) bool {
 	if err != nil {
-		return err
+		if dbusError, ok := err.(dbus.Error); ok {
+			return strings.Contains(dbusError.Name, "org.freedesktop.systemd1.UnitExists")
+		}
 	}
-	return nil
-}
-
-func joinHugetlb(c *configs.Cgroup, pid int) error {
-	_, err := join(c, "hugetlb", pid)
-	if err != nil && !cgroups.IsNotFound(err) {
-		return err
-	}
-	return nil
-}
-
-func joinPerfEvent(c *configs.Cgroup, pid int) error {
-	_, err := join(c, "perf_event", pid)
-	if err != nil && !cgroups.IsNotFound(err) {
-		return err
-	}
-	return nil
+	return false
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go
@@ -16,37 +16,24 @@ import (
 	"github.com/docker/go-units"
 )

-const cgroupNamePrefix = "name="
+const (
+	cgroupNamePrefix = "name="
+	CgroupProcesses  = "cgroup.procs"
+)

-// https://www.kernel.org/doc/Documentation/cgroups/cgroups.txt
+// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt
 func FindCgroupMountpoint(subsystem string) (string, error) {
-	// We are not using mount.GetMounts() because it's super-inefficient,
-	// parsing it directly sped up x10 times because of not using Sscanf.
-	// It was one of two major performance drawbacks in container start.
-	f, err := os.Open("/proc/self/mountinfo")
-	if err != nil {
-		return "", err
-	}
-	defer f.Close()
-
-	scanner := bufio.NewScanner(f)
-	for scanner.Scan() {
-		txt := scanner.Text()
-		fields := strings.Split(txt, " ")
-		for _, opt := range strings.Split(fields[len(fields)-1], ",") {
-			if opt == subsystem {
-				return fields[4], nil
-			}
-		}
-	}
-	if err := scanner.Err(); err != nil {
-		return "", err
-	}
-
-	return "", NewNotFoundError(subsystem)
+	mnt, _, err := FindCgroupMountpointAndRoot(subsystem)
+	return mnt, err
 }

 func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
+	// We are not using mount.GetMounts() because it's super-inefficient,
+	// parsing it directly sped up x10 times because of not using Sscanf.
+	// It was one of two major performance drawbacks in container start.
+	if !isSubsystemAvailable(subsystem) {
+		return "", "", NewNotFoundError(subsystem)
+	}
 	f, err := os.Open("/proc/self/mountinfo")
 	if err != nil {
 		return "", "", err
@@ -70,6 +57,30 @@ func FindCgroupMountpointAndRoot(subsystem string) (string, string, error) {
 	return "", "", NewNotFoundError(subsystem)
 }

+func isSubsystemAvailable(subsystem string) bool {
+	cgroups, err := ParseCgroupFile("/proc/self/cgroup")
+	if err != nil {
+		return false
+	}
+	_, avail := cgroups[subsystem]
+	return avail
+}
+
+func GetClosestMountpointAncestor(dir, mountinfo string) string {
+	deepestMountPoint := ""
+	for _, mountInfoEntry := range strings.Split(mountinfo, "\n") {
+		mountInfoParts := strings.Fields(mountInfoEntry)
+		if len(mountInfoParts) < 5 {
+			continue
+		}
+		mountPoint := mountInfoParts[4]
+		if strings.HasPrefix(mountPoint, deepestMountPoint) && strings.HasPrefix(dir, mountPoint) {
+			deepestMountPoint = mountPoint
+		}
+	}
+	return deepestMountPoint
+}
+
 func FindCgroupMountpointDir() (string, error) {
 	f, err := os.Open("/proc/self/mountinfo")
 	if err != nil {
@@ -113,7 +124,7 @@ type Mount struct {
 	Subsystems []string
 }

-func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
+func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) {
 	if len(m.Subsystems) == 0 {
 		return "", fmt.Errorf("no subsystem for mount")
 	}
@@ -121,16 +132,17 @@ func (m Mount) GetThisCgroupDir(cgroups map[string]string) (string, error) {
 	return getControllerPath(m.Subsystems[0], cgroups)
 }

-func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
+func getCgroupMountsHelper(ss map[string]bool, mi io.Reader, all bool) ([]Mount, error) {
 	res := make([]Mount, 0, len(ss))
 	scanner := bufio.NewScanner(mi)
-	for scanner.Scan() {
+	numFound := 0
+	for scanner.Scan() && numFound < len(ss) {
 		txt := scanner.Text()
-		sepIdx := strings.IndexByte(txt, '-')
+		sepIdx := strings.Index(txt, " - ")
 		if sepIdx == -1 {
 			return nil, fmt.Errorf("invalid mountinfo format")
 		}
-		if txt[sepIdx+2:sepIdx+8] != "cgroup" {
+		if txt[sepIdx+3:sepIdx+10] == "cgroup2" || txt[sepIdx+3:sepIdx+9] != "cgroup" {
 			continue
 		}
 		fields := strings.Split(txt, " ")
@@ -139,12 +151,17 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
 			Root:       fields[3],
 		}
 		for _, opt := range strings.Split(fields[len(fields)-1], ",") {
+			if !ss[opt] {
+				continue
+			}
 			if strings.HasPrefix(opt, cgroupNamePrefix) {
 				m.Subsystems = append(m.Subsystems, opt[len(cgroupNamePrefix):])
-			}
-			if ss[opt] {
+			} else {
 				m.Subsystems = append(m.Subsystems, opt)
 			}
+			if !all {
+				numFound++
+			}
 		}
 		res = append(res, m)
 	}
@@ -154,26 +171,28 @@ func getCgroupMountsHelper(ss map[string]bool, mi io.Reader) ([]Mount, error) {
 	return res, nil
 }

-func GetCgroupMounts() ([]Mount, error) {
+// GetCgroupMounts returns the mounts for the cgroup subsystems.
+// all indicates whether to return just the first instance or all the mounts.
+func GetCgroupMounts(all bool) ([]Mount, error) {
 	f, err := os.Open("/proc/self/mountinfo")
 	if err != nil {
 		return nil, err
 	}
 	defer f.Close()

-	all, err := GetAllSubsystems()
+	allSubsystems, err := ParseCgroupFile("/proc/self/cgroup")
 	if err != nil {
 		return nil, err
 	}

 	allMap := make(map[string]bool)
-	for _, s := range all {
+	for s := range allSubsystems {
 		allMap[s] = true
 	}
-	return getCgroupMountsHelper(allMap, f)
+	return getCgroupMountsHelper(allMap, f, all)
 }

-// Returns all the cgroup subsystems supported by the kernel
+// GetAllSubsystems returns all the cgroup subsystems supported by the kernel
 func GetAllSubsystems() ([]string, error) {
 	f, err := os.Open("/proc/cgroups")
 	if err != nil {
@@ -185,9 +204,6 @@ func GetAllSubsystems() ([]string, error) {

 	s := bufio.NewScanner(f)
 	for s.Scan() {
-		if err := s.Err(); err != nil {
-			return nil, err
-		}
 		text := s.Text()
 		if text[0] != '#' {
 			parts := strings.Fields(text)
@@ -196,11 +212,14 @@ func GetAllSubsystems() ([]string, error) {
 			}
 		}
 	}
+	if err := s.Err(); err != nil {
+		return nil, err
+	}
 	return subsystems, nil
 }

-// Returns the relative path to the cgroup docker is running in.
-func GetThisCgroupDir(subsystem string) (string, error) {
+// GetOwnCgroup returns the relative path to the cgroup docker is running in.
+func GetOwnCgroup(subsystem string) (string, error) {
 	cgroups, err := ParseCgroupFile("/proc/self/cgroup")
 	if err != nil {
 		return "", err
@@ -209,8 +228,16 @@ func GetThisCgroupDir(subsystem string) (string, error) {
 	return getControllerPath(subsystem, cgroups)
 }

-func GetInitCgroupDir(subsystem string) (string, error) {
+func GetOwnCgroupPath(subsystem string) (string, error) {
+	cgroup, err := GetOwnCgroup(subsystem)
+	if err != nil {
+		return "", err
+	}

+	return getCgroupPathHelper(subsystem, cgroup)
+}
+
+func GetInitCgroup(subsystem string) (string, error) {
 	cgroups, err := ParseCgroupFile("/proc/1/cgroup")
 	if err != nil {
 		return "", err
@@ -219,8 +246,33 @@ func GetInitCgroupDir(subsystem string) (string, error) {
 	return getControllerPath(subsystem, cgroups)
 }

+func GetInitCgroupPath(subsystem string) (string, error) {
+	cgroup, err := GetInitCgroup(subsystem)
+	if err != nil {
+		return "", err
+	}
+
+	return getCgroupPathHelper(subsystem, cgroup)
+}
+
+func getCgroupPathHelper(subsystem, cgroup string) (string, error) {
+	mnt, root, err := FindCgroupMountpointAndRoot(subsystem)
+	if err != nil {
+		return "", err
+	}
+
+	// This is needed for nested containers, because in /proc/self/cgroup we
+	// see pathes from host, which don't exist in container.
+	relCgroup, err := filepath.Rel(root, cgroup)
+	if err != nil {
+		return "", err
+	}
+
+	return filepath.Join(mnt, relCgroup), nil
+}
+
 func readProcsFile(dir string) ([]int, error) {
-	f, err := os.Open(filepath.Join(dir, "cgroup.procs"))
+	f, err := os.Open(filepath.Join(dir, CgroupProcesses))
 	if err != nil {
 		return nil, err
 	}
@@ -243,6 +295,8 @@ func readProcsFile(dir string) ([]int, error) {
 	return out, nil
 }

+// ParseCgroupFile parses the given cgroup file, typically from
+// /proc/<pid>/cgroup, into a map of subgroups to cgroup names.
 func ParseCgroupFile(path string) (map[string]string, error) {
 	f, err := os.Open(path)
 	if err != nil {
@@ -250,21 +304,35 @@ func ParseCgroupFile(path string) (map[string]string, error) {
 	}
 	defer f.Close()

-	s := bufio.NewScanner(f)
+	return parseCgroupFromReader(f)
+}
+
+// helper function for ParseCgroupFile to make testing easier
+func parseCgroupFromReader(r io.Reader) (map[string]string, error) {
+	s := bufio.NewScanner(r)
 	cgroups := make(map[string]string)

 	for s.Scan() {
-		if err := s.Err(); err != nil {
-			return nil, err
-		}
-
 		text := s.Text()
-		parts := strings.Split(text, ":")
+		// from cgroups(7):
+		// /proc/[pid]/cgroup
+		// ...
+		// For each cgroup hierarchy ... there is one entry
+		// containing three colon-separated fields of the form:
+		//     hierarchy-ID:subsystem-list:cgroup-path
+		parts := strings.SplitN(text, ":", 3)
+		if len(parts) < 3 {
+			return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text)
+		}

 		for _, subs := range strings.Split(parts[1], ",") {
 			cgroups[subs] = parts[2]
 		}
 	}
+	if err := s.Err(); err != nil {
+		return nil, err
+	}
+
 	return cgroups, nil
 }

@@ -291,8 +359,7 @@ func PathExists(path string) bool {
 func EnterPid(cgroupPaths map[string]string, pid int) error {
 	for _, path := range cgroupPaths {
 		if PathExists(path) {
-			if err := ioutil.WriteFile(filepath.Join(path, "cgroup.procs"),
-				[]byte(strconv.Itoa(pid)), 0700); err != nil {
+			if err := WriteCgroupProc(path, pid); err != nil {
 				return err
 			}
 		}
@@ -326,7 +393,7 @@ func RemovePaths(paths map[string]string) (err error) {
 			return nil
 		}
 	}
-	return fmt.Errorf("Failed to remove paths: %s", paths)
+	return fmt.Errorf("Failed to remove paths: %v", paths)
 }

 func GetHugePageSize() ([]string, error) {
@@ -361,7 +428,7 @@ func GetAllPids(path string) ([]int, error) {
 	// collect pids from all sub-cgroups
 	err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error {
 		dir, file := filepath.Split(p)
-		if file != "cgroup.procs" {
+		if file != CgroupProcesses {
 			return nil
 		}
 		if iErr != nil {
@@ -376,3 +443,20 @@ func GetAllPids(path string) ([]int, error) {
 	})
 	return pids, err
 }
+
+// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file
+func WriteCgroupProc(dir string, pid int) error {
+	// Normally dir should not be empty, one case is that cgroup subsystem
+	// is not mounted, we will get empty dir, and we want it fail here.
+	if dir == "" {
+		return fmt.Errorf("no such directory for %s", CgroupProcesses)
+	}
+
+	// Dont attach any pid to the cgroup if -1 is specified as a pid
+	if pid != -1 {
+		if err := ioutil.WriteFile(filepath.Join(dir, CgroupProcesses), []byte(strconv.Itoa(pid)), 0700); err != nil {
+			return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err)
+		}
+	}
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils_test.go
@@ -1,138 +0,0 @@
-package cgroups
-
-import (
-	"bytes"
-	"strings"
-	"testing"
-)
-
-const fedoraMountinfo = `15 35 0:3 / /proc rw,nosuid,nodev,noexec,relatime shared:5 - proc proc rw
-16 35 0:14 / /sys rw,nosuid,nodev,noexec,relatime shared:6 - sysfs sysfs rw,seclabel
-17 35 0:5 / /dev rw,nosuid shared:2 - devtmpfs devtmpfs rw,seclabel,size=8056484k,nr_inodes=2014121,mode=755
-18 16 0:15 / /sys/kernel/security rw,nosuid,nodev,noexec,relatime shared:7 - securityfs securityfs rw
-19 16 0:13 / /sys/fs/selinux rw,relatime shared:8 - selinuxfs selinuxfs rw
-20 17 0:16 / /dev/shm rw,nosuid,nodev shared:3 - tmpfs tmpfs rw,seclabel
-21 17 0:10 / /dev/pts rw,nosuid,noexec,relatime shared:4 - devpts devpts rw,seclabel,gid=5,mode=620,ptmxmode=000
-22 35 0:17 / /run rw,nosuid,nodev shared:21 - tmpfs tmpfs rw,seclabel,mode=755
-23 16 0:18 / /sys/fs/cgroup rw,nosuid,nodev,noexec shared:9 - tmpfs tmpfs rw,seclabel,mode=755
-24 23 0:19 / /sys/fs/cgroup/systemd rw,nosuid,nodev,noexec,relatime shared:10 - cgroup cgroup rw,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd
-25 16 0:20 / /sys/fs/pstore rw,nosuid,nodev,noexec,relatime shared:20 - pstore pstore rw
-26 23 0:21 / /sys/fs/cgroup/cpuset rw,nosuid,nodev,noexec,relatime shared:11 - cgroup cgroup rw,cpuset,clone_children
-27 23 0:22 / /sys/fs/cgroup/cpu,cpuacct rw,nosuid,nodev,noexec,relatime shared:12 - cgroup cgroup rw,cpuacct,cpu,clone_children
-28 23 0:23 / /sys/fs/cgroup/memory rw,nosuid,nodev,noexec,relatime shared:13 - cgroup cgroup rw,memory,clone_children
-29 23 0:24 / /sys/fs/cgroup/devices rw,nosuid,nodev,noexec,relatime shared:14 - cgroup cgroup rw,devices,clone_children
-30 23 0:25 / /sys/fs/cgroup/freezer rw,nosuid,nodev,noexec,relatime shared:15 - cgroup cgroup rw,freezer,clone_children
-31 23 0:26 / /sys/fs/cgroup/net_cls rw,nosuid,nodev,noexec,relatime shared:16 - cgroup cgroup rw,net_cls,clone_children
-32 23 0:27 / /sys/fs/cgroup/blkio rw,nosuid,nodev,noexec,relatime shared:17 - cgroup cgroup rw,blkio,clone_children
-33 23 0:28 / /sys/fs/cgroup/perf_event rw,nosuid,nodev,noexec,relatime shared:18 - cgroup cgroup rw,perf_event,clone_children
-34 23 0:29 / /sys/fs/cgroup/hugetlb rw,nosuid,nodev,noexec,relatime shared:19 - cgroup cgroup rw,hugetlb,clone_children
-35 1 253:2 / / rw,relatime shared:1 - ext4 /dev/mapper/ssd-root--f20 rw,seclabel,data=ordered
-36 15 0:30 / /proc/sys/fs/binfmt_misc rw,relatime shared:22 - autofs systemd-1 rw,fd=38,pgrp=1,timeout=300,minproto=5,maxproto=5,direct
-37 17 0:12 / /dev/mqueue rw,relatime shared:23 - mqueue mqueue rw,seclabel
-38 35 0:31 / /tmp rw shared:24 - tmpfs tmpfs rw,seclabel
-39 17 0:32 / /dev/hugepages rw,relatime shared:25 - hugetlbfs hugetlbfs rw,seclabel
-40 16 0:7 / /sys/kernel/debug rw,relatime shared:26 - debugfs debugfs rw
-41 16 0:33 / /sys/kernel/config rw,relatime shared:27 - configfs configfs rw
-42 35 0:34 / /var/lib/nfs/rpc_pipefs rw,relatime shared:28 - rpc_pipefs sunrpc rw
-43 15 0:35 / /proc/fs/nfsd rw,relatime shared:29 - nfsd sunrpc rw
-45 35 8:17 / /boot rw,relatime shared:30 - ext4 /dev/sdb1 rw,seclabel,data=ordered
-46 35 253:4 / /home rw,relatime shared:31 - ext4 /dev/mapper/ssd-home rw,seclabel,data=ordered
-47 35 253:5 / /var/lib/libvirt/images rw,noatime,nodiratime shared:32 - ext4 /dev/mapper/ssd-virt rw,seclabel,discard,data=ordered
-48 35 253:12 / /mnt/old rw,relatime shared:33 - ext4 /dev/mapper/HelpDeskRHEL6-FedoraRoot rw,seclabel,data=ordered
-121 22 0:36 / /run/user/1000/gvfs rw,nosuid,nodev,relatime shared:104 - fuse.gvfsd-fuse gvfsd-fuse rw,user_id=1000,group_id=1000
-124 16 0:37 / /sys/fs/fuse/connections rw,relatime shared:107 - fusectl fusectl rw
-165 38 253:3 / /tmp/mnt rw,relatime shared:147 - ext4 /dev/mapper/ssd-root rw,seclabel,data=ordered
-167 35 253:15 / /var/lib/docker/devicemapper/mnt/aae4076022f0e2b80a2afbf8fc6df450c52080191fcef7fb679a73e6f073e5c2 rw,relatime shared:149 - ext4 /dev/mapper/docker-253:2-425882-aae4076022f0e2b80a2afbf8fc6df450c52080191fcef7fb679a73e6f073e5c2 rw,seclabel,discard,stripe=16,data=ordered
-171 35 253:16 / /var/lib/docker/devicemapper/mnt/c71be651f114db95180e472f7871b74fa597ee70a58ccc35cb87139ddea15373 rw,relatime shared:153 - ext4 /dev/mapper/docker-253:2-425882-c71be651f114db95180e472f7871b74fa597ee70a58ccc35cb87139ddea15373 rw,seclabel,discard,stripe=16,data=ordered
-175 35 253:17 / /var/lib/docker/devicemapper/mnt/1bac6ab72862d2d5626560df6197cf12036b82e258c53d981fa29adce6f06c3c rw,relatime shared:157 - ext4 /dev/mapper/docker-253:2-425882-1bac6ab72862d2d5626560df6197cf12036b82e258c53d981fa29adce6f06c3c rw,seclabel,discard,stripe=16,data=ordered
-179 35 253:18 / /var/lib/docker/devicemapper/mnt/d710a357d77158e80d5b2c55710ae07c94e76d34d21ee7bae65ce5418f739b09 rw,relatime shared:161 - ext4 /dev/mapper/docker-253:2-425882-d710a357d77158e80d5b2c55710ae07c94e76d34d21ee7bae65ce5418f739b09 rw,seclabel,discard,stripe=16,data=ordered
-183 35 253:19 / /var/lib/docker/devicemapper/mnt/6479f52366114d5f518db6837254baab48fab39f2ac38d5099250e9a6ceae6c7 rw,relatime shared:165 - ext4 /dev/mapper/docker-253:2-425882-6479f52366114d5f518db6837254baab48fab39f2ac38d5099250e9a6ceae6c7 rw,seclabel,discard,stripe=16,data=ordered
-187 35 253:20 / /var/lib/docker/devicemapper/mnt/8d9df91c4cca5aef49eeb2725292aab324646f723a7feab56be34c2ad08268e1 rw,relatime shared:169 - ext4 /dev/mapper/docker-253:2-425882-8d9df91c4cca5aef49eeb2725292aab324646f723a7feab56be34c2ad08268e1 rw,seclabel,discard,stripe=16,data=ordered
-191 35 253:21 / /var/lib/docker/devicemapper/mnt/c8240b768603d32e920d365dc9d1dc2a6af46cd23e7ae819947f969e1b4ec661 rw,relatime shared:173 - ext4 /dev/mapper/docker-253:2-425882-c8240b768603d32e920d365dc9d1dc2a6af46cd23e7ae819947f969e1b4ec661 rw,seclabel,discard,stripe=16,data=ordered
-195 35 253:22 / /var/lib/docker/devicemapper/mnt/2eb3a01278380bbf3ed12d86ac629eaa70a4351301ee307a5cabe7b5f3b1615f rw,relatime shared:177 - ext4 /dev/mapper/docker-253:2-425882-2eb3a01278380bbf3ed12d86ac629eaa70a4351301ee307a5cabe7b5f3b1615f rw,seclabel,discard,stripe=16,data=ordered
-199 35 253:23 / /var/lib/docker/devicemapper/mnt/37a17fb7c9d9b80821235d5f2662879bd3483915f245f9b49cdaa0e38779b70b rw,relatime shared:181 - ext4 /dev/mapper/docker-253:2-425882-37a17fb7c9d9b80821235d5f2662879bd3483915f245f9b49cdaa0e38779b70b rw,seclabel,discard,stripe=16,data=ordered
-203 35 253:24 / /var/lib/docker/devicemapper/mnt/aea459ae930bf1de913e2f29428fd80ee678a1e962d4080019d9f9774331ee2b rw,relatime shared:185 - ext4 /dev/mapper/docker-253:2-425882-aea459ae930bf1de913e2f29428fd80ee678a1e962d4080019d9f9774331ee2b rw,seclabel,discard,stripe=16,data=ordered
-207 35 253:25 / /var/lib/docker/devicemapper/mnt/928ead0bc06c454bd9f269e8585aeae0a6bd697f46dc8754c2a91309bc810882 rw,relatime shared:189 - ext4 /dev/mapper/docker-253:2-425882-928ead0bc06c454bd9f269e8585aeae0a6bd697f46dc8754c2a91309bc810882 rw,seclabel,discard,stripe=16,data=ordered
-211 35 253:26 / /var/lib/docker/devicemapper/mnt/0f284d18481d671644706e7a7244cbcf63d590d634cc882cb8721821929d0420 rw,relatime shared:193 - ext4 /dev/mapper/docker-253:2-425882-0f284d18481d671644706e7a7244cbcf63d590d634cc882cb8721821929d0420 rw,seclabel,discard,stripe=16,data=ordered
-215 35 253:27 / /var/lib/docker/devicemapper/mnt/d9dd16722ab34c38db2733e23f69e8f4803ce59658250dd63e98adff95d04919 rw,relatime shared:197 - ext4 /dev/mapper/docker-253:2-425882-d9dd16722ab34c38db2733e23f69e8f4803ce59658250dd63e98adff95d04919 rw,seclabel,discard,stripe=16,data=ordered
-219 35 253:28 / /var/lib/docker/devicemapper/mnt/bc4500479f18c2c08c21ad5282e5f826a016a386177d9874c2764751c031d634 rw,relatime shared:201 - ext4 /dev/mapper/docker-253:2-425882-bc4500479f18c2c08c21ad5282e5f826a016a386177d9874c2764751c031d634 rw,seclabel,discard,stripe=16,data=ordered
-223 35 253:29 / /var/lib/docker/devicemapper/mnt/7770c8b24eb3d5cc159a065910076938910d307ab2f5d94e1dc3b24c06ee2c8a rw,relatime shared:205 - ext4 /dev/mapper/docker-253:2-425882-7770c8b24eb3d5cc159a065910076938910d307ab2f5d94e1dc3b24c06ee2c8a rw,seclabel,discard,stripe=16,data=ordered
-227 35 253:30 / /var/lib/docker/devicemapper/mnt/c280cd3d0bf0aa36b478b292279671624cceafc1a67eaa920fa1082601297adf rw,relatime shared:209 - ext4 /dev/mapper/docker-253:2-425882-c280cd3d0bf0aa36b478b292279671624cceafc1a67eaa920fa1082601297adf rw,seclabel,discard,stripe=16,data=ordered
-231 35 253:31 / /var/lib/docker/devicemapper/mnt/8b59a7d9340279f09fea67fd6ad89ddef711e9e7050eb647984f8b5ef006335f rw,relatime shared:213 - ext4 /dev/mapper/docker-253:2-425882-8b59a7d9340279f09fea67fd6ad89ddef711e9e7050eb647984f8b5ef006335f rw,seclabel,discard,stripe=16,data=ordered
-235 35 253:32 / /var/lib/docker/devicemapper/mnt/1a28059f29eda821578b1bb27a60cc71f76f846a551abefabce6efd0146dce9f rw,relatime shared:217 - ext4 /dev/mapper/docker-253:2-425882-1a28059f29eda821578b1bb27a60cc71f76f846a551abefabce6efd0146dce9f rw,seclabel,discard,stripe=16,data=ordered
-239 35 253:33 / /var/lib/docker/devicemapper/mnt/e9aa60c60128cad1 rw,relatime shared:221 - ext4 /dev/mapper/docker-253:2-425882-e9aa60c60128cad1 rw,seclabel,discard,stripe=16,data=ordered
-243 35 253:34 / /var/lib/docker/devicemapper/mnt/5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d-init rw,relatime shared:225 - ext4 /dev/mapper/docker-253:2-425882-5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d-init rw,seclabel,discard,stripe=16,data=ordered
-247 35 253:35 / /var/lib/docker/devicemapper/mnt/5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d rw,relatime shared:229 - ext4 /dev/mapper/docker-253:2-425882-5fec11304b6f4713fea7b6ccdcc1adc0a1966187f590fe25a8227428a8df275d rw,seclabel,discard,stripe=16,data=ordered
-31 21 0:23 / /DATA/foo_bla_bla rw,relatime - cifs //foo/BLA\040BLA\040BLA/ rw,sec=ntlm,cache=loose,unc=\\foo\BLA BLA BLA,username=my_login,domain=mydomain.com,uid=12345678,forceuid,gid=12345678,forcegid,addr=10.1.30.10,file_mode=0755,dir_mode=0755,nounix,rsize=61440,wsize=65536,actimeo=1`
-
-func TestGetCgroupMounts(t *testing.T) {
-	subsystems := map[string]bool{
-		"cpuset":     true,
-		"cpu":        true,
-		"cpuacct":    true,
-		"memory":     true,
-		"devices":    true,
-		"freezer":    true,
-		"net_cls":    true,
-		"blkio":      true,
-		"perf_event": true,
-		"hugetlb":    true,
-	}
-	mi := bytes.NewBufferString(fedoraMountinfo)
-	cgMounts, err := getCgroupMountsHelper(subsystems, mi)
-	if err != nil {
-		t.Fatal(err)
-	}
-	cgMap := make(map[string]Mount)
-	for _, m := range cgMounts {
-		for _, ss := range m.Subsystems {
-			cgMap[ss] = m
-		}
-	}
-	for ss := range subsystems {
-		m, ok := cgMap[ss]
-		if !ok {
-			t.Fatalf("%s not found", ss)
-		}
-		if m.Root != "/" {
-			t.Fatalf("unexpected root for %s: %s", ss, m.Root)
-		}
-		if !strings.HasPrefix(m.Mountpoint, "/sys/fs/cgroup/") && !strings.Contains(m.Mountpoint, ss) {
-			t.Fatalf("unexpected mountpoint for %s: %s", ss, m.Mountpoint)
-		}
-		var ssFound bool
-		for _, mss := range m.Subsystems {
-			if mss == ss {
-				ssFound = true
-				break
-			}
-		}
-		if !ssFound {
-			t.Fatalf("subsystem %s not found in Subsystems field %v", ss, m.Subsystems)
-		}
-	}
-}
-
-func BenchmarkGetCgroupMounts(b *testing.B) {
-	subsystems := map[string]bool{
-		"cpuset":     true,
-		"cpu":        true,
-		"cpuacct":    true,
-		"memory":     true,
-		"devices":    true,
-		"freezer":    true,
-		"net_cls":    true,
-		"blkio":      true,
-		"perf_event": true,
-		"hugetlb":    true,
-	}
-	b.ResetTimer()
-	for i := 0; i < b.N; i++ {
-		b.StopTimer()
-		mi := bytes.NewBufferString(fedoraMountinfo)
-		b.StartTimer()
-		if _, err := getCgroupMountsHelper(subsystems, mi); err != nil {
-			b.Fatal(err)
-		}
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_linux.go
@@ -1,5 +1,3 @@
-// +build linux freebsd
-
 package configs

 type FreezerState string
@@ -22,7 +20,7 @@ type Cgroup struct {
 	// The path is assumed to be relative to the host system cgroup mountpoint.
 	Path string `json:"path"`

-	// ScopePrefix decribes prefix for the scope name
+	// ScopePrefix describes prefix for the scope name
 	ScopePrefix string `json:"scope_prefix"`

 	// Paths represent the absolute cgroups paths to join.
@@ -36,7 +34,7 @@ type Cgroup struct {
 type Resources struct {
 	// If this is true allow access to any kind of device within the container.  If false, allow access only to devices explicitly listed in the allowed_devices list.
 	// Deprecated
-	AllowAllDevices bool `json:"allow_all_devices,omitempty"`
+	AllowAllDevices *bool `json:"allow_all_devices,omitempty"`
 	// Deprecated
 	AllowedDevices []*Device `json:"allowed_devices,omitempty"`
 	// Deprecated
@@ -56,20 +54,23 @@ type Resources struct {
 	// Kernel memory limit (in bytes)
 	KernelMemory int64 `json:"kernel_memory"`

+	// Kernel memory limit for TCP use (in bytes)
+	KernelMemoryTCP int64 `json:"kernel_memory_tcp"`
+
 	// CPU shares (relative weight vs. other containers)
-	CpuShares int64 `json:"cpu_shares"`
+	CpuShares uint64 `json:"cpu_shares"`

 	// CPU hardcap limit (in usecs). Allowed cpu time in a given period.
 	CpuQuota int64 `json:"cpu_quota"`

 	// CPU period to be used for hardcapping (in usecs). 0 to use system default.
-	CpuPeriod int64 `json:"cpu_period"`
+	CpuPeriod uint64 `json:"cpu_period"`

 	// How many time CPU will use in realtime scheduling (in usecs).
-	CpuRtRuntime int64 `json:"cpu_quota"`
+	CpuRtRuntime int64 `json:"cpu_rt_quota"`

 	// CPU period to be used for realtime scheduling (in usecs).
-	CpuRtPeriod int64 `json:"cpu_period"`
+	CpuRtPeriod uint64 `json:"cpu_rt_period"`

 	// CPU to use
 	CpusetCpus string `json:"cpuset_cpus"`
@@ -92,7 +93,7 @@ type Resources struct {
 	// IO read rate limit per cgroup per device, bytes per second.
 	BlkioThrottleReadBpsDevice []*ThrottleDevice `json:"blkio_throttle_read_bps_device"`

-	// IO write rate limit per cgroup per divice, bytes per second.
+	// IO write rate limit per cgroup per device, bytes per second.
 	BlkioThrottleWriteBpsDevice []*ThrottleDevice `json:"blkio_throttle_write_bps_device"`

 	// IO read rate limit per cgroup per device, IO per second.
@@ -111,11 +112,11 @@ type Resources struct {
 	OomKillDisable bool `json:"oom_kill_disable"`

 	// Tuning swappiness behaviour per cgroup
-	MemorySwappiness int64 `json:"memory_swappiness"`
+	MemorySwappiness *uint64 `json:"memory_swappiness"`

 	// Set priority of network traffic for container
 	NetPrioIfpriomap []*IfPrioMap `json:"net_prio_ifpriomap"`

 	// Set class identifier for container's network packets
-	NetClsClassid string `json:"net_cls_classid"`
+	NetClsClassid uint32 `json:"net_cls_classid_u"`
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go
@@ -1,6 +0,0 @@
-// +build !windows,!linux,!freebsd
-
-package configs
-
-type Cgroup struct {
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go
@@ -3,7 +3,13 @@ package configs
 import (
 	"bytes"
 	"encoding/json"
+	"fmt"
 	"os/exec"
+	"time"
+
+	"github.com/opencontainers/runtime-spec/specs-go"
+
+	"github.com/sirupsen/logrus"
 )

 type Rlimit struct {
@@ -29,7 +35,7 @@ type Seccomp struct {
 	Syscalls      []*Syscall `json:"syscalls"`
 }

-// An action to be taken upon rule match in Seccomp
+// Action is taken upon rule match in Seccomp
 type Action int

 const (
@@ -40,7 +46,7 @@ const (
 	Trace
 )

-// A comparison operator to be used when matching syscall arguments in Seccomp
+// Operator is a comparison operator to be used when matching syscall arguments in Seccomp
 type Operator int

 const (
@@ -53,7 +59,7 @@ const (
 	MaskEqualTo
 )

-// A rule to match a specific syscall argument in Seccomp
+// Arg is a rule to match a specific syscall argument in Seccomp
 type Arg struct {
 	Index    uint     `json:"index"`
 	Value    uint64   `json:"value"`
@@ -61,7 +67,7 @@ type Arg struct {
 	Op       Operator `json:"op"`
 }

-// An rule to match a syscall in Seccomp
+// Syscall is a rule to match a syscall in Seccomp
 type Syscall struct {
 	Name   string `json:"name"`
 	Action Action `json:"action"`
@@ -81,11 +87,6 @@ type Config struct {
 	// that the parent process dies.
 	ParentDeathSignal int `json:"parent_death_signal"`

-	// PivotDir allows a custom directory inside the container's root filesystem to be used as pivot, when NoPivotRoot is not set.
-	// When a custom PivotDir not set, a temporary dir inside the root filesystem will be used. The pivot dir needs to be writeable.
-	// This is required when using read only root filesystems. In these cases, a read/writeable path can be (bind) mounted somewhere inside the root filesystem to act as pivot.
-	PivotDir string `json:"pivot_dir"`
-
 	// Path to a directory containing the container's root filesystem.
 	Rootfs string `json:"rootfs"`

@@ -113,8 +114,8 @@ type Config struct {
 	Namespaces Namespaces `json:"namespaces"`

 	// Capabilities specify the capabilities to keep when executing the process inside the container
-	// All capbilities not specified will be dropped from the processes capability mask
-	Capabilities []string `json:"capabilities"`
+	// All capabilities not specified will be dropped from the processes capability mask
+	Capabilities *Capabilities `json:"capabilities"`

 	// Networks specifies the container's network setup to be created
 	Networks []*Network `json:"networks"`
@@ -128,15 +129,15 @@ type Config struct {

 	// AppArmorProfile specifies the profile to apply to the process running in the container and is
 	// change at the time the process is execed
-	AppArmorProfile string `json:"apparmor_profile"`
+	AppArmorProfile string `json:"apparmor_profile,omitempty"`

 	// ProcessLabel specifies the label to apply to the process running in the container.  It is
 	// commonly used by selinux
-	ProcessLabel string `json:"process_label"`
+	ProcessLabel string `json:"process_label,omitempty"`

 	// Rlimits specifies the resource limits, such as max open files, to set in the container
 	// If Rlimits are not set, the container will inherit rlimits from the parent process
-	Rlimits []Rlimit `json:"rlimits"`
+	Rlimits []Rlimit `json:"rlimits,omitempty"`

 	// OomScoreAdj specifies the adjustment to be made by the kernel when calculating oom scores
 	// for a process. Valid values are between the range [-1000, '1000'], where processes with
@@ -144,10 +145,6 @@ type Config struct {
 	// More information about kernel oom score calculation here: https://lwn.net/Articles/317814/
 	OomScoreAdj int `json:"oom_score_adj"`

-	// AdditionalGroups specifies the gids that should be added to supplementary groups
-	// in addition to those that the user belongs to.
-	AdditionalGroups []string `json:"additional_groups"`
-
 	// UidMappings is an array of User ID mappings for User Namespaces
 	UidMappings []IDMap `json:"uid_mappings"`

@@ -171,12 +168,29 @@ type Config struct {
 	// A default action to be taken if no rules match is also given.
 	Seccomp *Seccomp `json:"seccomp"`

+	// NoNewPrivileges controls whether processes in the container can gain additional privileges.
+	NoNewPrivileges bool `json:"no_new_privileges,omitempty"`
+
 	// Hooks are a collection of actions to perform at various container lifecycle events.
-	// Hooks are not able to be marshaled to json but they are also not needed to.
-	Hooks *Hooks `json:"-"`
+	// CommandHooks are serialized to JSON, but other hooks are not.
+	Hooks *Hooks

 	// Version is the version of opencontainer specification that is supported.
 	Version string `json:"version"`
+
+	// Labels are user defined metadata that is stored in the config and populated on the state
+	Labels []string `json:"labels"`
+
+	// NoNewKeyring will not allocated a new session keyring for the container.  It will use the
+	// callers keyring in this case.
+	NoNewKeyring bool `json:"no_new_keyring"`
+
+	// Rootless specifies whether the container is a rootless container.
+	Rootless bool `json:"rootless"`
+
+	// IntelRdt specifies settings for Intel RDT/CAT group that the container is placed into
+	// to limit the resources (e.g., L3 cache) the container has available
+	IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
 }

 type Hooks struct {
@@ -191,20 +205,74 @@ type Hooks struct {
 	Poststop []Hook
 }

-// HookState is the payload provided to a hook on execution.
-type HookState struct {
-	Version string `json:"version"`
-	ID      string `json:"id"`
-	Pid     int    `json:"pid"`
-	Root    string `json:"root"`
+type Capabilities struct {
+	// Bounding is the set of capabilities checked by the kernel.
+	Bounding []string
+	// Effective is the set of capabilities checked by the kernel.
+	Effective []string
+	// Inheritable is the capabilities preserved across execve.
+	Inheritable []string
+	// Permitted is the limiting superset for effective capabilities.
+	Permitted []string
+	// Ambient is the ambient set of capabilities that are kept.
+	Ambient []string
 }

+func (hooks *Hooks) UnmarshalJSON(b []byte) error {
+	var state struct {
+		Prestart  []CommandHook
+		Poststart []CommandHook
+		Poststop  []CommandHook
+	}
+
+	if err := json.Unmarshal(b, &state); err != nil {
+		return err
+	}
+
+	deserialize := func(shooks []CommandHook) (hooks []Hook) {
+		for _, shook := range shooks {
+			hooks = append(hooks, shook)
+		}
+
+		return hooks
+	}
+
+	hooks.Prestart = deserialize(state.Prestart)
+	hooks.Poststart = deserialize(state.Poststart)
+	hooks.Poststop = deserialize(state.Poststop)
+	return nil
+}
+
+func (hooks Hooks) MarshalJSON() ([]byte, error) {
+	serialize := func(hooks []Hook) (serializableHooks []CommandHook) {
+		for _, hook := range hooks {
+			switch chook := hook.(type) {
+			case CommandHook:
+				serializableHooks = append(serializableHooks, chook)
+			default:
+				logrus.Warnf("cannot serialize hook of type %T, skipping", hook)
+			}
+		}
+
+		return serializableHooks
+	}
+
+	return json.Marshal(map[string]interface{}{
+		"prestart":  serialize(hooks.Prestart),
+		"poststart": serialize(hooks.Poststart),
+		"poststop":  serialize(hooks.Poststop),
+	})
+}
+
+// HookState is the payload provided to a hook on execution.
+type HookState specs.State
+
 type Hook interface {
 	// Run executes the hook with the provided state.
 	Run(HookState) error
 }

-// NewFunctionHooks will call the provided function when the hook is run.
+// NewFunctionHook will call the provided function when the hook is run.
 func NewFunctionHook(f func(HookState) error) FuncHook {
 	return FuncHook{
 		run: f,
@@ -220,13 +288,14 @@ func (f FuncHook) Run(s HookState) error {
 }

 type Command struct {
-	Path string   `json:"path"`
-	Args []string `json:"args"`
-	Env  []string `json:"env"`
-	Dir  string   `json:"dir"`
+	Path    string         `json:"path"`
+	Args    []string       `json:"args"`
+	Env     []string       `json:"env"`
+	Dir     string         `json:"dir"`
+	Timeout *time.Duration `json:"timeout"`
 }

-// NewCommandHooks will execute the provided command when the hook is run.
+// NewCommandHook will execute the provided command when the hook is run.
 func NewCommandHook(cmd Command) CommandHook {
 	return CommandHook{
 		Command: cmd,
@@ -242,11 +311,38 @@ func (c Command) Run(s HookState) error {
 	if err != nil {
 		return err
 	}
+	var stdout, stderr bytes.Buffer
 	cmd := exec.Cmd{
-		Path:  c.Path,
-		Args:  c.Args,
-		Env:   c.Env,
-		Stdin: bytes.NewReader(b),
+		Path:   c.Path,
+		Args:   c.Args,
+		Env:    c.Env,
+		Stdin:  bytes.NewReader(b),
+		Stdout: &stdout,
+		Stderr: &stderr,
+	}
+	if err := cmd.Start(); err != nil {
+		return err
+	}
+	errC := make(chan error, 1)
+	go func() {
+		err := cmd.Wait()
+		if err != nil {
+			err = fmt.Errorf("error running hook: %v, stdout: %s, stderr: %s", err, stdout.String(), stderr.String())
+		}
+		errC <- err
+	}()
+	var timerCh <-chan time.Time
+	if c.Timeout != nil {
+		timer := time.NewTimer(*c.Timeout)
+		defer timer.Stop()
+		timerCh = timer.C
+	}
+	select {
+	case err := <-errC:
+		return err
+	case <-timerCh:
+		cmd.Process.Kill()
+		cmd.Wait()
+		return fmt.Errorf("hook ran past specified timeout of %.1fs", c.Timeout.Seconds())
 	}
-	return cmd.Run()
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_linux.go
@@ -0,0 +1,61 @@
+package configs
+
+import "fmt"
+
+// HostUID gets the translated uid for the process on host which could be
+// different when user namespaces are enabled.
+func (c Config) HostUID(containerId int) (int, error) {
+	if c.Namespaces.Contains(NEWUSER) {
+		if c.UidMappings == nil {
+			return -1, fmt.Errorf("User namespaces enabled, but no uid mappings found.")
+		}
+		id, found := c.hostIDFromMapping(containerId, c.UidMappings)
+		if !found {
+			return -1, fmt.Errorf("User namespaces enabled, but no user mapping found.")
+		}
+		return id, nil
+	}
+	// Return unchanged id.
+	return containerId, nil
+}
+
+// HostRootUID gets the root uid for the process on host which could be non-zero
+// when user namespaces are enabled.
+func (c Config) HostRootUID() (int, error) {
+	return c.HostUID(0)
+}
+
+// HostGID gets the translated gid for the process on host which could be
+// different when user namespaces are enabled.
+func (c Config) HostGID(containerId int) (int, error) {
+	if c.Namespaces.Contains(NEWUSER) {
+		if c.GidMappings == nil {
+			return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
+		}
+		id, found := c.hostIDFromMapping(containerId, c.GidMappings)
+		if !found {
+			return -1, fmt.Errorf("User namespaces enabled, but no group mapping found.")
+		}
+		return id, nil
+	}
+	// Return unchanged id.
+	return containerId, nil
+}
+
+// HostRootGID gets the root gid for the process on host which could be non-zero
+// when user namespaces are enabled.
+func (c Config) HostRootGID() (int, error) {
+	return c.HostGID(0)
+}
+
+// Utility function that gets a host ID for a container ID from user namespace map
+// if that ID is present in the map.
+func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
+	for _, m := range uMap {
+		if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
+			hostID := m.HostID + (containerID - m.ContainerID)
+			return hostID, true
+		}
+	}
+	return -1, false
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix.go
@@ -1,51 +0,0 @@
-// +build freebsd linux
-
-package configs
-
-import "fmt"
-
-// Gets the root uid for the process on host which could be non-zero
-// when user namespaces are enabled.
-func (c Config) HostUID() (int, error) {
-	if c.Namespaces.Contains(NEWUSER) {
-		if c.UidMappings == nil {
-			return -1, fmt.Errorf("User namespaces enabled, but no user mappings found.")
-		}
-		id, found := c.hostIDFromMapping(0, c.UidMappings)
-		if !found {
-			return -1, fmt.Errorf("User namespaces enabled, but no root user mapping found.")
-		}
-		return id, nil
-	}
-	// Return default root uid 0
-	return 0, nil
-}
-
-// Gets the root gid for the process on host which could be non-zero
-// when user namespaces are enabled.
-func (c Config) HostGID() (int, error) {
-	if c.Namespaces.Contains(NEWUSER) {
-		if c.GidMappings == nil {
-			return -1, fmt.Errorf("User namespaces enabled, but no gid mappings found.")
-		}
-		id, found := c.hostIDFromMapping(0, c.GidMappings)
-		if !found {
-			return -1, fmt.Errorf("User namespaces enabled, but no root group mapping found.")
-		}
-		return id, nil
-	}
-	// Return default root gid 0
-	return 0, nil
-}
-
-// Utility function that gets a host ID for a container ID from user namespace map
-// if that ID is present in the map.
-func (c Config) hostIDFromMapping(containerID int, uMap []IDMap) (int, bool) {
-	for _, m := range uMap {
-		if (containerID >= m.ContainerID) && (containerID <= (m.ContainerID + m.Size - 1)) {
-			hostID := m.HostID + (containerID - m.ContainerID)
-			return hostID, true
-		}
-	}
-	return -1, false
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_unix_test.go
@@ -1,156 +0,0 @@
-// +build linux freebsd
-
-package configs
-
-import (
-	"encoding/json"
-	"fmt"
-	"os"
-	"path/filepath"
-	"testing"
-)
-
-// Checks whether the expected capability is specified in the capabilities.
-func contains(expected string, values []string) bool {
-	for _, v := range values {
-		if v == expected {
-			return true
-		}
-	}
-	return false
-}
-
-func containsDevice(expected *Device, values []*Device) bool {
-	for _, d := range values {
-		if d.Path == expected.Path &&
-			d.Permissions == expected.Permissions &&
-			d.FileMode == expected.FileMode &&
-			d.Major == expected.Major &&
-			d.Minor == expected.Minor &&
-			d.Type == expected.Type {
-			return true
-		}
-	}
-	return false
-}
-
-func loadConfig(name string) (*Config, error) {
-	f, err := os.Open(filepath.Join("../sample_configs", name))
-	if err != nil {
-		return nil, err
-	}
-	defer f.Close()
-
-	var container *Config
-	if err := json.NewDecoder(f).Decode(&container); err != nil {
-		return nil, err
-	}
-
-	// Check that a config doesn't contain extra fields
-	var configMap, abstractMap map[string]interface{}
-
-	if _, err := f.Seek(0, 0); err != nil {
-		return nil, err
-	}
-
-	if err := json.NewDecoder(f).Decode(&abstractMap); err != nil {
-		return nil, err
-	}
-
-	configData, err := json.Marshal(&container)
-	if err != nil {
-		return nil, err
-	}
-
-	if err := json.Unmarshal(configData, &configMap); err != nil {
-		return nil, err
-	}
-
-	for k := range configMap {
-		delete(abstractMap, k)
-	}
-
-	if len(abstractMap) != 0 {
-		return nil, fmt.Errorf("unknown fields: %s", abstractMap)
-	}
-
-	return container, nil
-}
-
-func TestRemoveNamespace(t *testing.T) {
-	ns := Namespaces{
-		{Type: NEWNET},
-	}
-	if !ns.Remove(NEWNET) {
-		t.Fatal("NEWNET was not removed")
-	}
-	if len(ns) != 0 {
-		t.Fatalf("namespaces should have 0 items but reports %d", len(ns))
-	}
-}
-
-func TestHostUIDNoUSERNS(t *testing.T) {
-	config := &Config{
-		Namespaces: Namespaces{},
-	}
-	uid, err := config.HostUID()
-	if err != nil {
-		t.Fatal(err)
-	}
-	if uid != 0 {
-		t.Fatalf("expected uid 0 with no USERNS but received %d", uid)
-	}
-}
-
-func TestHostUIDWithUSERNS(t *testing.T) {
-	config := &Config{
-		Namespaces: Namespaces{{Type: NEWUSER}},
-		UidMappings: []IDMap{
-			{
-				ContainerID: 0,
-				HostID:      1000,
-				Size:        1,
-			},
-		},
-	}
-	uid, err := config.HostUID()
-	if err != nil {
-		t.Fatal(err)
-	}
-	if uid != 1000 {
-		t.Fatalf("expected uid 1000 with no USERNS but received %d", uid)
-	}
-}
-
-func TestHostGIDNoUSERNS(t *testing.T) {
-	config := &Config{
-		Namespaces: Namespaces{},
-	}
-	uid, err := config.HostGID()
-	if err != nil {
-		t.Fatal(err)
-	}
-	if uid != 0 {
-		t.Fatalf("expected gid 0 with no USERNS but received %d", uid)
-	}
-}
-
-func TestHostGIDWithUSERNS(t *testing.T) {
-	config := &Config{
-		Namespaces: Namespaces{{Type: NEWUSER}},
-		GidMappings: []IDMap{
-			{
-				ContainerID: 0,
-				HostID:      1000,
-				Size:        1,
-			},
-		},
-	}
-	uid, err := config.HostGID()
-	if err != nil {
-		t.Fatal(err)
-	}
-	if uid != 1000 {
-		t.Fatalf("expected gid 1000 with no USERNS but received %d", uid)
-	}
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config_windows_test.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config_windows_test.go
@@ -1,3 +0,0 @@
-package configs
-
-// All current tests are for Unix-specific functionality
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/device_defaults.go
@@ -1,9 +1,9 @@
-// +build linux freebsd
+// +build linux

 package configs

 var (
-	// These are devices that are to be both allowed and created.
+	// DefaultSimpleDevices are devices that are to be both allowed and created.
 	DefaultSimpleDevices = []*Device{
 		// /dev/null and zero
 		{
@@ -107,19 +107,5 @@ var (
 			Permissions: "rwm",
 		},
 	}, DefaultSimpleDevices...)
-	DefaultAutoCreatedDevices = append([]*Device{
-		{
-			// /dev/fuse is created but not allowed.
-			// This is to allow java to work.  Because java
-			// Insists on there being a /dev/fuse
-			// https://github.com/docker/docker/issues/514
-			// https://github.com/docker/docker/issues/2393
-			//
-			Path:        "/dev/fuse",
-			Type:        'c',
-			Major:       10,
-			Minor:       229,
-			Permissions: "rwm",
-		},
-	}, DefaultSimpleDevices...)
+	DefaultAutoCreatedDevices = append([]*Device{}, DefaultSimpleDevices...)
 )
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/intelrdt.go
@@ -0,0 +1,7 @@
+package configs
+
+type IntelRdt struct {
+	// The schema for L3 cache id and capacity bitmask (CBM)
+	// Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
+	L3CacheSchema string `json:"l3_cache_schema,omitempty"`
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount.go
@@ -1,5 +1,11 @@
 package configs

+const (
+	// EXT_COPYUP is a directive to copy up the contents of a directory when
+	// a tmpfs is mounted over it.
+	EXT_COPYUP = 1 << iota
+)
+
 type Mount struct {
 	// Source path for the mount.
 	Source string `json:"source"`
@@ -22,6 +28,9 @@ type Mount struct {
 	// Relabel source if set, "z" indicates shared, "Z" indicates unshared.
 	Relabel string `json:"relabel"`

+	// Extensions are additional flags that are specific to runc.
+	Extensions int `json:"extensions"`
+
 	// Optional Command to be run before Source is mounted.
 	PremountCmds []Command `json:"premount_cmds"`

--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_linux.go
@@ -1,8 +1,10 @@
-// +build linux freebsd
-
 package configs

-import "fmt"
+import (
+	"fmt"
+	"os"
+	"sync"
+)

 const (
 	NEWNET  NamespaceType = "NEWNET"
@@ -13,14 +15,59 @@ const (
 	NEWUSER NamespaceType = "NEWUSER"
 )

+var (
+	nsLock              sync.Mutex
+	supportedNamespaces = make(map[NamespaceType]bool)
+)
+
+// NsName converts the namespace type to its filename
+func NsName(ns NamespaceType) string {
+	switch ns {
+	case NEWNET:
+		return "net"
+	case NEWNS:
+		return "mnt"
+	case NEWPID:
+		return "pid"
+	case NEWIPC:
+		return "ipc"
+	case NEWUSER:
+		return "user"
+	case NEWUTS:
+		return "uts"
+	}
+	return ""
+}
+
+// IsNamespaceSupported returns whether a namespace is available or
+// not
+func IsNamespaceSupported(ns NamespaceType) bool {
+	nsLock.Lock()
+	defer nsLock.Unlock()
+	supported, ok := supportedNamespaces[ns]
+	if ok {
+		return supported
+	}
+	nsFile := NsName(ns)
+	// if the namespace type is unknown, just return false
+	if nsFile == "" {
+		return false
+	}
+	_, err := os.Stat(fmt.Sprintf("/proc/self/ns/%s", nsFile))
+	// a namespace is supported if it exists and we have permissions to read it
+	supported = err == nil
+	supportedNamespaces[ns] = supported
+	return supported
+}
+
 func NamespaceTypes() []NamespaceType {
 	return []NamespaceType{
+		NEWUSER, // Keep user NS always first, don't move it.
+		NEWIPC,
+		NEWUTS,
 		NEWNET,
 		NEWPID,
 		NEWNS,
-		NEWUTS,
-		NEWIPC,
-		NEWUSER,
 	}
 }

@@ -32,29 +79,7 @@ type Namespace struct {
 }

 func (n *Namespace) GetPath(pid int) string {
-	if n.Path != "" {
-		return n.Path
-	}
-	return fmt.Sprintf("/proc/%d/ns/%s", pid, n.file())
-}
-
-func (n *Namespace) file() string {
-	file := ""
-	switch n.Type {
-	case NEWNET:
-		file = "net"
-	case NEWNS:
-		file = "mnt"
-	case NEWPID:
-		file = "pid"
-	case NEWIPC:
-		file = "ipc"
-	case NEWUSER:
-		file = "user"
-	case NEWUTS:
-		file = "uts"
-	}
-	return file
+	return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type))
 }

 func (n *Namespaces) Remove(t NamespaceType) bool {
@@ -87,3 +112,11 @@ func (n *Namespaces) index(t NamespaceType) int {
 func (n *Namespaces) Contains(t NamespaceType) bool {
 	return n.index(t) != -1
 }
+
+func (n *Namespaces) PathOf(t NamespaceType) string {
+	i := n.index(t)
+	if i == -1 {
+		return ""
+	}
+	return (*n)[i].Path
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go
@@ -2,23 +2,23 @@

 package configs

-import "syscall"
+import "golang.org/x/sys/unix"

 func (n *Namespace) Syscall() int {
 	return namespaceInfo[n.Type]
 }

 var namespaceInfo = map[NamespaceType]int{
-	NEWNET:  syscall.CLONE_NEWNET,
-	NEWNS:   syscall.CLONE_NEWNS,
-	NEWUSER: syscall.CLONE_NEWUSER,
-	NEWIPC:  syscall.CLONE_NEWIPC,
-	NEWUTS:  syscall.CLONE_NEWUTS,
-	NEWPID:  syscall.CLONE_NEWPID,
+	NEWNET:  unix.CLONE_NEWNET,
+	NEWNS:   unix.CLONE_NEWNS,
+	NEWUSER: unix.CLONE_NEWUSER,
+	NEWIPC:  unix.CLONE_NEWIPC,
+	NEWUTS:  unix.CLONE_NEWUTS,
+	NEWPID:  unix.CLONE_NEWPID,
 }

 // CloneFlags parses the container's Namespaces options to set the correct
-// flags on clone, unshare. This functions returns flags only for new namespaces.
+// flags on clone, unshare. This function returns flags only for new namespaces.
 func (n *Namespaces) CloneFlags() uintptr {
 	var flag int
 	for _, v := range *n {
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go
@@ -4,12 +4,10 @@ package configs

 func (n *Namespace) Syscall() int {
 	panic("No namespace syscall support")
-	return 0
 }

 // CloneFlags parses the container's Namespaces options to set the correct
-// flags on clone, unshare. This functions returns flags only for new namespaces.
+// flags on clone, unshare. This function returns flags only for new namespaces.
 func (n *Namespaces) CloneFlags() uintptr {
 	panic("No namespace syscall support")
-	return uintptr(0)
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go
@@ -1,4 +1,4 @@
-// +build !linux,!freebsd
+// +build !linux

 package configs

--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/config.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/config.go
@@ -1,93 +0,0 @@
-package validate
-
-import (
-	"fmt"
-	"os"
-	"path/filepath"
-
-	"github.com/opencontainers/runc/libcontainer/configs"
-)
-
-type Validator interface {
-	Validate(*configs.Config) error
-}
-
-func New() Validator {
-	return &ConfigValidator{}
-}
-
-type ConfigValidator struct {
-}
-
-func (v *ConfigValidator) Validate(config *configs.Config) error {
-	if err := v.rootfs(config); err != nil {
-		return err
-	}
-	if err := v.network(config); err != nil {
-		return err
-	}
-	if err := v.hostname(config); err != nil {
-		return err
-	}
-	if err := v.security(config); err != nil {
-		return err
-	}
-	if err := v.usernamespace(config); err != nil {
-		return err
-	}
-	return nil
-}
-
-// rootfs validates the the rootfs is an absolute path and is not a symlink
-// to the container's root filesystem.
-func (v *ConfigValidator) rootfs(config *configs.Config) error {
-	cleaned, err := filepath.Abs(config.Rootfs)
-	if err != nil {
-		return err
-	}
-	if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
-		return err
-	}
-	if config.Rootfs != cleaned {
-		return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
-	}
-	return nil
-}
-
-func (v *ConfigValidator) network(config *configs.Config) error {
-	if !config.Namespaces.Contains(configs.NEWNET) {
-		if len(config.Networks) > 0 || len(config.Routes) > 0 {
-			return fmt.Errorf("unable to apply network settings without a private NET namespace")
-		}
-	}
-	return nil
-}
-
-func (v *ConfigValidator) hostname(config *configs.Config) error {
-	if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
-		return fmt.Errorf("unable to set hostname without a private UTS namespace")
-	}
-	return nil
-}
-
-func (v *ConfigValidator) security(config *configs.Config) error {
-	// restrict sys without mount namespace
-	if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
-		!config.Namespaces.Contains(configs.NEWNS) {
-		return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
-	}
-	return nil
-}
-
-func (v *ConfigValidator) usernamespace(config *configs.Config) error {
-	if config.Namespaces.Contains(configs.NEWUSER) {
-		if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
-			return fmt.Errorf("USER namespaces aren't enabled in the kernel")
-		}
-	} else {
-		if config.UidMappings != nil || config.GidMappings != nil {
-			return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config")
-		}
-	}
-	return nil
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go
@@ -0,0 +1,117 @@
+package validate
+
+import (
+	"fmt"
+	"os"
+	"reflect"
+	"strings"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+)
+
+var (
+	geteuid = os.Geteuid
+	getegid = os.Getegid
+)
+
+func (v *ConfigValidator) rootless(config *configs.Config) error {
+	if err := rootlessMappings(config); err != nil {
+		return err
+	}
+	if err := rootlessMount(config); err != nil {
+		return err
+	}
+
+	// XXX: We currently can't verify the user config at all, because
+	//      configs.Config doesn't store the user-related configs. So this
+	//      has to be verified by setupUser() in init_linux.go.
+
+	return nil
+}
+
+func hasIDMapping(id int, mappings []configs.IDMap) bool {
+	for _, m := range mappings {
+		if id >= m.ContainerID && id < m.ContainerID+m.Size {
+			return true
+		}
+	}
+	return false
+}
+
+func rootlessMappings(config *configs.Config) error {
+	if euid := geteuid(); euid != 0 {
+		if !config.Namespaces.Contains(configs.NEWUSER) {
+			return fmt.Errorf("rootless containers require user namespaces")
+		}
+	}
+
+	if len(config.UidMappings) == 0 {
+		return fmt.Errorf("rootless containers requires at least one UID mapping")
+	}
+	if len(config.GidMappings) == 0 {
+		return fmt.Errorf("rootless containers requires at least one UID mapping")
+	}
+
+	return nil
+}
+
+// cgroup verifies that the user isn't trying to set any cgroup limits or paths.
+func rootlessCgroup(config *configs.Config) error {
+	// Nothing set at all.
+	if config.Cgroups == nil || config.Cgroups.Resources == nil {
+		return nil
+	}
+
+	// Used for comparing to the zero value.
+	left := reflect.ValueOf(*config.Cgroups.Resources)
+	right := reflect.Zero(left.Type())
+
+	// This is all we need to do, since specconv won't add cgroup options in
+	// rootless mode.
+	if !reflect.DeepEqual(left.Interface(), right.Interface()) {
+		return fmt.Errorf("cannot specify resource limits in rootless container")
+	}
+
+	return nil
+}
+
+// mount verifies that the user isn't trying to set up any mounts they don't have
+// the rights to do. In addition, it makes sure that no mount has a `uid=` or
+// `gid=` option that doesn't resolve to root.
+func rootlessMount(config *configs.Config) error {
+	// XXX: We could whitelist allowed devices at this point, but I'm not
+	//      convinced that's a good idea. The kernel is the best arbiter of
+	//      access control.
+
+	for _, mount := range config.Mounts {
+		// Check that the options list doesn't contain any uid= or gid= entries
+		// that don't resolve to root.
+		for _, opt := range strings.Split(mount.Data, ",") {
+			if strings.HasPrefix(opt, "uid=") {
+				var uid int
+				n, err := fmt.Sscanf(opt, "uid=%d", &uid)
+				if n != 1 || err != nil {
+					// Ignore unknown mount options.
+					continue
+				}
+				if !hasIDMapping(uid, config.UidMappings) {
+					return fmt.Errorf("cannot specify uid= mount options for unmapped uid in rootless containers")
+				}
+			}
+
+			if strings.HasPrefix(opt, "gid=") {
+				var gid int
+				n, err := fmt.Sscanf(opt, "gid=%d", &gid)
+				if n != 1 || err != nil {
+					// Ignore unknown mount options.
+					continue
+				}
+				if !hasIDMapping(gid, config.GidMappings) {
+					return fmt.Errorf("cannot specify gid= mount options for unmapped gid in rootless containers")
+				}
+			}
+		}
+	}
+
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go
@@ -0,0 +1,212 @@
+package validate
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/intelrdt"
+	selinux "github.com/opencontainers/selinux/go-selinux"
+)
+
+type Validator interface {
+	Validate(*configs.Config) error
+}
+
+func New() Validator {
+	return &ConfigValidator{}
+}
+
+type ConfigValidator struct {
+}
+
+func (v *ConfigValidator) Validate(config *configs.Config) error {
+	if err := v.rootfs(config); err != nil {
+		return err
+	}
+	if err := v.network(config); err != nil {
+		return err
+	}
+	if err := v.hostname(config); err != nil {
+		return err
+	}
+	if err := v.security(config); err != nil {
+		return err
+	}
+	if err := v.usernamespace(config); err != nil {
+		return err
+	}
+	if err := v.sysctl(config); err != nil {
+		return err
+	}
+	if err := v.intelrdt(config); err != nil {
+		return err
+	}
+	if config.Rootless {
+		if err := v.rootless(config); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// rootfs validates if the rootfs is an absolute path and is not a symlink
+// to the container's root filesystem.
+func (v *ConfigValidator) rootfs(config *configs.Config) error {
+	if _, err := os.Stat(config.Rootfs); err != nil {
+		if os.IsNotExist(err) {
+			return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs)
+		}
+		return err
+	}
+	cleaned, err := filepath.Abs(config.Rootfs)
+	if err != nil {
+		return err
+	}
+	if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil {
+		return err
+	}
+	if filepath.Clean(config.Rootfs) != cleaned {
+		return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs)
+	}
+	return nil
+}
+
+func (v *ConfigValidator) network(config *configs.Config) error {
+	if !config.Namespaces.Contains(configs.NEWNET) {
+		if len(config.Networks) > 0 || len(config.Routes) > 0 {
+			return fmt.Errorf("unable to apply network settings without a private NET namespace")
+		}
+	}
+	return nil
+}
+
+func (v *ConfigValidator) hostname(config *configs.Config) error {
+	if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) {
+		return fmt.Errorf("unable to set hostname without a private UTS namespace")
+	}
+	return nil
+}
+
+func (v *ConfigValidator) security(config *configs.Config) error {
+	// restrict sys without mount namespace
+	if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) &&
+		!config.Namespaces.Contains(configs.NEWNS) {
+		return fmt.Errorf("unable to restrict sys entries without a private MNT namespace")
+	}
+	if config.ProcessLabel != "" && !selinux.GetEnabled() {
+		return fmt.Errorf("selinux label is specified in config, but selinux is disabled or not supported")
+	}
+
+	return nil
+}
+
+func (v *ConfigValidator) usernamespace(config *configs.Config) error {
+	if config.Namespaces.Contains(configs.NEWUSER) {
+		if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) {
+			return fmt.Errorf("USER namespaces aren't enabled in the kernel")
+		}
+	} else {
+		if config.UidMappings != nil || config.GidMappings != nil {
+			return fmt.Errorf("User namespace mappings specified, but USER namespace isn't enabled in the config")
+		}
+	}
+	return nil
+}
+
+// sysctl validates that the specified sysctl keys are valid or not.
+// /proc/sys isn't completely namespaced and depending on which namespaces
+// are specified, a subset of sysctls are permitted.
+func (v *ConfigValidator) sysctl(config *configs.Config) error {
+	validSysctlMap := map[string]bool{
+		"kernel.msgmax":          true,
+		"kernel.msgmnb":          true,
+		"kernel.msgmni":          true,
+		"kernel.sem":             true,
+		"kernel.shmall":          true,
+		"kernel.shmmax":          true,
+		"kernel.shmmni":          true,
+		"kernel.shm_rmid_forced": true,
+	}
+
+	for s := range config.Sysctl {
+		if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") {
+			if config.Namespaces.Contains(configs.NEWIPC) {
+				continue
+			} else {
+				return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s)
+			}
+		}
+		if strings.HasPrefix(s, "net.") {
+			if config.Namespaces.Contains(configs.NEWNET) {
+				if path := config.Namespaces.PathOf(configs.NEWNET); path != "" {
+					if err := checkHostNs(s, path); err != nil {
+						return err
+					}
+				}
+				continue
+			} else {
+				return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", s)
+			}
+		}
+		return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s)
+	}
+
+	return nil
+}
+
+func (v *ConfigValidator) intelrdt(config *configs.Config) error {
+	if config.IntelRdt != nil {
+		if !intelrdt.IsEnabled() {
+			return fmt.Errorf("intelRdt is specified in config, but Intel RDT feature is not supported or enabled")
+		}
+		if config.IntelRdt.L3CacheSchema == "" {
+			return fmt.Errorf("intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
+		}
+	}
+
+	return nil
+}
+
+func isSymbolicLink(path string) (bool, error) {
+	fi, err := os.Lstat(path)
+	if err != nil {
+		return false, err
+	}
+
+	return fi.Mode()&os.ModeSymlink == os.ModeSymlink, nil
+}
+
+// checkHostNs checks whether network sysctl is used in host namespace.
+func checkHostNs(sysctlConfig string, path string) error {
+	var currentProcessNetns = "/proc/self/ns/net"
+	// readlink on the current processes network namespace
+	destOfCurrentProcess, err := os.Readlink(currentProcessNetns)
+	if err != nil {
+		return fmt.Errorf("read soft link %q error", currentProcessNetns)
+	}
+
+	// First check if the provided path is a symbolic link
+	symLink, err := isSymbolicLink(path)
+	if err != nil {
+		return fmt.Errorf("could not check that %q is a symlink: %v", path, err)
+	}
+
+	if symLink == false {
+		// The provided namespace is not a symbolic link,
+		// it is not the host namespace.
+		return nil
+	}
+
+	// readlink on the path provided in the struct
+	destOfContainer, err := os.Readlink(path)
+	if err != nil {
+		return fmt.Errorf("read soft link %q error", path)
+	}
+	if destOfContainer == destOfCurrentProcess {
+		return fmt.Errorf("sysctl %q is not allowed in the hosts network namespace", sysctlConfig)
+	}
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go
@@ -0,0 +1,41 @@
+package libcontainer
+
+import (
+	"os"
+
+	"golang.org/x/sys/unix"
+)
+
+// mount initializes the console inside the rootfs mounting with the specified mount label
+// and applying the correct ownership of the console.
+func mountConsole(slavePath string) error {
+	oldMask := unix.Umask(0000)
+	defer unix.Umask(oldMask)
+	f, err := os.Create("/dev/console")
+	if err != nil && !os.IsExist(err) {
+		return err
+	}
+	if f != nil {
+		f.Close()
+	}
+	return unix.Mount(slavePath, "/dev/console", "bind", unix.MS_BIND, "")
+}
+
+// dupStdio opens the slavePath for the console and dups the fds to the current
+// processes stdio, fd 0,1,2.
+func dupStdio(slavePath string) error {
+	fd, err := unix.Open(slavePath, unix.O_RDWR, 0)
+	if err != nil {
+		return &os.PathError{
+			Op:   "open",
+			Path: slavePath,
+			Err:  err,
+		}
+	}
+	for _, i := range []int{0, 1, 2} {
+		if err := unix.Dup3(fd, i, 0); err != nil {
+			return err
+		}
+	}
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/container.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/container.go
@@ -0,0 +1,166 @@
+// Package libcontainer provides a native Go implementation for creating containers
+// with namespaces, cgroups, capabilities, and filesystem access controls.
+// It allows you to manage the lifecycle of the container performing additional operations
+// after the container is created.
+package libcontainer
+
+import (
+	"os"
+	"time"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+)
+
+// Status is the status of a container.
+type Status int
+
+const (
+	// Created is the status that denotes the container exists but has not been run yet.
+	Created Status = iota
+	// Running is the status that denotes the container exists and is running.
+	Running
+	// Pausing is the status that denotes the container exists, it is in the process of being paused.
+	Pausing
+	// Paused is the status that denotes the container exists, but all its processes are paused.
+	Paused
+	// Stopped is the status that denotes the container does not have a created or running process.
+	Stopped
+)
+
+func (s Status) String() string {
+	switch s {
+	case Created:
+		return "created"
+	case Running:
+		return "running"
+	case Pausing:
+		return "pausing"
+	case Paused:
+		return "paused"
+	case Stopped:
+		return "stopped"
+	default:
+		return "unknown"
+	}
+}
+
+// BaseState represents the platform agnostic pieces relating to a
+// running container's state
+type BaseState struct {
+	// ID is the container ID.
+	ID string `json:"id"`
+
+	// InitProcessPid is the init process id in the parent namespace.
+	InitProcessPid int `json:"init_process_pid"`
+
+	// InitProcessStartTime is the init process start time in clock cycles since boot time.
+	InitProcessStartTime uint64 `json:"init_process_start"`
+
+	// Created is the unix timestamp for the creation time of the container in UTC
+	Created time.Time `json:"created"`
+
+	// Config is the container's configuration.
+	Config configs.Config `json:"config"`
+}
+
+// BaseContainer is a libcontainer container object.
+//
+// Each container is thread-safe within the same process. Since a container can
+// be destroyed by a separate process, any function may return that the container
+// was not found. BaseContainer includes methods that are platform agnostic.
+type BaseContainer interface {
+	// Returns the ID of the container
+	ID() string
+
+	// Returns the current status of the container.
+	//
+	// errors:
+	// ContainerNotExists - Container no longer exists,
+	// Systemerror - System error.
+	Status() (Status, error)
+
+	// State returns the current container's state information.
+	//
+	// errors:
+	// SystemError - System error.
+	State() (*State, error)
+
+	// Returns the current config of the container.
+	Config() configs.Config
+
+	// Returns the PIDs inside this container. The PIDs are in the namespace of the calling process.
+	//
+	// errors:
+	// ContainerNotExists - Container no longer exists,
+	// Systemerror - System error.
+	//
+	// Some of the returned PIDs may no longer refer to processes in the Container, unless
+	// the Container state is PAUSED in which case every PID in the slice is valid.
+	Processes() ([]int, error)
+
+	// Returns statistics for the container.
+	//
+	// errors:
+	// ContainerNotExists - Container no longer exists,
+	// Systemerror - System error.
+	Stats() (*Stats, error)
+
+	// Set resources of container as configured
+	//
+	// We can use this to change resources when containers are running.
+	//
+	// errors:
+	// SystemError - System error.
+	Set(config configs.Config) error
+
+	// Start a process inside the container. Returns error if process fails to
+	// start. You can track process lifecycle with passed Process structure.
+	//
+	// errors:
+	// ContainerNotExists - Container no longer exists,
+	// ConfigInvalid - config is invalid,
+	// ContainerPaused - Container is paused,
+	// SystemError - System error.
+	Start(process *Process) (err error)
+
+	// Run immediately starts the process inside the container.  Returns error if process
+	// fails to start.  It does not block waiting for the exec fifo  after start returns but
+	// opens the fifo after start returns.
+	//
+	// errors:
+	// ContainerNotExists - Container no longer exists,
+	// ConfigInvalid - config is invalid,
+	// ContainerPaused - Container is paused,
+	// SystemError - System error.
+	Run(process *Process) (err error)
+
+	// Destroys the container, if its in a valid state, after killing any
+	// remaining running processes.
+	//
+	// Any event registrations are removed before the container is destroyed.
+	// No error is returned if the container is already destroyed.
+	//
+	// Running containers must first be stopped using Signal(..).
+	// Paused containers must first be resumed using Resume(..).
+	//
+	// errors:
+	// ContainerNotStopped - Container is still running,
+	// ContainerPaused - Container is paused,
+	// SystemError - System error.
+	Destroy() error
+
+	// Signal sends the provided signal code to the container's initial process.
+	//
+	// If all is specified the signal is sent to all processes in the container
+	// including the initial process.
+	//
+	// errors:
+	// SystemError - System error.
+	Signal(s os.Signal, all bool) error
+
+	// Exec signals the container to exec the users process at the end of the init.
+	//
+	// errors:
+	// SystemError - System error.
+	Exec() error
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go
--- a/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go
@@ -0,0 +1,40 @@
+package libcontainer
+
+// cgroup restoring strategy provided by criu
+type cgMode uint32
+
+const (
+	CRIU_CG_MODE_SOFT    cgMode = 3 + iota // restore cgroup properties if only dir created by criu
+	CRIU_CG_MODE_FULL                      // always restore all cgroups and their properties
+	CRIU_CG_MODE_STRICT                    // restore all, requiring them to not present in the system
+	CRIU_CG_MODE_DEFAULT                   // the same as CRIU_CG_MODE_SOFT
+)
+
+type CriuPageServerInfo struct {
+	Address string // IP address of CRIU page server
+	Port    int32  // port number of CRIU page server
+}
+
+type VethPairName struct {
+	ContainerInterfaceName string
+	HostInterfaceName      string
+}
+
+type CriuOpts struct {
+	ImagesDirectory         string             // directory for storing image files
+	WorkDirectory           string             // directory to cd and write logs/pidfiles/stats to
+	ParentImage             string             // directory for storing parent image files in pre-dump and dump
+	LeaveRunning            bool               // leave container in running state after checkpoint
+	TcpEstablished          bool               // checkpoint/restore established TCP connections
+	ExternalUnixConnections bool               // allow external unix connections
+	ShellJob                bool               // allow to dump and restore shell jobs
+	FileLocks               bool               // handle file locks, for safety
+	PreDump                 bool               // call criu predump to perform iterative checkpoint
+	PageServer              CriuPageServerInfo // allow to dump to criu page server
+	VethPairs               []VethPairName     // pass the veth to criu when restore
+	ManageCgroupsMode       cgMode             // dump or restore cgroup mode
+	EmptyNs                 uint32             // don't c/r properties for namespace from this mask
+	AutoDedup               bool               // auto deduplication for incremental dumps
+	LazyPages               bool               // restore memory pages lazily using userfaultfd
+	StatusFd                string             // fd for feedback when lazy server is ready
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/criurpc/criurpc.pb.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/criurpc/criurpc.pb.go
--- a/vendor/github.com/opencontainers/runc/libcontainer/devices/devices.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/devices.go
@@ -0,0 +1,104 @@
+package devices
+
+import (
+	"errors"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+
+	"golang.org/x/sys/unix"
+)
+
+var (
+	ErrNotADevice = errors.New("not a device node")
+)
+
+// Testing dependencies
+var (
+	unixLstat     = unix.Lstat
+	ioutilReadDir = ioutil.ReadDir
+)
+
+// Given the path to a device and its cgroup_permissions(which cannot be easily queried) look up the information about a linux device and return that information as a Device struct.
+func DeviceFromPath(path, permissions string) (*configs.Device, error) {
+	var stat unix.Stat_t
+	err := unixLstat(path, &stat)
+	if err != nil {
+		return nil, err
+	}
+
+	var (
+		devNumber = stat.Rdev
+		major     = unix.Major(devNumber)
+	)
+	if major == 0 {
+		return nil, ErrNotADevice
+	}
+
+	var (
+		devType rune
+		mode    = stat.Mode
+	)
+	switch {
+	case mode&unix.S_IFBLK == unix.S_IFBLK:
+		devType = 'b'
+	case mode&unix.S_IFCHR == unix.S_IFCHR:
+		devType = 'c'
+	}
+	return &configs.Device{
+		Type:        devType,
+		Path:        path,
+		Major:       int64(major),
+		Minor:       int64(unix.Minor(devNumber)),
+		Permissions: permissions,
+		FileMode:    os.FileMode(mode),
+		Uid:         stat.Uid,
+		Gid:         stat.Gid,
+	}, nil
+}
+
+func HostDevices() ([]*configs.Device, error) {
+	return getDevices("/dev")
+}
+
+func getDevices(path string) ([]*configs.Device, error) {
+	files, err := ioutilReadDir(path)
+	if err != nil {
+		return nil, err
+	}
+	out := []*configs.Device{}
+	for _, f := range files {
+		switch {
+		case f.IsDir():
+			switch f.Name() {
+			// ".lxc" & ".lxd-mounts" added to address https://github.com/lxc/lxd/issues/2825
+			case "pts", "shm", "fd", "mqueue", ".lxc", ".lxd-mounts":
+				continue
+			default:
+				sub, err := getDevices(filepath.Join(path, f.Name()))
+				if err != nil {
+					return nil, err
+				}
+
+				out = append(out, sub...)
+				continue
+			}
+		case f.Name() == "console":
+			continue
+		}
+		device, err := DeviceFromPath(filepath.Join(path, f.Name()), "rwm")
+		if err != nil {
+			if err == ErrNotADevice {
+				continue
+			}
+			if os.IsNotExist(err) {
+				continue
+			}
+			return nil, err
+		}
+		out = append(out, device)
+	}
+	return out, nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/error.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/error.go
@@ -0,0 +1,70 @@
+package libcontainer
+
+import "io"
+
+// ErrorCode is the API error code type.
+type ErrorCode int
+
+// API error codes.
+const (
+	// Factory errors
+	IdInUse ErrorCode = iota
+	InvalidIdFormat
+
+	// Container errors
+	ContainerNotExists
+	ContainerPaused
+	ContainerNotStopped
+	ContainerNotRunning
+	ContainerNotPaused
+
+	// Process errors
+	NoProcessOps
+
+	// Common errors
+	ConfigInvalid
+	ConsoleExists
+	SystemError
+)
+
+func (c ErrorCode) String() string {
+	switch c {
+	case IdInUse:
+		return "Id already in use"
+	case InvalidIdFormat:
+		return "Invalid format"
+	case ContainerPaused:
+		return "Container paused"
+	case ConfigInvalid:
+		return "Invalid configuration"
+	case SystemError:
+		return "System error"
+	case ContainerNotExists:
+		return "Container does not exist"
+	case ContainerNotStopped:
+		return "Container is not stopped"
+	case ContainerNotRunning:
+		return "Container is not running"
+	case ConsoleExists:
+		return "Console exists for process"
+	case ContainerNotPaused:
+		return "Container is not paused"
+	case NoProcessOps:
+		return "No process operations"
+	default:
+		return "Unknown error"
+	}
+}
+
+// Error is the API error type.
+type Error interface {
+	error
+
+	// Returns an error if it failed to write the detail of the Error to w.
+	// The detail of the Error may include the error message and a
+	// representation of the stack trace.
+	Detail(w io.Writer) error
+
+	// Returns the error code for this error.
+	Code() ErrorCode
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/factory.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/factory.go
@@ -0,0 +1,44 @@
+package libcontainer
+
+import (
+	"github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type Factory interface {
+	// Creates a new container with the given id and starts the initial process inside it.
+	// id must be a string containing only letters, digits and underscores and must contain
+	// between 1 and 1024 characters, inclusive.
+	//
+	// The id must not already be in use by an existing container. Containers created using
+	// a factory with the same path (and filesystem) must have distinct ids.
+	//
+	// Returns the new container with a running process.
+	//
+	// errors:
+	// IdInUse - id is already in use by a container
+	// InvalidIdFormat - id has incorrect format
+	// ConfigInvalid - config is invalid
+	// Systemerror - System error
+	//
+	// On error, any partially created container parts are cleaned up (the operation is atomic).
+	Create(id string, config *configs.Config) (Container, error)
+
+	// Load takes an ID for an existing container and returns the container information
+	// from the state.  This presents a read only view of the container.
+	//
+	// errors:
+	// Path does not exist
+	// System error
+	Load(id string) (Container, error)
+
+	// StartInitialization is an internal API to libcontainer used during the reexec of the
+	// container.
+	//
+	// Errors:
+	// Pipe connection error
+	// System error
+	StartInitialization() error
+
+	// Type returns info string about factory type (e.g. lxc, libcontainer...)
+	Type() string
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go
@@ -0,0 +1,364 @@
+// +build linux
+
+package libcontainer
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+	"path/filepath"
+	"regexp"
+	"runtime/debug"
+	"strconv"
+
+	"github.com/opencontainers/runc/libcontainer/cgroups"
+	"github.com/opencontainers/runc/libcontainer/cgroups/fs"
+	"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
+	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/configs/validate"
+	"github.com/opencontainers/runc/libcontainer/intelrdt"
+	"github.com/opencontainers/runc/libcontainer/mount"
+	"github.com/opencontainers/runc/libcontainer/utils"
+
+	"golang.org/x/sys/unix"
+)
+
+const (
+	stateFilename    = "state.json"
+	execFifoFilename = "exec.fifo"
+)
+
+var idRegex = regexp.MustCompile(`^[\w+-\.]+$`)
+
+// InitArgs returns an options func to configure a LinuxFactory with the
+// provided init binary path and arguments.
+func InitArgs(args ...string) func(*LinuxFactory) error {
+	return func(l *LinuxFactory) (err error) {
+		if len(args) > 0 {
+			// Resolve relative paths to ensure that its available
+			// after directory changes.
+			if args[0], err = filepath.Abs(args[0]); err != nil {
+				return newGenericError(err, ConfigInvalid)
+			}
+		}
+
+		l.InitArgs = args
+		return nil
+	}
+}
+
+// SystemdCgroups is an options func to configure a LinuxFactory to return
+// containers that use systemd to create and manage cgroups.
+func SystemdCgroups(l *LinuxFactory) error {
+	l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
+		return &systemd.Manager{
+			Cgroups: config,
+			Paths:   paths,
+		}
+	}
+	return nil
+}
+
+// Cgroupfs is an options func to configure a LinuxFactory to return
+// containers that use the native cgroups filesystem implementation to
+// create and manage cgroups.
+func Cgroupfs(l *LinuxFactory) error {
+	l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager {
+		return &fs.Manager{
+			Cgroups: config,
+			Paths:   paths,
+		}
+	}
+	return nil
+}
+
+// IntelRdtfs is an options func to configure a LinuxFactory to return
+// containers that use the Intel RDT "resource control" filesystem to
+// create and manage Intel Xeon platform shared resources (e.g., L3 cache).
+func IntelRdtFs(l *LinuxFactory) error {
+	l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager {
+		return &intelrdt.IntelRdtManager{
+			Config: config,
+			Id:     id,
+			Path:   path,
+		}
+	}
+	return nil
+}
+
+// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs.
+func TmpfsRoot(l *LinuxFactory) error {
+	mounted, err := mount.Mounted(l.Root)
+	if err != nil {
+		return err
+	}
+	if !mounted {
+		if err := unix.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// CriuPath returns an option func to configure a LinuxFactory with the
+// provided criupath
+func CriuPath(criupath string) func(*LinuxFactory) error {
+	return func(l *LinuxFactory) error {
+		l.CriuPath = criupath
+		return nil
+	}
+}
+
+// New returns a linux based container factory based in the root directory and
+// configures the factory with the provided option funcs.
+func New(root string, options ...func(*LinuxFactory) error) (Factory, error) {
+	if root != "" {
+		if err := os.MkdirAll(root, 0700); err != nil {
+			return nil, newGenericError(err, SystemError)
+		}
+	}
+	l := &LinuxFactory{
+		Root:      root,
+		InitPath:  "/proc/self/exe",
+		InitArgs:  []string{os.Args[0], "init"},
+		Validator: validate.New(),
+		CriuPath:  "criu",
+	}
+	Cgroupfs(l)
+	for _, opt := range options {
+		if opt == nil {
+			continue
+		}
+		if err := opt(l); err != nil {
+			return nil, err
+		}
+	}
+	return l, nil
+}
+
+// LinuxFactory implements the default factory interface for linux based systems.
+type LinuxFactory struct {
+	// Root directory for the factory to store state.
+	Root string
+
+	// InitPath is the path for calling the init responsibilities for spawning
+	// a container.
+	InitPath string
+
+	// InitArgs are arguments for calling the init responsibilities for spawning
+	// a container.
+	InitArgs []string
+
+	// CriuPath is the path to the criu binary used for checkpoint and restore of
+	// containers.
+	CriuPath string
+
+	// New{u,g}uidmapPath is the path to the binaries used for mapping with
+	// rootless containers.
+	NewuidmapPath string
+	NewgidmapPath string
+
+	// Validator provides validation to container configurations.
+	Validator validate.Validator
+
+	// NewCgroupsManager returns an initialized cgroups manager for a single container.
+	NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager
+
+	// NewIntelRdtManager returns an initialized Intel RDT manager for a single container.
+	NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager
+}
+
+func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) {
+	if l.Root == "" {
+		return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
+	}
+	if err := l.validateID(id); err != nil {
+		return nil, err
+	}
+	if err := l.Validator.Validate(config); err != nil {
+		return nil, newGenericError(err, ConfigInvalid)
+	}
+	containerRoot := filepath.Join(l.Root, id)
+	if _, err := os.Stat(containerRoot); err == nil {
+		return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse)
+	} else if !os.IsNotExist(err) {
+		return nil, newGenericError(err, SystemError)
+	}
+	if err := os.MkdirAll(containerRoot, 0711); err != nil {
+		return nil, newGenericError(err, SystemError)
+	}
+	if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil {
+		return nil, newGenericError(err, SystemError)
+	}
+	c := &linuxContainer{
+		id:            id,
+		root:          containerRoot,
+		config:        config,
+		initPath:      l.InitPath,
+		initArgs:      l.InitArgs,
+		criuPath:      l.CriuPath,
+		newuidmapPath: l.NewuidmapPath,
+		newgidmapPath: l.NewgidmapPath,
+		cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
+	}
+	if intelrdt.IsEnabled() {
+		c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
+	}
+	c.state = &stoppedState{c: c}
+	return c, nil
+}
+
+func (l *LinuxFactory) Load(id string) (Container, error) {
+	if l.Root == "" {
+		return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid)
+	}
+	containerRoot := filepath.Join(l.Root, id)
+	state, err := l.loadState(containerRoot, id)
+	if err != nil {
+		return nil, err
+	}
+	r := &nonChildProcess{
+		processPid:       state.InitProcessPid,
+		processStartTime: state.InitProcessStartTime,
+		fds:              state.ExternalDescriptors,
+	}
+	c := &linuxContainer{
+		initProcess:          r,
+		initProcessStartTime: state.InitProcessStartTime,
+		id:                   id,
+		config:               &state.Config,
+		initPath:             l.InitPath,
+		initArgs:             l.InitArgs,
+		criuPath:             l.CriuPath,
+		newuidmapPath:        l.NewuidmapPath,
+		newgidmapPath:        l.NewgidmapPath,
+		cgroupManager:        l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
+		root:                 containerRoot,
+		created:              state.Created,
+	}
+	c.state = &loadedState{c: c}
+	if err := c.refreshState(); err != nil {
+		return nil, err
+	}
+	if intelrdt.IsEnabled() {
+		c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
+	}
+	return c, nil
+}
+
+func (l *LinuxFactory) Type() string {
+	return "libcontainer"
+}
+
+// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state
+// This is a low level implementation detail of the reexec and should not be consumed externally
+func (l *LinuxFactory) StartInitialization() (err error) {
+	var (
+		pipefd, fifofd int
+		consoleSocket  *os.File
+		envInitPipe    = os.Getenv("_LIBCONTAINER_INITPIPE")
+		envFifoFd      = os.Getenv("_LIBCONTAINER_FIFOFD")
+		envConsole     = os.Getenv("_LIBCONTAINER_CONSOLE")
+	)
+
+	// Get the INITPIPE.
+	pipefd, err = strconv.Atoi(envInitPipe)
+	if err != nil {
+		return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err)
+	}
+
+	var (
+		pipe = os.NewFile(uintptr(pipefd), "pipe")
+		it   = initType(os.Getenv("_LIBCONTAINER_INITTYPE"))
+	)
+	defer pipe.Close()
+
+	// Only init processes have FIFOFD.
+	fifofd = -1
+	if it == initStandard {
+		if fifofd, err = strconv.Atoi(envFifoFd); err != nil {
+			return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD=%s to int: %s", envFifoFd, err)
+		}
+	}
+
+	if envConsole != "" {
+		console, err := strconv.Atoi(envConsole)
+		if err != nil {
+			return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err)
+		}
+		consoleSocket = os.NewFile(uintptr(console), "console-socket")
+		defer consoleSocket.Close()
+	}
+
+	// clear the current process's environment to clean any libcontainer
+	// specific env vars.
+	os.Clearenv()
+
+	defer func() {
+		// We have an error during the initialization of the container's init,
+		// send it back to the parent process in the form of an initError.
+		if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil {
+			fmt.Fprintln(os.Stderr, err)
+			return
+		}
+		if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil {
+			fmt.Fprintln(os.Stderr, err)
+			return
+		}
+	}()
+	defer func() {
+		if e := recover(); e != nil {
+			err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack()))
+		}
+	}()
+
+	i, err := newContainerInit(it, pipe, consoleSocket, fifofd)
+	if err != nil {
+		return err
+	}
+
+	// If Init succeeds, syscall.Exec will not return, hence none of the defers will be called.
+	return i.Init()
+}
+
+func (l *LinuxFactory) loadState(root, id string) (*State, error) {
+	f, err := os.Open(filepath.Join(root, stateFilename))
+	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists)
+		}
+		return nil, newGenericError(err, SystemError)
+	}
+	defer f.Close()
+	var state *State
+	if err := json.NewDecoder(f).Decode(&state); err != nil {
+		return nil, newGenericError(err, SystemError)
+	}
+	return state, nil
+}
+
+func (l *LinuxFactory) validateID(id string) error {
+	if !idRegex.MatchString(id) {
+		return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat)
+	}
+
+	return nil
+}
+
+// NewuidmapPath returns an option func to configure a LinuxFactory with the
+// provided ..
+func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error {
+	return func(l *LinuxFactory) error {
+		l.NewuidmapPath = newuidmapPath
+		return nil
+	}
+}
+
+// NewgidmapPath returns an option func to configure a LinuxFactory with the
+// provided ..
+func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error {
+	return func(l *LinuxFactory) error {
+		l.NewgidmapPath = newgidmapPath
+		return nil
+	}
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/generic_error.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/generic_error.go
@@ -0,0 +1,92 @@
+package libcontainer
+
+import (
+	"fmt"
+	"io"
+	"text/template"
+	"time"
+
+	"github.com/opencontainers/runc/libcontainer/stacktrace"
+)
+
+var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}}
+Code: {{.ECode}}
+{{if .Message }}
+Message: {{.Message}}
+{{end}}
+Frames:{{range $i, $frame := .Stack.Frames}}
+---
+{{$i}}: {{$frame.Function}}
+Package: {{$frame.Package}}
+File: {{$frame.File}}@{{$frame.Line}}{{end}}
+`))
+
+func newGenericError(err error, c ErrorCode) Error {
+	if le, ok := err.(Error); ok {
+		return le
+	}
+	gerr := &genericError{
+		Timestamp: time.Now(),
+		Err:       err,
+		ECode:     c,
+		Stack:     stacktrace.Capture(1),
+	}
+	if err != nil {
+		gerr.Message = err.Error()
+	}
+	return gerr
+}
+
+func newSystemError(err error) Error {
+	return createSystemError(err, "")
+}
+
+func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error {
+	return createSystemError(err, fmt.Sprintf(cause, v...))
+}
+
+func newSystemErrorWithCause(err error, cause string) Error {
+	return createSystemError(err, cause)
+}
+
+// createSystemError creates the specified error with the correct number of
+// stack frames skipped. This is only to be called by the other functions for
+// formatting the error.
+func createSystemError(err error, cause string) Error {
+	gerr := &genericError{
+		Timestamp: time.Now(),
+		Err:       err,
+		ECode:     SystemError,
+		Cause:     cause,
+		Stack:     stacktrace.Capture(2),
+	}
+	if err != nil {
+		gerr.Message = err.Error()
+	}
+	return gerr
+}
+
+type genericError struct {
+	Timestamp time.Time
+	ECode     ErrorCode
+	Err       error `json:"-"`
+	Cause     string
+	Message   string
+	Stack     stacktrace.Stacktrace
+}
+
+func (e *genericError) Error() string {
+	if e.Cause == "" {
+		return e.Message
+	}
+	frame := e.Stack.Frames[0]
+	return fmt.Sprintf("%s:%d: %s caused %q", frame.File, frame.Line, e.Cause, e.Message)
+}
+
+func (e *genericError) Code() ErrorCode {
+	return e.ECode
+}
+
+func (e *genericError) Detail(w io.Writer) error {
+	return errorTemplate.Execute(w, e)
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go
@@ -0,0 +1,534 @@
+// +build linux
+
+package libcontainer
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+	"net"
+	"os"
+	"strings"
+	"syscall" // only for Errno
+	"unsafe"
+
+	"golang.org/x/sys/unix"
+
+	"github.com/containerd/console"
+	"github.com/opencontainers/runc/libcontainer/cgroups"
+	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/system"
+	"github.com/opencontainers/runc/libcontainer/user"
+	"github.com/opencontainers/runc/libcontainer/utils"
+	"github.com/sirupsen/logrus"
+	"github.com/vishvananda/netlink"
+)
+
+type initType string
+
+const (
+	initSetns    initType = "setns"
+	initStandard initType = "standard"
+)
+
+type pid struct {
+	Pid           int `json:"pid"`
+	PidFirstChild int `json:"pid_first"`
+}
+
+// network is an internal struct used to setup container networks.
+type network struct {
+	configs.Network
+
+	// TempVethPeerName is a unique temporary veth peer name that was placed into
+	// the container's namespace.
+	TempVethPeerName string `json:"temp_veth_peer_name"`
+}
+
+// initConfig is used for transferring parameters from Exec() to Init()
+type initConfig struct {
+	Args             []string              `json:"args"`
+	Env              []string              `json:"env"`
+	Cwd              string                `json:"cwd"`
+	Capabilities     *configs.Capabilities `json:"capabilities"`
+	ProcessLabel     string                `json:"process_label"`
+	AppArmorProfile  string                `json:"apparmor_profile"`
+	NoNewPrivileges  bool                  `json:"no_new_privileges"`
+	User             string                `json:"user"`
+	AdditionalGroups []string              `json:"additional_groups"`
+	Config           *configs.Config       `json:"config"`
+	Networks         []*network            `json:"network"`
+	PassedFilesCount int                   `json:"passed_files_count"`
+	ContainerId      string                `json:"containerid"`
+	Rlimits          []configs.Rlimit      `json:"rlimits"`
+	CreateConsole    bool                  `json:"create_console"`
+	ConsoleWidth     uint16                `json:"console_width"`
+	ConsoleHeight    uint16                `json:"console_height"`
+	Rootless         bool                  `json:"rootless"`
+}
+
+type initer interface {
+	Init() error
+}
+
+func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd int) (initer, error) {
+	var config *initConfig
+	if err := json.NewDecoder(pipe).Decode(&config); err != nil {
+		return nil, err
+	}
+	if err := populateProcessEnvironment(config.Env); err != nil {
+		return nil, err
+	}
+	switch t {
+	case initSetns:
+		return &linuxSetnsInit{
+			pipe:          pipe,
+			consoleSocket: consoleSocket,
+			config:        config,
+		}, nil
+	case initStandard:
+		return &linuxStandardInit{
+			pipe:          pipe,
+			consoleSocket: consoleSocket,
+			parentPid:     unix.Getppid(),
+			config:        config,
+			fifoFd:        fifoFd,
+		}, nil
+	}
+	return nil, fmt.Errorf("unknown init type %q", t)
+}
+
+// populateProcessEnvironment loads the provided environment variables into the
+// current processes's environment.
+func populateProcessEnvironment(env []string) error {
+	for _, pair := range env {
+		p := strings.SplitN(pair, "=", 2)
+		if len(p) < 2 {
+			return fmt.Errorf("invalid environment '%v'", pair)
+		}
+		if err := os.Setenv(p[0], p[1]); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// finalizeNamespace drops the caps, sets the correct user
+// and working dir, and closes any leaked file descriptors
+// before executing the command inside the namespace
+func finalizeNamespace(config *initConfig) error {
+	// Ensure that all unwanted fds we may have accidentally
+	// inherited are marked close-on-exec so they stay out of the
+	// container
+	if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil {
+		return err
+	}
+
+	capabilities := &configs.Capabilities{}
+	if config.Capabilities != nil {
+		capabilities = config.Capabilities
+	} else if config.Config.Capabilities != nil {
+		capabilities = config.Config.Capabilities
+	}
+	w, err := newContainerCapList(capabilities)
+	if err != nil {
+		return err
+	}
+	// drop capabilities in bounding set before changing user
+	if err := w.ApplyBoundingSet(); err != nil {
+		return err
+	}
+	// preserve existing capabilities while we change users
+	if err := system.SetKeepCaps(); err != nil {
+		return err
+	}
+	if err := setupUser(config); err != nil {
+		return err
+	}
+	if err := system.ClearKeepCaps(); err != nil {
+		return err
+	}
+	if err := w.ApplyCaps(); err != nil {
+		return err
+	}
+	if config.Cwd != "" {
+		if err := unix.Chdir(config.Cwd); err != nil {
+			return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err)
+		}
+	}
+	return nil
+}
+
+// setupConsole sets up the console from inside the container, and sends the
+// master pty fd to the config.Pipe (using cmsg). This is done to ensure that
+// consoles are scoped to a container properly (see runc#814 and the many
+// issues related to that). This has to be run *after* we've pivoted to the new
+// rootfs (and the users' configuration is entirely set up).
+func setupConsole(socket *os.File, config *initConfig, mount bool) error {
+	defer socket.Close()
+	// At this point, /dev/ptmx points to something that we would expect. We
+	// used to change the owner of the slave path, but since the /dev/pts mount
+	// can have gid=X set (at the users' option). So touching the owner of the
+	// slave PTY is not necessary, as the kernel will handle that for us. Note
+	// however, that setupUser (specifically fixStdioPermissions) *will* change
+	// the UID owner of the console to be the user the process will run as (so
+	// they can actually control their console).
+
+	pty, slavePath, err := console.NewPty()
+	if err != nil {
+		return err
+	}
+
+	if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 {
+		err = pty.Resize(console.WinSize{
+			Height: config.ConsoleHeight,
+			Width:  config.ConsoleWidth,
+		})
+
+		if err != nil {
+			return err
+		}
+	}
+
+	// After we return from here, we don't need the console anymore.
+	defer pty.Close()
+
+	// Mount the console inside our rootfs.
+	if mount {
+		if err := mountConsole(slavePath); err != nil {
+			return err
+		}
+	}
+	// While we can access console.master, using the API is a good idea.
+	if err := utils.SendFd(socket, pty.Name(), pty.Fd()); err != nil {
+		return err
+	}
+	// Now, dup over all the things.
+	return dupStdio(slavePath)
+}
+
+// syncParentReady sends to the given pipe a JSON payload which indicates that
+// the init is ready to Exec the child process. It then waits for the parent to
+// indicate that it is cleared to Exec.
+func syncParentReady(pipe io.ReadWriter) error {
+	// Tell parent.
+	if err := writeSync(pipe, procReady); err != nil {
+		return err
+	}
+
+	// Wait for parent to give the all-clear.
+	if err := readSync(pipe, procRun); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// syncParentHooks sends to the given pipe a JSON payload which indicates that
+// the parent should execute pre-start hooks. It then waits for the parent to
+// indicate that it is cleared to resume.
+func syncParentHooks(pipe io.ReadWriter) error {
+	// Tell parent.
+	if err := writeSync(pipe, procHooks); err != nil {
+		return err
+	}
+
+	// Wait for parent to give the all-clear.
+	if err := readSync(pipe, procResume); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// setupUser changes the groups, gid, and uid for the user inside the container
+func setupUser(config *initConfig) error {
+	// Set up defaults.
+	defaultExecUser := user.ExecUser{
+		Uid:  0,
+		Gid:  0,
+		Home: "/",
+	}
+
+	passwdPath, err := user.GetPasswdPath()
+	if err != nil {
+		return err
+	}
+
+	groupPath, err := user.GetGroupPath()
+	if err != nil {
+		return err
+	}
+
+	execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath)
+	if err != nil {
+		return err
+	}
+
+	var addGroups []int
+	if len(config.AdditionalGroups) > 0 {
+		addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath)
+		if err != nil {
+			return err
+		}
+	}
+
+	// Rather than just erroring out later in setuid(2) and setgid(2), check
+	// that the user is mapped here.
+	if _, err := config.Config.HostUID(execUser.Uid); err != nil {
+		return fmt.Errorf("cannot set uid to unmapped user in user namespace")
+	}
+	if _, err := config.Config.HostGID(execUser.Gid); err != nil {
+		return fmt.Errorf("cannot set gid to unmapped user in user namespace")
+	}
+
+	if config.Rootless {
+		// We cannot set any additional groups in a rootless container and thus
+		// we bail if the user asked us to do so. TODO: We currently can't do
+		// this check earlier, but if libcontainer.Process.User was typesafe
+		// this might work.
+		if len(addGroups) > 0 {
+			return fmt.Errorf("cannot set any additional groups in a rootless container")
+		}
+	}
+
+	// Before we change to the container's user make sure that the processes
+	// STDIO is correctly owned by the user that we are switching to.
+	if err := fixStdioPermissions(config, execUser); err != nil {
+		return err
+	}
+
+	// This isn't allowed in an unprivileged user namespace since Linux 3.19.
+	// There's nothing we can do about /etc/group entries, so we silently
+	// ignore setting groups here (since the user didn't explicitly ask us to
+	// set the group).
+	if !config.Rootless {
+		suppGroups := append(execUser.Sgids, addGroups...)
+		if err := unix.Setgroups(suppGroups); err != nil {
+			return err
+		}
+	}
+
+	if err := system.Setgid(execUser.Gid); err != nil {
+		return err
+	}
+	if err := system.Setuid(execUser.Uid); err != nil {
+		return err
+	}
+
+	// if we didn't get HOME already, set it based on the user's HOME
+	if envHome := os.Getenv("HOME"); envHome == "" {
+		if err := os.Setenv("HOME", execUser.Home); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user.
+// The ownership needs to match because it is created outside of the container and needs to be
+// localized.
+func fixStdioPermissions(config *initConfig, u *user.ExecUser) error {
+	var null unix.Stat_t
+	if err := unix.Stat("/dev/null", &null); err != nil {
+		return err
+	}
+	for _, fd := range []uintptr{
+		os.Stdin.Fd(),
+		os.Stderr.Fd(),
+		os.Stdout.Fd(),
+	} {
+		var s unix.Stat_t
+		if err := unix.Fstat(int(fd), &s); err != nil {
+			return err
+		}
+
+		// Skip chown of /dev/null if it was used as one of the STDIO fds.
+		if s.Rdev == null.Rdev {
+			continue
+		}
+
+		// We only change the uid owner (as it is possible for the mount to
+		// prefer a different gid, and there's no reason for us to change it).
+		// The reason why we don't just leave the default uid=X mount setup is
+		// that users expect to be able to actually use their console. Without
+		// this code, you couldn't effectively run as a non-root user inside a
+		// container and also have a console set up.
+		if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil {
+			// If we've hit an EINVAL then s.Gid isn't mapped in the user
+			// namespace. If we've hit an EPERM then the inode's current owner
+			// is not mapped in our user namespace (in particular,
+			// privileged_wrt_inode_uidgid() has failed). In either case, we
+			// are in a configuration where it's better for us to just not
+			// touch the stdio rather than bail at this point.
+			if err == unix.EINVAL || err == unix.EPERM {
+				continue
+			}
+			return err
+		}
+	}
+	return nil
+}
+
+// setupNetwork sets up and initializes any network interface inside the container.
+func setupNetwork(config *initConfig) error {
+	for _, config := range config.Networks {
+		strategy, err := getStrategy(config.Type)
+		if err != nil {
+			return err
+		}
+		if err := strategy.initialize(config); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func setupRoute(config *configs.Config) error {
+	for _, config := range config.Routes {
+		_, dst, err := net.ParseCIDR(config.Destination)
+		if err != nil {
+			return err
+		}
+		src := net.ParseIP(config.Source)
+		if src == nil {
+			return fmt.Errorf("Invalid source for route: %s", config.Source)
+		}
+		gw := net.ParseIP(config.Gateway)
+		if gw == nil {
+			return fmt.Errorf("Invalid gateway for route: %s", config.Gateway)
+		}
+		l, err := netlink.LinkByName(config.InterfaceName)
+		if err != nil {
+			return err
+		}
+		route := &netlink.Route{
+			Scope:     netlink.SCOPE_UNIVERSE,
+			Dst:       dst,
+			Src:       src,
+			Gw:        gw,
+			LinkIndex: l.Attrs().Index,
+		}
+		if err := netlink.RouteAdd(route); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+func setupRlimits(limits []configs.Rlimit, pid int) error {
+	for _, rlimit := range limits {
+		if err := system.Prlimit(pid, rlimit.Type, unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil {
+			return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
+		}
+	}
+	return nil
+}
+
+const _P_PID = 1
+
+type siginfo struct {
+	si_signo int32
+	si_errno int32
+	si_code  int32
+	// below here is a union; si_pid is the only field we use
+	si_pid int32
+	// Pad to 128 bytes as detailed in blockUntilWaitable
+	pad [96]byte
+}
+
+// isWaitable returns true if the process has exited false otherwise.
+// Its based off blockUntilWaitable in src/os/wait_waitid.go
+func isWaitable(pid int) (bool, error) {
+	si := &siginfo{}
+	_, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0)
+	if e != 0 {
+		return false, os.NewSyscallError("waitid", e)
+	}
+
+	return si.si_pid != 0, nil
+}
+
+// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise
+func isNoChildren(err error) bool {
+	switch err := err.(type) {
+	case syscall.Errno:
+		if err == unix.ECHILD {
+			return true
+		}
+	case *os.SyscallError:
+		if err.Err == unix.ECHILD {
+			return true
+		}
+	}
+	return false
+}
+
+// signalAllProcesses freezes then iterates over all the processes inside the
+// manager's cgroups sending the signal s to them.
+// If s is SIGKILL then it will wait for each process to exit.
+// For all other signals it will check if the process is ready to report its
+// exit status and only if it is will a wait be performed.
+func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
+	var procs []*os.Process
+	if err := m.Freeze(configs.Frozen); err != nil {
+		logrus.Warn(err)
+	}
+	pids, err := m.GetAllPids()
+	if err != nil {
+		m.Freeze(configs.Thawed)
+		return err
+	}
+	for _, pid := range pids {
+		p, err := os.FindProcess(pid)
+		if err != nil {
+			logrus.Warn(err)
+			continue
+		}
+		procs = append(procs, p)
+		if err := p.Signal(s); err != nil {
+			logrus.Warn(err)
+		}
+	}
+	if err := m.Freeze(configs.Thawed); err != nil {
+		logrus.Warn(err)
+	}
+
+	subreaper, err := system.GetSubreaper()
+	if err != nil {
+		// The error here means that PR_GET_CHILD_SUBREAPER is not
+		// supported because this code might run on a kernel older
+		// than 3.4. We don't want to throw an error in that case,
+		// and we simplify things, considering there is no subreaper
+		// set.
+		subreaper = 0
+	}
+
+	for _, p := range procs {
+		if s != unix.SIGKILL {
+			if ok, err := isWaitable(p.Pid); err != nil {
+				if !isNoChildren(err) {
+					logrus.Warn("signalAllProcesses: ", p.Pid, err)
+				}
+				continue
+			} else if !ok {
+				// Not ready to report so don't wait
+				continue
+			}
+		}
+
+		// In case a subreaper has been setup, this code must not
+		// wait for the process. Otherwise, we cannot be sure the
+		// current process will be reaped by the subreaper, while
+		// the subreaper might be waiting for this process in order
+		// to retrieve its exit code.
+		if subreaper == 0 {
+			if _, err := p.Wait(); err != nil {
+				if !isNoChildren(err) {
+					logrus.Warn("wait: ", err)
+				}
+			}
+		}
+	}
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/integration/doc.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/integration/doc.go
@@ -0,0 +1,2 @@
+// integration is used for integration testing of libcontainer
+package integration
--- a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go
@@ -0,0 +1,553 @@
+// +build linux
+
+package intelrdt
+
+import (
+	"bufio"
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"sync"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+)
+
+/*
+ * About Intel RDT/CAT feature:
+ * Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
+ * Intel Cache Allocation Technology (CAT) is a sub-feature of RDT. Currently L3
+ * Cache is the only resource that is supported in RDT.
+ *
+ * This feature provides a way for the software to restrict cache allocation to a
+ * defined 'subset' of L3 cache which may be overlapping with other 'subsets'.
+ * The different subsets are identified by class of service (CLOS) and each CLOS
+ * has a capacity bitmask (CBM).
+ *
+ * For more information about Intel RDT/CAT can be found in the section 17.17
+ * of Intel Software Developer Manual.
+ *
+ * About Intel RDT/CAT kernel interface:
+ * In Linux 4.10 kernel or newer, the interface is defined and exposed via
+ * "resource control" filesystem, which is a "cgroup-like" interface.
+ *
+ * Comparing with cgroups, it has similar process management lifecycle and
+ * interfaces in a container. But unlike cgroups' hierarchy, it has single level
+ * filesystem layout.
+ *
+ * Intel RDT "resource control" filesystem hierarchy:
+ * mount -t resctrl resctrl /sys/fs/resctrl
+ * tree /sys/fs/resctrl
+ * /sys/fs/resctrl/
+ * |-- info
+ * |   |-- L3
+ * |       |-- cbm_mask
+ * |       |-- min_cbm_bits
+ * |       |-- num_closids
+ * |-- cpus
+ * |-- schemata
+ * |-- tasks
+ * |-- <container_id>
+ *     |-- cpus
+ *     |-- schemata
+ *     |-- tasks
+ *
+ * For runc, we can make use of `tasks` and `schemata` configuration for L3 cache
+ * resource constraints.
+ *
+ *  The file `tasks` has a list of tasks that belongs to this group (e.g.,
+ * <container_id>" group). Tasks can be added to a group by writing the task ID
+ * to the "tasks" file  (which will automatically remove them from the previous
+ * group to which they belonged). New tasks created by fork(2) and clone(2) are
+ * added to the same group as their parent. If a pid is not in any sub group, it is
+ * in root group.
+ *
+ * The file `schemata` has allocation bitmasks/values for L3 cache on each socket,
+ * which contains L3 cache id and capacity bitmask (CBM).
+ * 	Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
+ * For example, on a two-socket machine, L3's schema line could be `L3:0=ff;1=c0`
+ * which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
+ *
+ * The valid L3 cache CBM is a *contiguous bits set* and number of bits that can
+ * be set is less than the max bit. The max bits in the CBM is varied among
+ * supported Intel Xeon platforms. In Intel RDT "resource control" filesystem
+ * layout, the CBM in a group should be a subset of the CBM in root. Kernel will
+ * check if it is valid when writing. e.g., 0xfffff in root indicates the max bits
+ * of CBM is 20 bits, which mapping to entire L3 cache capacity. Some valid CBM
+ * values to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
+ *
+ * For more information about Intel RDT/CAT kernel interface:
+ * https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
+ *
+ * An example for runc:
+ * Consider a two-socket machine with two L3 caches where the default CBM is
+ * 0xfffff and the max CBM length is 20 bits. With this configuration, tasks
+ * inside the container only have access to the "upper" 80% of L3 cache id 0 and
+ * the "lower" 50% L3 cache id 1:
+ *
+ * "linux": {
+ * 	"intelRdt": {
+ * 		"l3CacheSchema": "L3:0=ffff0;1=3ff"
+ * 	}
+ * }
+ */
+
+type Manager interface {
+	// Applies Intel RDT configuration to the process with the specified pid
+	Apply(pid int) error
+
+	// Returns statistics for Intel RDT
+	GetStats() (*Stats, error)
+
+	// Destroys the Intel RDT 'container_id' group
+	Destroy() error
+
+	// Returns Intel RDT path to save in a state file and to be able to
+	// restore the object later
+	GetPath() string
+
+	// Set Intel RDT "resource control" filesystem as configured.
+	Set(container *configs.Config) error
+}
+
+// This implements interface Manager
+type IntelRdtManager struct {
+	mu     sync.Mutex
+	Config *configs.Config
+	Id     string
+	Path   string
+}
+
+const (
+	IntelRdtTasks = "tasks"
+)
+
+var (
+	// The absolute root path of the Intel RDT "resource control" filesystem
+	intelRdtRoot     string
+	intelRdtRootLock sync.Mutex
+
+	// The flag to indicate if Intel RDT is supported
+	isEnabled bool
+)
+
+type intelRdtData struct {
+	root   string
+	config *configs.Config
+	pid    int
+}
+
+// Check if Intel RDT is enabled in init()
+func init() {
+	// 1. Check if hardware and kernel support Intel RDT/CAT feature
+	// "cat_l3" flag is set if supported
+	isFlagSet, err := parseCpuInfoFile("/proc/cpuinfo")
+	if !isFlagSet || err != nil {
+		isEnabled = false
+		return
+	}
+
+	// 2. Check if Intel RDT "resource control" filesystem is mounted
+	// The user guarantees to mount the filesystem
+	isEnabled = isIntelRdtMounted()
+}
+
+// Return the mount point path of Intel RDT "resource control" filesysem
+func findIntelRdtMountpointDir() (string, error) {
+	f, err := os.Open("/proc/self/mountinfo")
+	if err != nil {
+		return "", err
+	}
+	defer f.Close()
+
+	s := bufio.NewScanner(f)
+	for s.Scan() {
+		text := s.Text()
+		fields := strings.Split(text, " ")
+		// Safe as mountinfo encodes mountpoints with spaces as \040.
+		index := strings.Index(text, " - ")
+		postSeparatorFields := strings.Fields(text[index+3:])
+		numPostFields := len(postSeparatorFields)
+
+		// This is an error as we can't detect if the mount is for "Intel RDT"
+		if numPostFields == 0 {
+			return "", fmt.Errorf("Found no fields post '-' in %q", text)
+		}
+
+		if postSeparatorFields[0] == "resctrl" {
+			// Check that the mount is properly formated.
+			if numPostFields < 3 {
+				return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
+			}
+
+			return fields[4], nil
+		}
+	}
+	if err := s.Err(); err != nil {
+		return "", err
+	}
+
+	return "", NewNotFoundError("Intel RDT")
+}
+
+// Gets the root path of Intel RDT "resource control" filesystem
+func getIntelRdtRoot() (string, error) {
+	intelRdtRootLock.Lock()
+	defer intelRdtRootLock.Unlock()
+
+	if intelRdtRoot != "" {
+		return intelRdtRoot, nil
+	}
+
+	root, err := findIntelRdtMountpointDir()
+	if err != nil {
+		return "", err
+	}
+
+	if _, err := os.Stat(root); err != nil {
+		return "", err
+	}
+
+	intelRdtRoot = root
+	return intelRdtRoot, nil
+}
+
+func isIntelRdtMounted() bool {
+	_, err := getIntelRdtRoot()
+	if err != nil {
+		return false
+	}
+
+	return true
+}
+
+func parseCpuInfoFile(path string) (bool, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return false, err
+	}
+	defer f.Close()
+
+	s := bufio.NewScanner(f)
+	for s.Scan() {
+		if err := s.Err(); err != nil {
+			return false, err
+		}
+
+		text := s.Text()
+		flags := strings.Split(text, " ")
+
+		// "cat_l3" flag is set if Intel RDT/CAT is supported
+		for _, flag := range flags {
+			if flag == "cat_l3" {
+				return true, nil
+			}
+		}
+	}
+	return false, nil
+}
+
+func parseUint(s string, base, bitSize int) (uint64, error) {
+	value, err := strconv.ParseUint(s, base, bitSize)
+	if err != nil {
+		intValue, intErr := strconv.ParseInt(s, base, bitSize)
+		// 1. Handle negative values greater than MinInt64 (and)
+		// 2. Handle negative values lesser than MinInt64
+		if intErr == nil && intValue < 0 {
+			return 0, nil
+		} else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 {
+			return 0, nil
+		}
+
+		return value, err
+	}
+
+	return value, nil
+}
+
+// Gets a single uint64 value from the specified file.
+func getIntelRdtParamUint(path, file string) (uint64, error) {
+	fileName := filepath.Join(path, file)
+	contents, err := ioutil.ReadFile(fileName)
+	if err != nil {
+		return 0, err
+	}
+
+	res, err := parseUint(strings.TrimSpace(string(contents)), 10, 64)
+	if err != nil {
+		return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName)
+	}
+	return res, nil
+}
+
+// Gets a string value from the specified file
+func getIntelRdtParamString(path, file string) (string, error) {
+	contents, err := ioutil.ReadFile(filepath.Join(path, file))
+	if err != nil {
+		return "", err
+	}
+
+	return strings.TrimSpace(string(contents)), nil
+}
+
+func readTasksFile(dir string) ([]int, error) {
+	f, err := os.Open(filepath.Join(dir, IntelRdtTasks))
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	var (
+		s   = bufio.NewScanner(f)
+		out = []int{}
+	)
+
+	for s.Scan() {
+		if t := s.Text(); t != "" {
+			pid, err := strconv.Atoi(t)
+			if err != nil {
+				return nil, err
+			}
+			out = append(out, pid)
+		}
+	}
+	return out, nil
+}
+
+func writeFile(dir, file, data string) error {
+	if dir == "" {
+		return fmt.Errorf("no such directory for %s", file)
+	}
+	if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0700); err != nil {
+		return fmt.Errorf("failed to write %v to %v: %v", data, file, err)
+	}
+	return nil
+}
+
+func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) {
+	rootPath, err := getIntelRdtRoot()
+	if err != nil {
+		return nil, err
+	}
+	return &intelRdtData{
+		root:   rootPath,
+		config: c,
+		pid:    pid,
+	}, nil
+}
+
+// Get the read-only L3 cache information
+func getL3CacheInfo() (*L3CacheInfo, error) {
+	l3CacheInfo := &L3CacheInfo{}
+
+	rootPath, err := getIntelRdtRoot()
+	if err != nil {
+		return l3CacheInfo, err
+	}
+
+	path := filepath.Join(rootPath, "info", "L3")
+	cbmMask, err := getIntelRdtParamString(path, "cbm_mask")
+	if err != nil {
+		return l3CacheInfo, err
+	}
+	minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits")
+	if err != nil {
+		return l3CacheInfo, err
+	}
+	numClosids, err := getIntelRdtParamUint(path, "num_closids")
+	if err != nil {
+		return l3CacheInfo, err
+	}
+
+	l3CacheInfo.CbmMask = cbmMask
+	l3CacheInfo.MinCbmBits = minCbmBits
+	l3CacheInfo.NumClosids = numClosids
+
+	return l3CacheInfo, nil
+}
+
+// WriteIntelRdtTasks writes the specified pid into the "tasks" file
+func WriteIntelRdtTasks(dir string, pid int) error {
+	if dir == "" {
+		return fmt.Errorf("no such directory for %s", IntelRdtTasks)
+	}
+
+	// Dont attach any pid if -1 is specified as a pid
+	if pid != -1 {
+		if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0700); err != nil {
+			return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err)
+		}
+	}
+	return nil
+}
+
+// Check if Intel RDT is enabled
+func IsEnabled() bool {
+	return isEnabled
+}
+
+// Get the 'container_id' path in Intel RDT "resource control" filesystem
+func GetIntelRdtPath(id string) (string, error) {
+	rootPath, err := getIntelRdtRoot()
+	if err != nil {
+		return "", err
+	}
+
+	path := filepath.Join(rootPath, id)
+	return path, nil
+}
+
+// Applies Intel RDT configuration to the process with the specified pid
+func (m *IntelRdtManager) Apply(pid int) (err error) {
+	// If intelRdt is not specified in config, we do nothing
+	if m.Config.IntelRdt == nil {
+		return nil
+	}
+	d, err := getIntelRdtData(m.Config, pid)
+	if err != nil && !IsNotFound(err) {
+		return err
+	}
+
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	path, err := d.join(m.Id)
+	if err != nil {
+		return err
+	}
+
+	m.Path = path
+	return nil
+}
+
+// Destroys the Intel RDT 'container_id' group
+func (m *IntelRdtManager) Destroy() error {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	if err := os.RemoveAll(m.Path); err != nil {
+		return err
+	}
+	m.Path = ""
+	return nil
+}
+
+// Returns Intel RDT path to save in a state file and to be able to
+// restore the object later
+func (m *IntelRdtManager) GetPath() string {
+	if m.Path == "" {
+		m.Path, _ = GetIntelRdtPath(m.Id)
+	}
+	return m.Path
+}
+
+// Returns statistics for Intel RDT
+func (m *IntelRdtManager) GetStats() (*Stats, error) {
+	// If intelRdt is not specified in config
+	if m.Config.IntelRdt == nil {
+		return nil, nil
+	}
+
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	stats := NewStats()
+
+	// The read-only L3 cache information
+	l3CacheInfo, err := getL3CacheInfo()
+	if err != nil {
+		return nil, err
+	}
+	stats.L3CacheInfo = l3CacheInfo
+
+	// The read-only L3 cache schema in root
+	rootPath, err := getIntelRdtRoot()
+	if err != nil {
+		return nil, err
+	}
+	tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata")
+	if err != nil {
+		return nil, err
+	}
+	// L3 cache schema is in the first line
+	schemaRootStrings := strings.Split(tmpRootStrings, "\n")
+	stats.L3CacheSchemaRoot = schemaRootStrings[0]
+
+	// The L3 cache schema in 'container_id' group
+	tmpStrings, err := getIntelRdtParamString(m.GetPath(), "schemata")
+	if err != nil {
+		return nil, err
+	}
+	// L3 cache schema is in the first line
+	schemaStrings := strings.Split(tmpStrings, "\n")
+	stats.L3CacheSchema = schemaStrings[0]
+
+	return stats, nil
+}
+
+// Set Intel RDT "resource control" filesystem as configured.
+func (m *IntelRdtManager) Set(container *configs.Config) error {
+	path := m.GetPath()
+
+	// About L3 cache schema file:
+	// The schema has allocation masks/values for L3 cache on each socket,
+	// which contains L3 cache id and capacity bitmask (CBM).
+	//     Format: "L3:<cache_id0>=<cbm0>;<cache_id1>=<cbm1>;..."
+	// For example, on a two-socket machine, L3's schema line could be:
+	//     L3:0=ff;1=c0
+	// Which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0.
+	//
+	// About L3 cache CBM validity:
+	// The valid L3 cache CBM is a *contiguous bits set* and number of
+	// bits that can be set is less than the max bit. The max bits in the
+	// CBM is varied among supported Intel Xeon platforms. In Intel RDT
+	// "resource control" filesystem layout, the CBM in a group should
+	// be a subset of the CBM in root. Kernel will check if it is valid
+	// when writing.
+	// e.g., 0xfffff in root indicates the max bits of CBM is 20 bits,
+	// which mapping to entire L3 cache capacity. Some valid CBM values
+	// to set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc.
+	if container.IntelRdt != nil {
+		l3CacheSchema := container.IntelRdt.L3CacheSchema
+		if l3CacheSchema != "" {
+			if err := writeFile(path, "schemata", l3CacheSchema); err != nil {
+				return err
+			}
+		}
+	}
+
+	return nil
+}
+
+func (raw *intelRdtData) join(id string) (string, error) {
+	path := filepath.Join(raw.root, id)
+	if err := os.MkdirAll(path, 0755); err != nil {
+		return "", err
+	}
+
+	if err := WriteIntelRdtTasks(path, raw.pid); err != nil {
+		return "", err
+	}
+	return path, nil
+}
+
+type NotFoundError struct {
+	ResourceControl string
+}
+
+func (e *NotFoundError) Error() string {
+	return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl)
+}
+
+func NewNotFoundError(res string) error {
+	return &NotFoundError{
+		ResourceControl: res,
+	}
+}
+
+func IsNotFound(err error) bool {
+	if err == nil {
+		return false
+	}
+	_, ok := err.(*NotFoundError)
+	return ok
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go
@@ -0,0 +1,24 @@
+// +build linux
+
+package intelrdt
+
+type L3CacheInfo struct {
+	CbmMask    string `json:"cbm_mask,omitempty"`
+	MinCbmBits uint64 `json:"min_cbm_bits,omitempty"`
+	NumClosids uint64 `json:"num_closids,omitempty"`
+}
+
+type Stats struct {
+	// The read-only L3 cache information
+	L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"`
+
+	// The read-only L3 cache schema in root
+	L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"`
+
+	// The L3 cache schema in 'container_id' group
+	L3CacheSchema string `json:"l3_cache_schema,omitempty"`
+}
+
+func NewStats() *Stats {
+	return &Stats{}
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go
@@ -0,0 +1,50 @@
+// +build linux
+
+package keys
+
+import (
+	"fmt"
+	"strconv"
+	"strings"
+
+	"golang.org/x/sys/unix"
+)
+
+type KeySerial uint32
+
+func JoinSessionKeyring(name string) (KeySerial, error) {
+	sessKeyId, err := unix.KeyctlJoinSessionKeyring(name)
+	if err != nil {
+		return 0, fmt.Errorf("could not create session key: %v", err)
+	}
+	return KeySerial(sessKeyId), nil
+}
+
+// ModKeyringPerm modifies permissions on a keyring by reading the current permissions,
+// anding the bits with the given mask (clearing permissions) and setting
+// additional permission bits
+func ModKeyringPerm(ringId KeySerial, mask, setbits uint32) error {
+	dest, err := unix.KeyctlString(unix.KEYCTL_DESCRIBE, int(ringId))
+	if err != nil {
+		return err
+	}
+
+	res := strings.Split(dest, ";")
+	if len(res) < 5 {
+		return fmt.Errorf("Destination buffer for key description is too small")
+	}
+
+	// parse permissions
+	perm64, err := strconv.ParseUint(res[3], 16, 32)
+	if err != nil {
+		return err
+	}
+
+	perm := (uint32(perm64) & mask) | setbits
+
+	if err := unix.KeyctlSetperm(int(ringId), perm); err != nil {
+		return err
+	}
+
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go
@@ -0,0 +1,89 @@
+// +build linux
+
+package libcontainer
+
+import (
+	"github.com/vishvananda/netlink/nl"
+	"golang.org/x/sys/unix"
+)
+
+// list of known message types we want to send to bootstrap program
+// The number is randomly chosen to not conflict with known netlink types
+const (
+	InitMsg         uint16 = 62000
+	CloneFlagsAttr  uint16 = 27281
+	NsPathsAttr     uint16 = 27282
+	UidmapAttr      uint16 = 27283
+	GidmapAttr      uint16 = 27284
+	SetgroupAttr    uint16 = 27285
+	OomScoreAdjAttr uint16 = 27286
+	RootlessAttr    uint16 = 27287
+	UidmapPathAttr  uint16 = 27288
+	GidmapPathAttr  uint16 = 27289
+)
+
+type Int32msg struct {
+	Type  uint16
+	Value uint32
+}
+
+// Serialize serializes the message.
+// Int32msg has the following representation
+// | nlattr len | nlattr type |
+// | uint32 value             |
+func (msg *Int32msg) Serialize() []byte {
+	buf := make([]byte, msg.Len())
+	native := nl.NativeEndian()
+	native.PutUint16(buf[0:2], uint16(msg.Len()))
+	native.PutUint16(buf[2:4], msg.Type)
+	native.PutUint32(buf[4:8], msg.Value)
+	return buf
+}
+
+func (msg *Int32msg) Len() int {
+	return unix.NLA_HDRLEN + 4
+}
+
+// Bytemsg has the following representation
+// | nlattr len | nlattr type |
+// | value              | pad |
+type Bytemsg struct {
+	Type  uint16
+	Value []byte
+}
+
+func (msg *Bytemsg) Serialize() []byte {
+	l := msg.Len()
+	buf := make([]byte, (l+unix.NLA_ALIGNTO-1) & ^(unix.NLA_ALIGNTO-1))
+	native := nl.NativeEndian()
+	native.PutUint16(buf[0:2], uint16(l))
+	native.PutUint16(buf[2:4], msg.Type)
+	copy(buf[4:], msg.Value)
+	return buf
+}
+
+func (msg *Bytemsg) Len() int {
+	return unix.NLA_HDRLEN + len(msg.Value) + 1 // null-terminated
+}
+
+type Boolmsg struct {
+	Type  uint16
+	Value bool
+}
+
+func (msg *Boolmsg) Serialize() []byte {
+	buf := make([]byte, msg.Len())
+	native := nl.NativeEndian()
+	native.PutUint16(buf[0:2], uint16(msg.Len()))
+	native.PutUint16(buf[2:4], msg.Type)
+	if msg.Value {
+		buf[4] = 1
+	} else {
+		buf[4] = 0
+	}
+	return buf
+}
+
+func (msg *Boolmsg) Len() int {
+	return unix.NLA_HDRLEN + 1
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/mount/mount.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/mount/mount.go
@@ -0,0 +1,23 @@
+package mount
+
+// GetMounts retrieves a list of mounts for the current running process.
+func GetMounts() ([]*Info, error) {
+	return parseMountTable()
+}
+
+// Mounted looks at /proc/self/mountinfo to determine of the specified
+// mountpoint has been mounted
+func Mounted(mountpoint string) (bool, error) {
+	entries, err := parseMountTable()
+	if err != nil {
+		return false, err
+	}
+
+	// Search the table for the mountpoint
+	for _, e := range entries {
+		if e.Mountpoint == mountpoint {
+			return true, nil
+		}
+	}
+	return false, nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/mount/mount_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/mount/mount_linux.go
@@ -0,0 +1,82 @@
+// +build linux
+
+package mount
+
+import (
+	"bufio"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+)
+
+const (
+	/* 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue
+	   (1)(2)(3)   (4)   (5)      (6)      (7)   (8) (9)   (10)         (11)
+
+	   (1) mount ID:  unique identifier of the mount (may be reused after umount)
+	   (2) parent ID:  ID of parent (or of self for the top of the mount tree)
+	   (3) major:minor:  value of st_dev for files on filesystem
+	   (4) root:  root of the mount within the filesystem
+	   (5) mount point:  mount point relative to the process's root
+	   (6) mount options:  per mount options
+	   (7) optional fields:  zero or more fields of the form "tag[:value]"
+	   (8) separator:  marks the end of the optional fields
+	   (9) filesystem type:  name of filesystem of the form "type[.subtype]"
+	   (10) mount source:  filesystem specific information or "none"
+	   (11) super options:  per super block options*/
+	mountinfoFormat = "%d %d %d:%d %s %s %s %s"
+)
+
+// Parse /proc/self/mountinfo because comparing Dev and ino does not work from
+// bind mounts
+func parseMountTable() ([]*Info, error) {
+	f, err := os.Open("/proc/self/mountinfo")
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	return parseInfoFile(f)
+}
+
+func parseInfoFile(r io.Reader) ([]*Info, error) {
+	var (
+		s   = bufio.NewScanner(r)
+		out = []*Info{}
+	)
+
+	for s.Scan() {
+		if err := s.Err(); err != nil {
+			return nil, err
+		}
+
+		var (
+			p              = &Info{}
+			text           = s.Text()
+			optionalFields string
+		)
+
+		if _, err := fmt.Sscanf(text, mountinfoFormat,
+			&p.ID, &p.Parent, &p.Major, &p.Minor,
+			&p.Root, &p.Mountpoint, &p.Opts, &optionalFields); err != nil {
+			return nil, fmt.Errorf("Scanning '%s' failed: %s", text, err)
+		}
+		// Safe as mountinfo encodes mountpoints with spaces as \040.
+		index := strings.Index(text, " - ")
+		postSeparatorFields := strings.Fields(text[index+3:])
+		if len(postSeparatorFields) < 3 {
+			return nil, fmt.Errorf("Error found less than 3 fields post '-' in %q", text)
+		}
+
+		if optionalFields != "-" {
+			p.Optional = optionalFields
+		}
+
+		p.Fstype = postSeparatorFields[0]
+		p.Source = postSeparatorFields[1]
+		p.VfsOpts = strings.Join(postSeparatorFields[2:], " ")
+		out = append(out, p)
+	}
+	return out, nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/mount/mountinfo.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/mount/mountinfo.go
@@ -0,0 +1,40 @@
+package mount
+
+// Info reveals information about a particular mounted filesystem. This
+// struct is populated from the content in the /proc/<pid>/mountinfo file.
+type Info struct {
+	// ID is a unique identifier of the mount (may be reused after umount).
+	ID int
+
+	// Parent indicates the ID of the mount parent (or of self for the top of the
+	// mount tree).
+	Parent int
+
+	// Major indicates one half of the device ID which identifies the device class.
+	Major int
+
+	// Minor indicates one half of the device ID which identifies a specific
+	// instance of device.
+	Minor int
+
+	// Root of the mount within the filesystem.
+	Root string
+
+	// Mountpoint indicates the mount point relative to the process's root.
+	Mountpoint string
+
+	// Opts represents mount-specific options.
+	Opts string
+
+	// Optional represents optional fields.
+	Optional string
+
+	// Fstype indicates the type of filesystem, such as EXT3.
+	Fstype string
+
+	// Source indicates filesystem specific information or "none".
+	Source string
+
+	// VfsOpts represents per super block options.
+	VfsOpts string
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/network_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/network_linux.go
@@ -0,0 +1,259 @@
+// +build linux
+
+package libcontainer
+
+import (
+	"fmt"
+	"io/ioutil"
+	"net"
+	"path/filepath"
+	"strconv"
+	"strings"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/utils"
+	"github.com/vishvananda/netlink"
+)
+
+var strategies = map[string]networkStrategy{
+	"veth":     &veth{},
+	"loopback": &loopback{},
+}
+
+// networkStrategy represents a specific network configuration for
+// a container's networking stack
+type networkStrategy interface {
+	create(*network, int) error
+	initialize(*network) error
+	detach(*configs.Network) error
+	attach(*configs.Network) error
+}
+
+// getStrategy returns the specific network strategy for the
+// provided type.
+func getStrategy(tpe string) (networkStrategy, error) {
+	s, exists := strategies[tpe]
+	if !exists {
+		return nil, fmt.Errorf("unknown strategy type %q", tpe)
+	}
+	return s, nil
+}
+
+// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo.
+func getNetworkInterfaceStats(interfaceName string) (*NetworkInterface, error) {
+	out := &NetworkInterface{Name: interfaceName}
+	// This can happen if the network runtime information is missing - possible if the
+	// container was created by an old version of libcontainer.
+	if interfaceName == "" {
+		return out, nil
+	}
+	type netStatsPair struct {
+		// Where to write the output.
+		Out *uint64
+		// The network stats file to read.
+		File string
+	}
+	// Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container.
+	netStats := []netStatsPair{
+		{Out: &out.RxBytes, File: "tx_bytes"},
+		{Out: &out.RxPackets, File: "tx_packets"},
+		{Out: &out.RxErrors, File: "tx_errors"},
+		{Out: &out.RxDropped, File: "tx_dropped"},
+
+		{Out: &out.TxBytes, File: "rx_bytes"},
+		{Out: &out.TxPackets, File: "rx_packets"},
+		{Out: &out.TxErrors, File: "rx_errors"},
+		{Out: &out.TxDropped, File: "rx_dropped"},
+	}
+	for _, netStat := range netStats {
+		data, err := readSysfsNetworkStats(interfaceName, netStat.File)
+		if err != nil {
+			return nil, err
+		}
+		*(netStat.Out) = data
+	}
+	return out, nil
+}
+
+// Reads the specified statistics available under /sys/class/net/<EthInterface>/statistics
+func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) {
+	data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile))
+	if err != nil {
+		return 0, err
+	}
+	return strconv.ParseUint(strings.TrimSpace(string(data)), 10, 64)
+}
+
+// loopback is a network strategy that provides a basic loopback device
+type loopback struct {
+}
+
+func (l *loopback) create(n *network, nspid int) error {
+	return nil
+}
+
+func (l *loopback) initialize(config *network) error {
+	return netlink.LinkSetUp(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: "lo"}})
+}
+
+func (l *loopback) attach(n *configs.Network) (err error) {
+	return nil
+}
+
+func (l *loopback) detach(n *configs.Network) (err error) {
+	return nil
+}
+
+// veth is a network strategy that uses a bridge and creates
+// a veth pair, one that is attached to the bridge on the host and the other
+// is placed inside the container's namespace
+type veth struct {
+}
+
+func (v *veth) detach(n *configs.Network) (err error) {
+	return netlink.LinkSetMaster(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: n.HostInterfaceName}}, nil)
+}
+
+// attach a container network interface to an external network
+func (v *veth) attach(n *configs.Network) (err error) {
+	brl, err := netlink.LinkByName(n.Bridge)
+	if err != nil {
+		return err
+	}
+	br, ok := brl.(*netlink.Bridge)
+	if !ok {
+		return fmt.Errorf("Wrong device type %T", brl)
+	}
+	host, err := netlink.LinkByName(n.HostInterfaceName)
+	if err != nil {
+		return err
+	}
+
+	if err := netlink.LinkSetMaster(host, br); err != nil {
+		return err
+	}
+	if err := netlink.LinkSetMTU(host, n.Mtu); err != nil {
+		return err
+	}
+	if n.HairpinMode {
+		if err := netlink.LinkSetHairpin(host, true); err != nil {
+			return err
+		}
+	}
+	if err := netlink.LinkSetUp(host); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+func (v *veth) create(n *network, nspid int) (err error) {
+	tmpName, err := v.generateTempPeerName()
+	if err != nil {
+		return err
+	}
+	n.TempVethPeerName = tmpName
+	if n.Bridge == "" {
+		return fmt.Errorf("bridge is not specified")
+	}
+	veth := &netlink.Veth{
+		LinkAttrs: netlink.LinkAttrs{
+			Name:   n.HostInterfaceName,
+			TxQLen: n.TxQueueLen,
+		},
+		PeerName: n.TempVethPeerName,
+	}
+	if err := netlink.LinkAdd(veth); err != nil {
+		return err
+	}
+	defer func() {
+		if err != nil {
+			netlink.LinkDel(veth)
+		}
+	}()
+	if err := v.attach(&n.Network); err != nil {
+		return err
+	}
+	child, err := netlink.LinkByName(n.TempVethPeerName)
+	if err != nil {
+		return err
+	}
+	return netlink.LinkSetNsPid(child, nspid)
+}
+
+func (v *veth) generateTempPeerName() (string, error) {
+	return utils.GenerateRandomName("veth", 7)
+}
+
+func (v *veth) initialize(config *network) error {
+	peer := config.TempVethPeerName
+	if peer == "" {
+		return fmt.Errorf("peer is not specified")
+	}
+	child, err := netlink.LinkByName(peer)
+	if err != nil {
+		return err
+	}
+	if err := netlink.LinkSetDown(child); err != nil {
+		return err
+	}
+	if err := netlink.LinkSetName(child, config.Name); err != nil {
+		return err
+	}
+	// get the interface again after we changed the name as the index also changes.
+	if child, err = netlink.LinkByName(config.Name); err != nil {
+		return err
+	}
+	if config.MacAddress != "" {
+		mac, err := net.ParseMAC(config.MacAddress)
+		if err != nil {
+			return err
+		}
+		if err := netlink.LinkSetHardwareAddr(child, mac); err != nil {
+			return err
+		}
+	}
+	ip, err := netlink.ParseAddr(config.Address)
+	if err != nil {
+		return err
+	}
+	if err := netlink.AddrAdd(child, ip); err != nil {
+		return err
+	}
+	if config.IPv6Address != "" {
+		ip6, err := netlink.ParseAddr(config.IPv6Address)
+		if err != nil {
+			return err
+		}
+		if err := netlink.AddrAdd(child, ip6); err != nil {
+			return err
+		}
+	}
+	if err := netlink.LinkSetMTU(child, config.Mtu); err != nil {
+		return err
+	}
+	if err := netlink.LinkSetUp(child); err != nil {
+		return err
+	}
+	if config.Gateway != "" {
+		gw := net.ParseIP(config.Gateway)
+		if err := netlink.RouteAdd(&netlink.Route{
+			Scope:     netlink.SCOPE_UNIVERSE,
+			LinkIndex: child.Attrs().Index,
+			Gw:        gw,
+		}); err != nil {
+			return err
+		}
+	}
+	if config.IPv6Gateway != "" {
+		gw := net.ParseIP(config.IPv6Gateway)
+		if err := netlink.RouteAdd(&netlink.Route{
+			Scope:     netlink.SCOPE_UNIVERSE,
+			LinkIndex: child.Attrs().Index,
+			Gw:        gw,
+		}); err != nil {
+			return err
+		}
+	}
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go
@@ -0,0 +1,90 @@
+// +build linux
+
+package libcontainer
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+
+	"golang.org/x/sys/unix"
+)
+
+const oomCgroupName = "memory"
+
+type PressureLevel uint
+
+const (
+	LowPressure PressureLevel = iota
+	MediumPressure
+	CriticalPressure
+)
+
+func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct{}, error) {
+	evFile, err := os.Open(filepath.Join(cgDir, evName))
+	if err != nil {
+		return nil, err
+	}
+	fd, err := unix.Eventfd(0, unix.EFD_CLOEXEC)
+	if err != nil {
+		evFile.Close()
+		return nil, err
+	}
+
+	eventfd := os.NewFile(uintptr(fd), "eventfd")
+
+	eventControlPath := filepath.Join(cgDir, "cgroup.event_control")
+	data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg)
+	if err := ioutil.WriteFile(eventControlPath, []byte(data), 0700); err != nil {
+		eventfd.Close()
+		evFile.Close()
+		return nil, err
+	}
+	ch := make(chan struct{})
+	go func() {
+		defer func() {
+			eventfd.Close()
+			evFile.Close()
+			close(ch)
+		}()
+		buf := make([]byte, 8)
+		for {
+			if _, err := eventfd.Read(buf); err != nil {
+				return
+			}
+			// When a cgroup is destroyed, an event is sent to eventfd.
+			// So if the control path is gone, return instead of notifying.
+			if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) {
+				return
+			}
+			ch <- struct{}{}
+		}
+	}()
+	return ch, nil
+}
+
+// notifyOnOOM returns channel on which you can expect event about OOM,
+// if process died without OOM this channel will be closed.
+func notifyOnOOM(paths map[string]string) (<-chan struct{}, error) {
+	dir := paths[oomCgroupName]
+	if dir == "" {
+		return nil, fmt.Errorf("path %q missing", oomCgroupName)
+	}
+
+	return registerMemoryEvent(dir, "memory.oom_control", "")
+}
+
+func notifyMemoryPressure(paths map[string]string, level PressureLevel) (<-chan struct{}, error) {
+	dir := paths[oomCgroupName]
+	if dir == "" {
+		return nil, fmt.Errorf("path %q missing", oomCgroupName)
+	}
+
+	if level > CriticalPressure {
+		return nil, fmt.Errorf("invalid pressure level %d", level)
+	}
+
+	levelStr := []string{"low", "medium", "critical"}[level]
+	return registerMemoryEvent(dir, "memory.pressure_level", levelStr)
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/namespace.h
@@ -0,0 +1,32 @@
+#ifndef NSENTER_NAMESPACE_H
+#define NSENTER_NAMESPACE_H
+
+#ifndef _GNU_SOURCE
+#	define _GNU_SOURCE
+#endif
+#include <sched.h>
+
+/* All of these are taken from include/uapi/linux/sched.h */
+#ifndef CLONE_NEWNS
+#	define CLONE_NEWNS 0x00020000 /* New mount namespace group */
+#endif
+#ifndef CLONE_NEWCGROUP
+#	define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */
+#endif
+#ifndef CLONE_NEWUTS
+#	define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
+#endif
+#ifndef CLONE_NEWIPC
+#	define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
+#endif
+#ifndef CLONE_NEWUSER
+#	define CLONE_NEWUSER 0x10000000 /* New user namespace */
+#endif
+#ifndef CLONE_NEWPID
+#	define CLONE_NEWPID 0x20000000 /* New pid namespace */
+#endif
+#ifndef CLONE_NEWNET
+#	define CLONE_NEWNET 0x40000000 /* New network namespace */
+#endif
+
+#endif /* NSENTER_NAMESPACE_H */
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter.go
@@ -0,0 +1,12 @@
+// +build linux,!gccgo
+
+package nsenter
+
+/*
+#cgo CFLAGS: -Wall
+extern void nsexec();
+void __attribute__((constructor)) init(void) {
+	nsexec();
+}
+*/
+import "C"
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_gccgo.go
@@ -0,0 +1,25 @@
+// +build linux,gccgo
+
+package nsenter
+
+/*
+#cgo CFLAGS: -Wall
+extern void nsexec();
+void __attribute__((constructor)) init(void) {
+	nsexec();
+}
+*/
+import "C"
+
+// AlwaysFalse is here to stay false
+// (and be exported so the compiler doesn't optimize out its reference)
+var AlwaysFalse bool
+
+func init() {
+	if AlwaysFalse {
+		// by referencing this C init() in a noop test, it will ensure the compiler
+		// links in the C function.
+		// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65134
+		C.init()
+	}
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsenter_unsupported.go
@@ -0,0 +1,5 @@
+// +build !linux !cgo
+
+package nsenter
+
+import "C"
--- a/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
+++ b/vendor/github.com/opencontainers/runc/libcontainer/nsenter/nsexec.c
@@ -0,0 +1,963 @@
+
+#define _GNU_SOURCE
+#include <endian.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <sched.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <linux/limits.h>
+#include <linux/netlink.h>
+#include <linux/types.h>
+
+/* Get all of the CLONE_NEW* flags. */
+#include "namespace.h"
+
+/* Synchronisation values. */
+enum sync_t {
+	SYNC_USERMAP_PLS = 0x40,	/* Request parent to map our users. */
+	SYNC_USERMAP_ACK = 0x41,	/* Mapping finished by the parent. */
+	SYNC_RECVPID_PLS = 0x42,	/* Tell parent we're sending the PID. */
+	SYNC_RECVPID_ACK = 0x43,	/* PID was correctly received by parent. */
+	SYNC_GRANDCHILD = 0x44,	/* The grandchild is ready to run. */
+	SYNC_CHILD_READY = 0x45,	/* The child or grandchild is ready to return. */
+
+	/* XXX: This doesn't help with segfaults and other such issues. */
+	SYNC_ERR = 0xFF,	/* Fatal error, no turning back. The error code follows. */
+};
+
+/* longjmp() arguments. */
+#define JUMP_PARENT 0x00
+#define JUMP_CHILD  0xA0
+#define JUMP_INIT   0xA1
+
+/* JSON buffer. */
+#define JSON_MAX 4096
+
+/* Assume the stack grows down, so arguments should be above it. */
+struct clone_t {
+	/*
+	 * Reserve some space for clone() to locate arguments
+	 * and retcode in this place
+	 */
+	char stack[4096] __attribute__ ((aligned(16)));
+	char stack_ptr[0];
+
+	/* There's two children. This is used to execute the different code. */
+	jmp_buf *env;
+	int jmpval;
+};
+
+struct nlconfig_t {
+	char *data;
+
+	/* Process settings. */
+	uint32_t cloneflags;
+	char *oom_score_adj;
+	size_t oom_score_adj_len;
+
+	/* User namespace settings. */
+	char *uidmap;
+	size_t uidmap_len;
+	char *gidmap;
+	size_t gidmap_len;
+	char *namespaces;
+	size_t namespaces_len;
+	uint8_t is_setgroup;
+
+	/* Rootless container settings. */
+	uint8_t is_rootless;
+	char *uidmappath;
+	size_t uidmappath_len;
+	char *gidmappath;
+	size_t gidmappath_len;
+};
+
+/*
+ * List of netlink message types sent to us as part of bootstrapping the init.
+ * These constants are defined in libcontainer/message_linux.go.
+ */
+#define INIT_MSG			62000
+#define CLONE_FLAGS_ATTR	27281
+#define NS_PATHS_ATTR		27282
+#define UIDMAP_ATTR			27283
+#define GIDMAP_ATTR			27284
+#define SETGROUP_ATTR		27285
+#define OOM_SCORE_ADJ_ATTR	27286
+#define ROOTLESS_ATTR	    27287
+#define UIDMAPPATH_ATTR	    27288
+#define GIDMAPPATH_ATTR	    27289
+
+/*
+ * Use the raw syscall for versions of glibc which don't include a function for
+ * it, namely (glibc 2.12).
+ */
+#if __GLIBC__ == 2 && __GLIBC_MINOR__ < 14
+#	define _GNU_SOURCE
+#	include "syscall.h"
+#	if !defined(SYS_setns) && defined(__NR_setns)
+#		define SYS_setns __NR_setns
+#	endif
+
+#ifndef SYS_setns
+#	error "setns(2) syscall not supported by glibc version"
+#endif
+
+int setns(int fd, int nstype)
+{
+	return syscall(SYS_setns, fd, nstype);
+}
+#endif
+
+/* XXX: This is ugly. */
+static int syncfd = -1;
+
+/* TODO(cyphar): Fix this so it correctly deals with syncT. */
+#define bail(fmt, ...)								\
+	do {									\
+		int ret = __COUNTER__ + 1;					\
+		fprintf(stderr, "nsenter: " fmt ": %m\n", ##__VA_ARGS__);	\
+		if (syncfd >= 0) {						\
+			enum sync_t s = SYNC_ERR;				\
+			if (write(syncfd, &s, sizeof(s)) != sizeof(s))		\
+				fprintf(stderr, "nsenter: failed: write(s)");	\
+			if (write(syncfd, &ret, sizeof(ret)) != sizeof(ret))	\
+				fprintf(stderr, "nsenter: failed: write(ret)");	\
+		}								\
+		exit(ret);							\
+	} while(0)
+
+static int write_file(char *data, size_t data_len, char *pathfmt, ...)
+{
+	int fd, len, ret = 0;
+	char path[PATH_MAX];
+
+	va_list ap;
+	va_start(ap, pathfmt);
+	len = vsnprintf(path, PATH_MAX, pathfmt, ap);
+	va_end(ap);
+	if (len < 0)
+		return -1;
+
+	fd = open(path, O_RDWR);
+	if (fd < 0) {
+		return -1;
+	}
+
+	len = write(fd, data, data_len);
+	if (len != data_len) {
+		ret = -1;
+		goto out;
+	}
+
+ out:
+	close(fd);
+	return ret;
+}
+
+enum policy_t {
+	SETGROUPS_DEFAULT = 0,
+	SETGROUPS_ALLOW,
+	SETGROUPS_DENY,
+};
+
+/* This *must* be called before we touch gid_map. */
+static void update_setgroups(int pid, enum policy_t setgroup)
+{
+	char *policy;
+
+	switch (setgroup) {
+	case SETGROUPS_ALLOW:
+		policy = "allow";
+		break;
+	case SETGROUPS_DENY:
+		policy = "deny";
+		break;
+	case SETGROUPS_DEFAULT:
+	default:
+		/* Nothing to do. */
+		return;
+	}
+
+	if (write_file(policy, strlen(policy), "/proc/%d/setgroups", pid) < 0) {
+		/*
+		 * If the kernel is too old to support /proc/pid/setgroups,
+		 * open(2) or write(2) will return ENOENT. This is fine.
+		 */
+		if (errno != ENOENT)
+			bail("failed to write '%s' to /proc/%d/setgroups", policy, pid);
+	}
+}
+
+static int try_mapping_tool(const char *app, int pid, char *map, size_t map_len)
+{
+	int child;
+
+	/*
+	 * If @app is NULL, execve will segfault. Just check it here and bail (if
+	 * we're in this path, the caller is already getting desparate and there
+	 * isn't a backup to this failing). This usually would be a configuration
+	 * or programming issue.
+	 */
+	if (!app)
+		bail("mapping tool not present");
+
+	child = fork();
+	if (child < 0)
+		bail("failed to fork");
+
+	if (!child) {
+#define MAX_ARGV 20
+		char *argv[MAX_ARGV];
+		char *envp[] = { NULL };
+		char pid_fmt[16];
+		int argc = 0;
+		char *next;
+
+		snprintf(pid_fmt, 16, "%d", pid);
+
+		argv[argc++] = (char *)app;
+		argv[argc++] = pid_fmt;
+		/*
+		 * Convert the map string into a list of argument that
+		 * newuidmap/newgidmap can understand.
+		 */
+
+		while (argc < MAX_ARGV) {
+			if (*map == '\0') {
+				argv[argc++] = NULL;
+				break;
+			}
+			argv[argc++] = map;
+			next = strpbrk(map, "\n ");
+			if (next == NULL)
+				break;
+			*next++ = '\0';
+			map = next + strspn(next, "\n ");
+		}
+
+		execve(app, argv, envp);
+		bail("failed to execv");
+	} else {
+		int status;
+
+		while (true) {
+			if (waitpid(child, &status, 0) < 0) {
+				if (errno == EINTR)
+					continue;
+				bail("failed to waitpid");
+			}
+			if (WIFEXITED(status) || WIFSIGNALED(status))
+				return WEXITSTATUS(status);
+		}
+	}
+
+	return -1;
+}
+
+static void update_uidmap(const char *path, int pid, char *map, size_t map_len)
+{
+	if (map == NULL || map_len <= 0)
+		return;
+
+	if (write_file(map, map_len, "/proc/%d/uid_map", pid) < 0) {
+		if (errno != EPERM)
+			bail("failed to update /proc/%d/uid_map", pid);
+		if (try_mapping_tool(path, pid, map, map_len))
+			bail("failed to use newuid map on %d", pid);
+	}
+}
+
+static void update_gidmap(const char *path, int pid, char *map, size_t map_len)
+{
+	if (map == NULL || map_len <= 0)
+		return;
+
+	if (write_file(map, map_len, "/proc/%d/gid_map", pid) < 0) {
+		if (errno != EPERM)
+			bail("failed to update /proc/%d/gid_map", pid);
+		if (try_mapping_tool(path, pid, map, map_len))
+			bail("failed to use newgid map on %d", pid);
+	}
+}
+
+static void update_oom_score_adj(char *data, size_t len)
+{
+	if (data == NULL || len <= 0)
+		return;
+
+	if (write_file(data, len, "/proc/self/oom_score_adj") < 0)
+		bail("failed to update /proc/self/oom_score_adj");
+}
+
+/* A dummy function that just jumps to the given jumpval. */
+static int child_func(void *arg) __attribute__ ((noinline));
+static int child_func(void *arg)
+{
+	struct clone_t *ca = (struct clone_t *)arg;
+	longjmp(*ca->env, ca->jmpval);
+}
+
+static int clone_parent(jmp_buf *env, int jmpval) __attribute__ ((noinline));
+static int clone_parent(jmp_buf *env, int jmpval)
+{
+	struct clone_t ca = {
+		.env = env,
+		.jmpval = jmpval,
+	};
+
+	return clone(child_func, ca.stack_ptr, CLONE_PARENT | SIGCHLD, &ca);
+}
+
+/*
+ * Gets the init pipe fd from the environment, which is used to read the
+ * bootstrap data and tell the parent what the new pid is after we finish
+ * setting up the environment.
+ */
+static int initpipe(void)
+{
+	int pipenum;
+	char *initpipe, *endptr;
+
+	initpipe = getenv("_LIBCONTAINER_INITPIPE");
+	if (initpipe == NULL || *initpipe == '\0')
+		return -1;
+
+	pipenum = strtol(initpipe, &endptr, 10);
+	if (*endptr != '\0')
+		bail("unable to parse _LIBCONTAINER_INITPIPE");
+
+	return pipenum;
+}
+
+/* Returns the clone(2) flag for a namespace, given the name of a namespace. */
+static int nsflag(char *name)
+{
+	if (!strcmp(name, "cgroup"))
+		return CLONE_NEWCGROUP;
+	else if (!strcmp(name, "ipc"))
+		return CLONE_NEWIPC;
+	else if (!strcmp(name, "mnt"))
+		return CLONE_NEWNS;
+	else if (!strcmp(name, "net"))
+		return CLONE_NEWNET;
+	else if (!strcmp(name, "pid"))
+		return CLONE_NEWPID;
+	else if (!strcmp(name, "user"))
+		return CLONE_NEWUSER;
+	else if (!strcmp(name, "uts"))
+		return CLONE_NEWUTS;
+
+	/* If we don't recognise a name, fallback to 0. */
+	return 0;
+}
+
+static uint32_t readint32(char *buf)
+{
+	return *(uint32_t *) buf;
+}
+
+static uint8_t readint8(char *buf)
+{
+	return *(uint8_t *) buf;
+}
+
+static void nl_parse(int fd, struct nlconfig_t *config)
+{
+	size_t len, size;
+	struct nlmsghdr hdr;
+	char *data, *current;
+
+	/* Retrieve the netlink header. */
+	len = read(fd, &hdr, NLMSG_HDRLEN);
+	if (len != NLMSG_HDRLEN)
+		bail("invalid netlink header length %zu", len);
+
+	if (hdr.nlmsg_type == NLMSG_ERROR)
+		bail("failed to read netlink message");
+
+	if (hdr.nlmsg_type != INIT_MSG)
+		bail("unexpected msg type %d", hdr.nlmsg_type);
+
+	/* Retrieve data. */
+	size = NLMSG_PAYLOAD(&hdr, 0);
+	current = data = malloc(size);
+	if (!data)
+		bail("failed to allocate %zu bytes of memory for nl_payload", size);
+
+	len = read(fd, data, size);
+	if (len != size)
+		bail("failed to read netlink payload, %zu != %zu", len, size);
+
+	/* Parse the netlink payload. */
+	config->data = data;
+	while (current < data + size) {
+		struct nlattr *nlattr = (struct nlattr *)current;
+		size_t payload_len = nlattr->nla_len - NLA_HDRLEN;
+
+		/* Advance to payload. */
+		current += NLA_HDRLEN;
+
+		/* Handle payload. */
+		switch (nlattr->nla_type) {
+		case CLONE_FLAGS_ATTR:
+			config->cloneflags = readint32(current);
+			break;
+		case ROOTLESS_ATTR:
+			config->is_rootless = readint8(current);
+			break;
+		case OOM_SCORE_ADJ_ATTR:
+			config->oom_score_adj = current;
+			config->oom_score_adj_len = payload_len;
+			break;
+		case NS_PATHS_ATTR:
+			config->namespaces = current;
+			config->namespaces_len = payload_len;
+			break;
+		case UIDMAP_ATTR:
+			config->uidmap = current;
+			config->uidmap_len = payload_len;
+			break;
+		case GIDMAP_ATTR:
+			config->gidmap = current;
+			config->gidmap_len = payload_len;
+			break;
+		case UIDMAPPATH_ATTR:
+			config->uidmappath = current;
+			config->uidmappath_len = payload_len;
+			break;
+		case GIDMAPPATH_ATTR:
+			config->gidmappath = current;
+			config->gidmappath_len = payload_len;
+			break;
+		case SETGROUP_ATTR:
+			config->is_setgroup = readint8(current);
+			break;
+		default:
+			bail("unknown netlink message type %d", nlattr->nla_type);
+		}
+
+		current += NLA_ALIGN(payload_len);
+	}
+}
+
+void nl_free(struct nlconfig_t *config)
+{
+	free(config->data);
+}
+
+void join_namespaces(char *nslist)
+{
+	int num = 0, i;
+	char *saveptr = NULL;
+	char *namespace = strtok_r(nslist, ",", &saveptr);
+	struct namespace_t {
+		int fd;
+		int ns;
+		char type[PATH_MAX];
+		char path[PATH_MAX];
+	} *namespaces = NULL;
+
+	if (!namespace || !strlen(namespace) || !strlen(nslist))
+		bail("ns paths are empty");
+
+	/*
+	 * We have to open the file descriptors first, since after
+	 * we join the mnt namespace we might no longer be able to
+	 * access the paths.
+	 */
+	do {
+		int fd;
+		char *path;
+		struct namespace_t *ns;
+
+		/* Resize the namespace array. */
+		namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t));
+		if (!namespaces)
+			bail("failed to reallocate namespace array");
+		ns = &namespaces[num - 1];
+
+		/* Split 'ns:path'. */
+		path = strstr(namespace, ":");
+		if (!path)
+			bail("failed to parse %s", namespace);
+		*path++ = '\0';
+
+		fd = open(path, O_RDONLY);
+		if (fd < 0)
+			bail("failed to open %s", path);
+
+		ns->fd = fd;
+		ns->ns = nsflag(namespace);
+		strncpy(ns->path, path, PATH_MAX);
+	} while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL);
+
+	/*
+	 * The ordering in which we join namespaces is important. We should
+	 * always join the user namespace *first*. This is all guaranteed
+	 * from the container_linux.go side of this, so we're just going to
+	 * follow the order given to us.
+	 */
+
+	for (i = 0; i < num; i++) {
+		struct namespace_t ns = namespaces[i];
+
+		if (setns(ns.fd, ns.ns) < 0)
+			bail("failed to setns to %s", ns.path);
+
+		close(ns.fd);
+	}
+
+	free(namespaces);
+}
+
+void nsexec(void)
+{
+	int pipenum;
+	jmp_buf env;
+	int sync_child_pipe[2], sync_grandchild_pipe[2];
+	struct nlconfig_t config = { 0 };
+
+	/*
+	 * If we don't have an init pipe, just return to the go routine.
+	 * We'll only get an init pipe for start or exec.
+	 */
+	pipenum = initpipe();
+	if (pipenum == -1)
+		return;
+
+	/* Parse all of the netlink configuration. */
+	nl_parse(pipenum, &config);
+
+	/* Set oom_score_adj. This has to be done before !dumpable because
+	 * /proc/self/oom_score_adj is not writeable unless you're an privileged
+	 * user (if !dumpable is set). All children inherit their parent's
+	 * oom_score_adj value on fork(2) so this will always be propagated
+	 * properly.
+	 */
+	update_oom_score_adj(config.oom_score_adj, config.oom_score_adj_len);
+
+	/*
+	 * Make the process non-dumpable, to avoid various race conditions that
+	 * could cause processes in namespaces we're joining to access host
+	 * resources (or potentially execute code).
+	 *
+	 * However, if the number of namespaces we are joining is 0, we are not
+	 * going to be switching to a different security context. Thus setting
+	 * ourselves to be non-dumpable only breaks things (like rootless
+	 * containers), which is the recommendation from the kernel folks.
+	 */
+	if (config.namespaces) {
+		if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
+			bail("failed to set process as non-dumpable");
+	}
+
+	/* Pipe so we can tell the child when we've finished setting up. */
+	if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_child_pipe) < 0)
+		bail("failed to setup sync pipe between parent and child");
+
+	/*
+	 * We need a new socketpair to sync with grandchild so we don't have
+	 * race condition with child.
+	 */
+	if (socketpair(AF_LOCAL, SOCK_STREAM, 0, sync_grandchild_pipe) < 0)
+		bail("failed to setup sync pipe between parent and grandchild");
+
+	/* TODO: Currently we aren't dealing with child deaths properly. */
+
+	/*
+	 * Okay, so this is quite annoying.
+	 *
+	 * In order for this unsharing code to be more extensible we need to split
+	 * up unshare(CLONE_NEWUSER) and clone() in various ways. The ideal case
+	 * would be if we did clone(CLONE_NEWUSER) and the other namespaces
+	 * separately, but because of SELinux issues we cannot really do that. But
+	 * we cannot just dump the namespace flags into clone(...) because several
+	 * usecases (such as rootless containers) require more granularity around
+	 * the namespace setup. In addition, some older kernels had issues where
+	 * CLONE_NEWUSER wasn't handled before other namespaces (but we cannot
+	 * handle this while also dealing with SELinux so we choose SELinux support
+	 * over broken kernel support).
+	 *
+	 * However, if we unshare(2) the user namespace *before* we clone(2), then
+	 * all hell breaks loose.
+	 *
+	 * The parent no longer has permissions to do many things (unshare(2) drops
+	 * all capabilities in your old namespace), and the container cannot be set
+	 * up to have more than one {uid,gid} mapping. This is obviously less than
+	 * ideal. In order to fix this, we have to first clone(2) and then unshare.
+	 *
+	 * Unfortunately, it's not as simple as that. We have to fork to enter the
+	 * PID namespace (the PID namespace only applies to children). Since we'll
+	 * have to double-fork, this clone_parent() call won't be able to get the
+	 * PID of the _actual_ init process (without doing more synchronisation than
+	 * I can deal with at the moment). So we'll just get the parent to send it
+	 * for us, the only job of this process is to update
+	 * /proc/pid/{setgroups,uid_map,gid_map}.
+	 *
+	 * And as a result of the above, we also need to setns(2) in the first child
+	 * because if we join a PID namespace in the topmost parent then our child
+	 * will be in that namespace (and it will not be able to give us a PID value
+	 * that makes sense without resorting to sending things with cmsg).
+	 *
+	 * This also deals with an older issue caused by dumping cloneflags into
+	 * clone(2): On old kernels, CLONE_PARENT didn't work with CLONE_NEWPID, so
+	 * we have to unshare(2) before clone(2) in order to do this. This was fixed
+	 * in upstream commit 1f7f4dde5c945f41a7abc2285be43d918029ecc5, and was
+	 * introduced by 40a0d32d1eaffe6aac7324ca92604b6b3977eb0e. As far as we're
+	 * aware, the last mainline kernel which had this bug was Linux 3.12.
+	 * However, we cannot comment on which kernels the broken patch was
+	 * backported to.
+	 *
+	 * -- Aleksa "what has my life come to?" Sarai
+	 */
+
+	switch (setjmp(env)) {
+		/*
+		 * Stage 0: We're in the parent. Our job is just to create a new child
+		 *          (stage 1: JUMP_CHILD) process and write its uid_map and
+		 *          gid_map. That process will go on to create a new process, then
+		 *          it will send us its PID which we will send to the bootstrap
+		 *          process.
+		 */
+	case JUMP_PARENT:{
+			int len;
+			pid_t child, first_child = -1;
+			char buf[JSON_MAX];
+			bool ready = false;
+
+			/* For debugging. */
+			prctl(PR_SET_NAME, (unsigned long)"runc:[0:PARENT]", 0, 0, 0);
+
+			/* Start the process of getting a container. */
+			child = clone_parent(&env, JUMP_CHILD);
+			if (child < 0)
+				bail("unable to fork: child_func");
+
+			/*
+			 * State machine for synchronisation with the children.
+			 *
+			 * Father only return when both child and grandchild are
+			 * ready, so we can receive all possible error codes
+			 * generated by children.
+			 */
+			while (!ready) {
+				enum sync_t s;
+				int ret;
+
+				syncfd = sync_child_pipe[1];
+				close(sync_child_pipe[0]);
+
+				if (read(syncfd, &s, sizeof(s)) != sizeof(s))
+					bail("failed to sync with child: next state");
+
+				switch (s) {
+				case SYNC_ERR:
+					/* We have to mirror the error code of the child. */
+					if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
+						bail("failed to sync with child: read(error code)");
+
+					exit(ret);
+				case SYNC_USERMAP_PLS:
+					/*
+					 * Enable setgroups(2) if we've been asked to. But we also
+					 * have to explicitly disable setgroups(2) if we're
+					 * creating a rootless container (this is required since
+					 * Linux 3.19).
+					 */
+					if (config.is_rootless && config.is_setgroup) {
+						kill(child, SIGKILL);
+						bail("cannot allow setgroup in an unprivileged user namespace setup");
+					}
+
+					if (config.is_setgroup)
+						update_setgroups(child, SETGROUPS_ALLOW);
+					if (config.is_rootless)
+						update_setgroups(child, SETGROUPS_DENY);
+
+					/* Set up mappings. */
+					update_uidmap(config.uidmappath, child, config.uidmap, config.uidmap_len);
+					update_gidmap(config.gidmappath, child, config.gidmap, config.gidmap_len);
+
+					s = SYNC_USERMAP_ACK;
+					if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
+						kill(child, SIGKILL);
+						bail("failed to sync with child: write(SYNC_USERMAP_ACK)");
+					}
+					break;
+				case SYNC_RECVPID_PLS:{
+						first_child = child;
+
+						/* Get the init_func pid. */
+						if (read(syncfd, &child, sizeof(child)) != sizeof(child)) {
+							kill(first_child, SIGKILL);
+							bail("failed to sync with child: read(childpid)");
+						}
+
+						/* Send ACK. */
+						s = SYNC_RECVPID_ACK;
+						if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
+							kill(first_child, SIGKILL);
+							kill(child, SIGKILL);
+							bail("failed to sync with child: write(SYNC_RECVPID_ACK)");
+						}
+					}
+					break;
+				case SYNC_CHILD_READY:
+					ready = true;
+					break;
+				default:
+					bail("unexpected sync value: %u", s);
+				}
+			}
+
+			/* Now sync with grandchild. */
+
+			ready = false;
+			while (!ready) {
+				enum sync_t s;
+				int ret;
+
+				syncfd = sync_grandchild_pipe[1];
+				close(sync_grandchild_pipe[0]);
+
+				s = SYNC_GRANDCHILD;
+				if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
+					kill(child, SIGKILL);
+					bail("failed to sync with child: write(SYNC_GRANDCHILD)");
+				}
+
+				if (read(syncfd, &s, sizeof(s)) != sizeof(s))
+					bail("failed to sync with child: next state");
+
+				switch (s) {
+				case SYNC_ERR:
+					/* We have to mirror the error code of the child. */
+					if (read(syncfd, &ret, sizeof(ret)) != sizeof(ret))
+						bail("failed to sync with child: read(error code)");
+
+					exit(ret);
+				case SYNC_CHILD_READY:
+					ready = true;
+					break;
+				default:
+					bail("unexpected sync value: %u", s);
+				}
+			}
+
+			/*
+			 * Send the init_func pid and the pid of the first child back to our parent.
+			 *
+			 * We need to send both back because we can't reap the first child we created (CLONE_PARENT).
+			 * It becomes the responsibility of our parent to reap the first child.
+			 */
+			len = snprintf(buf, JSON_MAX, "{\"pid\": %d, \"pid_first\": %d}\n", child, first_child);
+			if (len < 0) {
+				kill(child, SIGKILL);
+				bail("unable to generate JSON for child pid");
+			}
+			if (write(pipenum, buf, len) != len) {
+				kill(child, SIGKILL);
+				bail("unable to send child pid to bootstrapper");
+			}
+
+			exit(0);
+		}
+
+		/*
+		 * Stage 1: We're in the first child process. Our job is to join any
+		 *          provided namespaces in the netlink payload and unshare all
+		 *          of the requested namespaces. If we've been asked to
+		 *          CLONE_NEWUSER, we will ask our parent (stage 0) to set up
+		 *          our user mappings for us. Then, we create a new child
+		 *          (stage 2: JUMP_INIT) for PID namespace. We then send the
+		 *          child's PID to our parent (stage 0).
+		 */
+	case JUMP_CHILD:{
+			pid_t child;
+			enum sync_t s;
+
+			/* We're in a child and thus need to tell the parent if we die. */
+			syncfd = sync_child_pipe[0];
+			close(sync_child_pipe[1]);
+
+			/* For debugging. */
+			prctl(PR_SET_NAME, (unsigned long)"runc:[1:CHILD]", 0, 0, 0);
+
+			/*
+			 * We need to setns first. We cannot do this earlier (in stage 0)
+			 * because of the fact that we forked to get here (the PID of
+			 * [stage 2: JUMP_INIT]) would be meaningless). We could send it
+			 * using cmsg(3) but that's just annoying.
+			 */
+			if (config.namespaces)
+				join_namespaces(config.namespaces);
+
+			/*
+			 * Unshare all of the namespaces. Now, it should be noted that this
+			 * ordering might break in the future (especially with rootless
+			 * containers). But for now, it's not possible to split this into
+			 * CLONE_NEWUSER + [the rest] because of some RHEL SELinux issues.
+			 *
+			 * Note that we don't merge this with clone() because there were
+			 * some old kernel versions where clone(CLONE_PARENT | CLONE_NEWPID)
+			 * was broken, so we'll just do it the long way anyway.
+			 */
+			if (unshare(config.cloneflags) < 0)
+				bail("failed to unshare namespaces");
+
+			/*
+			 * Deal with user namespaces first. They are quite special, as they
+			 * affect our ability to unshare other namespaces and are used as
+			 * context for privilege checks.
+			 */
+			if (config.cloneflags & CLONE_NEWUSER) {
+				/*
+				 * We don't have the privileges to do any mapping here (see the
+				 * clone_parent rant). So signal our parent to hook us up.
+				 */
+
+				/* Switching is only necessary if we joined namespaces. */
+				if (config.namespaces) {
+					if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0) < 0)
+						bail("failed to set process as dumpable");
+				}
+				s = SYNC_USERMAP_PLS;
+				if (write(syncfd, &s, sizeof(s)) != sizeof(s))
+					bail("failed to sync with parent: write(SYNC_USERMAP_PLS)");
+
+				/* ... wait for mapping ... */
+
+				if (read(syncfd, &s, sizeof(s)) != sizeof(s))
+					bail("failed to sync with parent: read(SYNC_USERMAP_ACK)");
+				if (s != SYNC_USERMAP_ACK)
+					bail("failed to sync with parent: SYNC_USERMAP_ACK: got %u", s);
+				/* Switching is only necessary if we joined namespaces. */
+				if (config.namespaces) {
+					if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) < 0)
+						bail("failed to set process as dumpable");
+				}
+			}
+
+			/*
+			 * TODO: What about non-namespace clone flags that we're dropping here?
+			 *
+			 * We fork again because of PID namespace, setns(2) or unshare(2) don't
+			 * change the PID namespace of the calling process, because doing so
+			 * would change the caller's idea of its own PID (as reported by getpid()),
+			 * which would break many applications and libraries, so we must fork
+			 * to actually enter the new PID namespace.
+			 */
+			child = clone_parent(&env, JUMP_INIT);
+			if (child < 0)
+				bail("unable to fork: init_func");
+
+			/* Send the child to our parent, which knows what it's doing. */
+			s = SYNC_RECVPID_PLS;
+			if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
+				kill(child, SIGKILL);
+				bail("failed to sync with parent: write(SYNC_RECVPID_PLS)");
+			}
+			if (write(syncfd, &child, sizeof(child)) != sizeof(child)) {
+				kill(child, SIGKILL);
+				bail("failed to sync with parent: write(childpid)");
+			}
+
+			/* ... wait for parent to get the pid ... */
+
+			if (read(syncfd, &s, sizeof(s)) != sizeof(s)) {
+				kill(child, SIGKILL);
+				bail("failed to sync with parent: read(SYNC_RECVPID_ACK)");
+			}
+			if (s != SYNC_RECVPID_ACK) {
+				kill(child, SIGKILL);
+				bail("failed to sync with parent: SYNC_RECVPID_ACK: got %u", s);
+			}
+
+			s = SYNC_CHILD_READY;
+			if (write(syncfd, &s, sizeof(s)) != sizeof(s)) {
+				kill(child, SIGKILL);
+				bail("failed to sync with parent: write(SYNC_CHILD_READY)");
+			}
+
+			/* Our work is done. [Stage 2: JUMP_INIT] is doing the rest of the work. */
+			exit(0);
+		}
+
+		/*
+		 * Stage 2: We're the final child process, and the only process that will
+		 *          actually return to the Go runtime. Our job is to just do the
+		 *          final cleanup steps and then return to the Go runtime to allow
+		 *          init_linux.go to run.
+		 */
+	case JUMP_INIT:{
+			/*
+			 * We're inside the child now, having jumped from the
+			 * start_child() code after forking in the parent.
+			 */
+			enum sync_t s;
+
+			/* We're in a child and thus need to tell the parent if we die. */
+			syncfd = sync_grandchild_pipe[0];
+			close(sync_grandchild_pipe[1]);
+			close(sync_child_pipe[0]);
+			close(sync_child_pipe[1]);
+
+			/* For debugging. */
+			prctl(PR_SET_NAME, (unsigned long)"runc:[2:INIT]", 0, 0, 0);
+
+			if (read(syncfd, &s, sizeof(s)) != sizeof(s))
+				bail("failed to sync with parent: read(SYNC_GRANDCHILD)");
+			if (s != SYNC_GRANDCHILD)
+				bail("failed to sync with parent: SYNC_GRANDCHILD: got %u", s);
+
+			if (setsid() < 0)
+				bail("setsid failed");
+
+			if (setuid(0) < 0)
+				bail("setuid failed");
+
+			if (setgid(0) < 0)
+				bail("setgid failed");
+
+			if (!config.is_rootless && config.is_setgroup) {
+				if (setgroups(0, NULL) < 0)
+					bail("setgroups failed");
+			}
+
+			s = SYNC_CHILD_READY;
+			if (write(syncfd, &s, sizeof(s)) != sizeof(s))
+				bail("failed to sync with patent: write(SYNC_CHILD_READY)");
+
+			/* Close sync pipes. */
+			close(sync_grandchild_pipe[0]);
+
+			/* Free netlink data. */
+			nl_free(&config);
+
+			/* Finish executing, let the Go runtime take over. */
+			return;
+		}
+	default:
+		bail("unexpected jump value");
+	}
+
+	/* Should never be reached. */
+	bail("should never be reached");
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/process.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/process.go
@@ -0,0 +1,110 @@
+package libcontainer
+
+import (
+	"fmt"
+	"io"
+	"math"
+	"os"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+)
+
+type processOperations interface {
+	wait() (*os.ProcessState, error)
+	signal(sig os.Signal) error
+	pid() int
+}
+
+// Process specifies the configuration and IO for a process inside
+// a container.
+type Process struct {
+	// The command to be run followed by any arguments.
+	Args []string
+
+	// Env specifies the environment variables for the process.
+	Env []string
+
+	// User will set the uid and gid of the executing process running inside the container
+	// local to the container's user and group configuration.
+	User string
+
+	// AdditionalGroups specifies the gids that should be added to supplementary groups
+	// in addition to those that the user belongs to.
+	AdditionalGroups []string
+
+	// Cwd will change the processes current working directory inside the container's rootfs.
+	Cwd string
+
+	// Stdin is a pointer to a reader which provides the standard input stream.
+	Stdin io.Reader
+
+	// Stdout is a pointer to a writer which receives the standard output stream.
+	Stdout io.Writer
+
+	// Stderr is a pointer to a writer which receives the standard error stream.
+	Stderr io.Writer
+
+	// ExtraFiles specifies additional open files to be inherited by the container
+	ExtraFiles []*os.File
+
+	// Initial sizings for the console
+	ConsoleWidth  uint16
+	ConsoleHeight uint16
+
+	// Capabilities specify the capabilities to keep when executing the process inside the container
+	// All capabilities not specified will be dropped from the processes capability mask
+	Capabilities *configs.Capabilities
+
+	// AppArmorProfile specifies the profile to apply to the process and is
+	// changed at the time the process is execed
+	AppArmorProfile string
+
+	// Label specifies the label to apply to the process.  It is commonly used by selinux
+	Label string
+
+	// NoNewPrivileges controls whether processes can gain additional privileges.
+	NoNewPrivileges *bool
+
+	// Rlimits specifies the resource limits, such as max open files, to set in the container
+	// If Rlimits are not set, the container will inherit rlimits from the parent process
+	Rlimits []configs.Rlimit
+
+	// ConsoleSocket provides the masterfd console.
+	ConsoleSocket *os.File
+
+	ops processOperations
+}
+
+// Wait waits for the process to exit.
+// Wait releases any resources associated with the Process
+func (p Process) Wait() (*os.ProcessState, error) {
+	if p.ops == nil {
+		return nil, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
+	}
+	return p.ops.wait()
+}
+
+// Pid returns the process ID
+func (p Process) Pid() (int, error) {
+	// math.MinInt32 is returned here, because it's invalid value
+	// for the kill() system call.
+	if p.ops == nil {
+		return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
+	}
+	return p.ops.pid(), nil
+}
+
+// Signal sends a signal to the Process.
+func (p Process) Signal(sig os.Signal) error {
+	if p.ops == nil {
+		return newGenericError(fmt.Errorf("invalid process"), NoProcessOps)
+	}
+	return p.ops.signal(sig)
+}
+
+// IO holds the process's STDIO
+type IO struct {
+	Stdin  io.WriteCloser
+	Stdout io.ReadCloser
+	Stderr io.ReadCloser
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go
@@ -0,0 +1,547 @@
+// +build linux
+
+package libcontainer
+
+import (
+	"encoding/json"
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"strconv"
+	"syscall" // only for Signal
+
+	"github.com/opencontainers/runc/libcontainer/cgroups"
+	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/intelrdt"
+	"github.com/opencontainers/runc/libcontainer/system"
+	"github.com/opencontainers/runc/libcontainer/utils"
+
+	"golang.org/x/sys/unix"
+)
+
+type parentProcess interface {
+	// pid returns the pid for the running process.
+	pid() int
+
+	// start starts the process execution.
+	start() error
+
+	// send a SIGKILL to the process and wait for the exit.
+	terminate() error
+
+	// wait waits on the process returning the process state.
+	wait() (*os.ProcessState, error)
+
+	// startTime returns the process start time.
+	startTime() (uint64, error)
+
+	signal(os.Signal) error
+
+	externalDescriptors() []string
+
+	setExternalDescriptors(fds []string)
+}
+
+type setnsProcess struct {
+	cmd           *exec.Cmd
+	parentPipe    *os.File
+	childPipe     *os.File
+	cgroupPaths   map[string]string
+	intelRdtPath  string
+	config        *initConfig
+	fds           []string
+	process       *Process
+	bootstrapData io.Reader
+}
+
+func (p *setnsProcess) startTime() (uint64, error) {
+	stat, err := system.Stat(p.pid())
+	return stat.StartTime, err
+}
+
+func (p *setnsProcess) signal(sig os.Signal) error {
+	s, ok := sig.(syscall.Signal)
+	if !ok {
+		return errors.New("os: unsupported signal type")
+	}
+	return unix.Kill(p.pid(), s)
+}
+
+func (p *setnsProcess) start() (err error) {
+	defer p.parentPipe.Close()
+	err = p.cmd.Start()
+	p.childPipe.Close()
+	if err != nil {
+		return newSystemErrorWithCause(err, "starting setns process")
+	}
+	if p.bootstrapData != nil {
+		if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
+			return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
+		}
+	}
+	if err = p.execSetns(); err != nil {
+		return newSystemErrorWithCause(err, "executing setns process")
+	}
+	if len(p.cgroupPaths) > 0 {
+		if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil {
+			return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid())
+		}
+	}
+	if p.intelRdtPath != "" {
+		// if Intel RDT "resource control" filesystem path exists
+		_, err := os.Stat(p.intelRdtPath)
+		if err == nil {
+			if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil {
+				return newSystemErrorWithCausef(err, "adding pid %d to Intel RDT resource control filesystem", p.pid())
+			}
+		}
+	}
+	// set rlimits, this has to be done here because we lose permissions
+	// to raise the limits once we enter a user-namespace
+	if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
+		return newSystemErrorWithCause(err, "setting rlimits for process")
+	}
+	if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
+		return newSystemErrorWithCause(err, "writing config to pipe")
+	}
+
+	ierr := parseSync(p.parentPipe, func(sync *syncT) error {
+		switch sync.Type {
+		case procReady:
+			// This shouldn't happen.
+			panic("unexpected procReady in setns")
+		case procHooks:
+			// This shouldn't happen.
+			panic("unexpected procHooks in setns")
+		default:
+			return newSystemError(fmt.Errorf("invalid JSON payload from child"))
+		}
+	})
+
+	if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil {
+		return newSystemErrorWithCause(err, "calling shutdown on init pipe")
+	}
+	// Must be done after Shutdown so the child will exit and we can wait for it.
+	if ierr != nil {
+		p.wait()
+		return ierr
+	}
+	return nil
+}
+
+// execSetns runs the process that executes C code to perform the setns calls
+// because setns support requires the C process to fork off a child and perform the setns
+// before the go runtime boots, we wait on the process to die and receive the child's pid
+// over the provided pipe.
+func (p *setnsProcess) execSetns() error {
+	status, err := p.cmd.Process.Wait()
+	if err != nil {
+		p.cmd.Wait()
+		return newSystemErrorWithCause(err, "waiting on setns process to finish")
+	}
+	if !status.Success() {
+		p.cmd.Wait()
+		return newSystemError(&exec.ExitError{ProcessState: status})
+	}
+	var pid *pid
+	if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
+		p.cmd.Wait()
+		return newSystemErrorWithCause(err, "reading pid from init pipe")
+	}
+
+	// Clean up the zombie parent process
+	firstChildProcess, err := os.FindProcess(pid.PidFirstChild)
+	if err != nil {
+		return err
+	}
+
+	// Ignore the error in case the child has already been reaped for any reason
+	_, _ = firstChildProcess.Wait()
+
+	process, err := os.FindProcess(pid.Pid)
+	if err != nil {
+		return err
+	}
+	p.cmd.Process = process
+	p.process.ops = p
+	return nil
+}
+
+// terminate sends a SIGKILL to the forked process for the setns routine then waits to
+// avoid the process becoming a zombie.
+func (p *setnsProcess) terminate() error {
+	if p.cmd.Process == nil {
+		return nil
+	}
+	err := p.cmd.Process.Kill()
+	if _, werr := p.wait(); err == nil {
+		err = werr
+	}
+	return err
+}
+
+func (p *setnsProcess) wait() (*os.ProcessState, error) {
+	err := p.cmd.Wait()
+
+	// Return actual ProcessState even on Wait error
+	return p.cmd.ProcessState, err
+}
+
+func (p *setnsProcess) pid() int {
+	return p.cmd.Process.Pid
+}
+
+func (p *setnsProcess) externalDescriptors() []string {
+	return p.fds
+}
+
+func (p *setnsProcess) setExternalDescriptors(newFds []string) {
+	p.fds = newFds
+}
+
+type initProcess struct {
+	cmd             *exec.Cmd
+	parentPipe      *os.File
+	childPipe       *os.File
+	config          *initConfig
+	manager         cgroups.Manager
+	intelRdtManager intelrdt.Manager
+	container       *linuxContainer
+	fds             []string
+	process         *Process
+	bootstrapData   io.Reader
+	sharePidns      bool
+}
+
+func (p *initProcess) pid() int {
+	return p.cmd.Process.Pid
+}
+
+func (p *initProcess) externalDescriptors() []string {
+	return p.fds
+}
+
+// execSetns runs the process that executes C code to perform the setns calls
+// because setns support requires the C process to fork off a child and perform the setns
+// before the go runtime boots, we wait on the process to die and receive the child's pid
+// over the provided pipe.
+// This is called by initProcess.start function
+func (p *initProcess) execSetns() error {
+	status, err := p.cmd.Process.Wait()
+	if err != nil {
+		p.cmd.Wait()
+		return err
+	}
+	if !status.Success() {
+		p.cmd.Wait()
+		return &exec.ExitError{ProcessState: status}
+	}
+	var pid *pid
+	if err := json.NewDecoder(p.parentPipe).Decode(&pid); err != nil {
+		p.cmd.Wait()
+		return err
+	}
+
+	// Clean up the zombie parent process
+	firstChildProcess, err := os.FindProcess(pid.PidFirstChild)
+	if err != nil {
+		return err
+	}
+
+	// Ignore the error in case the child has already been reaped for any reason
+	_, _ = firstChildProcess.Wait()
+
+	process, err := os.FindProcess(pid.Pid)
+	if err != nil {
+		return err
+	}
+	p.cmd.Process = process
+	p.process.ops = p
+	return nil
+}
+
+func (p *initProcess) start() error {
+	defer p.parentPipe.Close()
+	err := p.cmd.Start()
+	p.process.ops = p
+	p.childPipe.Close()
+	if err != nil {
+		p.process.ops = nil
+		return newSystemErrorWithCause(err, "starting init process command")
+	}
+	// Do this before syncing with child so that no children can escape the
+	// cgroup. We don't need to worry about not doing this and not being root
+	// because we'd be using the rootless cgroup manager in that case.
+	if err := p.manager.Apply(p.pid()); err != nil {
+		return newSystemErrorWithCause(err, "applying cgroup configuration for process")
+	}
+	if p.intelRdtManager != nil {
+		if err := p.intelRdtManager.Apply(p.pid()); err != nil {
+			return newSystemErrorWithCause(err, "applying Intel RDT configuration for process")
+		}
+	}
+	defer func() {
+		if err != nil {
+			// TODO: should not be the responsibility to call here
+			p.manager.Destroy()
+			if p.intelRdtManager != nil {
+				p.intelRdtManager.Destroy()
+			}
+		}
+	}()
+
+	if _, err := io.Copy(p.parentPipe, p.bootstrapData); err != nil {
+		return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
+	}
+
+	if err := p.execSetns(); err != nil {
+		return newSystemErrorWithCause(err, "running exec setns process for init")
+	}
+
+	// Save the standard descriptor names before the container process
+	// can potentially move them (e.g., via dup2()).  If we don't do this now,
+	// we won't know at checkpoint time which file descriptor to look up.
+	fds, err := getPipeFds(p.pid())
+	if err != nil {
+		return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", p.pid())
+	}
+	p.setExternalDescriptors(fds)
+	if err := p.createNetworkInterfaces(); err != nil {
+		return newSystemErrorWithCause(err, "creating network interfaces")
+	}
+	if err := p.sendConfig(); err != nil {
+		return newSystemErrorWithCause(err, "sending config to init process")
+	}
+	var (
+		sentRun    bool
+		sentResume bool
+	)
+
+	ierr := parseSync(p.parentPipe, func(sync *syncT) error {
+		switch sync.Type {
+		case procReady:
+			// set rlimits, this has to be done here because we lose permissions
+			// to raise the limits once we enter a user-namespace
+			if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil {
+				return newSystemErrorWithCause(err, "setting rlimits for ready process")
+			}
+			// call prestart hooks
+			if !p.config.Config.Namespaces.Contains(configs.NEWNS) {
+				// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
+				if err := p.manager.Set(p.config.Config); err != nil {
+					return newSystemErrorWithCause(err, "setting cgroup config for ready process")
+				}
+				if p.intelRdtManager != nil {
+					if err := p.intelRdtManager.Set(p.config.Config); err != nil {
+						return newSystemErrorWithCause(err, "setting Intel RDT config for ready process")
+					}
+				}
+
+				if p.config.Config.Hooks != nil {
+					bundle, annotations := utils.Annotations(p.container.config.Labels)
+					s := configs.HookState{
+						Version:     p.container.config.Version,
+						ID:          p.container.id,
+						Pid:         p.pid(),
+						Bundle:      bundle,
+						Annotations: annotations,
+					}
+					for i, hook := range p.config.Config.Hooks.Prestart {
+						if err := hook.Run(s); err != nil {
+							return newSystemErrorWithCausef(err, "running prestart hook %d", i)
+						}
+					}
+				}
+			}
+			// Sync with child.
+			if err := writeSync(p.parentPipe, procRun); err != nil {
+				return newSystemErrorWithCause(err, "writing syncT 'run'")
+			}
+			sentRun = true
+		case procHooks:
+			// Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions.
+			if err := p.manager.Set(p.config.Config); err != nil {
+				return newSystemErrorWithCause(err, "setting cgroup config for procHooks process")
+			}
+			if p.intelRdtManager != nil {
+				if err := p.intelRdtManager.Set(p.config.Config); err != nil {
+					return newSystemErrorWithCause(err, "setting Intel RDT config for procHooks process")
+				}
+			}
+			if p.config.Config.Hooks != nil {
+				bundle, annotations := utils.Annotations(p.container.config.Labels)
+				s := configs.HookState{
+					Version:     p.container.config.Version,
+					ID:          p.container.id,
+					Pid:         p.pid(),
+					Bundle:      bundle,
+					Annotations: annotations,
+				}
+				for i, hook := range p.config.Config.Hooks.Prestart {
+					if err := hook.Run(s); err != nil {
+						return newSystemErrorWithCausef(err, "running prestart hook %d", i)
+					}
+				}
+			}
+			// Sync with child.
+			if err := writeSync(p.parentPipe, procResume); err != nil {
+				return newSystemErrorWithCause(err, "writing syncT 'resume'")
+			}
+			sentResume = true
+		default:
+			return newSystemError(fmt.Errorf("invalid JSON payload from child"))
+		}
+
+		return nil
+	})
+
+	if !sentRun {
+		return newSystemErrorWithCause(ierr, "container init")
+	}
+	if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume {
+		return newSystemError(fmt.Errorf("could not synchronise after executing prestart hooks with container process"))
+	}
+	if err := unix.Shutdown(int(p.parentPipe.Fd()), unix.SHUT_WR); err != nil {
+		return newSystemErrorWithCause(err, "shutting down init pipe")
+	}
+
+	// Must be done after Shutdown so the child will exit and we can wait for it.
+	if ierr != nil {
+		p.wait()
+		return ierr
+	}
+	return nil
+}
+
+func (p *initProcess) wait() (*os.ProcessState, error) {
+	err := p.cmd.Wait()
+	if err != nil {
+		return p.cmd.ProcessState, err
+	}
+	// we should kill all processes in cgroup when init is died if we use host PID namespace
+	if p.sharePidns {
+		signalAllProcesses(p.manager, unix.SIGKILL)
+	}
+	return p.cmd.ProcessState, nil
+}
+
+func (p *initProcess) terminate() error {
+	if p.cmd.Process == nil {
+		return nil
+	}
+	err := p.cmd.Process.Kill()
+	if _, werr := p.wait(); err == nil {
+		err = werr
+	}
+	return err
+}
+
+func (p *initProcess) startTime() (uint64, error) {
+	stat, err := system.Stat(p.pid())
+	return stat.StartTime, err
+}
+
+func (p *initProcess) sendConfig() error {
+	// send the config to the container's init process, we don't use JSON Encode
+	// here because there might be a problem in JSON decoder in some cases, see:
+	// https://github.com/docker/docker/issues/14203#issuecomment-174177790
+	return utils.WriteJSON(p.parentPipe, p.config)
+}
+
+func (p *initProcess) createNetworkInterfaces() error {
+	for _, config := range p.config.Config.Networks {
+		strategy, err := getStrategy(config.Type)
+		if err != nil {
+			return err
+		}
+		n := &network{
+			Network: *config,
+		}
+		if err := strategy.create(n, p.pid()); err != nil {
+			return err
+		}
+		p.config.Networks = append(p.config.Networks, n)
+	}
+	return nil
+}
+
+func (p *initProcess) signal(sig os.Signal) error {
+	s, ok := sig.(syscall.Signal)
+	if !ok {
+		return errors.New("os: unsupported signal type")
+	}
+	return unix.Kill(p.pid(), s)
+}
+
+func (p *initProcess) setExternalDescriptors(newFds []string) {
+	p.fds = newFds
+}
+
+func getPipeFds(pid int) ([]string, error) {
+	fds := make([]string, 3)
+
+	dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd")
+	for i := 0; i < 3; i++ {
+		// XXX: This breaks if the path is not a valid symlink (which can
+		//      happen in certain particularly unlucky mount namespace setups).
+		f := filepath.Join(dirPath, strconv.Itoa(i))
+		target, err := os.Readlink(f)
+		if err != nil {
+			// Ignore permission errors, for rootless containers and other
+			// non-dumpable processes. if we can't get the fd for a particular
+			// file, there's not much we can do.
+			if os.IsPermission(err) {
+				continue
+			}
+			return fds, err
+		}
+		fds[i] = target
+	}
+	return fds, nil
+}
+
+// InitializeIO creates pipes for use with the process's stdio and returns the
+// opposite side for each. Do not use this if you want to have a pseudoterminal
+// set up for you by libcontainer (TODO: fix that too).
+// TODO: This is mostly unnecessary, and should be handled by clients.
+func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) {
+	var fds []uintptr
+	i = &IO{}
+	// cleanup in case of an error
+	defer func() {
+		if err != nil {
+			for _, fd := range fds {
+				unix.Close(int(fd))
+			}
+		}
+	}()
+	// STDIN
+	r, w, err := os.Pipe()
+	if err != nil {
+		return nil, err
+	}
+	fds = append(fds, r.Fd(), w.Fd())
+	p.Stdin, i.Stdin = r, w
+	// STDOUT
+	if r, w, err = os.Pipe(); err != nil {
+		return nil, err
+	}
+	fds = append(fds, r.Fd(), w.Fd())
+	p.Stdout, i.Stdout = w, r
+	// STDERR
+	if r, w, err = os.Pipe(); err != nil {
+		return nil, err
+	}
+	fds = append(fds, r.Fd(), w.Fd())
+	p.Stderr, i.Stderr = w, r
+	// change ownership of the pipes incase we are in a user namespace
+	for _, fd := range fds {
+		if err := unix.Fchown(int(fd), rootuid, rootgid); err != nil {
+			return nil, err
+		}
+	}
+	return i, nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/restored_process.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/restored_process.go
@@ -0,0 +1,122 @@
+// +build linux
+
+package libcontainer
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/opencontainers/runc/libcontainer/system"
+)
+
+func newRestoredProcess(pid int, fds []string) (*restoredProcess, error) {
+	var (
+		err error
+	)
+	proc, err := os.FindProcess(pid)
+	if err != nil {
+		return nil, err
+	}
+	stat, err := system.Stat(pid)
+	if err != nil {
+		return nil, err
+	}
+	return &restoredProcess{
+		proc:             proc,
+		processStartTime: stat.StartTime,
+		fds:              fds,
+	}, nil
+}
+
+type restoredProcess struct {
+	proc             *os.Process
+	processStartTime uint64
+	fds              []string
+}
+
+func (p *restoredProcess) start() error {
+	return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
+}
+
+func (p *restoredProcess) pid() int {
+	return p.proc.Pid
+}
+
+func (p *restoredProcess) terminate() error {
+	err := p.proc.Kill()
+	if _, werr := p.wait(); err == nil {
+		err = werr
+	}
+	return err
+}
+
+func (p *restoredProcess) wait() (*os.ProcessState, error) {
+	// TODO: how do we wait on the actual process?
+	// maybe use --exec-cmd in criu
+	st, err := p.proc.Wait()
+	if err != nil {
+		return nil, err
+	}
+	return st, nil
+}
+
+func (p *restoredProcess) startTime() (uint64, error) {
+	return p.processStartTime, nil
+}
+
+func (p *restoredProcess) signal(s os.Signal) error {
+	return p.proc.Signal(s)
+}
+
+func (p *restoredProcess) externalDescriptors() []string {
+	return p.fds
+}
+
+func (p *restoredProcess) setExternalDescriptors(newFds []string) {
+	p.fds = newFds
+}
+
+// nonChildProcess represents a process where the calling process is not
+// the parent process.  This process is created when a factory loads a container from
+// a persisted state.
+type nonChildProcess struct {
+	processPid       int
+	processStartTime uint64
+	fds              []string
+}
+
+func (p *nonChildProcess) start() error {
+	return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError)
+}
+
+func (p *nonChildProcess) pid() int {
+	return p.processPid
+}
+
+func (p *nonChildProcess) terminate() error {
+	return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError)
+}
+
+func (p *nonChildProcess) wait() (*os.ProcessState, error) {
+	return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError)
+}
+
+func (p *nonChildProcess) startTime() (uint64, error) {
+	return p.processStartTime, nil
+}
+
+func (p *nonChildProcess) signal(s os.Signal) error {
+	proc, err := os.FindProcess(p.processPid)
+	if err != nil {
+		return err
+	}
+	return proc.Signal(s)
+}
+
+func (p *nonChildProcess) externalDescriptors() []string {
+	return p.fds
+}
+
+func (p *nonChildProcess) setExternalDescriptors(newFds []string) {
+	p.fds = newFds
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go
@@ -0,0 +1,838 @@
+// +build linux
+
+package libcontainer
+
+import (
+	"fmt"
+	"io"
+	"io/ioutil"
+	"os"
+	"os/exec"
+	"path"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"github.com/cyphar/filepath-securejoin"
+	"github.com/mrunalp/fileutils"
+	"github.com/opencontainers/runc/libcontainer/cgroups"
+	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/mount"
+	"github.com/opencontainers/runc/libcontainer/system"
+	libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
+	"github.com/opencontainers/selinux/go-selinux/label"
+
+	"golang.org/x/sys/unix"
+)
+
+const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
+
+// needsSetupDev returns true if /dev needs to be set up.
+func needsSetupDev(config *configs.Config) bool {
+	for _, m := range config.Mounts {
+		if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" {
+			return false
+		}
+	}
+	return true
+}
+
+// prepareRootfs sets up the devices, mount points, and filesystems for use
+// inside a new mount namespace. It doesn't set anything as ro. You must call
+// finalizeRootfs after this function to finish setting up the rootfs.
+func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) {
+	config := iConfig.Config
+	if err := prepareRoot(config); err != nil {
+		return newSystemErrorWithCause(err, "preparing rootfs")
+	}
+
+	setupDev := needsSetupDev(config)
+	for _, m := range config.Mounts {
+		for _, precmd := range m.PremountCmds {
+			if err := mountCmd(precmd); err != nil {
+				return newSystemErrorWithCause(err, "running premount command")
+			}
+		}
+
+		if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
+			return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
+		}
+
+		for _, postcmd := range m.PostmountCmds {
+			if err := mountCmd(postcmd); err != nil {
+				return newSystemErrorWithCause(err, "running postmount command")
+			}
+		}
+	}
+
+	if setupDev {
+		if err := createDevices(config); err != nil {
+			return newSystemErrorWithCause(err, "creating device nodes")
+		}
+		if err := setupPtmx(config); err != nil {
+			return newSystemErrorWithCause(err, "setting up ptmx")
+		}
+		if err := setupDevSymlinks(config.Rootfs); err != nil {
+			return newSystemErrorWithCause(err, "setting up /dev symlinks")
+		}
+	}
+
+	// Signal the parent to run the pre-start hooks.
+	// The hooks are run after the mounts are setup, but before we switch to the new
+	// root, so that the old root is still available in the hooks for any mount
+	// manipulations.
+	// Note that iConfig.Cwd is not guaranteed to exist here.
+	if err := syncParentHooks(pipe); err != nil {
+		return err
+	}
+
+	// The reason these operations are done here rather than in finalizeRootfs
+	// is because the console-handling code gets quite sticky if we have to set
+	// up the console before doing the pivot_root(2). This is because the
+	// Console API has to also work with the ExecIn case, which means that the
+	// API must be able to deal with being inside as well as outside the
+	// container. It's just cleaner to do this here (at the expense of the
+	// operation not being perfectly split).
+
+	if err := unix.Chdir(config.Rootfs); err != nil {
+		return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs)
+	}
+
+	if config.NoPivotRoot {
+		err = msMoveRoot(config.Rootfs)
+	} else if config.Namespaces.Contains(configs.NEWNS) {
+		err = pivotRoot(config.Rootfs)
+	} else {
+		err = chroot(config.Rootfs)
+	}
+	if err != nil {
+		return newSystemErrorWithCause(err, "jailing process inside rootfs")
+	}
+
+	if setupDev {
+		if err := reOpenDevNull(); err != nil {
+			return newSystemErrorWithCause(err, "reopening /dev/null inside container")
+		}
+	}
+
+	if cwd := iConfig.Cwd; cwd != "" {
+		// Note that spec.Process.Cwd can contain unclean value like  "../../../../foo/bar...".
+		// However, we are safe to call MkDirAll directly because we are in the jail here.
+		if err := os.MkdirAll(cwd, 0755); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
+// finalizeRootfs sets anything to ro if necessary. You must call
+// prepareRootfs first.
+func finalizeRootfs(config *configs.Config) (err error) {
+	// remount dev as ro if specified
+	for _, m := range config.Mounts {
+		if libcontainerUtils.CleanPath(m.Destination) == "/dev" {
+			if m.Flags&unix.MS_RDONLY == unix.MS_RDONLY {
+				if err := remountReadonly(m); err != nil {
+					return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination)
+				}
+			}
+			break
+		}
+	}
+
+	// set rootfs ( / ) as readonly
+	if config.Readonlyfs {
+		if err := setReadonly(); err != nil {
+			return newSystemErrorWithCause(err, "setting rootfs as readonly")
+		}
+	}
+
+	unix.Umask(0022)
+	return nil
+}
+
+func mountCmd(cmd configs.Command) error {
+	command := exec.Command(cmd.Path, cmd.Args[:]...)
+	command.Env = cmd.Env
+	command.Dir = cmd.Dir
+	if out, err := command.CombinedOutput(); err != nil {
+		return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err)
+	}
+	return nil
+}
+
+func mountToRootfs(m *configs.Mount, rootfs, mountLabel string) error {
+	var (
+		dest = m.Destination
+	)
+	if !strings.HasPrefix(dest, rootfs) {
+		dest = filepath.Join(rootfs, dest)
+	}
+
+	switch m.Device {
+	case "proc", "sysfs":
+		if err := os.MkdirAll(dest, 0755); err != nil {
+			return err
+		}
+		// Selinux kernels do not support labeling of /proc or /sys
+		return mountPropagate(m, rootfs, "")
+	case "mqueue":
+		if err := os.MkdirAll(dest, 0755); err != nil {
+			return err
+		}
+		if err := mountPropagate(m, rootfs, mountLabel); err != nil {
+			// older kernels do not support labeling of /dev/mqueue
+			if err := mountPropagate(m, rootfs, ""); err != nil {
+				return err
+			}
+			return label.SetFileLabel(dest, mountLabel)
+		}
+		return nil
+	case "tmpfs":
+		copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
+		tmpDir := ""
+		stat, err := os.Stat(dest)
+		if err != nil {
+			if err := os.MkdirAll(dest, 0755); err != nil {
+				return err
+			}
+		}
+		if copyUp {
+			tmpDir, err = ioutil.TempDir("/tmp", "runctmpdir")
+			if err != nil {
+				return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir")
+			}
+			defer os.RemoveAll(tmpDir)
+			m.Destination = tmpDir
+		}
+		if err := mountPropagate(m, rootfs, mountLabel); err != nil {
+			return err
+		}
+		if copyUp {
+			if err := fileutils.CopyDirectory(dest, tmpDir); err != nil {
+				errMsg := fmt.Errorf("tmpcopyup: failed to copy %s to %s: %v", dest, tmpDir, err)
+				if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
+					return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
+				}
+				return errMsg
+			}
+			if err := unix.Mount(tmpDir, dest, "", unix.MS_MOVE, ""); err != nil {
+				errMsg := fmt.Errorf("tmpcopyup: failed to move mount %s to %s: %v", tmpDir, dest, err)
+				if err1 := unix.Unmount(tmpDir, unix.MNT_DETACH); err1 != nil {
+					return newSystemErrorWithCausef(err1, "tmpcopyup: %v: failed to unmount", errMsg)
+				}
+				return errMsg
+			}
+		}
+		if stat != nil {
+			if err = os.Chmod(dest, stat.Mode()); err != nil {
+				return err
+			}
+		}
+		return nil
+	case "bind":
+		stat, err := os.Stat(m.Source)
+		if err != nil {
+			// error out if the source of a bind mount does not exist as we will be
+			// unable to bind anything to it.
+			return err
+		}
+		// ensure that the destination of the bind mount is resolved of symlinks at mount time because
+		// any previous mounts can invalidate the next mount's destination.
+		// this can happen when a user specifies mounts within other mounts to cause breakouts or other
+		// evil stuff to try to escape the container's rootfs.
+		if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
+			return err
+		}
+		if err := checkMountDestination(rootfs, dest); err != nil {
+			return err
+		}
+		// update the mount with the correct dest after symlinks are resolved.
+		m.Destination = dest
+		if err := createIfNotExists(dest, stat.IsDir()); err != nil {
+			return err
+		}
+		if err := mountPropagate(m, rootfs, mountLabel); err != nil {
+			return err
+		}
+		// bind mount won't change mount options, we need remount to make mount options effective.
+		// first check that we have non-default options required before attempting a remount
+		if m.Flags&^(unix.MS_REC|unix.MS_REMOUNT|unix.MS_BIND) != 0 {
+			// only remount if unique mount options are set
+			if err := remount(m, rootfs); err != nil {
+				return err
+			}
+		}
+
+		if m.Relabel != "" {
+			if err := label.Validate(m.Relabel); err != nil {
+				return err
+			}
+			shared := label.IsShared(m.Relabel)
+			if err := label.Relabel(m.Source, mountLabel, shared); err != nil {
+				return err
+			}
+		}
+	case "cgroup":
+		binds, err := getCgroupMounts(m)
+		if err != nil {
+			return err
+		}
+		var merged []string
+		for _, b := range binds {
+			ss := filepath.Base(b.Destination)
+			if strings.Contains(ss, ",") {
+				merged = append(merged, ss)
+			}
+		}
+		tmpfs := &configs.Mount{
+			Source:           "tmpfs",
+			Device:           "tmpfs",
+			Destination:      m.Destination,
+			Flags:            defaultMountFlags,
+			Data:             "mode=755",
+			PropagationFlags: m.PropagationFlags,
+		}
+		if err := mountToRootfs(tmpfs, rootfs, mountLabel); err != nil {
+			return err
+		}
+		for _, b := range binds {
+			if err := mountToRootfs(b, rootfs, mountLabel); err != nil {
+				return err
+			}
+		}
+		for _, mc := range merged {
+			for _, ss := range strings.Split(mc, ",") {
+				// symlink(2) is very dumb, it will just shove the path into
+				// the link and doesn't do any checks or relative path
+				// conversion. Also, don't error out if the cgroup already exists.
+				if err := os.Symlink(mc, filepath.Join(rootfs, m.Destination, ss)); err != nil && !os.IsExist(err) {
+					return err
+				}
+			}
+		}
+		if m.Flags&unix.MS_RDONLY != 0 {
+			// remount cgroup root as readonly
+			mcgrouproot := &configs.Mount{
+				Source:      m.Destination,
+				Device:      "bind",
+				Destination: m.Destination,
+				Flags:       defaultMountFlags | unix.MS_RDONLY | unix.MS_BIND,
+			}
+			if err := remount(mcgrouproot, rootfs); err != nil {
+				return err
+			}
+		}
+	default:
+		// ensure that the destination of the mount is resolved of symlinks at mount time because
+		// any previous mounts can invalidate the next mount's destination.
+		// this can happen when a user specifies mounts within other mounts to cause breakouts or other
+		// evil stuff to try to escape the container's rootfs.
+		var err error
+		if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
+			return err
+		}
+		if err := checkMountDestination(rootfs, dest); err != nil {
+			return err
+		}
+		// update the mount with the correct dest after symlinks are resolved.
+		m.Destination = dest
+		if err := os.MkdirAll(dest, 0755); err != nil {
+			return err
+		}
+		return mountPropagate(m, rootfs, mountLabel)
+	}
+	return nil
+}
+
+func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) {
+	mounts, err := cgroups.GetCgroupMounts(false)
+	if err != nil {
+		return nil, err
+	}
+
+	cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup")
+	if err != nil {
+		return nil, err
+	}
+
+	var binds []*configs.Mount
+
+	for _, mm := range mounts {
+		dir, err := mm.GetOwnCgroup(cgroupPaths)
+		if err != nil {
+			return nil, err
+		}
+		relDir, err := filepath.Rel(mm.Root, dir)
+		if err != nil {
+			return nil, err
+		}
+		binds = append(binds, &configs.Mount{
+			Device:           "bind",
+			Source:           filepath.Join(mm.Mountpoint, relDir),
+			Destination:      filepath.Join(m.Destination, filepath.Base(mm.Mountpoint)),
+			Flags:            unix.MS_BIND | unix.MS_REC | m.Flags,
+			PropagationFlags: m.PropagationFlags,
+		})
+	}
+
+	return binds, nil
+}
+
+// checkMountDestination checks to ensure that the mount destination is not over the top of /proc.
+// dest is required to be an abs path and have any symlinks resolved before calling this function.
+func checkMountDestination(rootfs, dest string) error {
+	invalidDestinations := []string{
+		"/proc",
+	}
+	// White list, it should be sub directories of invalid destinations
+	validDestinations := []string{
+		// These entries can be bind mounted by files emulated by fuse,
+		// so commands like top, free displays stats in container.
+		"/proc/cpuinfo",
+		"/proc/diskstats",
+		"/proc/meminfo",
+		"/proc/stat",
+		"/proc/swaps",
+		"/proc/uptime",
+		"/proc/net/dev",
+	}
+	for _, valid := range validDestinations {
+		path, err := filepath.Rel(filepath.Join(rootfs, valid), dest)
+		if err != nil {
+			return err
+		}
+		if path == "." {
+			return nil
+		}
+	}
+	for _, invalid := range invalidDestinations {
+		path, err := filepath.Rel(filepath.Join(rootfs, invalid), dest)
+		if err != nil {
+			return err
+		}
+		if path == "." || !strings.HasPrefix(path, "..") {
+			return fmt.Errorf("%q cannot be mounted because it is located inside %q", dest, invalid)
+		}
+	}
+	return nil
+}
+
+func setupDevSymlinks(rootfs string) error {
+	var links = [][2]string{
+		{"/proc/self/fd", "/dev/fd"},
+		{"/proc/self/fd/0", "/dev/stdin"},
+		{"/proc/self/fd/1", "/dev/stdout"},
+		{"/proc/self/fd/2", "/dev/stderr"},
+	}
+	// kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink
+	// in /dev if it exists in /proc.
+	if _, err := os.Stat("/proc/kcore"); err == nil {
+		links = append(links, [2]string{"/proc/kcore", "/dev/core"})
+	}
+	for _, link := range links {
+		var (
+			src = link[0]
+			dst = filepath.Join(rootfs, link[1])
+		)
+		if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) {
+			return fmt.Errorf("symlink %s %s %s", src, dst, err)
+		}
+	}
+	return nil
+}
+
+// If stdin, stdout, and/or stderr are pointing to `/dev/null` in the parent's rootfs
+// this method will make them point to `/dev/null` in this container's rootfs.  This
+// needs to be called after we chroot/pivot into the container's rootfs so that any
+// symlinks are resolved locally.
+func reOpenDevNull() error {
+	var stat, devNullStat unix.Stat_t
+	file, err := os.OpenFile("/dev/null", os.O_RDWR, 0)
+	if err != nil {
+		return fmt.Errorf("Failed to open /dev/null - %s", err)
+	}
+	defer file.Close()
+	if err := unix.Fstat(int(file.Fd()), &devNullStat); err != nil {
+		return err
+	}
+	for fd := 0; fd < 3; fd++ {
+		if err := unix.Fstat(fd, &stat); err != nil {
+			return err
+		}
+		if stat.Rdev == devNullStat.Rdev {
+			// Close and re-open the fd.
+			if err := unix.Dup3(int(file.Fd()), fd, 0); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+// Create the device nodes in the container.
+func createDevices(config *configs.Config) error {
+	useBindMount := system.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER)
+	oldMask := unix.Umask(0000)
+	for _, node := range config.Devices {
+		// containers running in a user namespace are not allowed to mknod
+		// devices so we can just bind mount it from the host.
+		if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil {
+			unix.Umask(oldMask)
+			return err
+		}
+	}
+	unix.Umask(oldMask)
+	return nil
+}
+
+func bindMountDeviceNode(dest string, node *configs.Device) error {
+	f, err := os.Create(dest)
+	if err != nil && !os.IsExist(err) {
+		return err
+	}
+	if f != nil {
+		f.Close()
+	}
+	return unix.Mount(node.Path, dest, "bind", unix.MS_BIND, "")
+}
+
+// Creates the device node in the rootfs of the container.
+func createDeviceNode(rootfs string, node *configs.Device, bind bool) error {
+	dest := filepath.Join(rootfs, node.Path)
+	if err := os.MkdirAll(filepath.Dir(dest), 0755); err != nil {
+		return err
+	}
+
+	if bind {
+		return bindMountDeviceNode(dest, node)
+	}
+	if err := mknodDevice(dest, node); err != nil {
+		if os.IsExist(err) {
+			return nil
+		} else if os.IsPermission(err) {
+			return bindMountDeviceNode(dest, node)
+		}
+		return err
+	}
+	return nil
+}
+
+func mknodDevice(dest string, node *configs.Device) error {
+	fileMode := node.FileMode
+	switch node.Type {
+	case 'c', 'u':
+		fileMode |= unix.S_IFCHR
+	case 'b':
+		fileMode |= unix.S_IFBLK
+	case 'p':
+		fileMode |= unix.S_IFIFO
+	default:
+		return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path)
+	}
+	if err := unix.Mknod(dest, uint32(fileMode), node.Mkdev()); err != nil {
+		return err
+	}
+	return unix.Chown(dest, int(node.Uid), int(node.Gid))
+}
+
+func getMountInfo(mountinfo []*mount.Info, dir string) *mount.Info {
+	for _, m := range mountinfo {
+		if m.Mountpoint == dir {
+			return m
+		}
+	}
+	return nil
+}
+
+// Get the parent mount point of directory passed in as argument. Also return
+// optional fields.
+func getParentMount(rootfs string) (string, string, error) {
+	var path string
+
+	mountinfos, err := mount.GetMounts()
+	if err != nil {
+		return "", "", err
+	}
+
+	mountinfo := getMountInfo(mountinfos, rootfs)
+	if mountinfo != nil {
+		return rootfs, mountinfo.Optional, nil
+	}
+
+	path = rootfs
+	for {
+		path = filepath.Dir(path)
+
+		mountinfo = getMountInfo(mountinfos, path)
+		if mountinfo != nil {
+			return path, mountinfo.Optional, nil
+		}
+
+		if path == "/" {
+			break
+		}
+	}
+
+	// If we are here, we did not find parent mount. Something is wrong.
+	return "", "", fmt.Errorf("Could not find parent mount of %s", rootfs)
+}
+
+// Make parent mount private if it was shared
+func rootfsParentMountPrivate(rootfs string) error {
+	sharedMount := false
+
+	parentMount, optionalOpts, err := getParentMount(rootfs)
+	if err != nil {
+		return err
+	}
+
+	optsSplit := strings.Split(optionalOpts, " ")
+	for _, opt := range optsSplit {
+		if strings.HasPrefix(opt, "shared:") {
+			sharedMount = true
+			break
+		}
+	}
+
+	// Make parent mount PRIVATE if it was shared. It is needed for two
+	// reasons. First of all pivot_root() will fail if parent mount is
+	// shared. Secondly when we bind mount rootfs it will propagate to
+	// parent namespace and we don't want that to happen.
+	if sharedMount {
+		return unix.Mount("", parentMount, "", unix.MS_PRIVATE, "")
+	}
+
+	return nil
+}
+
+func prepareRoot(config *configs.Config) error {
+	flag := unix.MS_SLAVE | unix.MS_REC
+	if config.RootPropagation != 0 {
+		flag = config.RootPropagation
+	}
+	if err := unix.Mount("", "/", "", uintptr(flag), ""); err != nil {
+		return err
+	}
+
+	// Make parent mount private to make sure following bind mount does
+	// not propagate in other namespaces. Also it will help with kernel
+	// check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent))
+	if err := rootfsParentMountPrivate(config.Rootfs); err != nil {
+		return err
+	}
+
+	return unix.Mount(config.Rootfs, config.Rootfs, "bind", unix.MS_BIND|unix.MS_REC, "")
+}
+
+func setReadonly() error {
+	return unix.Mount("/", "/", "bind", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "")
+}
+
+func setupPtmx(config *configs.Config) error {
+	ptmx := filepath.Join(config.Rootfs, "dev/ptmx")
+	if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) {
+		return err
+	}
+	if err := os.Symlink("pts/ptmx", ptmx); err != nil {
+		return fmt.Errorf("symlink dev ptmx %s", err)
+	}
+	return nil
+}
+
+// pivotRoot will call pivot_root such that rootfs becomes the new root
+// filesystem, and everything else is cleaned up.
+func pivotRoot(rootfs string) error {
+	// While the documentation may claim otherwise, pivot_root(".", ".") is
+	// actually valid. What this results in is / being the new root but
+	// /proc/self/cwd being the old root. Since we can play around with the cwd
+	// with pivot_root this allows us to pivot without creating directories in
+	// the rootfs. Shout-outs to the LXC developers for giving us this idea.
+
+	oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0)
+	if err != nil {
+		return err
+	}
+	defer unix.Close(oldroot)
+
+	newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0)
+	if err != nil {
+		return err
+	}
+	defer unix.Close(newroot)
+
+	// Change to the new root so that the pivot_root actually acts on it.
+	if err := unix.Fchdir(newroot); err != nil {
+		return err
+	}
+
+	if err := unix.PivotRoot(".", "."); err != nil {
+		return fmt.Errorf("pivot_root %s", err)
+	}
+
+	// Currently our "." is oldroot (according to the current kernel code).
+	// However, purely for safety, we will fchdir(oldroot) since there isn't
+	// really any guarantee from the kernel what /proc/self/cwd will be after a
+	// pivot_root(2).
+
+	if err := unix.Fchdir(oldroot); err != nil {
+		return err
+	}
+
+	// Make oldroot rslave to make sure our unmounts don't propagate to the
+	// host (and thus bork the machine). We don't use rprivate because this is
+	// known to cause issues due to races where we still have a reference to a
+	// mount while a process in the host namespace are trying to operate on
+	// something they think has no mounts (devicemapper in particular).
+	if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil {
+		return err
+	}
+	// Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd.
+	if err := unix.Unmount(".", unix.MNT_DETACH); err != nil {
+		return err
+	}
+
+	// Switch back to our shiny new root.
+	if err := unix.Chdir("/"); err != nil {
+		return fmt.Errorf("chdir / %s", err)
+	}
+	return nil
+}
+
+func msMoveRoot(rootfs string) error {
+	if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil {
+		return err
+	}
+	return chroot(rootfs)
+}
+
+func chroot(rootfs string) error {
+	if err := unix.Chroot("."); err != nil {
+		return err
+	}
+	return unix.Chdir("/")
+}
+
+// createIfNotExists creates a file or a directory only if it does not already exist.
+func createIfNotExists(path string, isDir bool) error {
+	if _, err := os.Stat(path); err != nil {
+		if os.IsNotExist(err) {
+			if isDir {
+				return os.MkdirAll(path, 0755)
+			}
+			if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
+				return err
+			}
+			f, err := os.OpenFile(path, os.O_CREATE, 0755)
+			if err != nil {
+				return err
+			}
+			f.Close()
+		}
+	}
+	return nil
+}
+
+// readonlyPath will make a path read only.
+func readonlyPath(path string) error {
+	if err := unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
+		if os.IsNotExist(err) {
+			return nil
+		}
+		return err
+	}
+	return unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY|unix.MS_REC, "")
+}
+
+// remountReadonly will remount an existing mount point and ensure that it is read-only.
+func remountReadonly(m *configs.Mount) error {
+	var (
+		dest  = m.Destination
+		flags = m.Flags
+	)
+	for i := 0; i < 5; i++ {
+		// There is a special case in the kernel for
+		// MS_REMOUNT | MS_BIND, which allows us to change only the
+		// flags even as an unprivileged user (i.e. user namespace)
+		// assuming we don't drop any security related flags (nodev,
+		// nosuid, etc.). So, let's use that case so that we can do
+		// this re-mount without failing in a userns.
+		flags |= unix.MS_REMOUNT | unix.MS_BIND | unix.MS_RDONLY
+		if err := unix.Mount("", dest, "", uintptr(flags), ""); err != nil {
+			switch err {
+			case unix.EBUSY:
+				time.Sleep(100 * time.Millisecond)
+				continue
+			default:
+				return err
+			}
+		}
+		return nil
+	}
+	return fmt.Errorf("unable to mount %s as readonly max retries reached", dest)
+}
+
+// maskPath masks the top of the specified path inside a container to avoid
+// security issues from processes reading information from non-namespace aware
+// mounts ( proc/kcore ).
+// For files, maskPath bind mounts /dev/null over the top of the specified path.
+// For directories, maskPath mounts read-only tmpfs over the top of the specified path.
+func maskPath(path string) error {
+	if err := unix.Mount("/dev/null", path, "", unix.MS_BIND, ""); err != nil && !os.IsNotExist(err) {
+		if err == unix.ENOTDIR {
+			return unix.Mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, "")
+		}
+		return err
+	}
+	return nil
+}
+
+// writeSystemProperty writes the value to a path under /proc/sys as determined from the key.
+// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward.
+func writeSystemProperty(key, value string) error {
+	keyPath := strings.Replace(key, ".", "/", -1)
+	return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0644)
+}
+
+func remount(m *configs.Mount, rootfs string) error {
+	var (
+		dest = m.Destination
+	)
+	if !strings.HasPrefix(dest, rootfs) {
+		dest = filepath.Join(rootfs, dest)
+	}
+	if err := unix.Mount(m.Source, dest, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), ""); err != nil {
+		return err
+	}
+	return nil
+}
+
+// Do the mount operation followed by additional mounts required to take care
+// of propagation flags.
+func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error {
+	var (
+		dest  = m.Destination
+		data  = label.FormatMountLabel(m.Data, mountLabel)
+		flags = m.Flags
+	)
+	if libcontainerUtils.CleanPath(dest) == "/dev" {
+		flags &= ^unix.MS_RDONLY
+	}
+
+	copyUp := m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP
+	if !(copyUp || strings.HasPrefix(dest, rootfs)) {
+		dest = filepath.Join(rootfs, dest)
+	}
+
+	if err := unix.Mount(m.Source, dest, m.Device, uintptr(flags), data); err != nil {
+		return err
+	}
+
+	for _, pflag := range m.PropagationFlags {
+		if err := unix.Mount("", dest, "", uintptr(pflag), ""); err != nil {
+			return err
+		}
+	}
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go
@@ -0,0 +1,76 @@
+package seccomp
+
+import (
+	"fmt"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+)
+
+var operators = map[string]configs.Operator{
+	"SCMP_CMP_NE":        configs.NotEqualTo,
+	"SCMP_CMP_LT":        configs.LessThan,
+	"SCMP_CMP_LE":        configs.LessThanOrEqualTo,
+	"SCMP_CMP_EQ":        configs.EqualTo,
+	"SCMP_CMP_GE":        configs.GreaterThanOrEqualTo,
+	"SCMP_CMP_GT":        configs.GreaterThan,
+	"SCMP_CMP_MASKED_EQ": configs.MaskEqualTo,
+}
+
+var actions = map[string]configs.Action{
+	"SCMP_ACT_KILL":  configs.Kill,
+	"SCMP_ACT_ERRNO": configs.Errno,
+	"SCMP_ACT_TRAP":  configs.Trap,
+	"SCMP_ACT_ALLOW": configs.Allow,
+	"SCMP_ACT_TRACE": configs.Trace,
+}
+
+var archs = map[string]string{
+	"SCMP_ARCH_X86":         "x86",
+	"SCMP_ARCH_X86_64":      "amd64",
+	"SCMP_ARCH_X32":         "x32",
+	"SCMP_ARCH_ARM":         "arm",
+	"SCMP_ARCH_AARCH64":     "arm64",
+	"SCMP_ARCH_MIPS":        "mips",
+	"SCMP_ARCH_MIPS64":      "mips64",
+	"SCMP_ARCH_MIPS64N32":   "mips64n32",
+	"SCMP_ARCH_MIPSEL":      "mipsel",
+	"SCMP_ARCH_MIPSEL64":    "mipsel64",
+	"SCMP_ARCH_MIPSEL64N32": "mipsel64n32",
+	"SCMP_ARCH_PPC":         "ppc",
+	"SCMP_ARCH_PPC64":       "ppc64",
+	"SCMP_ARCH_PPC64LE":     "ppc64le",
+	"SCMP_ARCH_S390":        "s390",
+	"SCMP_ARCH_S390X":       "s390x",
+}
+
+// ConvertStringToOperator converts a string into a Seccomp comparison operator.
+// Comparison operators use the names they are assigned by Libseccomp's header.
+// Attempting to convert a string that is not a valid operator results in an
+// error.
+func ConvertStringToOperator(in string) (configs.Operator, error) {
+	if op, ok := operators[in]; ok == true {
+		return op, nil
+	}
+	return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in)
+}
+
+// ConvertStringToAction converts a string into a Seccomp rule match action.
+// Actions use the names they are assigned in Libseccomp's header, though some
+// (notable, SCMP_ACT_TRACE) are not available in this implementation and will
+// return errors.
+// Attempting to convert a string that is not a valid action results in an
+// error.
+func ConvertStringToAction(in string) (configs.Action, error) {
+	if act, ok := actions[in]; ok == true {
+		return act, nil
+	}
+	return 0, fmt.Errorf("string %s is not a valid action for seccomp", in)
+}
+
+// ConvertStringToArch converts a string into a Seccomp comparison arch.
+func ConvertStringToArch(in string) (string, error) {
+	if arch, ok := archs[in]; ok == true {
+		return arch, nil
+	}
+	return "", fmt.Errorf("string %s is not a valid arch for seccomp", in)
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go
@@ -0,0 +1,258 @@
+// +build linux,cgo,seccomp
+
+package seccomp
+
+import (
+	"bufio"
+	"fmt"
+	"os"
+	"strings"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+	libseccomp "github.com/seccomp/libseccomp-golang"
+
+	"golang.org/x/sys/unix"
+)
+
+var (
+	actAllow = libseccomp.ActAllow
+	actTrap  = libseccomp.ActTrap
+	actKill  = libseccomp.ActKill
+	actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM))
+	actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM))
+)
+
+const (
+	// Linux system calls can have at most 6 arguments
+	syscallMaxArguments int = 6
+)
+
+// Filters given syscalls in a container, preventing them from being used
+// Started in the container init process, and carried over to all child processes
+// Setns calls, however, require a separate invocation, as they are not children
+// of the init until they join the namespace
+func InitSeccomp(config *configs.Seccomp) error {
+	if config == nil {
+		return fmt.Errorf("cannot initialize Seccomp - nil config passed")
+	}
+
+	defaultAction, err := getAction(config.DefaultAction)
+	if err != nil {
+		return fmt.Errorf("error initializing seccomp - invalid default action")
+	}
+
+	filter, err := libseccomp.NewFilter(defaultAction)
+	if err != nil {
+		return fmt.Errorf("error creating filter: %s", err)
+	}
+
+	// Add extra architectures
+	for _, arch := range config.Architectures {
+		scmpArch, err := libseccomp.GetArchFromString(arch)
+		if err != nil {
+			return fmt.Errorf("error validating Seccomp architecture: %s", err)
+		}
+
+		if err := filter.AddArch(scmpArch); err != nil {
+			return fmt.Errorf("error adding architecture to seccomp filter: %s", err)
+		}
+	}
+
+	// Unset no new privs bit
+	if err := filter.SetNoNewPrivsBit(false); err != nil {
+		return fmt.Errorf("error setting no new privileges: %s", err)
+	}
+
+	// Add a rule for each syscall
+	for _, call := range config.Syscalls {
+		if call == nil {
+			return fmt.Errorf("encountered nil syscall while initializing Seccomp")
+		}
+
+		if err = matchCall(filter, call); err != nil {
+			return err
+		}
+	}
+
+	if err = filter.Load(); err != nil {
+		return fmt.Errorf("error loading seccomp filter into kernel: %s", err)
+	}
+
+	return nil
+}
+
+// IsEnabled returns if the kernel has been configured to support seccomp.
+func IsEnabled() bool {
+	// Try to read from /proc/self/status for kernels > 3.8
+	s, err := parseStatusFile("/proc/self/status")
+	if err != nil {
+		// Check if Seccomp is supported, via CONFIG_SECCOMP.
+		if err := unix.Prctl(unix.PR_GET_SECCOMP, 0, 0, 0, 0); err != unix.EINVAL {
+			// Make sure the kernel has CONFIG_SECCOMP_FILTER.
+			if err := unix.Prctl(unix.PR_SET_SECCOMP, unix.SECCOMP_MODE_FILTER, 0, 0, 0); err != unix.EINVAL {
+				return true
+			}
+		}
+		return false
+	}
+	_, ok := s["Seccomp"]
+	return ok
+}
+
+// Convert Libcontainer Action to Libseccomp ScmpAction
+func getAction(act configs.Action) (libseccomp.ScmpAction, error) {
+	switch act {
+	case configs.Kill:
+		return actKill, nil
+	case configs.Errno:
+		return actErrno, nil
+	case configs.Trap:
+		return actTrap, nil
+	case configs.Allow:
+		return actAllow, nil
+	case configs.Trace:
+		return actTrace, nil
+	default:
+		return libseccomp.ActInvalid, fmt.Errorf("invalid action, cannot use in rule")
+	}
+}
+
+// Convert Libcontainer Operator to Libseccomp ScmpCompareOp
+func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) {
+	switch op {
+	case configs.EqualTo:
+		return libseccomp.CompareEqual, nil
+	case configs.NotEqualTo:
+		return libseccomp.CompareNotEqual, nil
+	case configs.GreaterThan:
+		return libseccomp.CompareGreater, nil
+	case configs.GreaterThanOrEqualTo:
+		return libseccomp.CompareGreaterEqual, nil
+	case configs.LessThan:
+		return libseccomp.CompareLess, nil
+	case configs.LessThanOrEqualTo:
+		return libseccomp.CompareLessOrEqual, nil
+	case configs.MaskEqualTo:
+		return libseccomp.CompareMaskedEqual, nil
+	default:
+		return libseccomp.CompareInvalid, fmt.Errorf("invalid operator, cannot use in rule")
+	}
+}
+
+// Convert Libcontainer Arg to Libseccomp ScmpCondition
+func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) {
+	cond := libseccomp.ScmpCondition{}
+
+	if arg == nil {
+		return cond, fmt.Errorf("cannot convert nil to syscall condition")
+	}
+
+	op, err := getOperator(arg.Op)
+	if err != nil {
+		return cond, err
+	}
+
+	return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo)
+}
+
+// Add a rule to match a single syscall
+func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
+	if call == nil || filter == nil {
+		return fmt.Errorf("cannot use nil as syscall to block")
+	}
+
+	if len(call.Name) == 0 {
+		return fmt.Errorf("empty string is not a valid syscall")
+	}
+
+	// If we can't resolve the syscall, assume it's not supported on this kernel
+	// Ignore it, don't error out
+	callNum, err := libseccomp.GetSyscallFromName(call.Name)
+	if err != nil {
+		return nil
+	}
+
+	// Convert the call's action to the libseccomp equivalent
+	callAct, err := getAction(call.Action)
+	if err != nil {
+		return fmt.Errorf("action in seccomp profile is invalid: %s", err)
+	}
+
+	// Unconditional match - just add the rule
+	if len(call.Args) == 0 {
+		if err = filter.AddRule(callNum, callAct); err != nil {
+			return fmt.Errorf("error adding seccomp filter rule for syscall %s: %s", call.Name, err)
+		}
+	} else {
+		// If two or more arguments have the same condition,
+		// Revert to old behavior, adding each condition as a separate rule
+		argCounts := make([]uint, syscallMaxArguments)
+		conditions := []libseccomp.ScmpCondition{}
+
+		for _, cond := range call.Args {
+			newCond, err := getCondition(cond)
+			if err != nil {
+				return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %s", call.Name, err)
+			}
+
+			argCounts[cond.Index] += 1
+
+			conditions = append(conditions, newCond)
+		}
+
+		hasMultipleArgs := false
+		for _, count := range argCounts {
+			if count > 1 {
+				hasMultipleArgs = true
+				break
+			}
+		}
+
+		if hasMultipleArgs {
+			// Revert to old behavior
+			// Add each condition attached to a separate rule
+			for _, cond := range conditions {
+				condArr := []libseccomp.ScmpCondition{cond}
+
+				if err = filter.AddRuleConditional(callNum, callAct, condArr); err != nil {
+					return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err)
+				}
+			}
+		} else {
+			// No conditions share same argument
+			// Use new, proper behavior
+			if err = filter.AddRuleConditional(callNum, callAct, conditions); err != nil {
+				return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err)
+			}
+		}
+	}
+
+	return nil
+}
+
+func parseStatusFile(path string) (map[string]string, error) {
+	f, err := os.Open(path)
+	if err != nil {
+		return nil, err
+	}
+	defer f.Close()
+
+	s := bufio.NewScanner(f)
+	status := make(map[string]string)
+
+	for s.Scan() {
+		text := s.Text()
+		parts := strings.Split(text, ":")
+
+		if len(parts) <= 1 {
+			continue
+		}
+
+		status[parts[0]] = parts[1]
+	}
+	if err := s.Err(); err != nil {
+		return nil, err
+	}
+
+	return status, nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go
@@ -0,0 +1,24 @@
+// +build !linux !cgo !seccomp
+
+package seccomp
+
+import (
+	"errors"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+)
+
+var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported")
+
+// InitSeccomp does nothing because seccomp is not supported.
+func InitSeccomp(config *configs.Seccomp) error {
+	if config != nil {
+		return ErrSeccompNotEnabled
+	}
+	return nil
+}
+
+// IsEnabled returns false, because it is not supported.
+func IsEnabled() bool {
+	return false
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go
@@ -0,0 +1,76 @@
+// +build linux
+
+package libcontainer
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/opencontainers/runc/libcontainer/apparmor"
+	"github.com/opencontainers/runc/libcontainer/keys"
+	"github.com/opencontainers/runc/libcontainer/seccomp"
+	"github.com/opencontainers/runc/libcontainer/system"
+	"github.com/opencontainers/selinux/go-selinux/label"
+
+	"golang.org/x/sys/unix"
+)
+
+// linuxSetnsInit performs the container's initialization for running a new process
+// inside an existing container.
+type linuxSetnsInit struct {
+	pipe          *os.File
+	consoleSocket *os.File
+	config        *initConfig
+}
+
+func (l *linuxSetnsInit) getSessionRingName() string {
+	return fmt.Sprintf("_ses.%s", l.config.ContainerId)
+}
+
+func (l *linuxSetnsInit) Init() error {
+	if !l.config.Config.NoNewKeyring {
+		// do not inherit the parent's session keyring
+		if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil {
+			return err
+		}
+	}
+	if l.config.CreateConsole {
+		if err := setupConsole(l.consoleSocket, l.config, false); err != nil {
+			return err
+		}
+		if err := system.Setctty(); err != nil {
+			return err
+		}
+	}
+	if l.config.NoNewPrivileges {
+		if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
+			return err
+		}
+	}
+	// Without NoNewPrivileges seccomp is a privileged operation, so we need to
+	// do this before dropping capabilities; otherwise do it as late as possible
+	// just before execve so as few syscalls take place after it as possible.
+	if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
+		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
+			return err
+		}
+	}
+	if err := finalizeNamespace(l.config); err != nil {
+		return err
+	}
+	if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
+		return err
+	}
+	if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
+		return err
+	}
+	// Set seccomp as close to execve as possible, so as few syscalls take
+	// place afterward (reducing the amount of syscalls that users need to
+	// enable in their seccomp profiles).
+	if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
+		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
+			return newSystemErrorWithCause(err, "init seccomp")
+		}
+	}
+	return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/specconv/example.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/specconv/example.go
@@ -0,0 +1,221 @@
+package specconv
+
+import (
+	"os"
+	"strings"
+
+	"github.com/opencontainers/runtime-spec/specs-go"
+)
+
+// Example returns an example spec file, with many options set so a user can
+// see what a standard spec file looks like.
+func Example() *specs.Spec {
+	return &specs.Spec{
+		Version: specs.Version,
+		Root: &specs.Root{
+			Path:     "rootfs",
+			Readonly: true,
+		},
+		Process: &specs.Process{
+			Terminal: true,
+			User:     specs.User{},
+			Args: []string{
+				"sh",
+			},
+			Env: []string{
+				"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+				"TERM=xterm",
+			},
+			Cwd:             "/",
+			NoNewPrivileges: true,
+			Capabilities: &specs.LinuxCapabilities{
+				Bounding: []string{
+					"CAP_AUDIT_WRITE",
+					"CAP_KILL",
+					"CAP_NET_BIND_SERVICE",
+				},
+				Permitted: []string{
+					"CAP_AUDIT_WRITE",
+					"CAP_KILL",
+					"CAP_NET_BIND_SERVICE",
+				},
+				Inheritable: []string{
+					"CAP_AUDIT_WRITE",
+					"CAP_KILL",
+					"CAP_NET_BIND_SERVICE",
+				},
+				Ambient: []string{
+					"CAP_AUDIT_WRITE",
+					"CAP_KILL",
+					"CAP_NET_BIND_SERVICE",
+				},
+				Effective: []string{
+					"CAP_AUDIT_WRITE",
+					"CAP_KILL",
+					"CAP_NET_BIND_SERVICE",
+				},
+			},
+			Rlimits: []specs.POSIXRlimit{
+				{
+					Type: "RLIMIT_NOFILE",
+					Hard: uint64(1024),
+					Soft: uint64(1024),
+				},
+			},
+		},
+		Hostname: "runc",
+		Mounts: []specs.Mount{
+			{
+				Destination: "/proc",
+				Type:        "proc",
+				Source:      "proc",
+				Options:     nil,
+			},
+			{
+				Destination: "/dev",
+				Type:        "tmpfs",
+				Source:      "tmpfs",
+				Options:     []string{"nosuid", "strictatime", "mode=755", "size=65536k"},
+			},
+			{
+				Destination: "/dev/pts",
+				Type:        "devpts",
+				Source:      "devpts",
+				Options:     []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"},
+			},
+			{
+				Destination: "/dev/shm",
+				Type:        "tmpfs",
+				Source:      "shm",
+				Options:     []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"},
+			},
+			{
+				Destination: "/dev/mqueue",
+				Type:        "mqueue",
+				Source:      "mqueue",
+				Options:     []string{"nosuid", "noexec", "nodev"},
+			},
+			{
+				Destination: "/sys",
+				Type:        "sysfs",
+				Source:      "sysfs",
+				Options:     []string{"nosuid", "noexec", "nodev", "ro"},
+			},
+			{
+				Destination: "/sys/fs/cgroup",
+				Type:        "cgroup",
+				Source:      "cgroup",
+				Options:     []string{"nosuid", "noexec", "nodev", "relatime", "ro"},
+			},
+		},
+		Linux: &specs.Linux{
+			MaskedPaths: []string{
+				"/proc/kcore",
+				"/proc/latency_stats",
+				"/proc/timer_list",
+				"/proc/timer_stats",
+				"/proc/sched_debug",
+				"/sys/firmware",
+				"/proc/scsi",
+			},
+			ReadonlyPaths: []string{
+				"/proc/asound",
+				"/proc/bus",
+				"/proc/fs",
+				"/proc/irq",
+				"/proc/sys",
+				"/proc/sysrq-trigger",
+			},
+			Resources: &specs.LinuxResources{
+				Devices: []specs.LinuxDeviceCgroup{
+					{
+						Allow:  false,
+						Access: "rwm",
+					},
+				},
+			},
+			Namespaces: []specs.LinuxNamespace{
+				{
+					Type: "pid",
+				},
+				{
+					Type: "network",
+				},
+				{
+					Type: "ipc",
+				},
+				{
+					Type: "uts",
+				},
+				{
+					Type: "mount",
+				},
+			},
+		},
+	}
+}
+
+// ToRootless converts the given spec file into one that should work with
+// rootless containers, by removing incompatible options and adding others that
+// are needed.
+func ToRootless(spec *specs.Spec) {
+	var namespaces []specs.LinuxNamespace
+
+	// Remove networkns from the spec.
+	for _, ns := range spec.Linux.Namespaces {
+		switch ns.Type {
+		case specs.NetworkNamespace, specs.UserNamespace:
+			// Do nothing.
+		default:
+			namespaces = append(namespaces, ns)
+		}
+	}
+	// Add userns to the spec.
+	namespaces = append(namespaces, specs.LinuxNamespace{
+		Type: specs.UserNamespace,
+	})
+	spec.Linux.Namespaces = namespaces
+
+	// Add mappings for the current user.
+	spec.Linux.UIDMappings = []specs.LinuxIDMapping{{
+		HostID:      uint32(os.Geteuid()),
+		ContainerID: 0,
+		Size:        1,
+	}}
+	spec.Linux.GIDMappings = []specs.LinuxIDMapping{{
+		HostID:      uint32(os.Getegid()),
+		ContainerID: 0,
+		Size:        1,
+	}}
+
+	// Fix up mounts.
+	var mounts []specs.Mount
+	for _, mount := range spec.Mounts {
+		// Ignore all mounts that are under /sys.
+		if strings.HasPrefix(mount.Destination, "/sys") {
+			continue
+		}
+
+		// Remove all gid= and uid= mappings.
+		var options []string
+		for _, option := range mount.Options {
+			if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") {
+				options = append(options, option)
+			}
+		}
+
+		mount.Options = options
+		mounts = append(mounts, mount)
+	}
+	// Add the sysfs mount as an rbind.
+	mounts = append(mounts, specs.Mount{
+		Source:      "/sys",
+		Destination: "/sys",
+		Type:        "none",
+		Options:     []string{"rbind", "nosuid", "noexec", "nodev", "ro"},
+	})
+	spec.Mounts = mounts
+
+	// Remove cgroup settings.
+	spec.Linux.Resources = nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/specconv/spec_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/specconv/spec_linux.go
@@ -0,0 +1,832 @@
+// +build linux
+
+// Package specconv implements conversion of specifications to libcontainer
+// configurations
+package specconv
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+	"strings"
+	"time"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/seccomp"
+	libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils"
+	"github.com/opencontainers/runtime-spec/specs-go"
+
+	"golang.org/x/sys/unix"
+)
+
+const wildcard = -1
+
+var namespaceMapping = map[specs.LinuxNamespaceType]configs.NamespaceType{
+	specs.PIDNamespace:     configs.NEWPID,
+	specs.NetworkNamespace: configs.NEWNET,
+	specs.MountNamespace:   configs.NEWNS,
+	specs.UserNamespace:    configs.NEWUSER,
+	specs.IPCNamespace:     configs.NEWIPC,
+	specs.UTSNamespace:     configs.NEWUTS,
+}
+
+var mountPropagationMapping = map[string]int{
+	"rprivate":    unix.MS_PRIVATE | unix.MS_REC,
+	"private":     unix.MS_PRIVATE,
+	"rslave":      unix.MS_SLAVE | unix.MS_REC,
+	"slave":       unix.MS_SLAVE,
+	"rshared":     unix.MS_SHARED | unix.MS_REC,
+	"shared":      unix.MS_SHARED,
+	"runbindable": unix.MS_UNBINDABLE | unix.MS_REC,
+	"unbindable":  unix.MS_UNBINDABLE,
+	"":            0,
+}
+
+var allowedDevices = []*configs.Device{
+	// allow mknod for any device
+	{
+		Type:        'c',
+		Major:       wildcard,
+		Minor:       wildcard,
+		Permissions: "m",
+		Allow:       true,
+	},
+	{
+		Type:        'b',
+		Major:       wildcard,
+		Minor:       wildcard,
+		Permissions: "m",
+		Allow:       true,
+	},
+	{
+		Type:        'c',
+		Path:        "/dev/null",
+		Major:       1,
+		Minor:       3,
+		Permissions: "rwm",
+		Allow:       true,
+	},
+	{
+		Type:        'c',
+		Path:        "/dev/random",
+		Major:       1,
+		Minor:       8,
+		Permissions: "rwm",
+		Allow:       true,
+	},
+	{
+		Type:        'c',
+		Path:        "/dev/full",
+		Major:       1,
+		Minor:       7,
+		Permissions: "rwm",
+		Allow:       true,
+	},
+	{
+		Type:        'c',
+		Path:        "/dev/tty",
+		Major:       5,
+		Minor:       0,
+		Permissions: "rwm",
+		Allow:       true,
+	},
+	{
+		Type:        'c',
+		Path:        "/dev/zero",
+		Major:       1,
+		Minor:       5,
+		Permissions: "rwm",
+		Allow:       true,
+	},
+	{
+		Type:        'c',
+		Path:        "/dev/urandom",
+		Major:       1,
+		Minor:       9,
+		Permissions: "rwm",
+		Allow:       true,
+	},
+	{
+		Path:        "/dev/console",
+		Type:        'c',
+		Major:       5,
+		Minor:       1,
+		Permissions: "rwm",
+		Allow:       true,
+	},
+	// /dev/pts/ - pts namespaces are "coming soon"
+	{
+		Path:        "",
+		Type:        'c',
+		Major:       136,
+		Minor:       wildcard,
+		Permissions: "rwm",
+		Allow:       true,
+	},
+	{
+		Path:        "",
+		Type:        'c',
+		Major:       5,
+		Minor:       2,
+		Permissions: "rwm",
+		Allow:       true,
+	},
+	// tuntap
+	{
+		Path:        "",
+		Type:        'c',
+		Major:       10,
+		Minor:       200,
+		Permissions: "rwm",
+		Allow:       true,
+	},
+}
+
+type CreateOpts struct {
+	CgroupName       string
+	UseSystemdCgroup bool
+	NoPivotRoot      bool
+	NoNewKeyring     bool
+	Spec             *specs.Spec
+	Rootless         bool
+}
+
+// CreateLibcontainerConfig creates a new libcontainer configuration from a
+// given specification and a cgroup name
+func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
+	// runc's cwd will always be the bundle path
+	rcwd, err := os.Getwd()
+	if err != nil {
+		return nil, err
+	}
+	cwd, err := filepath.Abs(rcwd)
+	if err != nil {
+		return nil, err
+	}
+	spec := opts.Spec
+	if spec.Root == nil {
+		return nil, fmt.Errorf("Root must be specified")
+	}
+	rootfsPath := spec.Root.Path
+	if !filepath.IsAbs(rootfsPath) {
+		rootfsPath = filepath.Join(cwd, rootfsPath)
+	}
+	labels := []string{}
+	for k, v := range spec.Annotations {
+		labels = append(labels, fmt.Sprintf("%s=%s", k, v))
+	}
+	config := &configs.Config{
+		Rootfs:       rootfsPath,
+		NoPivotRoot:  opts.NoPivotRoot,
+		Readonlyfs:   spec.Root.Readonly,
+		Hostname:     spec.Hostname,
+		Labels:       append(labels, fmt.Sprintf("bundle=%s", cwd)),
+		NoNewKeyring: opts.NoNewKeyring,
+		Rootless:     opts.Rootless,
+	}
+
+	exists := false
+	for _, m := range spec.Mounts {
+		config.Mounts = append(config.Mounts, createLibcontainerMount(cwd, m))
+	}
+	if err := createDevices(spec, config); err != nil {
+		return nil, err
+	}
+	c, err := createCgroupConfig(opts)
+	if err != nil {
+		return nil, err
+	}
+	config.Cgroups = c
+	// set linux-specific config
+	if spec.Linux != nil {
+		if config.RootPropagation, exists = mountPropagationMapping[spec.Linux.RootfsPropagation]; !exists {
+			return nil, fmt.Errorf("rootfsPropagation=%v is not supported", spec.Linux.RootfsPropagation)
+		}
+		if config.NoPivotRoot && (config.RootPropagation&unix.MS_PRIVATE != 0) {
+			return nil, fmt.Errorf("rootfsPropagation of [r]private is not safe without pivot_root")
+		}
+
+		for _, ns := range spec.Linux.Namespaces {
+			t, exists := namespaceMapping[ns.Type]
+			if !exists {
+				return nil, fmt.Errorf("namespace %q does not exist", ns)
+			}
+			if config.Namespaces.Contains(t) {
+				return nil, fmt.Errorf("malformed spec file: duplicated ns %q", ns)
+			}
+			config.Namespaces.Add(t, ns.Path)
+		}
+		if config.Namespaces.Contains(configs.NEWNET) {
+			config.Networks = []*configs.Network{
+				{
+					Type: "loopback",
+				},
+			}
+		}
+		if config.Namespaces.Contains(configs.NEWUSER) {
+			if err := setupUserNamespace(spec, config); err != nil {
+				return nil, err
+			}
+		}
+		config.MaskPaths = spec.Linux.MaskedPaths
+		config.ReadonlyPaths = spec.Linux.ReadonlyPaths
+		config.MountLabel = spec.Linux.MountLabel
+		config.Sysctl = spec.Linux.Sysctl
+		if spec.Linux.Seccomp != nil {
+			seccomp, err := setupSeccomp(spec.Linux.Seccomp)
+			if err != nil {
+				return nil, err
+			}
+			config.Seccomp = seccomp
+		}
+	}
+	if spec.Process.SelinuxLabel != "" {
+		config.ProcessLabel = spec.Process.SelinuxLabel
+	}
+	if spec.Process != nil && spec.Process.OOMScoreAdj != nil {
+		config.OomScoreAdj = *spec.Process.OOMScoreAdj
+	}
+	if spec.Process.Capabilities != nil {
+		config.Capabilities = &configs.Capabilities{
+			Bounding:    spec.Process.Capabilities.Bounding,
+			Effective:   spec.Process.Capabilities.Effective,
+			Permitted:   spec.Process.Capabilities.Permitted,
+			Inheritable: spec.Process.Capabilities.Inheritable,
+			Ambient:     spec.Process.Capabilities.Ambient,
+		}
+	}
+	createHooks(spec, config)
+	config.Version = specs.Version
+	if spec.Linux.IntelRdt != nil {
+		config.IntelRdt = &configs.IntelRdt{}
+		if spec.Linux.IntelRdt.L3CacheSchema != "" {
+			config.IntelRdt.L3CacheSchema = spec.Linux.IntelRdt.L3CacheSchema
+		}
+	}
+	return config, nil
+}
+
+func createLibcontainerMount(cwd string, m specs.Mount) *configs.Mount {
+	flags, pgflags, data, ext := parseMountOptions(m.Options)
+	source := m.Source
+	if m.Type == "bind" {
+		if !filepath.IsAbs(source) {
+			source = filepath.Join(cwd, m.Source)
+		}
+	}
+	return &configs.Mount{
+		Device:           m.Type,
+		Source:           source,
+		Destination:      m.Destination,
+		Data:             data,
+		Flags:            flags,
+		PropagationFlags: pgflags,
+		Extensions:       ext,
+	}
+}
+
+func createCgroupConfig(opts *CreateOpts) (*configs.Cgroup, error) {
+	var (
+		myCgroupPath string
+
+		spec             = opts.Spec
+		useSystemdCgroup = opts.UseSystemdCgroup
+		name             = opts.CgroupName
+	)
+
+	c := &configs.Cgroup{
+		Resources: &configs.Resources{},
+	}
+
+	if spec.Linux != nil && spec.Linux.CgroupsPath != "" {
+		myCgroupPath = libcontainerUtils.CleanPath(spec.Linux.CgroupsPath)
+		if useSystemdCgroup {
+			myCgroupPath = spec.Linux.CgroupsPath
+		}
+	}
+
+	if useSystemdCgroup {
+		if myCgroupPath == "" {
+			c.Parent = "system.slice"
+			c.ScopePrefix = "runc"
+			c.Name = name
+		} else {
+			// Parse the path from expected "slice:prefix:name"
+			// for e.g. "system.slice:docker:1234"
+			parts := strings.Split(myCgroupPath, ":")
+			if len(parts) != 3 {
+				return nil, fmt.Errorf("expected cgroupsPath to be of format \"slice:prefix:name\" for systemd cgroups")
+			}
+			c.Parent = parts[0]
+			c.ScopePrefix = parts[1]
+			c.Name = parts[2]
+		}
+	} else {
+		if myCgroupPath == "" {
+			c.Name = name
+		}
+		c.Path = myCgroupPath
+	}
+
+	// In rootless containers, any attempt to make cgroup changes will fail.
+	// libcontainer will validate this and we shouldn't add any cgroup options
+	// the user didn't specify.
+	if !opts.Rootless {
+		c.Resources.AllowedDevices = allowedDevices
+	}
+	if spec.Linux != nil {
+		r := spec.Linux.Resources
+		if r == nil {
+			return c, nil
+		}
+		for i, d := range spec.Linux.Resources.Devices {
+			var (
+				t     = "a"
+				major = int64(-1)
+				minor = int64(-1)
+			)
+			if d.Type != "" {
+				t = d.Type
+			}
+			if d.Major != nil {
+				major = *d.Major
+			}
+			if d.Minor != nil {
+				minor = *d.Minor
+			}
+			if d.Access == "" {
+				return nil, fmt.Errorf("device access at %d field cannot be empty", i)
+			}
+			dt, err := stringToCgroupDeviceRune(t)
+			if err != nil {
+				return nil, err
+			}
+			dd := &configs.Device{
+				Type:        dt,
+				Major:       major,
+				Minor:       minor,
+				Permissions: d.Access,
+				Allow:       d.Allow,
+			}
+			c.Resources.Devices = append(c.Resources.Devices, dd)
+		}
+		if r.Memory != nil {
+			if r.Memory.Limit != nil {
+				c.Resources.Memory = *r.Memory.Limit
+			}
+			if r.Memory.Reservation != nil {
+				c.Resources.MemoryReservation = *r.Memory.Reservation
+			}
+			if r.Memory.Swap != nil {
+				c.Resources.MemorySwap = *r.Memory.Swap
+			}
+			if r.Memory.Kernel != nil {
+				c.Resources.KernelMemory = *r.Memory.Kernel
+			}
+			if r.Memory.KernelTCP != nil {
+				c.Resources.KernelMemoryTCP = *r.Memory.KernelTCP
+			}
+			if r.Memory.Swappiness != nil {
+				c.Resources.MemorySwappiness = r.Memory.Swappiness
+			}
+			if r.Memory.DisableOOMKiller != nil {
+				c.Resources.OomKillDisable = *r.Memory.DisableOOMKiller
+			}
+		}
+		if r.CPU != nil {
+			if r.CPU.Shares != nil {
+				c.Resources.CpuShares = *r.CPU.Shares
+			}
+			if r.CPU.Quota != nil {
+				c.Resources.CpuQuota = *r.CPU.Quota
+			}
+			if r.CPU.Period != nil {
+				c.Resources.CpuPeriod = *r.CPU.Period
+			}
+			if r.CPU.RealtimeRuntime != nil {
+				c.Resources.CpuRtRuntime = *r.CPU.RealtimeRuntime
+			}
+			if r.CPU.RealtimePeriod != nil {
+				c.Resources.CpuRtPeriod = *r.CPU.RealtimePeriod
+			}
+			if r.CPU.Cpus != "" {
+				c.Resources.CpusetCpus = r.CPU.Cpus
+			}
+			if r.CPU.Mems != "" {
+				c.Resources.CpusetMems = r.CPU.Mems
+			}
+		}
+		if r.Pids != nil {
+			c.Resources.PidsLimit = r.Pids.Limit
+		}
+		if r.BlockIO != nil {
+			if r.BlockIO.Weight != nil {
+				c.Resources.BlkioWeight = *r.BlockIO.Weight
+			}
+			if r.BlockIO.LeafWeight != nil {
+				c.Resources.BlkioLeafWeight = *r.BlockIO.LeafWeight
+			}
+			if r.BlockIO.WeightDevice != nil {
+				for _, wd := range r.BlockIO.WeightDevice {
+					var weight, leafWeight uint16
+					if wd.Weight != nil {
+						weight = *wd.Weight
+					}
+					if wd.LeafWeight != nil {
+						leafWeight = *wd.LeafWeight
+					}
+					weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, weight, leafWeight)
+					c.Resources.BlkioWeightDevice = append(c.Resources.BlkioWeightDevice, weightDevice)
+				}
+			}
+			if r.BlockIO.ThrottleReadBpsDevice != nil {
+				for _, td := range r.BlockIO.ThrottleReadBpsDevice {
+					rate := td.Rate
+					throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate)
+					c.Resources.BlkioThrottleReadBpsDevice = append(c.Resources.BlkioThrottleReadBpsDevice, throttleDevice)
+				}
+			}
+			if r.BlockIO.ThrottleWriteBpsDevice != nil {
+				for _, td := range r.BlockIO.ThrottleWriteBpsDevice {
+					rate := td.Rate
+					throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate)
+					c.Resources.BlkioThrottleWriteBpsDevice = append(c.Resources.BlkioThrottleWriteBpsDevice, throttleDevice)
+				}
+			}
+			if r.BlockIO.ThrottleReadIOPSDevice != nil {
+				for _, td := range r.BlockIO.ThrottleReadIOPSDevice {
+					rate := td.Rate
+					throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate)
+					c.Resources.BlkioThrottleReadIOPSDevice = append(c.Resources.BlkioThrottleReadIOPSDevice, throttleDevice)
+				}
+			}
+			if r.BlockIO.ThrottleWriteIOPSDevice != nil {
+				for _, td := range r.BlockIO.ThrottleWriteIOPSDevice {
+					rate := td.Rate
+					throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate)
+					c.Resources.BlkioThrottleWriteIOPSDevice = append(c.Resources.BlkioThrottleWriteIOPSDevice, throttleDevice)
+				}
+			}
+		}
+		for _, l := range r.HugepageLimits {
+			c.Resources.HugetlbLimit = append(c.Resources.HugetlbLimit, &configs.HugepageLimit{
+				Pagesize: l.Pagesize,
+				Limit:    l.Limit,
+			})
+		}
+		if r.Network != nil {
+			if r.Network.ClassID != nil {
+				c.Resources.NetClsClassid = *r.Network.ClassID
+			}
+			for _, m := range r.Network.Priorities {
+				c.Resources.NetPrioIfpriomap = append(c.Resources.NetPrioIfpriomap, &configs.IfPrioMap{
+					Interface: m.Name,
+					Priority:  int64(m.Priority),
+				})
+			}
+		}
+	}
+	if !opts.Rootless {
+		// append the default allowed devices to the end of the list
+		c.Resources.Devices = append(c.Resources.Devices, allowedDevices...)
+	}
+	return c, nil
+}
+
+func stringToCgroupDeviceRune(s string) (rune, error) {
+	switch s {
+	case "a":
+		return 'a', nil
+	case "b":
+		return 'b', nil
+	case "c":
+		return 'c', nil
+	default:
+		return 0, fmt.Errorf("invalid cgroup device type %q", s)
+	}
+}
+
+func stringToDeviceRune(s string) (rune, error) {
+	switch s {
+	case "p":
+		return 'p', nil
+	case "u":
+		return 'u', nil
+	case "b":
+		return 'b', nil
+	case "c":
+		return 'c', nil
+	default:
+		return 0, fmt.Errorf("invalid device type %q", s)
+	}
+}
+
+func createDevices(spec *specs.Spec, config *configs.Config) error {
+	// add whitelisted devices
+	config.Devices = []*configs.Device{
+		{
+			Type:     'c',
+			Path:     "/dev/null",
+			Major:    1,
+			Minor:    3,
+			FileMode: 0666,
+			Uid:      0,
+			Gid:      0,
+		},
+		{
+			Type:     'c',
+			Path:     "/dev/random",
+			Major:    1,
+			Minor:    8,
+			FileMode: 0666,
+			Uid:      0,
+			Gid:      0,
+		},
+		{
+			Type:     'c',
+			Path:     "/dev/full",
+			Major:    1,
+			Minor:    7,
+			FileMode: 0666,
+			Uid:      0,
+			Gid:      0,
+		},
+		{
+			Type:     'c',
+			Path:     "/dev/tty",
+			Major:    5,
+			Minor:    0,
+			FileMode: 0666,
+			Uid:      0,
+			Gid:      0,
+		},
+		{
+			Type:     'c',
+			Path:     "/dev/zero",
+			Major:    1,
+			Minor:    5,
+			FileMode: 0666,
+			Uid:      0,
+			Gid:      0,
+		},
+		{
+			Type:     'c',
+			Path:     "/dev/urandom",
+			Major:    1,
+			Minor:    9,
+			FileMode: 0666,
+			Uid:      0,
+			Gid:      0,
+		},
+	}
+	// merge in additional devices from the spec
+	if spec.Linux != nil {
+		for _, d := range spec.Linux.Devices {
+			var uid, gid uint32
+			var filemode os.FileMode = 0666
+
+			if d.UID != nil {
+				uid = *d.UID
+			}
+			if d.GID != nil {
+				gid = *d.GID
+			}
+			dt, err := stringToDeviceRune(d.Type)
+			if err != nil {
+				return err
+			}
+			if d.FileMode != nil {
+				filemode = *d.FileMode
+			}
+			device := &configs.Device{
+				Type:     dt,
+				Path:     d.Path,
+				Major:    d.Major,
+				Minor:    d.Minor,
+				FileMode: filemode,
+				Uid:      uid,
+				Gid:      gid,
+			}
+			config.Devices = append(config.Devices, device)
+		}
+	}
+	return nil
+}
+
+func setupUserNamespace(spec *specs.Spec, config *configs.Config) error {
+	create := func(m specs.LinuxIDMapping) configs.IDMap {
+		return configs.IDMap{
+			HostID:      int(m.HostID),
+			ContainerID: int(m.ContainerID),
+			Size:        int(m.Size),
+		}
+	}
+	if spec.Linux != nil {
+		for _, m := range spec.Linux.UIDMappings {
+			config.UidMappings = append(config.UidMappings, create(m))
+		}
+		for _, m := range spec.Linux.GIDMappings {
+			config.GidMappings = append(config.GidMappings, create(m))
+		}
+	}
+	rootUID, err := config.HostRootUID()
+	if err != nil {
+		return err
+	}
+	rootGID, err := config.HostRootGID()
+	if err != nil {
+		return err
+	}
+	for _, node := range config.Devices {
+		node.Uid = uint32(rootUID)
+		node.Gid = uint32(rootGID)
+	}
+	return nil
+}
+
+// parseMountOptions parses the string and returns the flags, propagation
+// flags and any mount data that it contains.
+func parseMountOptions(options []string) (int, []int, string, int) {
+	var (
+		flag     int
+		pgflag   []int
+		data     []string
+		extFlags int
+	)
+	flags := map[string]struct {
+		clear bool
+		flag  int
+	}{
+		"acl":           {false, unix.MS_POSIXACL},
+		"async":         {true, unix.MS_SYNCHRONOUS},
+		"atime":         {true, unix.MS_NOATIME},
+		"bind":          {false, unix.MS_BIND},
+		"defaults":      {false, 0},
+		"dev":           {true, unix.MS_NODEV},
+		"diratime":      {true, unix.MS_NODIRATIME},
+		"dirsync":       {false, unix.MS_DIRSYNC},
+		"exec":          {true, unix.MS_NOEXEC},
+		"iversion":      {false, unix.MS_I_VERSION},
+		"lazytime":      {false, unix.MS_LAZYTIME},
+		"loud":          {true, unix.MS_SILENT},
+		"mand":          {false, unix.MS_MANDLOCK},
+		"noacl":         {true, unix.MS_POSIXACL},
+		"noatime":       {false, unix.MS_NOATIME},
+		"nodev":         {false, unix.MS_NODEV},
+		"nodiratime":    {false, unix.MS_NODIRATIME},
+		"noexec":        {false, unix.MS_NOEXEC},
+		"noiversion":    {true, unix.MS_I_VERSION},
+		"nolazytime":    {true, unix.MS_LAZYTIME},
+		"nomand":        {true, unix.MS_MANDLOCK},
+		"norelatime":    {true, unix.MS_RELATIME},
+		"nostrictatime": {true, unix.MS_STRICTATIME},
+		"nosuid":        {false, unix.MS_NOSUID},
+		"rbind":         {false, unix.MS_BIND | unix.MS_REC},
+		"relatime":      {false, unix.MS_RELATIME},
+		"remount":       {false, unix.MS_REMOUNT},
+		"ro":            {false, unix.MS_RDONLY},
+		"rw":            {true, unix.MS_RDONLY},
+		"silent":        {false, unix.MS_SILENT},
+		"strictatime":   {false, unix.MS_STRICTATIME},
+		"suid":          {true, unix.MS_NOSUID},
+		"sync":          {false, unix.MS_SYNCHRONOUS},
+	}
+	propagationFlags := map[string]int{
+		"private":     unix.MS_PRIVATE,
+		"shared":      unix.MS_SHARED,
+		"slave":       unix.MS_SLAVE,
+		"unbindable":  unix.MS_UNBINDABLE,
+		"rprivate":    unix.MS_PRIVATE | unix.MS_REC,
+		"rshared":     unix.MS_SHARED | unix.MS_REC,
+		"rslave":      unix.MS_SLAVE | unix.MS_REC,
+		"runbindable": unix.MS_UNBINDABLE | unix.MS_REC,
+	}
+	extensionFlags := map[string]struct {
+		clear bool
+		flag  int
+	}{
+		"tmpcopyup": {false, configs.EXT_COPYUP},
+	}
+	for _, o := range options {
+		// If the option does not exist in the flags table or the flag
+		// is not supported on the platform,
+		// then it is a data value for a specific fs type
+		if f, exists := flags[o]; exists && f.flag != 0 {
+			if f.clear {
+				flag &= ^f.flag
+			} else {
+				flag |= f.flag
+			}
+		} else if f, exists := propagationFlags[o]; exists && f != 0 {
+			pgflag = append(pgflag, f)
+		} else if f, exists := extensionFlags[o]; exists && f.flag != 0 {
+			if f.clear {
+				extFlags &= ^f.flag
+			} else {
+				extFlags |= f.flag
+			}
+		} else {
+			data = append(data, o)
+		}
+	}
+	return flag, pgflag, strings.Join(data, ","), extFlags
+}
+
+func setupSeccomp(config *specs.LinuxSeccomp) (*configs.Seccomp, error) {
+	if config == nil {
+		return nil, nil
+	}
+
+	// No default action specified, no syscalls listed, assume seccomp disabled
+	if config.DefaultAction == "" && len(config.Syscalls) == 0 {
+		return nil, nil
+	}
+
+	newConfig := new(configs.Seccomp)
+	newConfig.Syscalls = []*configs.Syscall{}
+
+	if len(config.Architectures) > 0 {
+		newConfig.Architectures = []string{}
+		for _, arch := range config.Architectures {
+			newArch, err := seccomp.ConvertStringToArch(string(arch))
+			if err != nil {
+				return nil, err
+			}
+			newConfig.Architectures = append(newConfig.Architectures, newArch)
+		}
+	}
+
+	// Convert default action from string representation
+	newDefaultAction, err := seccomp.ConvertStringToAction(string(config.DefaultAction))
+	if err != nil {
+		return nil, err
+	}
+	newConfig.DefaultAction = newDefaultAction
+
+	// Loop through all syscall blocks and convert them to libcontainer format
+	for _, call := range config.Syscalls {
+		newAction, err := seccomp.ConvertStringToAction(string(call.Action))
+		if err != nil {
+			return nil, err
+		}
+
+		for _, name := range call.Names {
+			newCall := configs.Syscall{
+				Name:   name,
+				Action: newAction,
+				Args:   []*configs.Arg{},
+			}
+			// Loop through all the arguments of the syscall and convert them
+			for _, arg := range call.Args {
+				newOp, err := seccomp.ConvertStringToOperator(string(arg.Op))
+				if err != nil {
+					return nil, err
+				}
+
+				newArg := configs.Arg{
+					Index:    arg.Index,
+					Value:    arg.Value,
+					ValueTwo: arg.ValueTwo,
+					Op:       newOp,
+				}
+
+				newCall.Args = append(newCall.Args, &newArg)
+			}
+			newConfig.Syscalls = append(newConfig.Syscalls, &newCall)
+		}
+	}
+
+	return newConfig, nil
+}
+
+func createHooks(rspec *specs.Spec, config *configs.Config) {
+	config.Hooks = &configs.Hooks{}
+	if rspec.Hooks != nil {
+
+		for _, h := range rspec.Hooks.Prestart {
+			cmd := createCommandHook(h)
+			config.Hooks.Prestart = append(config.Hooks.Prestart, configs.NewCommandHook(cmd))
+		}
+		for _, h := range rspec.Hooks.Poststart {
+			cmd := createCommandHook(h)
+			config.Hooks.Poststart = append(config.Hooks.Poststart, configs.NewCommandHook(cmd))
+		}
+		for _, h := range rspec.Hooks.Poststop {
+			cmd := createCommandHook(h)
+			config.Hooks.Poststop = append(config.Hooks.Poststop, configs.NewCommandHook(cmd))
+		}
+	}
+}
+
+func createCommandHook(h specs.Hook) configs.Command {
+	cmd := configs.Command{
+		Path: h.Path,
+		Args: h.Args,
+		Env:  h.Env,
+	}
+	if h.Timeout != nil {
+		d := time.Duration(*h.Timeout) * time.Second
+		cmd.Timeout = &d
+	}
+	return cmd
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go
@@ -0,0 +1,27 @@
+package stacktrace
+
+import "runtime"
+
+// Capture captures a stacktrace for the current calling go program
+//
+// skip is the number of frames to skip
+func Capture(userSkip int) Stacktrace {
+	var (
+		skip   = userSkip + 1 // add one for our own function
+		frames []Frame
+		prevPc uintptr
+	)
+	for i := skip; ; i++ {
+		pc, file, line, ok := runtime.Caller(i)
+		//detect if caller is repeated to avoid loop, gccgo
+		//currently runs  into a loop without this check
+		if !ok || pc == prevPc {
+			break
+		}
+		frames = append(frames, NewFrame(pc, file, line))
+		prevPc = pc
+	}
+	return Stacktrace{
+		Frames: frames,
+	}
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go
@@ -0,0 +1,38 @@
+package stacktrace
+
+import (
+	"path/filepath"
+	"runtime"
+	"strings"
+)
+
+// NewFrame returns a new stack frame for the provided information
+func NewFrame(pc uintptr, file string, line int) Frame {
+	fn := runtime.FuncForPC(pc)
+	if fn == nil {
+		return Frame{}
+	}
+	pack, name := parseFunctionName(fn.Name())
+	return Frame{
+		Line:     line,
+		File:     filepath.Base(file),
+		Package:  pack,
+		Function: name,
+	}
+}
+
+func parseFunctionName(name string) (string, string) {
+	i := strings.LastIndex(name, ".")
+	if i == -1 {
+		return "", name
+	}
+	return name[:i], name[i+1:]
+}
+
+// Frame contains all the information for a stack frame within a go program
+type Frame struct {
+	File     string
+	Function string
+	Package  string
+	Line     int
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go
@@ -0,0 +1,5 @@
+package stacktrace
+
+type Stacktrace struct {
+	Frames []Frame
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go
@@ -0,0 +1,193 @@
+// +build linux
+
+package libcontainer
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"syscall" //only for Exec
+
+	"github.com/opencontainers/runc/libcontainer/apparmor"
+	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/keys"
+	"github.com/opencontainers/runc/libcontainer/seccomp"
+	"github.com/opencontainers/runc/libcontainer/system"
+	"github.com/opencontainers/selinux/go-selinux/label"
+
+	"golang.org/x/sys/unix"
+)
+
+type linuxStandardInit struct {
+	pipe          *os.File
+	consoleSocket *os.File
+	parentPid     int
+	fifoFd        int
+	config        *initConfig
+}
+
+func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) {
+	var newperms uint32
+
+	if l.config.Config.Namespaces.Contains(configs.NEWUSER) {
+		// With user ns we need 'other' search permissions.
+		newperms = 0x8
+	} else {
+		// Without user ns we need 'UID' search permissions.
+		newperms = 0x80000
+	}
+
+	// Create a unique per session container name that we can join in setns;
+	// However, other containers can also join it.
+	return fmt.Sprintf("_ses.%s", l.config.ContainerId), 0xffffffff, newperms
+}
+
+func (l *linuxStandardInit) Init() error {
+	if !l.config.Config.NoNewKeyring {
+		ringname, keepperms, newperms := l.getSessionRingParams()
+
+		// Do not inherit the parent's session keyring.
+		sessKeyId, err := keys.JoinSessionKeyring(ringname)
+		if err != nil {
+			return err
+		}
+		// Make session keyring searcheable.
+		if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil {
+			return err
+		}
+	}
+
+	if err := setupNetwork(l.config); err != nil {
+		return err
+	}
+	if err := setupRoute(l.config.Config); err != nil {
+		return err
+	}
+
+	label.Init()
+	if err := prepareRootfs(l.pipe, l.config); err != nil {
+		return err
+	}
+	// Set up the console. This has to be done *before* we finalize the rootfs,
+	// but *after* we've given the user the chance to set up all of the mounts
+	// they wanted.
+	if l.config.CreateConsole {
+		if err := setupConsole(l.consoleSocket, l.config, true); err != nil {
+			return err
+		}
+		if err := system.Setctty(); err != nil {
+			return err
+		}
+	}
+
+	// Finish the rootfs setup.
+	if l.config.Config.Namespaces.Contains(configs.NEWNS) {
+		if err := finalizeRootfs(l.config.Config); err != nil {
+			return err
+		}
+	}
+
+	if hostname := l.config.Config.Hostname; hostname != "" {
+		if err := unix.Sethostname([]byte(hostname)); err != nil {
+			return err
+		}
+	}
+	if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
+		return err
+	}
+	if err := label.SetProcessLabel(l.config.ProcessLabel); err != nil {
+		return err
+	}
+
+	for key, value := range l.config.Config.Sysctl {
+		if err := writeSystemProperty(key, value); err != nil {
+			return err
+		}
+	}
+	for _, path := range l.config.Config.ReadonlyPaths {
+		if err := readonlyPath(path); err != nil {
+			return err
+		}
+	}
+	for _, path := range l.config.Config.MaskPaths {
+		if err := maskPath(path); err != nil {
+			return err
+		}
+	}
+	pdeath, err := system.GetParentDeathSignal()
+	if err != nil {
+		return err
+	}
+	if l.config.NoNewPrivileges {
+		if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
+			return err
+		}
+	}
+	// Tell our parent that we're ready to Execv. This must be done before the
+	// Seccomp rules have been applied, because we need to be able to read and
+	// write to a socket.
+	if err := syncParentReady(l.pipe); err != nil {
+		return err
+	}
+	// Without NoNewPrivileges seccomp is a privileged operation, so we need to
+	// do this before dropping capabilities; otherwise do it as late as possible
+	// just before execve so as few syscalls take place after it as possible.
+	if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges {
+		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
+			return err
+		}
+	}
+	if err := finalizeNamespace(l.config); err != nil {
+		return err
+	}
+	// finalizeNamespace can change user/group which clears the parent death
+	// signal, so we restore it here.
+	if err := pdeath.Restore(); err != nil {
+		return err
+	}
+	// Compare the parent from the initial start of the init process and make
+	// sure that it did not change.  if the parent changes that means it died
+	// and we were reparented to something else so we should just kill ourself
+	// and not cause problems for someone else.
+	if unix.Getppid() != l.parentPid {
+		return unix.Kill(unix.Getpid(), unix.SIGKILL)
+	}
+	// Check for the arg before waiting to make sure it exists and it is
+	// returned as a create time error.
+	name, err := exec.LookPath(l.config.Args[0])
+	if err != nil {
+		return err
+	}
+	// Close the pipe to signal that we have completed our init.
+	l.pipe.Close()
+	// Wait for the FIFO to be opened on the other side before exec-ing the
+	// user process. We open it through /proc/self/fd/$fd, because the fd that
+	// was given to us was an O_PATH fd to the fifo itself. Linux allows us to
+	// re-open an O_PATH fd through /proc.
+	fd, err := unix.Open(fmt.Sprintf("/proc/self/fd/%d", l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0)
+	if err != nil {
+		return newSystemErrorWithCause(err, "open exec fifo")
+	}
+	if _, err := unix.Write(fd, []byte("0")); err != nil {
+		return newSystemErrorWithCause(err, "write 0 exec fifo")
+	}
+	// Close the O_PATH fifofd fd before exec because the kernel resets
+	// dumpable in the wrong order. This has been fixed in newer kernels, but
+	// we keep this to ensure CVE-2016-9962 doesn't re-emerge on older kernels.
+	// N.B. the core issue itself (passing dirfds to the host filesystem) has
+	// since been resolved.
+	// https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318
+	unix.Close(l.fifoFd)
+	// Set seccomp as close to execve as possible, so as few syscalls take
+	// place afterward (reducing the amount of syscalls that users need to
+	// enable in their seccomp profiles).
+	if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges {
+		if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil {
+			return newSystemErrorWithCause(err, "init seccomp")
+		}
+	}
+	if err := syscall.Exec(name, l.config.Args[0:], os.Environ()); err != nil {
+		return newSystemErrorWithCause(err, "exec user process")
+	}
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go
@@ -0,0 +1,255 @@
+// +build linux
+
+package libcontainer
+
+import (
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/opencontainers/runc/libcontainer/configs"
+	"github.com/opencontainers/runc/libcontainer/utils"
+
+	"github.com/sirupsen/logrus"
+	"golang.org/x/sys/unix"
+)
+
+func newStateTransitionError(from, to containerState) error {
+	return &stateTransitionError{
+		From: from.status().String(),
+		To:   to.status().String(),
+	}
+}
+
+// stateTransitionError is returned when an invalid state transition happens from one
+// state to another.
+type stateTransitionError struct {
+	From string
+	To   string
+}
+
+func (s *stateTransitionError) Error() string {
+	return fmt.Sprintf("invalid state transition from %s to %s", s.From, s.To)
+}
+
+type containerState interface {
+	transition(containerState) error
+	destroy() error
+	status() Status
+}
+
+func destroy(c *linuxContainer) error {
+	if !c.config.Namespaces.Contains(configs.NEWPID) {
+		if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil {
+			logrus.Warn(err)
+		}
+	}
+	err := c.cgroupManager.Destroy()
+	if c.intelRdtManager != nil {
+		if ierr := c.intelRdtManager.Destroy(); err == nil {
+			err = ierr
+		}
+	}
+	if rerr := os.RemoveAll(c.root); err == nil {
+		err = rerr
+	}
+	c.initProcess = nil
+	if herr := runPoststopHooks(c); err == nil {
+		err = herr
+	}
+	c.state = &stoppedState{c: c}
+	return err
+}
+
+func runPoststopHooks(c *linuxContainer) error {
+	if c.config.Hooks != nil {
+		bundle, annotations := utils.Annotations(c.config.Labels)
+		s := configs.HookState{
+			Version:     c.config.Version,
+			ID:          c.id,
+			Bundle:      bundle,
+			Annotations: annotations,
+		}
+		for _, hook := range c.config.Hooks.Poststop {
+			if err := hook.Run(s); err != nil {
+				return err
+			}
+		}
+	}
+	return nil
+}
+
+// stoppedState represents a container is a stopped/destroyed state.
+type stoppedState struct {
+	c *linuxContainer
+}
+
+func (b *stoppedState) status() Status {
+	return Stopped
+}
+
+func (b *stoppedState) transition(s containerState) error {
+	switch s.(type) {
+	case *runningState, *restoredState:
+		b.c.state = s
+		return nil
+	case *stoppedState:
+		return nil
+	}
+	return newStateTransitionError(b, s)
+}
+
+func (b *stoppedState) destroy() error {
+	return destroy(b.c)
+}
+
+// runningState represents a container that is currently running.
+type runningState struct {
+	c *linuxContainer
+}
+
+func (r *runningState) status() Status {
+	return Running
+}
+
+func (r *runningState) transition(s containerState) error {
+	switch s.(type) {
+	case *stoppedState:
+		t, err := r.c.runType()
+		if err != nil {
+			return err
+		}
+		if t == Running {
+			return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped)
+		}
+		r.c.state = s
+		return nil
+	case *pausedState:
+		r.c.state = s
+		return nil
+	case *runningState:
+		return nil
+	}
+	return newStateTransitionError(r, s)
+}
+
+func (r *runningState) destroy() error {
+	t, err := r.c.runType()
+	if err != nil {
+		return err
+	}
+	if t == Running {
+		return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped)
+	}
+	return destroy(r.c)
+}
+
+type createdState struct {
+	c *linuxContainer
+}
+
+func (i *createdState) status() Status {
+	return Created
+}
+
+func (i *createdState) transition(s containerState) error {
+	switch s.(type) {
+	case *runningState, *pausedState, *stoppedState:
+		i.c.state = s
+		return nil
+	case *createdState:
+		return nil
+	}
+	return newStateTransitionError(i, s)
+}
+
+func (i *createdState) destroy() error {
+	i.c.initProcess.signal(unix.SIGKILL)
+	return destroy(i.c)
+}
+
+// pausedState represents a container that is currently pause.  It cannot be destroyed in a
+// paused state and must transition back to running first.
+type pausedState struct {
+	c *linuxContainer
+}
+
+func (p *pausedState) status() Status {
+	return Paused
+}
+
+func (p *pausedState) transition(s containerState) error {
+	switch s.(type) {
+	case *runningState, *stoppedState:
+		p.c.state = s
+		return nil
+	case *pausedState:
+		return nil
+	}
+	return newStateTransitionError(p, s)
+}
+
+func (p *pausedState) destroy() error {
+	t, err := p.c.runType()
+	if err != nil {
+		return err
+	}
+	if t != Running && t != Created {
+		if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil {
+			return err
+		}
+		return destroy(p.c)
+	}
+	return newGenericError(fmt.Errorf("container is paused"), ContainerPaused)
+}
+
+// restoredState is the same as the running state but also has associated checkpoint
+// information that maybe need destroyed when the container is stopped and destroy is called.
+type restoredState struct {
+	imageDir string
+	c        *linuxContainer
+}
+
+func (r *restoredState) status() Status {
+	return Running
+}
+
+func (r *restoredState) transition(s containerState) error {
+	switch s.(type) {
+	case *stoppedState, *runningState:
+		return nil
+	}
+	return newStateTransitionError(r, s)
+}
+
+func (r *restoredState) destroy() error {
+	if _, err := os.Stat(filepath.Join(r.c.root, "checkpoint")); err != nil {
+		if !os.IsNotExist(err) {
+			return err
+		}
+	}
+	return destroy(r.c)
+}
+
+// loadedState is used whenever a container is restored, loaded, or setting additional
+// processes inside and it should not be destroyed when it is exiting.
+type loadedState struct {
+	c *linuxContainer
+	s Status
+}
+
+func (n *loadedState) status() Status {
+	return n.s
+}
+
+func (n *loadedState) transition(s containerState) error {
+	n.c.state = s
+	return nil
+}
+
+func (n *loadedState) destroy() error {
+	if err := n.c.refreshState(); err != nil {
+		return err
+	}
+	return n.c.state.destroy()
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/stats.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/stats.go
@@ -0,0 +1,15 @@
+package libcontainer
+
+type NetworkInterface struct {
+	// Name is the name of the network interface.
+	Name string
+
+	RxBytes   uint64
+	RxPackets uint64
+	RxErrors  uint64
+	RxDropped uint64
+	TxBytes   uint64
+	TxPackets uint64
+	TxErrors  uint64
+	TxDropped uint64
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go
@@ -0,0 +1,10 @@
+package libcontainer
+
+import "github.com/opencontainers/runc/libcontainer/cgroups"
+import "github.com/opencontainers/runc/libcontainer/intelrdt"
+
+type Stats struct {
+	Interfaces    []*NetworkInterface
+	CgroupStats   *cgroups.Stats
+	IntelRdtStats *intelrdt.Stats
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/sync.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/sync.go
@@ -0,0 +1,107 @@
+package libcontainer
+
+import (
+	"encoding/json"
+	"fmt"
+	"io"
+
+	"github.com/opencontainers/runc/libcontainer/utils"
+)
+
+type syncType string
+
+// Constants that are used for synchronisation between the parent and child
+// during container setup. They come in pairs (with procError being a generic
+// response which is followed by a &genericError).
+//
+// [  child  ] <-> [   parent   ]
+//
+// procHooks   --> [run hooks]
+//             <-- procResume
+//
+// procConsole -->
+//             <-- procConsoleReq
+//  [send(fd)] --> [recv(fd)]
+//             <-- procConsoleAck
+//
+// procReady   --> [final setup]
+//             <-- procRun
+const (
+	procError  syncType = "procError"
+	procReady  syncType = "procReady"
+	procRun    syncType = "procRun"
+	procHooks  syncType = "procHooks"
+	procResume syncType = "procResume"
+)
+
+type syncT struct {
+	Type syncType `json:"type"`
+}
+
+// writeSync is used to write to a synchronisation pipe. An error is returned
+// if there was a problem writing the payload.
+func writeSync(pipe io.Writer, sync syncType) error {
+	if err := utils.WriteJSON(pipe, syncT{sync}); err != nil {
+		return err
+	}
+	return nil
+}
+
+// readSync is used to read from a synchronisation pipe. An error is returned
+// if we got a genericError, the pipe was closed, or we got an unexpected flag.
+func readSync(pipe io.Reader, expected syncType) error {
+	var procSync syncT
+	if err := json.NewDecoder(pipe).Decode(&procSync); err != nil {
+		if err == io.EOF {
+			return fmt.Errorf("parent closed synchronisation channel")
+		}
+
+		if procSync.Type == procError {
+			var ierr genericError
+
+			if err := json.NewDecoder(pipe).Decode(&ierr); err != nil {
+				return fmt.Errorf("failed reading error from parent: %v", err)
+			}
+
+			return &ierr
+		}
+
+		if procSync.Type != expected {
+			return fmt.Errorf("invalid synchronisation flag from parent")
+		}
+	}
+	return nil
+}
+
+// parseSync runs the given callback function on each syncT received from the
+// child. It will return once io.EOF is returned from the given pipe.
+func parseSync(pipe io.Reader, fn func(*syncT) error) error {
+	dec := json.NewDecoder(pipe)
+	for {
+		var sync syncT
+		if err := dec.Decode(&sync); err != nil {
+			if err == io.EOF {
+				break
+			}
+			return err
+		}
+
+		// We handle this case outside fn for cleanliness reasons.
+		var ierr *genericError
+		if sync.Type == procError {
+			if err := dec.Decode(&ierr); err != nil && err != io.EOF {
+				return newSystemErrorWithCause(err, "decoding proc error from init")
+			}
+			if ierr != nil {
+				return ierr
+			}
+			// Programmer error.
+			panic("No error following JSON procError payload.")
+		}
+
+		if err := fn(&sync); err != nil {
+			return err
+		}
+	}
+	return nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go
@@ -7,10 +7,25 @@ import (
 	"fmt"
 	"os"
 	"os/exec"
-	"syscall"
+	"syscall" // only for exec
 	"unsafe"
+
+	"golang.org/x/sys/unix"
 )

+// If arg2 is nonzero, set the "child subreaper" attribute of the
+// calling process; if arg2 is zero, unset the attribute.  When a
+// process is marked as a child subreaper, all of the children
+// that it creates, and their descendants, will be marked as
+// having a subreaper.  In effect, a subreaper fulfills the role
+// of init(1) for its descendant processes.  Upon termination of
+// a process that is orphaned (i.e., its immediate parent has
+// already terminated) and marked as having a subreaper, the
+// nearest still living ancestor subreaper will receive a SIGCHLD
+// signal and be able to wait(2) on the process to discover its
+// termination status.
+const PR_SET_CHILD_SUBREAPER = 36
+
 type ParentDeathSignal int

 func (p ParentDeathSignal) Restore() error {
@@ -40,8 +55,16 @@ func Execv(cmd string, args []string, env []string) error {
 	return syscall.Exec(name, args, env)
 }

+func Prlimit(pid, resource int, limit unix.Rlimit) error {
+	_, _, err := unix.RawSyscall6(unix.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0)
+	if err != 0 {
+		return err
+	}
+	return nil
+}
+
 func SetParentDeathSignal(sig uintptr) error {
-	if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_PDEATHSIG, sig, 0); err != 0 {
+	if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil {
 		return err
 	}
 	return nil
@@ -49,15 +72,14 @@ func SetParentDeathSignal(sig uintptr) error {

 func GetParentDeathSignal() (ParentDeathSignal, error) {
 	var sig int
-	_, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0)
-	if err != 0 {
+	if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil {
 		return -1, err
 	}
 	return ParentDeathSignal(sig), nil
 }

 func SetKeepCaps() error {
-	if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 1, 0); err != 0 {
+	if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil {
 		return err
 	}

@@ -65,7 +87,7 @@ func SetKeepCaps() error {
 }

 func ClearKeepCaps() error {
-	if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_KEEPCAPS, 0, 0); err != 0 {
+	if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil {
 		return err
 	}

@@ -73,23 +95,18 @@ func ClearKeepCaps() error {
 }

 func Setctty() error {
-	if _, _, err := syscall.RawSyscall(syscall.SYS_IOCTL, 0, uintptr(syscall.TIOCSCTTY), 0); err != 0 {
+	if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil {
 		return err
 	}
 	return nil
 }

-/*
- * Detect whether we are currently running in a user namespace.
- * Copied from github.com/lxc/lxd/shared/util.go
- */
+// RunningInUserNS detects whether we are currently running in a user namespace.
+// Copied from github.com/lxc/lxd/shared/util.go
 func RunningInUserNS() bool {
 	file, err := os.Open("/proc/self/uid_map")
 	if err != nil {
-		/*
-		 * This kernel-provided file only exists if user namespaces are
-		 * supported
-		 */
+		// This kernel-provided file only exists if user namespaces are supported
 		return false
 	}
 	defer file.Close()
@@ -112,3 +129,19 @@ func RunningInUserNS() bool {
 	}
 	return true
 }
+
+// SetSubreaper sets the value i as the subreaper setting for the calling process
+func SetSubreaper(i int) error {
+	return unix.Prctl(PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0)
+}
+
+// GetSubreaper returns the subreaper setting for the calling process
+func GetSubreaper() (int, error) {
+	var i uintptr
+
+	if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil {
+		return -1, err
+	}
+
+	return int(i), nil
+}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go
@@ -1,27 +1,113 @@
 package system

 import (
+	"fmt"
 	"io/ioutil"
 	"path/filepath"
 	"strconv"
 	"strings"
 )

-// look in /proc to find the process start time so that we can verify
-// that this pid has started after ourself
+// State is the status of a process.
+type State rune
+
+const ( // Only values for Linux 3.14 and later are listed here
+	Dead        State = 'X'
+	DiskSleep   State = 'D'
+	Running     State = 'R'
+	Sleeping    State = 'S'
+	Stopped     State = 'T'
+	TracingStop State = 't'
+	Zombie      State = 'Z'
+)
+
+// String forms of the state from proc(5)'s documentation for
+// /proc/[pid]/status' "State" field.
+func (s State) String() string {
+	switch s {
+	case Dead:
+		return "dead"
+	case DiskSleep:
+		return "disk sleep"
+	case Running:
+		return "running"
+	case Sleeping:
+		return "sleeping"
+	case Stopped:
+		return "stopped"
+	case TracingStop:
+		return "tracing stop"
+	case Zombie:
+		return "zombie"
+	default:
+		return fmt.Sprintf("unknown (%c)", s)
+	}
+}
+
+// Stat_t represents the information from /proc/[pid]/stat, as
+// described in proc(5) with names based on the /proc/[pid]/status
+// fields.
+type Stat_t struct {
+	// PID is the process ID.
+	PID uint
+
+	// Name is the command run by the process.
+	Name string
+
+	// State is the state of the process.
+	State State
+
+	// StartTime is the number of clock ticks after system boot (since
+	// Linux 2.6).
+	StartTime uint64
+}
+
+// Stat returns a Stat_t instance for the specified process.
+func Stat(pid int) (stat Stat_t, err error) {
+	bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
+	if err != nil {
+		return stat, err
+	}
+	return parseStat(string(bytes))
+}
+
+// GetProcessStartTime is deprecated.  Use Stat(pid) and
+// Stat_t.StartTime instead.
 func GetProcessStartTime(pid int) (string, error) {
-	data, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat"))
+	stat, err := Stat(pid)
 	if err != nil {
 		return "", err
 	}
-
-	parts := strings.Split(string(data), " ")
-	// the starttime is located at pos 22
-	// from the man page
-	//
-	// starttime %llu (was %lu before Linux 2.6)
-	// (22)  The  time the process started after system boot.  In kernels before Linux 2.6, this
-	// value was expressed in jiffies.  Since Linux 2.6, the value is expressed in  clock  ticks
-	// (divide by sysconf(_SC_CLK_TCK)).
-	return parts[22-1], nil // starts at 1
+	return fmt.Sprintf("%d", stat.StartTime), nil
+}
+
+func parseStat(data string) (stat Stat_t, err error) {
+	// From proc(5), field 2 could contain space and is inside `(` and `)`.
+	// The following is an example:
+	// 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0
+	i := strings.LastIndex(data, ")")
+	if i <= 2 || i >= len(data)-1 {
+		return stat, fmt.Errorf("invalid stat data: %q", data)
+	}
+
+	parts := strings.SplitN(data[:i], "(", 2)
+	if len(parts) != 2 {
+		return stat, fmt.Errorf("invalid stat data: %q", data)
+	}
+
+	stat.Name = parts[1]
+	_, err = fmt.Sscanf(parts[0], "%d", &stat.PID)
+	if err != nil {
+		return stat, err
+	}
+
+	// parts indexes should be offset by 3 from the field number given
+	// proc(5), because parts is zero-indexed and we've removed fields
+	// one (PID) and two (Name) in the paren-split.
+	parts = strings.Split(data[i+2:], " ")
+	var state int
+	fmt.Sscanf(parts[3-3], "%c", &state)
+	stat.State = State(state)
+	fmt.Sscanf(parts[22-3], "%d", &stat.StartTime)
+	return stat, nil
 }
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/setns_linux.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/setns_linux.go
@@ -1,40 +0,0 @@
-package system
-
-import (
-	"fmt"
-	"runtime"
-	"syscall"
-)
-
-// Via http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=7b21fddd087678a70ad64afc0f632e0f1071b092
-//
-// We need different setns values for the different platforms and arch
-// We are declaring the macro here because the SETNS syscall does not exist in th stdlib
-var setNsMap = map[string]uintptr{
-	"linux/386":     346,
-	"linux/arm64":   268,
-	"linux/amd64":   308,
-	"linux/arm":     375,
-	"linux/ppc":     350,
-	"linux/ppc64":   350,
-	"linux/ppc64le": 350,
-	"linux/s390x":   339,
-}
-
-var sysSetns = setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
-
-func SysSetns() uint32 {
-	return uint32(sysSetns)
-}
-
-func Setns(fd uintptr, flags uintptr) error {
-	ns, exists := setNsMap[fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH)]
-	if !exists {
-		return fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH)
-	}
-	_, _, err := syscall.RawSyscall(ns, fd, flags, 0)
-	if err != 0 {
-		return err
-	}
-	return nil
-}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_arm.go
@@ -1,14 +1,15 @@
-// +build linux,arm
+// +build linux
+// +build 386 arm

 package system

 import (
-	"syscall"
+	"golang.org/x/sys/unix"
 )

 // Setuid sets the uid of the calling thread to the specified uid.
 func Setuid(uid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
+	_, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
@@ -17,7 +18,7 @@ func Setuid(uid int) (err error) {

 // Setgid sets the gid of the calling thread to the specified gid.
 func Setgid(gid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
+	_, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0)
 	if e1 != 0 {
 		err = e1
 	}
--- a/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go
+++ b/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_386.go
@@ -1,25 +0,0 @@
-// +build linux,386
-
-package system
-
-import (
-	"syscall"
-)
-
-// Setuid sets the uid of the calling thread to the specified uid.
-func Setuid(uid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0)
-	if e1 != 0 {
-		err = e1
-	}
-	return
-}
-
-// Setgid sets the gid of the calling thread to the specified gid.
-func Setgid(gid int) (err error) {
-	_, _, e1 := syscall.RawSyscall(syscall.SYS_SETGID32, uintptr(gid), 0, 0)
-	if e1 != 0 {
-		err = e1
-	}
-	return
-}
--- a/Show More
+++ b/Show More