本文基于k8s release 1.9分析kubelet sandbox创建与cni网络配置流程。
首先谈下docker的四种网络模式,它们分别为:

  • bridge模式:使–net =bridge指定,默认设置;bridge模式是Docker默认的网络设置,此模式会为每一个容器分配Network Namespace、设置IP等,并将并将一个主机上的Docker容器连接到一个虚拟网桥上。
  • host模式:使–net =host指定,如果启动容器的时候使用host模式,那么这个容器将不会获得一个独立的Network Namespace,而是和宿主机共用一个Network Namespace。容器将不会虚拟出自己的网卡,配置自己的IP等,而是使用宿主机的IP和端口。
  • none模式:使–net =none指定,在none模式下,Docker容器拥有自己的Network Namespace,但是,并不为Docker容器进行任何网络配置。也就是说,这个Docker容器没有网卡、IP、路由等信息。需要我们自己为Docker容器添加网卡、配置IP等。
  • container模式:使用–net =container指定,这个模式指定新创建的容器和已经存在的一个容器共享一个Network Namespace,而不是和宿主机共享。新创建的容器不会创建自己的网卡,配置自己的IP,而是和一个指定的容器共享IP、端口范围等。
    dns

举个栗子,在我的集群中,dns的pod有3个业务容器,分别为dns、dnsmasq和sidecar,pause为网络容器,查找出这些容器id,并通过容器id查看每个容器的NetworkMode,发现除了pause容器的NetworkMode为node,其余容器均为container:7bae079c4926921a1ac8934362331f31eeb2fdb5c9fc0e6d06fcaabd9ef16c29,由docker容器的网络模式,我们得知dns、dnsmasq和sidecar使用的是pause容器的网络空间。而pause容器的网络模式为none是因为在该k8s集群中使用的cni网络配置,具体的容器网卡、IP、路由等信息是由calico(集群中使用的cni为calico)配置。

我们通过kubelet SyncPod函数分析sandbox的创建流程:

// SyncPod syncs the running pod into the desired pod by executing following steps:
//
//  1. Compute sandbox and container changes.
//  2. Kill pod sandbox if necessary.
//  3. Kill any containers that should not be running.
//  4. Create sandbox if necessary.
//  5. Create init containers.
//  6. Create normal containers.
func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
    // Step 1: Compute sandbox and container changes.
    podContainerChanges := m.computePodActions(pod, podStatus)
    glog.V(3).Infof("computePodActions got %+v for pod %q", podContainerChanges, format.Pod(pod))
    if podContainerChanges.CreateSandbox {
        ref, err := ref.GetReference(legacyscheme.Scheme, pod)
        if err != nil {
            glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
        }
        if podContainerChanges.SandboxID != "" {
            m.recorder.Eventf(ref, v1.EventTypeNormal, events.SandboxChanged, "Pod sandbox changed, it will be killed and re-created.")
        } else {
            glog.V(4).Infof("SyncPod received new pod %q, will create a sandbox for it", format.Pod(pod))
        }
    }

    // Step 2: Kill the pod if the sandbox has changed.
    if podContainerChanges.KillPod {
        if !podContainerChanges.CreateSandbox {
            glog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod))
        } else {
            glog.V(4).Infof("Stopping PodSandbox for %q, will start new one", format.Pod(pod))
        }

        killResult := m.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)
        result.AddPodSyncResult(killResult)
        if killResult.Error() != nil {
            glog.Errorf("killPodWithSyncResult failed: %v", killResult.Error())
            return
        }

        if podContainerChanges.CreateSandbox {
            m.purgeInitContainers(pod, podStatus)
        }
    } else {
        // Step 3: kill any running containers in this pod which are not to keep.
        for containerID, containerInfo := range podContainerChanges.ContainersToKill {
            glog.V(3).Infof("Killing unwanted container %q(id=%q) for pod %q", containerInfo.name, containerID, format.Pod(pod))
            killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerInfo.name)
            result.AddSyncResult(killContainerResult)
            if err := m.killContainer(pod, containerID, containerInfo.name, containerInfo.message, nil); err != nil {
                killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
                glog.Errorf("killContainer %q(id=%q) for pod %q failed: %v", containerInfo.name, containerID, format.Pod(pod), err)
                return
            }
        }
    }

    // Keep terminated init containers fairly aggressively controlled
    // This is an optmization because container removals are typically handled
    // by container garbage collector.
    m.pruneInitContainersBeforeStart(pod, podStatus)

    // We pass the value of the podIP down to generatePodSandboxConfig and
    // generateContainerConfig, which in turn passes it to various other
    // functions, in order to facilitate functionality that requires this
    // value (hosts file and downward API) and avoid races determining
    // the pod IP in cases where a container requires restart but the
    // podIP isn't in the status manager yet.
    //
    // We default to the IP in the passed-in pod status, and overwrite it if the
    // sandbox needs to be (re)started.
    podIP := ""
    if podStatus != nil {
        podIP = podStatus.IP
    }

    // Step 4: Create a sandbox for the pod if necessary.
    podSandboxID := podContainerChanges.SandboxID
    if podContainerChanges.CreateSandbox {
        var msg string
        var err error

        glog.V(4).Infof("Creating sandbox for pod %q", format.Pod(pod))
        createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
        result.AddSyncResult(createSandboxResult)
        podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
        if err != nil {
            createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
            glog.Errorf("createPodSandbox for pod %q failed: %v", format.Pod(pod), err)
            ref, err := ref.GetReference(legacyscheme.Scheme, pod)
            if err != nil {
                glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
            }
            m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedCreatePodSandBox, "Failed create pod sandbox.")
            return
        }
        glog.V(4).Infof("Created PodSandbox %q for pod %q", podSandboxID, format.Pod(pod))

        podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID)
        if err != nil {
            ref, err := ref.GetReference(legacyscheme.Scheme, pod)
            if err != nil {
                glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
            }
            m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedStatusPodSandBox, "Unable to get pod sandbox status: %v", err)
            glog.Errorf("Failed to get pod sandbox status: %v; Skipping pod %q", err, format.Pod(pod))
            result.Fail(err)
            return
        }

        // If we ever allow updating a pod from non-host-network to
        // host-network, we may use a stale IP.
        if !kubecontainer.IsHostNetworkPod(pod) {
            // Overwrite the podIP passed in the pod status, since we just started the pod sandbox.
            podIP = m.determinePodSandboxIP(pod.Namespace, pod.Name, podSandboxStatus)
            glog.V(4).Infof("Determined the ip %q for pod %q after sandbox changed", podIP, format.Pod(pod))
        }
    }

    // Get podSandboxConfig for containers to start.
    configPodSandboxResult := kubecontainer.NewSyncResult(kubecontainer.ConfigPodSandbox, podSandboxID)
    result.AddSyncResult(configPodSandboxResult)
    podSandboxConfig, err := m.generatePodSandboxConfig(pod, podContainerChanges.Attempt)
    if err != nil {
        message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
        glog.Error(message)
        configPodSandboxResult.Fail(kubecontainer.ErrConfigPodSandbox, message)
        return
    }

    // Step 5: start the init container.
    if container := podContainerChanges.NextInitContainerToStart; container != nil {
        // Start the next init container.
        startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
        result.AddSyncResult(startContainerResult)
        isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
        if isInBackOff {
            startContainerResult.Fail(err, msg)
            glog.V(4).Infof("Backing Off restarting init container %+v in pod %v", container, format.Pod(pod))
            return
        }

        glog.V(4).Infof("Creating init container %+v in pod %v", container, format.Pod(pod))
        if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP); err != nil {
            startContainerResult.Fail(err, msg)
            utilruntime.HandleError(fmt.Errorf("init container start failed: %v: %s", err, msg))
            return
        }

        // Successfully started the container; clear the entry in the failure
        glog.V(4).Infof("Completed init container %q for pod %q", container.Name, format.Pod(pod))
    }

    // Step 6: start containers in podContainerChanges.ContainersToStart.
    for _, idx := range podContainerChanges.ContainersToStart {
        container := &pod.Spec.Containers[idx]
        startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
        result.AddSyncResult(startContainerResult)

        isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
        if isInBackOff {
            startContainerResult.Fail(err, msg)
            glog.V(4).Infof("Backing Off restarting container %+v in pod %v", container, format.Pod(pod))
            continue
        }

        glog.V(4).Infof("Creating container %+v in pod %v", container, format.Pod(pod))
        if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP); err != nil {
            startContainerResult.Fail(err, msg)
            // known errors that are logged in other places are logged at higher levels here to avoid
            // repetitive log spam
            switch {
            case err == images.ErrImagePullBackOff:
                glog.V(3).Infof("container start failed: %v: %s", err, msg)
            default:
                utilruntime.HandleError(fmt.Errorf("container start failed: %v: %s", err, msg))
            }
            continue
        }
    }

    return
}

如果是新创建的pod或者是pod的spec参数已经发生改变,则需要重新生成sandbox,这里主要看createPodSandbox函数

// createPodSandbox creates a pod sandbox and returns (podSandBoxID, message, error).
func (m *kubeGenericRuntimeManager) createPodSandbox(pod *v1.Pod, attempt uint32) (string, string, error) {
    // config中包含的信息主要有,pod name、namespace、UID、labels、annotation、dns、
    // container portMappings以及根据annotation中的security.alpha.kubernetes.io/sysctls和
    // security.alpha.kubernetes.io/unsafe-sysctls配置linuxConfig等。
    podSandboxConfig, err := m.generatePodSandboxConfig(pod, attempt)
    if err != nil {
        message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
        glog.Error(message)
        return "", message, err
    }

    // Create pod logs directory
    err = m.osInterface.MkdirAll(podSandboxConfig.LogDirectory, 0755)
    if err != nil {
        message := fmt.Sprintf("Create pod log directory for pod %q failed: %v", format.Pod(pod), err)
        glog.Errorf(message)
        return "", message, err
    }

    podSandBoxID, err := m.runtimeService.RunPodSandbox(podSandboxConfig)
    if err != nil {
        message := fmt.Sprintf("CreatePodSandbox for pod %q failed: %v", format.Pod(pod), err)
        glog.Error(message)
        return "", message, err
    }

    return podSandBoxID, "", nil
}

该函数首先调用generatePodSandboxConfig来生成sandbox的配置,然后调用MkdirAll方法来创建pod的日志目录,最后调用RunPodSandbox来完成pause创建工作,RunPodSandbox函数如下

// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
// the sandbox is in ready state.
// For docker, PodSandbox is implemented by a container holding the network
// namespace for the pod.
// Note: docker doesn't use LogDirectory (yet).
func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (id string, err error) {
    // Step 1: Pull the image for the sandbox.
    image := defaultSandboxImage
    podSandboxImage := ds.podSandboxImage
    if len(podSandboxImage) != 0 {
        image = podSandboxImage
    }

    // NOTE: To use a custom sandbox image in a private repository, users need to configure the nodes with credentials properly.
    // see: http://kubernetes.io/docs/user-guide/images/#configuring-nodes-to-authenticate-to-a-private-repository
    // Only pull sandbox image when it's not present - v1.PullIfNotPresent.
    if err := ensureSandboxImageExists(ds.client, image); err != nil {
        return "", err
    }

    // Step 2: Create the sandbox container.
    createConfig, err := ds.makeSandboxDockerConfig(config, image)
    if err != nil {
        return "", fmt.Errorf("failed to make sandbox docker config for pod %q: %v", config.Metadata.Name, err)
    }
    createResp, err := ds.client.CreateContainer(*createConfig)
    if err != nil {
        createResp, err = recoverFromCreationConflictIfNeeded(ds.client, *createConfig, err)
    }

    if err != nil || createResp == nil {
        return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err)
    }

    ds.setNetworkReady(createResp.ID, false)
    defer func(e *error) {
        // Set networking ready depending on the error return of
        // the parent function
        if *e == nil {
            ds.setNetworkReady(createResp.ID, true)
        }
    }(&err)

    // Step 3: Create Sandbox Checkpoint.
    if err = ds.checkpointHandler.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil {
        return createResp.ID, err
    }

    // Step 4: Start the sandbox container.
    // Assume kubelet's garbage collector would remove the sandbox later, if
    // startContainer failed.
    err = ds.client.StartContainer(createResp.ID)
    if err != nil {
        return createResp.ID, fmt.Errorf("failed to start sandbox container for pod %q: %v", config.Metadata.Name, err)
    }

    // Rewrite resolv.conf file generated by docker.
    // NOTE: cluster dns settings aren't passed anymore to docker api in all cases,
    // not only for pods with host network: the resolver conf will be overwritten
    // after sandbox creation to override docker's behaviour. This resolv.conf
    // file is shared by all containers of the same pod, and needs to be modified
    // only once per pod.
    if dnsConfig := config.GetDnsConfig(); dnsConfig != nil {
        containerInfo, err := ds.client.InspectContainer(createResp.ID)
        if err != nil {
            return createResp.ID, fmt.Errorf("failed to inspect sandbox container for pod %q: %v", config.Metadata.Name, err)
        }

        if err := rewriteResolvFile(containerInfo.ResolvConfPath, dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options); err != nil {
            return createResp.ID, fmt.Errorf("rewrite resolv.conf failed for pod %q: %v", config.Metadata.Name, err)
        }
    }

    // Do not invoke network plugins if in hostNetwork mode.
    if nsOptions := config.GetLinux().GetSecurityContext().GetNamespaceOptions(); nsOptions != nil && nsOptions.HostNetwork {
        return createResp.ID, nil
    }

    // Step 5: Setup networking for the sandbox.
    // All pod networking is setup by a CNI plugin discovered at startup time.
    // This plugin assigns the pod ip, sets up routes inside the sandbox,
    // creates interfaces etc. In theory, its jurisdiction ends with pod
    // sandbox networking, but it might insert iptables rules or open ports
    // on the host as well, to satisfy parts of the pod spec that aren't
    // recognized by the CNI standard yet.
    cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID)
    err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations)
    if err != nil {
        // TODO(random-liu): Do we need to teardown network here?
        if err := ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod); err != nil {
            glog.Warningf("Failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err)
        }
    }
    return createResp.ID, err
}

RunPodSandbox函数的流程还是比较清晰的,还记得我们在kubelet启动参数里面配置的那个pause容器参数不,这里ensureSandboxImageExists要确保pause镜像存在节点上,若不存在则先去下载。makeSandboxDockerConfig将PodSandboxConfig转换成docker的ContainerCreateConfig,然后再根据这个配置生成pause容器,最后在启动pause容器。容器启动后再配置dns,这里dns的配置很有意思,通过docker inspecft查看容器信息,然后再重写容器位于宿主机的resolv文件

[root@localhost ~]# docker inspect 7bae079c4926 -f={{.ResolvConfPath}}
/var/lib/docker/containers/7bae079c4926921a1ac8934362331f31eeb2fdb5c9fc0e6d06fcaabd9ef16c29/resolv.conf

再看看rewriteResolvFile函数

// rewriteResolvFile rewrites resolv.conf file generated by docker.
func rewriteResolvFile(resolvFilePath string, dns []string, dnsSearch []string, dnsOptions []string) error {
    if len(resolvFilePath) == 0 {
        glog.Errorf("ResolvConfPath is empty.")
        return nil
    }

    if _, err := os.Stat(resolvFilePath); os.IsNotExist(err) {
        return fmt.Errorf("ResolvConfPath %q does not exist", resolvFilePath)
    }

    var resolvFileContent []string
    for _, srv := range dns {
        resolvFileContent = append(resolvFileContent, "nameserver "+srv)
    }

    if len(dnsSearch) > 0 {
        resolvFileContent = append(resolvFileContent, "search "+strings.Join(dnsSearch, " "))
    }

    if len(dnsOptions) > 0 {
        resolvFileContent = append(resolvFileContent, "options "+strings.Join(dnsOptions, " "))
    }

    if len(resolvFileContent) > 0 {
        resolvFileContentStr := strings.Join(resolvFileContent, "\n")
        resolvFileContentStr += "\n"

        glog.V(4).Infof("Will attempt to re-write config file %s with: \n%s", resolvFilePath, resolvFileContent)
        if err := rewriteFile(resolvFilePath, resolvFileContentStr); err != nil {
            glog.Errorf("resolv.conf could not be updated: %v", err)
            return err
        }
    }

    return nil
}

整个过程看出来了吗,其实就是在重写宿主机上的/var/lib/docker/containers/7bae079c4926921a1ac8934362331f31eeb2fdb5c9fc0e6d06fcaabd9ef16c29/resolv.conf文件,然后你登录容器,cat resolve.conf,最后就是rewriteResolvFile函数的结果

nameserver 10.96.0.10
search default.svc.cluster.local svc.cluster.local cluster.local
options ndots:5

到这一步,如果你的pod的网络模式是host,则不需要调用cni插件完成pod的网络配置,否则将调用SetUpPod函数完成pod的puse容器网络创建。

func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer.ContainerID, annotations map[string]string) error {
    defer recordOperation("set_up_pod", time.Now())
    fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
    pm.podLock(fullPodName).Lock()
    defer pm.podUnlock(fullPodName)

    glog.V(3).Infof("Calling network plugin %s to set up pod %q", pm.plugin.Name(), fullPodName)
    if err := pm.plugin.SetUpPod(podNamespace, podName, id, annotations); err != nil {
        return fmt.Errorf("NetworkPlugin %s failed to set up pod %q network: %v", pm.plugin.Name(), fullPodName, err)
    }

    return nil
}

调用plugin的SetUpPod方法,这里plugin是一个interface, 具体使用哪个plugin是由kubelet的启动参数–network-plugin决定的。

func (plugin *cniNetworkPlugin) SetUpPod(namespace string, name string, id kubecontainer.ContainerID, annotations map[string]string) error {
    if err := plugin.checkInitialized(); err != nil {
        return err
    }
    netnsPath, err := plugin.host.GetNetNS(id.ID)
    if err != nil {
        return fmt.Errorf("CNI failed to retrieve network namespace path: %v", err)
    }

    // Windows doesn't have loNetwork. It comes only with Linux
    if plugin.loNetwork != nil {
        if _, err = plugin.addToNetwork(plugin.loNetwork, name, namespace, id, netnsPath); err != nil {
            glog.Errorf("Error while adding to cni lo network: %s", err)
            return err
        }
    }

    _, err = plugin.addToNetwork(plugin.getDefaultNetwork(), name, namespace, id, netnsPath)
    if err != nil {
        glog.Errorf("Error while adding to cni network: %s", err)
        return err
    }

    return err
}

GetNetNS获取容器的netnamespace所在路径,在非windows系统上,还会调用ddToNetwork来配置loopback设备的网络。最后调用addToNetwork来配置pause容器的eth0接口的网络。查看plugin的getDefaultNetwork函数

func (plugin *cniNetworkPlugin) getDefaultNetwork() *cniNetwork {
    plugin.RLock()
    defer plugin.RUnlock()
    return plugin.defaultNetwork
}

它实际调用的是getDefaultCNINetwork函数

func getDefaultCNINetwork(pluginDir, binDir, vendorCNIDirPrefix string) (*cniNetwork, error) {
    if pluginDir == "" {
        pluginDir = DefaultNetDir
    }
    files, err := libcni.ConfFiles(pluginDir, []string{".conf", ".conflist", ".json"})
    switch {
    case err != nil:
        return nil, err
    case len(files) == 0:
        return nil, fmt.Errorf("No networks found in %s", pluginDir)
    }

    sort.Strings(files)
    for _, confFile := range files {
        var confList *libcni.NetworkConfigList
        if strings.HasSuffix(confFile, ".conflist") {
            confList, err = libcni.ConfListFromFile(confFile)
            if err != nil {
                glog.Warningf("Error loading CNI config list file %s: %v", confFile, err)
                continue
            }
        } else {
            conf, err := libcni.ConfFromFile(confFile)
            if err != nil {
                glog.Warningf("Error loading CNI config file %s: %v", confFile, err)
                continue
            }
            // Ensure the config has a "type" so we know what plugin to run.
            // Also catches the case where somebody put a conflist into a conf file.
            if conf.Network.Type == "" {
                glog.Warningf("Error loading CNI config file %s: no 'type'; perhaps this is a .conflist?", confFile)
                continue
            }

            confList, err = libcni.ConfListFromConf(conf)
            if err != nil {
                glog.Warningf("Error converting CNI config file %s to list: %v", confFile, err)
                continue
            }
        }
        if len(confList.Plugins) == 0 {
            glog.Warningf("CNI config list %s has no networks, skipping", confFile)
            continue
        }
        confType := confList.Plugins[0].Network.Type

        // Search for vendor-specific plugins as well as default plugins in the CNI codebase.
        vendorDir := vendorCNIDir(vendorCNIDirPrefix, confType)
        cninet := &libcni.CNIConfig{
            Path: []string{vendorDir, binDir},
        }
        network := &cniNetwork{name: confList.Name, NetworkConfig: confList, CNIConfig: cninet}
        return network, nil
    }
    return nil, fmt.Errorf("No valid networks found in %s", pluginDir)
}

getDefaultCNINetwork会根据kubelet的cni配置参数寻找cni的二进制文件和配置目录,如果在kubelet没配置这些参数则默认的netDir为/etc/cni/net.d,默认的cniDir为/opt/cni/bin。改函数查找NetDIr目录下后缀为conf、conflist和json的文件,根据配置文件最后返回cniNetwork对象,它包含了cni插件名称和配置信息等。
继续查看plugin的addToNetwork函数

func (plugin *cniNetworkPlugin) addToNetwork(network *cniNetwork, podName string, podNamespace string, podSandboxID kubecontainer.ContainerID, podNetnsPath string) (cnitypes.Result, error) {
    rt, err := plugin.buildCNIRuntimeConf(podName, podNamespace, podSandboxID, podNetnsPath)
    if err != nil {
        glog.Errorf("Error adding network when building cni runtime conf: %v", err)
        return nil, err
    }

    netConf, cniNet := network.NetworkConfig, network.CNIConfig
    glog.V(4).Infof("About to add CNI network %v (type=%v)", netConf.Name, netConf.Plugins[0].Network.Type)
    res, err := cniNet.AddNetworkList(netConf, rt)
    if err != nil {
        glog.Errorf("Error adding network: %v", err)
        return nil, err
    }

    return res, nil
}

buildCNIRuntimeConf根据getDefaultCNINetwork函数返回的cniNetwork对象和pause容器id以及netnamespace和pod的名称、命名空间生成cni的运行时配置。这里容器的网卡名称为eth0。
buildCNIRuntimeConf函数如下

func (plugin *cniNetworkPlugin) buildCNIRuntimeConf(podName string, podNs string, podSandboxID kubecontainer.ContainerID, podNetnsPath string) (*libcni.RuntimeConf, error) {
    glog.V(4).Infof("Got netns path %v", podNetnsPath)
    glog.V(4).Infof("Using podns path %v", podNs)

    rt := &libcni.RuntimeConf{
        ContainerID: podSandboxID.ID,
        NetNS:       podNetnsPath,
        IfName:      network.DefaultInterfaceName,
        Args: [][2]string{
            {"IgnoreUnknown", "1"},
            {"K8S_POD_NAMESPACE", podNs},
            {"K8S_POD_NAME", podName},
            {"K8S_POD_INFRA_CONTAINER_ID", podSandboxID.ID},
        },
    }

    // port mappings are a cni capability-based args, rather than parameters
    // to a specific plugin
    portMappings, err := plugin.host.GetPodPortMappings(podSandboxID.ID)
    if err != nil {
        return nil, fmt.Errorf("could not retrieve port mappings: %v", err)
    }
    portMappingsParam := make([]cniPortMapping, 0, len(portMappings))
    for _, p := range portMappings {
        if p.HostPort <= 0 {
            continue
        }
        portMappingsParam = append(portMappingsParam, cniPortMapping{
            HostPort:      p.HostPort,
            ContainerPort: p.ContainerPort,
            Protocol:      strings.ToLower(string(p.Protocol)),
            HostIP:        p.HostIP,
        })
    }
    rt.CapabilityArgs = map[string]interface{}{
        "portMappings": portMappingsParam,
    }

    return rt, nil
}

接着执行AddNetworkList函数

// AddNetworkList executes a sequence of plugins with the ADD command
func (c *CNIConfig) AddNetworkList(list *NetworkConfigList, rt *RuntimeConf) (types.Result, error) {
    var prevResult types.Result
    for _, net := range list.Plugins {
        pluginPath, err := invoke.FindInPath(net.Network.Type, c.Path)
        if err != nil {
            return nil, err
        }

        newConf, err := buildOneConfig(list, net, prevResult, rt)
        if err != nil {
            return nil, err
        }

        prevResult, err = invoke.ExecPluginWithResult(pluginPath, newConf.Bytes, c.args("ADD", rt))
        if err != nil {
            return nil, err
        }
    }

    return prevResult, nil
}

该函数会遍历plugin,根据cni的type在binDir中找到同名插件,返回该插件的全路径。最后执行ExecPluginWithResult函数,它将调用cni的二进制文件并传入newConf参数以及RuntimeConf和一个ADD参数,其中ADD代表给容器添加网络。
分析到这,kubelet的网络配置已经完成了,我们最终会看到kubelet在生成新pod的时候会先生成一个sandbox容器,kubelet会根据pod的yaml信息和kubelet的cni参数配置生成一个cni runtime配置,最后调用cni插件完成docker容器的网络配置(关于cni插件调用流程在下一节分析)。

Logo

K8S/Kubernetes社区为您提供最前沿的新闻资讯和知识内容

更多推荐