kubelet sandbox创建与calico cni网络配置流程 (一)
本文基于k8s release 1.9分析kubelet sanbox穿件与cni网络配置流程。首先谈下docker的四种网络模式,它们分别为:- bridge模式:使–net =bridge指定,默认设置;bridge模式是Docker默认的网络设置,此模式会为每一个容器分配Network Namespace、设置IP等,并将并将一个主机上的Docker容器连接到一个虚拟网桥上。...
本文基于k8s release 1.9分析kubelet sandbox创建与cni网络配置流程。
首先谈下docker的四种网络模式,它们分别为:
- bridge模式:使–net =bridge指定,默认设置;bridge模式是Docker默认的网络设置,此模式会为每一个容器分配Network Namespace、设置IP等,并将并将一个主机上的Docker容器连接到一个虚拟网桥上。
- host模式:使–net =host指定,如果启动容器的时候使用host模式,那么这个容器将不会获得一个独立的Network Namespace,而是和宿主机共用一个Network Namespace。容器将不会虚拟出自己的网卡,配置自己的IP等,而是使用宿主机的IP和端口。
- none模式:使–net =none指定,在none模式下,Docker容器拥有自己的Network Namespace,但是,并不为Docker容器进行任何网络配置。也就是说,这个Docker容器没有网卡、IP、路由等信息。需要我们自己为Docker容器添加网卡、配置IP等。
- container模式:使用–net =container指定,这个模式指定新创建的容器和已经存在的一个容器共享一个Network Namespace,而不是和宿主机共享。新创建的容器不会创建自己的网卡,配置自己的IP,而是和一个指定的容器共享IP、端口范围等。
举个栗子,在我的集群中,dns的pod有3个业务容器,分别为dns、dnsmasq和sidecar,pause为网络容器,查找出这些容器id,并通过容器id查看每个容器的NetworkMode,发现除了pause容器的NetworkMode为node,其余容器均为container:7bae079c4926921a1ac8934362331f31eeb2fdb5c9fc0e6d06fcaabd9ef16c29,由docker容器的网络模式,我们得知dns、dnsmasq和sidecar使用的是pause容器的网络空间。而pause容器的网络模式为none是因为在该k8s集群中使用的cni网络配置,具体的容器网卡、IP、路由等信息是由calico(集群中使用的cni为calico)配置。
我们通过kubelet SyncPod函数分析sandbox的创建流程:
// SyncPod syncs the running pod into the desired pod by executing following steps:
//
// 1. Compute sandbox and container changes.
// 2. Kill pod sandbox if necessary.
// 3. Kill any containers that should not be running.
// 4. Create sandbox if necessary.
// 5. Create init containers.
// 6. Create normal containers.
func (m *kubeGenericRuntimeManager) SyncPod(pod *v1.Pod, _ v1.PodStatus, podStatus *kubecontainer.PodStatus, pullSecrets []v1.Secret, backOff *flowcontrol.Backoff) (result kubecontainer.PodSyncResult) {
// Step 1: Compute sandbox and container changes.
podContainerChanges := m.computePodActions(pod, podStatus)
glog.V(3).Infof("computePodActions got %+v for pod %q", podContainerChanges, format.Pod(pod))
if podContainerChanges.CreateSandbox {
ref, err := ref.GetReference(legacyscheme.Scheme, pod)
if err != nil {
glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
}
if podContainerChanges.SandboxID != "" {
m.recorder.Eventf(ref, v1.EventTypeNormal, events.SandboxChanged, "Pod sandbox changed, it will be killed and re-created.")
} else {
glog.V(4).Infof("SyncPod received new pod %q, will create a sandbox for it", format.Pod(pod))
}
}
// Step 2: Kill the pod if the sandbox has changed.
if podContainerChanges.KillPod {
if !podContainerChanges.CreateSandbox {
glog.V(4).Infof("Stopping PodSandbox for %q because all other containers are dead.", format.Pod(pod))
} else {
glog.V(4).Infof("Stopping PodSandbox for %q, will start new one", format.Pod(pod))
}
killResult := m.killPodWithSyncResult(pod, kubecontainer.ConvertPodStatusToRunningPod(m.runtimeName, podStatus), nil)
result.AddPodSyncResult(killResult)
if killResult.Error() != nil {
glog.Errorf("killPodWithSyncResult failed: %v", killResult.Error())
return
}
if podContainerChanges.CreateSandbox {
m.purgeInitContainers(pod, podStatus)
}
} else {
// Step 3: kill any running containers in this pod which are not to keep.
for containerID, containerInfo := range podContainerChanges.ContainersToKill {
glog.V(3).Infof("Killing unwanted container %q(id=%q) for pod %q", containerInfo.name, containerID, format.Pod(pod))
killContainerResult := kubecontainer.NewSyncResult(kubecontainer.KillContainer, containerInfo.name)
result.AddSyncResult(killContainerResult)
if err := m.killContainer(pod, containerID, containerInfo.name, containerInfo.message, nil); err != nil {
killContainerResult.Fail(kubecontainer.ErrKillContainer, err.Error())
glog.Errorf("killContainer %q(id=%q) for pod %q failed: %v", containerInfo.name, containerID, format.Pod(pod), err)
return
}
}
}
// Keep terminated init containers fairly aggressively controlled
// This is an optmization because container removals are typically handled
// by container garbage collector.
m.pruneInitContainersBeforeStart(pod, podStatus)
// We pass the value of the podIP down to generatePodSandboxConfig and
// generateContainerConfig, which in turn passes it to various other
// functions, in order to facilitate functionality that requires this
// value (hosts file and downward API) and avoid races determining
// the pod IP in cases where a container requires restart but the
// podIP isn't in the status manager yet.
//
// We default to the IP in the passed-in pod status, and overwrite it if the
// sandbox needs to be (re)started.
podIP := ""
if podStatus != nil {
podIP = podStatus.IP
}
// Step 4: Create a sandbox for the pod if necessary.
podSandboxID := podContainerChanges.SandboxID
if podContainerChanges.CreateSandbox {
var msg string
var err error
glog.V(4).Infof("Creating sandbox for pod %q", format.Pod(pod))
createSandboxResult := kubecontainer.NewSyncResult(kubecontainer.CreatePodSandbox, format.Pod(pod))
result.AddSyncResult(createSandboxResult)
podSandboxID, msg, err = m.createPodSandbox(pod, podContainerChanges.Attempt)
if err != nil {
createSandboxResult.Fail(kubecontainer.ErrCreatePodSandbox, msg)
glog.Errorf("createPodSandbox for pod %q failed: %v", format.Pod(pod), err)
ref, err := ref.GetReference(legacyscheme.Scheme, pod)
if err != nil {
glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
}
m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedCreatePodSandBox, "Failed create pod sandbox.")
return
}
glog.V(4).Infof("Created PodSandbox %q for pod %q", podSandboxID, format.Pod(pod))
podSandboxStatus, err := m.runtimeService.PodSandboxStatus(podSandboxID)
if err != nil {
ref, err := ref.GetReference(legacyscheme.Scheme, pod)
if err != nil {
glog.Errorf("Couldn't make a ref to pod %q: '%v'", format.Pod(pod), err)
}
m.recorder.Eventf(ref, v1.EventTypeWarning, events.FailedStatusPodSandBox, "Unable to get pod sandbox status: %v", err)
glog.Errorf("Failed to get pod sandbox status: %v; Skipping pod %q", err, format.Pod(pod))
result.Fail(err)
return
}
// If we ever allow updating a pod from non-host-network to
// host-network, we may use a stale IP.
if !kubecontainer.IsHostNetworkPod(pod) {
// Overwrite the podIP passed in the pod status, since we just started the pod sandbox.
podIP = m.determinePodSandboxIP(pod.Namespace, pod.Name, podSandboxStatus)
glog.V(4).Infof("Determined the ip %q for pod %q after sandbox changed", podIP, format.Pod(pod))
}
}
// Get podSandboxConfig for containers to start.
configPodSandboxResult := kubecontainer.NewSyncResult(kubecontainer.ConfigPodSandbox, podSandboxID)
result.AddSyncResult(configPodSandboxResult)
podSandboxConfig, err := m.generatePodSandboxConfig(pod, podContainerChanges.Attempt)
if err != nil {
message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
glog.Error(message)
configPodSandboxResult.Fail(kubecontainer.ErrConfigPodSandbox, message)
return
}
// Step 5: start the init container.
if container := podContainerChanges.NextInitContainerToStart; container != nil {
// Start the next init container.
startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
result.AddSyncResult(startContainerResult)
isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
if isInBackOff {
startContainerResult.Fail(err, msg)
glog.V(4).Infof("Backing Off restarting init container %+v in pod %v", container, format.Pod(pod))
return
}
glog.V(4).Infof("Creating init container %+v in pod %v", container, format.Pod(pod))
if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP); err != nil {
startContainerResult.Fail(err, msg)
utilruntime.HandleError(fmt.Errorf("init container start failed: %v: %s", err, msg))
return
}
// Successfully started the container; clear the entry in the failure
glog.V(4).Infof("Completed init container %q for pod %q", container.Name, format.Pod(pod))
}
// Step 6: start containers in podContainerChanges.ContainersToStart.
for _, idx := range podContainerChanges.ContainersToStart {
container := &pod.Spec.Containers[idx]
startContainerResult := kubecontainer.NewSyncResult(kubecontainer.StartContainer, container.Name)
result.AddSyncResult(startContainerResult)
isInBackOff, msg, err := m.doBackOff(pod, container, podStatus, backOff)
if isInBackOff {
startContainerResult.Fail(err, msg)
glog.V(4).Infof("Backing Off restarting container %+v in pod %v", container, format.Pod(pod))
continue
}
glog.V(4).Infof("Creating container %+v in pod %v", container, format.Pod(pod))
if msg, err := m.startContainer(podSandboxID, podSandboxConfig, container, pod, podStatus, pullSecrets, podIP); err != nil {
startContainerResult.Fail(err, msg)
// known errors that are logged in other places are logged at higher levels here to avoid
// repetitive log spam
switch {
case err == images.ErrImagePullBackOff:
glog.V(3).Infof("container start failed: %v: %s", err, msg)
default:
utilruntime.HandleError(fmt.Errorf("container start failed: %v: %s", err, msg))
}
continue
}
}
return
}
如果是新创建的pod或者是pod的spec参数已经发生改变,则需要重新生成sandbox,这里主要看createPodSandbox函数
// createPodSandbox creates a pod sandbox and returns (podSandBoxID, message, error).
func (m *kubeGenericRuntimeManager) createPodSandbox(pod *v1.Pod, attempt uint32) (string, string, error) {
// config中包含的信息主要有,pod name、namespace、UID、labels、annotation、dns、
// container portMappings以及根据annotation中的security.alpha.kubernetes.io/sysctls和
// security.alpha.kubernetes.io/unsafe-sysctls配置linuxConfig等。
podSandboxConfig, err := m.generatePodSandboxConfig(pod, attempt)
if err != nil {
message := fmt.Sprintf("GeneratePodSandboxConfig for pod %q failed: %v", format.Pod(pod), err)
glog.Error(message)
return "", message, err
}
// Create pod logs directory
err = m.osInterface.MkdirAll(podSandboxConfig.LogDirectory, 0755)
if err != nil {
message := fmt.Sprintf("Create pod log directory for pod %q failed: %v", format.Pod(pod), err)
glog.Errorf(message)
return "", message, err
}
podSandBoxID, err := m.runtimeService.RunPodSandbox(podSandboxConfig)
if err != nil {
message := fmt.Sprintf("CreatePodSandbox for pod %q failed: %v", format.Pod(pod), err)
glog.Error(message)
return "", message, err
}
return podSandBoxID, "", nil
}
该函数首先调用generatePodSandboxConfig来生成sandbox的配置,然后调用MkdirAll方法来创建pod的日志目录,最后调用RunPodSandbox来完成pause创建工作,RunPodSandbox函数如下
// RunPodSandbox creates and starts a pod-level sandbox. Runtimes should ensure
// the sandbox is in ready state.
// For docker, PodSandbox is implemented by a container holding the network
// namespace for the pod.
// Note: docker doesn't use LogDirectory (yet).
func (ds *dockerService) RunPodSandbox(config *runtimeapi.PodSandboxConfig) (id string, err error) {
// Step 1: Pull the image for the sandbox.
image := defaultSandboxImage
podSandboxImage := ds.podSandboxImage
if len(podSandboxImage) != 0 {
image = podSandboxImage
}
// NOTE: To use a custom sandbox image in a private repository, users need to configure the nodes with credentials properly.
// see: http://kubernetes.io/docs/user-guide/images/#configuring-nodes-to-authenticate-to-a-private-repository
// Only pull sandbox image when it's not present - v1.PullIfNotPresent.
if err := ensureSandboxImageExists(ds.client, image); err != nil {
return "", err
}
// Step 2: Create the sandbox container.
createConfig, err := ds.makeSandboxDockerConfig(config, image)
if err != nil {
return "", fmt.Errorf("failed to make sandbox docker config for pod %q: %v", config.Metadata.Name, err)
}
createResp, err := ds.client.CreateContainer(*createConfig)
if err != nil {
createResp, err = recoverFromCreationConflictIfNeeded(ds.client, *createConfig, err)
}
if err != nil || createResp == nil {
return "", fmt.Errorf("failed to create a sandbox for pod %q: %v", config.Metadata.Name, err)
}
ds.setNetworkReady(createResp.ID, false)
defer func(e *error) {
// Set networking ready depending on the error return of
// the parent function
if *e == nil {
ds.setNetworkReady(createResp.ID, true)
}
}(&err)
// Step 3: Create Sandbox Checkpoint.
if err = ds.checkpointHandler.CreateCheckpoint(createResp.ID, constructPodSandboxCheckpoint(config)); err != nil {
return createResp.ID, err
}
// Step 4: Start the sandbox container.
// Assume kubelet's garbage collector would remove the sandbox later, if
// startContainer failed.
err = ds.client.StartContainer(createResp.ID)
if err != nil {
return createResp.ID, fmt.Errorf("failed to start sandbox container for pod %q: %v", config.Metadata.Name, err)
}
// Rewrite resolv.conf file generated by docker.
// NOTE: cluster dns settings aren't passed anymore to docker api in all cases,
// not only for pods with host network: the resolver conf will be overwritten
// after sandbox creation to override docker's behaviour. This resolv.conf
// file is shared by all containers of the same pod, and needs to be modified
// only once per pod.
if dnsConfig := config.GetDnsConfig(); dnsConfig != nil {
containerInfo, err := ds.client.InspectContainer(createResp.ID)
if err != nil {
return createResp.ID, fmt.Errorf("failed to inspect sandbox container for pod %q: %v", config.Metadata.Name, err)
}
if err := rewriteResolvFile(containerInfo.ResolvConfPath, dnsConfig.Servers, dnsConfig.Searches, dnsConfig.Options); err != nil {
return createResp.ID, fmt.Errorf("rewrite resolv.conf failed for pod %q: %v", config.Metadata.Name, err)
}
}
// Do not invoke network plugins if in hostNetwork mode.
if nsOptions := config.GetLinux().GetSecurityContext().GetNamespaceOptions(); nsOptions != nil && nsOptions.HostNetwork {
return createResp.ID, nil
}
// Step 5: Setup networking for the sandbox.
// All pod networking is setup by a CNI plugin discovered at startup time.
// This plugin assigns the pod ip, sets up routes inside the sandbox,
// creates interfaces etc. In theory, its jurisdiction ends with pod
// sandbox networking, but it might insert iptables rules or open ports
// on the host as well, to satisfy parts of the pod spec that aren't
// recognized by the CNI standard yet.
cID := kubecontainer.BuildContainerID(runtimeName, createResp.ID)
err = ds.network.SetUpPod(config.GetMetadata().Namespace, config.GetMetadata().Name, cID, config.Annotations)
if err != nil {
// TODO(random-liu): Do we need to teardown network here?
if err := ds.client.StopContainer(createResp.ID, defaultSandboxGracePeriod); err != nil {
glog.Warningf("Failed to stop sandbox container %q for pod %q: %v", createResp.ID, config.Metadata.Name, err)
}
}
return createResp.ID, err
}
RunPodSandbox函数的流程还是比较清晰的,还记得我们在kubelet启动参数里面配置的那个pause容器参数不,这里ensureSandboxImageExists要确保pause镜像存在节点上,若不存在则先去下载。makeSandboxDockerConfig将PodSandboxConfig转换成docker的ContainerCreateConfig,然后再根据这个配置生成pause容器,最后在启动pause容器。容器启动后再配置dns,这里dns的配置很有意思,通过docker inspecft查看容器信息,然后再重写容器位于宿主机的resolv文件
[root@localhost ~]# docker inspect 7bae079c4926 -f={{.ResolvConfPath}}
/var/lib/docker/containers/7bae079c4926921a1ac8934362331f31eeb2fdb5c9fc0e6d06fcaabd9ef16c29/resolv.conf
再看看rewriteResolvFile函数
// rewriteResolvFile rewrites resolv.conf file generated by docker.
func rewriteResolvFile(resolvFilePath string, dns []string, dnsSearch []string, dnsOptions []string) error {
if len(resolvFilePath) == 0 {
glog.Errorf("ResolvConfPath is empty.")
return nil
}
if _, err := os.Stat(resolvFilePath); os.IsNotExist(err) {
return fmt.Errorf("ResolvConfPath %q does not exist", resolvFilePath)
}
var resolvFileContent []string
for _, srv := range dns {
resolvFileContent = append(resolvFileContent, "nameserver "+srv)
}
if len(dnsSearch) > 0 {
resolvFileContent = append(resolvFileContent, "search "+strings.Join(dnsSearch, " "))
}
if len(dnsOptions) > 0 {
resolvFileContent = append(resolvFileContent, "options "+strings.Join(dnsOptions, " "))
}
if len(resolvFileContent) > 0 {
resolvFileContentStr := strings.Join(resolvFileContent, "\n")
resolvFileContentStr += "\n"
glog.V(4).Infof("Will attempt to re-write config file %s with: \n%s", resolvFilePath, resolvFileContent)
if err := rewriteFile(resolvFilePath, resolvFileContentStr); err != nil {
glog.Errorf("resolv.conf could not be updated: %v", err)
return err
}
}
return nil
}
整个过程看出来了吗,其实就是在重写宿主机上的/var/lib/docker/containers/7bae079c4926921a1ac8934362331f31eeb2fdb5c9fc0e6d06fcaabd9ef16c29/resolv.conf文件,然后你登录容器,cat resolve.conf,最后就是rewriteResolvFile函数的结果
nameserver 10.96.0.10
search default.svc.cluster.local svc.cluster.local cluster.local
options ndots:5
到这一步,如果你的pod的网络模式是host,则不需要调用cni插件完成pod的网络配置,否则将调用SetUpPod函数完成pod的puse容器网络创建。
func (pm *PluginManager) SetUpPod(podNamespace, podName string, id kubecontainer.ContainerID, annotations map[string]string) error {
defer recordOperation("set_up_pod", time.Now())
fullPodName := kubecontainer.BuildPodFullName(podName, podNamespace)
pm.podLock(fullPodName).Lock()
defer pm.podUnlock(fullPodName)
glog.V(3).Infof("Calling network plugin %s to set up pod %q", pm.plugin.Name(), fullPodName)
if err := pm.plugin.SetUpPod(podNamespace, podName, id, annotations); err != nil {
return fmt.Errorf("NetworkPlugin %s failed to set up pod %q network: %v", pm.plugin.Name(), fullPodName, err)
}
return nil
}
调用plugin的SetUpPod方法,这里plugin是一个interface, 具体使用哪个plugin是由kubelet的启动参数–network-plugin决定的。
func (plugin *cniNetworkPlugin) SetUpPod(namespace string, name string, id kubecontainer.ContainerID, annotations map[string]string) error {
if err := plugin.checkInitialized(); err != nil {
return err
}
netnsPath, err := plugin.host.GetNetNS(id.ID)
if err != nil {
return fmt.Errorf("CNI failed to retrieve network namespace path: %v", err)
}
// Windows doesn't have loNetwork. It comes only with Linux
if plugin.loNetwork != nil {
if _, err = plugin.addToNetwork(plugin.loNetwork, name, namespace, id, netnsPath); err != nil {
glog.Errorf("Error while adding to cni lo network: %s", err)
return err
}
}
_, err = plugin.addToNetwork(plugin.getDefaultNetwork(), name, namespace, id, netnsPath)
if err != nil {
glog.Errorf("Error while adding to cni network: %s", err)
return err
}
return err
}
GetNetNS获取容器的netnamespace所在路径,在非windows系统上,还会调用ddToNetwork来配置loopback设备的网络。最后调用addToNetwork来配置pause容器的eth0接口的网络。查看plugin的getDefaultNetwork函数
func (plugin *cniNetworkPlugin) getDefaultNetwork() *cniNetwork {
plugin.RLock()
defer plugin.RUnlock()
return plugin.defaultNetwork
}
它实际调用的是getDefaultCNINetwork函数
func getDefaultCNINetwork(pluginDir, binDir, vendorCNIDirPrefix string) (*cniNetwork, error) {
if pluginDir == "" {
pluginDir = DefaultNetDir
}
files, err := libcni.ConfFiles(pluginDir, []string{".conf", ".conflist", ".json"})
switch {
case err != nil:
return nil, err
case len(files) == 0:
return nil, fmt.Errorf("No networks found in %s", pluginDir)
}
sort.Strings(files)
for _, confFile := range files {
var confList *libcni.NetworkConfigList
if strings.HasSuffix(confFile, ".conflist") {
confList, err = libcni.ConfListFromFile(confFile)
if err != nil {
glog.Warningf("Error loading CNI config list file %s: %v", confFile, err)
continue
}
} else {
conf, err := libcni.ConfFromFile(confFile)
if err != nil {
glog.Warningf("Error loading CNI config file %s: %v", confFile, err)
continue
}
// Ensure the config has a "type" so we know what plugin to run.
// Also catches the case where somebody put a conflist into a conf file.
if conf.Network.Type == "" {
glog.Warningf("Error loading CNI config file %s: no 'type'; perhaps this is a .conflist?", confFile)
continue
}
confList, err = libcni.ConfListFromConf(conf)
if err != nil {
glog.Warningf("Error converting CNI config file %s to list: %v", confFile, err)
continue
}
}
if len(confList.Plugins) == 0 {
glog.Warningf("CNI config list %s has no networks, skipping", confFile)
continue
}
confType := confList.Plugins[0].Network.Type
// Search for vendor-specific plugins as well as default plugins in the CNI codebase.
vendorDir := vendorCNIDir(vendorCNIDirPrefix, confType)
cninet := &libcni.CNIConfig{
Path: []string{vendorDir, binDir},
}
network := &cniNetwork{name: confList.Name, NetworkConfig: confList, CNIConfig: cninet}
return network, nil
}
return nil, fmt.Errorf("No valid networks found in %s", pluginDir)
}
getDefaultCNINetwork会根据kubelet的cni配置参数寻找cni的二进制文件和配置目录,如果在kubelet没配置这些参数则默认的netDir为/etc/cni/net.d,默认的cniDir为/opt/cni/bin。改函数查找NetDIr目录下后缀为conf、conflist和json的文件,根据配置文件最后返回cniNetwork对象,它包含了cni插件名称和配置信息等。
继续查看plugin的addToNetwork函数
func (plugin *cniNetworkPlugin) addToNetwork(network *cniNetwork, podName string, podNamespace string, podSandboxID kubecontainer.ContainerID, podNetnsPath string) (cnitypes.Result, error) {
rt, err := plugin.buildCNIRuntimeConf(podName, podNamespace, podSandboxID, podNetnsPath)
if err != nil {
glog.Errorf("Error adding network when building cni runtime conf: %v", err)
return nil, err
}
netConf, cniNet := network.NetworkConfig, network.CNIConfig
glog.V(4).Infof("About to add CNI network %v (type=%v)", netConf.Name, netConf.Plugins[0].Network.Type)
res, err := cniNet.AddNetworkList(netConf, rt)
if err != nil {
glog.Errorf("Error adding network: %v", err)
return nil, err
}
return res, nil
}
buildCNIRuntimeConf根据getDefaultCNINetwork函数返回的cniNetwork对象和pause容器id以及netnamespace和pod的名称、命名空间生成cni的运行时配置。这里容器的网卡名称为eth0。
buildCNIRuntimeConf函数如下
func (plugin *cniNetworkPlugin) buildCNIRuntimeConf(podName string, podNs string, podSandboxID kubecontainer.ContainerID, podNetnsPath string) (*libcni.RuntimeConf, error) {
glog.V(4).Infof("Got netns path %v", podNetnsPath)
glog.V(4).Infof("Using podns path %v", podNs)
rt := &libcni.RuntimeConf{
ContainerID: podSandboxID.ID,
NetNS: podNetnsPath,
IfName: network.DefaultInterfaceName,
Args: [][2]string{
{"IgnoreUnknown", "1"},
{"K8S_POD_NAMESPACE", podNs},
{"K8S_POD_NAME", podName},
{"K8S_POD_INFRA_CONTAINER_ID", podSandboxID.ID},
},
}
// port mappings are a cni capability-based args, rather than parameters
// to a specific plugin
portMappings, err := plugin.host.GetPodPortMappings(podSandboxID.ID)
if err != nil {
return nil, fmt.Errorf("could not retrieve port mappings: %v", err)
}
portMappingsParam := make([]cniPortMapping, 0, len(portMappings))
for _, p := range portMappings {
if p.HostPort <= 0 {
continue
}
portMappingsParam = append(portMappingsParam, cniPortMapping{
HostPort: p.HostPort,
ContainerPort: p.ContainerPort,
Protocol: strings.ToLower(string(p.Protocol)),
HostIP: p.HostIP,
})
}
rt.CapabilityArgs = map[string]interface{}{
"portMappings": portMappingsParam,
}
return rt, nil
}
接着执行AddNetworkList函数
// AddNetworkList executes a sequence of plugins with the ADD command
func (c *CNIConfig) AddNetworkList(list *NetworkConfigList, rt *RuntimeConf) (types.Result, error) {
var prevResult types.Result
for _, net := range list.Plugins {
pluginPath, err := invoke.FindInPath(net.Network.Type, c.Path)
if err != nil {
return nil, err
}
newConf, err := buildOneConfig(list, net, prevResult, rt)
if err != nil {
return nil, err
}
prevResult, err = invoke.ExecPluginWithResult(pluginPath, newConf.Bytes, c.args("ADD", rt))
if err != nil {
return nil, err
}
}
return prevResult, nil
}
该函数会遍历plugin,根据cni的type在binDir中找到同名插件,返回该插件的全路径。最后执行ExecPluginWithResult函数,它将调用cni的二进制文件并传入newConf参数以及RuntimeConf和一个ADD参数,其中ADD代表给容器添加网络。
分析到这,kubelet的网络配置已经完成了,我们最终会看到kubelet在生成新pod的时候会先生成一个sandbox容器,kubelet会根据pod的yaml信息和kubelet的cni参数配置生成一个cni runtime配置,最后调用cni插件完成docker容器的网络配置(关于cni插件调用流程在下一节分析)。
更多推荐
所有评论(0)