一. 网络基础

  1.1 网络命名空间的操作

  • 创建网络命名空间: ip netns add
  • 命名空间内执行命令: ip netns exec
  • 进入命名空间: ip netns exec bash

  1.2 bridge-nf-call-iptables

  数据包进入网卡,协议栈代码就能“看到”整个数据包,剩下的问题就是如何来解析和过滤的问题了

  由于网桥工作于数据链路层,在iptables没有开启 bridge-nf时,数据会直接经过网桥转发,结果就是对FORWARD的设置失效;
centos默认不开启 bridge-nf

启动bridge-nf方式:编辑文件vim /etc/sysctl.conf 添加:

net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-arptables = 1

二. docker网络基础

  • 网络的命名空间:Linux在网络栈中引入网络命名空间,将独立的网络协议栈隔离到不同的命令空间中,彼此间无法通信;docker利用这一特性,实现不容器间的网络隔离。
  • Veth设备对:Veth设备对的引入是为了实现在不同网络命名空间的通信。
  • Iptables/Netfilter:Netfilter负责在内核中执行各种规则(过滤、修改、丢弃等),运行在内核模式中;Iptables模式是在用户模式下运行的进程,负责协助维护内核中Netfilter的各种规则表;通过二者的配合来实现整个Linux网络协议栈中灵活的数据包处理机制。
  • 网桥:网桥是二层网络设备,通过网桥可以将linux支持的不同的端口连接起来,并实现类似交换机那样的多对多的通信。
  • 路由:当IP层在处理数据发送或转发的时候,会使用路由表来决定去向

  别人特别好的图片,引用

三. kubernetes网络基础

kubernetes网络基础原则

  • 每个Pod都拥有一个独立的IP地址,所有Pod都在一个可以直接连通的、扁平的网络空间中,集群内所有Pod可以使用Pod的IP来访问。
  • 同一个Pod内所有的容器共享一个网络堆栈,该模型称为IP-per-Pod模型。

kubernetes对集群的网络要求

  • 所有容器都可以不用NAT的方式同别的容器通信。
  • 所有节点都可以在不同NAT的方式下同所有容器通信,反之亦然。
  • 容器的地址和别人看到的地址是同一个地址。

0. 数据流

  初始化: 

    NewMainKubelet -> NewDockerService

  SyncPod -> createPodSandbox -> RunPodSandbox -> SetUpPod

1. NewMainKubelet 函数

  路径: pkg/kubelet/kubelet.go

--hairpin-mode string     Default: "promiscuous-bridge" How should the kubelet setup hairpin NAT. This allows endpoints of a Service to loadbalance back to themselves if they should try to access their own Service. Valid values are "promiscuous-bridge", "hairpin-veth" and "none".
	pluginSettings := dockershim.NetworkPluginSettings{
		HairpinMode:        kubeletconfiginternal.HairpinMode(kubeCfg.HairpinMode),
		NonMasqueradeCIDR:  nonMasqueradeCIDR,
		PluginName:         crOptions.NetworkPluginName,
		PluginConfDir:      crOptions.CNIConfDir,
		PluginBinDirString: crOptions.CNIBinDir,
		MTU:                int(crOptions.NetworkPluginMTU),
	}

  根据containerRuntime为docker,执行NewDockerService

	switch containerRuntime {
	case kubetypes.DockerContainerRuntime:
		// Create and start the CRI shim running as a grpc server.
		streamingConfig := getStreamingConfig(kubeCfg, kubeDeps, crOptions)
		ds, err := dockershim.NewDockerService(kubeDeps.DockerClientConfig, crOptions.PodSandboxImage, streamingConfig,
			&pluginSettings, runtimeCgroups, kubeCfg.CgroupDriver, crOptions.DockershimRootDirectory, !crOptions.RedirectContainerStreaming)
		if err != nil {
			return nil, err
		}
		if crOptions.RedirectContainerStreaming {
			klet.criHandler = ds
		}

		// The unix socket for kubelet <-> dockershim communication.
		glog.V(5).Infof("RemoteRuntimeEndpoint: %q, RemoteImageEndpoint: %q",
			remoteRuntimeEndpoint,
			remoteImageEndpoint)
		glog.V(2).Infof("Starting the GRPC server for the docker CRI shim.")
		server := dockerremote.NewDockerServer(remoteRuntimeEndpoint, ds)
		if err := server.Start(); err != nil {
			return nil, err
		}

		// Create dockerLegacyService when the logging driver is not supported.
		supported, err := ds.IsCRISupportedLogDriver()
		if err != nil {
			return nil, err
		}
		if !supported {
			klet.dockerLegacyService = ds
			legacyLogProvider = ds
		}

2. NewDockerService函数

  路径: pkg/kubelet/dockershim/docker_service.go

  初始化CNI网络插件,本文使用calico plugin,有前面文章讲解,调用network.InitNetworkPlugin进行初始化

  InitNetworkPlugin函数调用Init进行初始化网络设置

	// dockershim currently only supports CNI plugins.
	pluginSettings.PluginBinDirs = cni.SplitDirs(pluginSettings.PluginBinDirString)
	cniPlugins := cni.ProbeNetworkPlugins(pluginSettings.PluginConfDir, pluginSettings.PluginBinDirs)
	cniPlugins = append(cniPlugins, kubenet.NewPlugin(pluginSettings.PluginBinDirs))
	netHost := &dockerNetworkHost{
		&namespaceGetter{ds},
		&portMappingGetter{ds},
	}
	plug, err := network.InitNetworkPlugin(cniPlugins, pluginSettings.PluginName, netHost, pluginSettings.HairpinMode, pluginSettings.NonMasqueradeCIDR, pluginSettings.MTU)
	if err != nil {
		return nil, fmt.Errorf("didn't find compatible CNI plugin with given settings %+v: %v", pluginSettings, err)
	}
	ds.network = network.NewPluginManager(plug)

3. Init函数

  路径 pkg/kubelet/dockershim/network/plugins.go

  前面讲解bridge-nf-call-iptables,路径在/proc/sys/net/bridge/bridge-nf-call-iptables

func (plugin *NoopNetworkPlugin) Init(host Host, hairpinMode kubeletconfig.HairpinMode, nonMasqueradeCIDR string, mtu int) error {
	// Set bridge-nf-call-iptables=1 to maintain compatibility with older
	// kubernetes versions to ensure the iptables-based kube proxy functions
	// correctly.  Other plugins are responsible for setting this correctly
	// depending on whether or not they connect containers to Linux bridges
	// or use some other mechanism (ie, SDN vswitch).

	// Ensure the netfilter module is loaded on kernel >= 3.18; previously
	// it was built-in.
	utilexec.New().Command("modprobe", "br-netfilter").CombinedOutput()
	if err := plugin.Sysctl.SetSysctl(sysctlBridgeCallIPTables, 1); err != nil {
		glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIPTables, err)
	}
	if val, err := plugin.Sysctl.GetSysctl(sysctlBridgeCallIP6Tables); err == nil {
		if val != 1 {
			if err = plugin.Sysctl.SetSysctl(sysctlBridgeCallIP6Tables, 1); err != nil {
				glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIP6Tables, err)
			}
		}
	}

	return nil
}

4.  cniNetworkPlugin结构体

  路径 pkg/kubelet/dockershim/.network/cni/cni.go

  如果启动不为空的话,比如Kubelet启动参数--network-plugin=cni,则执行这个目录下init操作

const (
	CNIPluginName = "cni"
)

type cniNetworkPlugin struct {
	network.NoopNetworkPlugin

	loNetwork *cniNetwork

	sync.RWMutex
	defaultNetwork *cniNetwork

	host        network.Host
	execer      utilexec.Interface
	nsenterPath string
	confDir     string
	binDirs     []string
	podCidr     string
}

5. Ini函数

func (plugin *cniNetworkPlugin) Init(host network.Host, hairpinMode kubeletconfig.HairpinMode, nonMasqueradeCIDR string, mtu int) error {
	err := plugin.platformInit()
	if err != nil {
		return err
	}

	plugin.host = host

	plugin.syncNetworkConfig()
	return nil
}

  5.1 platformInit函数

    主要使用nsenter

func (plugin *cniNetworkPlugin) platformInit() error {
	var err error
	plugin.nsenterPath, err = plugin.execer.LookPath("nsenter")
	if err != nil {
		return err
	}
	return nil
}

  5.2 syncNetworkConfig函数

  读取配置文件比如kubelet参数路径--cni-conf-dir=/etc/cni/net.d

func (plugin *cniNetworkPlugin) syncNetworkConfig() {
	network, err := getDefaultCNINetwork(plugin.confDir, plugin.binDirs)
	if err != nil {
		glog.Warningf("Unable to update cni config: %s", err)
		return
	}
	plugin.setDefaultNetwork(network)
}

6. SetUpPod

 根据数据流   SyncPod -> createPodSandbox -> RunPodSandbox -> SetUpPod

 主要函数为addToNetwork

func (plugin *cniNetworkPlugin) SetUpPod(namespace string, name string, id kubecontainer.ContainerID, annotations map[string]string) error {
	if err := plugin.checkInitialized(); err != nil {
		return err
	}
	netnsPath, err := plugin.host.GetNetNS(id.ID)
	if err != nil {
		return fmt.Errorf("CNI failed to retrieve network namespace path: %v", err)
	}

	// Windows doesn't have loNetwork. It comes only with Linux
	if plugin.loNetwork != nil {
		if _, err = plugin.addToNetwork(plugin.loNetwork, name, namespace, id, netnsPath, annotations); err != nil {
			glog.Errorf("Error while adding to cni lo network: %s", err)
			return err
		}
	}

	_, err = plugin.addToNetwork(plugin.getDefaultNetwork(), name, namespace, id, netnsPath, annotations)
	if err != nil {
		glog.Errorf("Error while adding to cni network: %s", err)
		return err
	}

	return err
}

7. addToNetwork函数

  发现最终调用AddNetworkList函数这个就是根据具体的网络插件进行调用

func (plugin *cniNetworkPlugin) addToNetwork(network *cniNetwork, podName string, podNamespace string, podSandboxID kubecontainer.ContainerID, podNetnsPath string, annotations map[string]string) (cnitypes.Result, error) {
	rt, err := plugin.buildCNIRuntimeConf(podName, podNamespace, podSandboxID, podNetnsPath, annotations)
	if err != nil {
		glog.Errorf("Error adding network when building cni runtime conf: %v", err)
		return nil, err
	}

	netConf, cniNet := network.NetworkConfig, network.CNIConfig
	glog.V(4).Infof("About to add CNI network %v (type=%v)", netConf.Name, netConf.Plugins[0].Network.Type)
	res, err := cniNet.AddNetworkList(netConf, rt)
	if err != nil {
		glog.Errorf("Error adding network: %v", err)
		return nil, err
	}

	return res, nil
}

8. CNI接口

  各种插件主要就是实现了两个方法

type CNI interface {
	AddNetworkList(net *NetworkConfigList, rt *RuntimeConf) (types.Result, error)
	DelNetworkList(net *NetworkConfigList, rt *RuntimeConf) error

	AddNetwork(net *NetworkConfig, rt *RuntimeConf) (types.Result, error)
	DelNetwork(net *NetworkConfig, rt *RuntimeConf) error
}

9. 例如使用calico插件

{
    "name": "calico-k8s-network",
    "cniVersion": "0.1.0",
    "type": "calico",
    "etcd_endpoints": "XXXXXX",
    "etcd_key_file": "/etc/calico/ssl/calico-key.pem",
    "etcd_cert_file": "/etc/calico/ssl/calico.pem",
    "etcd_ca_cert_file": "/etc/calico/ssl/ca.pem",
    "log_level": "info",
    "mtu": 1500,
    "ipam": {
        "type": "calico-ipam"
    },
    "policy": {
        "type": "k8s"
    },
    "kubernetes": {
        "kubeconfig": "/root/.kube/config"
    }
}

 

Logo

开源、云原生的融合云平台

更多推荐