【kubernetes/k8s源码分析】kubelet源码分析之容器网络初始化源码分析
一. 网络基础1.1 网络命名空间的操作创建网络命名空间: ip netns add命名空间内执行命令: ip netns exec进入命名空间: ip netns exec bash1.2bridge-nf-call-iptables数据包进入网卡,协议栈代码就能“看到”整个数据包,剩下的问题就是如何来解析和过滤的问题了由于网桥工作于数据链路层,在ip...
一. 网络基础
1.1 网络命名空间的操作
- 创建网络命名空间: ip netns add
- 命名空间内执行命令: ip netns exec
- 进入命名空间: ip netns exec bash
1.2 bridge-nf-call-iptables
数据包进入网卡,协议栈代码就能“看到”整个数据包,剩下的问题就是如何来解析和过滤的问题了
由于网桥工作于数据链路层,在iptables没有开启 bridge-nf时,数据会直接经过网桥转发,结果就是对FORWARD的设置失效;
centos默认不开启 bridge-nf
启动bridge-nf方式:编辑文件vim /etc/sysctl.conf 添加:
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
net.bridge.bridge-nf-call-arptables = 1
二. docker网络基础
- 网络的命名空间:Linux在网络栈中引入网络命名空间,将独立的网络协议栈隔离到不同的命令空间中,彼此间无法通信;docker利用这一特性,实现不容器间的网络隔离。
- Veth设备对:Veth设备对的引入是为了实现在不同网络命名空间的通信。
- Iptables/Netfilter:Netfilter负责在内核中执行各种规则(过滤、修改、丢弃等),运行在内核模式中;Iptables模式是在用户模式下运行的进程,负责协助维护内核中Netfilter的各种规则表;通过二者的配合来实现整个Linux网络协议栈中灵活的数据包处理机制。
- 网桥:网桥是二层网络设备,通过网桥可以将linux支持的不同的端口连接起来,并实现类似交换机那样的多对多的通信。
- 路由:当IP层在处理数据发送或转发的时候,会使用路由表来决定去向
别人特别好的图片,引用
三. kubernetes网络基础
kubernetes网络基础原则
- 每个Pod都拥有一个独立的IP地址,所有Pod都在一个可以直接连通的、扁平的网络空间中,集群内所有Pod可以使用Pod的IP来访问。
- 同一个Pod内所有的容器共享一个网络堆栈,该模型称为IP-per-Pod模型。
kubernetes对集群的网络要求
- 所有容器都可以不用NAT的方式同别的容器通信。
- 所有节点都可以在不同NAT的方式下同所有容器通信,反之亦然。
- 容器的地址和别人看到的地址是同一个地址。
0. 数据流
初始化:
NewMainKubelet -> NewDockerService
SyncPod -> createPodSandbox -> RunPodSandbox -> SetUpPod
1. NewMainKubelet 函数
路径: pkg/kubelet/kubelet.go
--hairpin-mode string Default: "promiscuous-bridge" How should the kubelet setup hairpin NAT. This allows endpoints of a Service to loadbalance back to themselves if they should try to access their own Service. Valid values are "promiscuous-bridge", "hairpin-veth" and "none".
pluginSettings := dockershim.NetworkPluginSettings{
HairpinMode: kubeletconfiginternal.HairpinMode(kubeCfg.HairpinMode),
NonMasqueradeCIDR: nonMasqueradeCIDR,
PluginName: crOptions.NetworkPluginName,
PluginConfDir: crOptions.CNIConfDir,
PluginBinDirString: crOptions.CNIBinDir,
MTU: int(crOptions.NetworkPluginMTU),
}
根据containerRuntime为docker,执行NewDockerService
switch containerRuntime {
case kubetypes.DockerContainerRuntime:
// Create and start the CRI shim running as a grpc server.
streamingConfig := getStreamingConfig(kubeCfg, kubeDeps, crOptions)
ds, err := dockershim.NewDockerService(kubeDeps.DockerClientConfig, crOptions.PodSandboxImage, streamingConfig,
&pluginSettings, runtimeCgroups, kubeCfg.CgroupDriver, crOptions.DockershimRootDirectory, !crOptions.RedirectContainerStreaming)
if err != nil {
return nil, err
}
if crOptions.RedirectContainerStreaming {
klet.criHandler = ds
}
// The unix socket for kubelet <-> dockershim communication.
glog.V(5).Infof("RemoteRuntimeEndpoint: %q, RemoteImageEndpoint: %q",
remoteRuntimeEndpoint,
remoteImageEndpoint)
glog.V(2).Infof("Starting the GRPC server for the docker CRI shim.")
server := dockerremote.NewDockerServer(remoteRuntimeEndpoint, ds)
if err := server.Start(); err != nil {
return nil, err
}
// Create dockerLegacyService when the logging driver is not supported.
supported, err := ds.IsCRISupportedLogDriver()
if err != nil {
return nil, err
}
if !supported {
klet.dockerLegacyService = ds
legacyLogProvider = ds
}
2. NewDockerService函数
路径: pkg/kubelet/dockershim/docker_service.go
初始化CNI网络插件,本文使用calico plugin,有前面文章讲解,调用network.InitNetworkPlugin进行初始化
InitNetworkPlugin函数调用Init进行初始化网络设置
// dockershim currently only supports CNI plugins.
pluginSettings.PluginBinDirs = cni.SplitDirs(pluginSettings.PluginBinDirString)
cniPlugins := cni.ProbeNetworkPlugins(pluginSettings.PluginConfDir, pluginSettings.PluginBinDirs)
cniPlugins = append(cniPlugins, kubenet.NewPlugin(pluginSettings.PluginBinDirs))
netHost := &dockerNetworkHost{
&namespaceGetter{ds},
&portMappingGetter{ds},
}
plug, err := network.InitNetworkPlugin(cniPlugins, pluginSettings.PluginName, netHost, pluginSettings.HairpinMode, pluginSettings.NonMasqueradeCIDR, pluginSettings.MTU)
if err != nil {
return nil, fmt.Errorf("didn't find compatible CNI plugin with given settings %+v: %v", pluginSettings, err)
}
ds.network = network.NewPluginManager(plug)
3. Init函数
路径 pkg/kubelet/dockershim/network/plugins.go
前面讲解bridge-nf-call-iptables,路径在/proc/sys/net/bridge/bridge-nf-call-iptables
func (plugin *NoopNetworkPlugin) Init(host Host, hairpinMode kubeletconfig.HairpinMode, nonMasqueradeCIDR string, mtu int) error {
// Set bridge-nf-call-iptables=1 to maintain compatibility with older
// kubernetes versions to ensure the iptables-based kube proxy functions
// correctly. Other plugins are responsible for setting this correctly
// depending on whether or not they connect containers to Linux bridges
// or use some other mechanism (ie, SDN vswitch).
// Ensure the netfilter module is loaded on kernel >= 3.18; previously
// it was built-in.
utilexec.New().Command("modprobe", "br-netfilter").CombinedOutput()
if err := plugin.Sysctl.SetSysctl(sysctlBridgeCallIPTables, 1); err != nil {
glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIPTables, err)
}
if val, err := plugin.Sysctl.GetSysctl(sysctlBridgeCallIP6Tables); err == nil {
if val != 1 {
if err = plugin.Sysctl.SetSysctl(sysctlBridgeCallIP6Tables, 1); err != nil {
glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIP6Tables, err)
}
}
}
return nil
}
4. cniNetworkPlugin结构体
路径 pkg/kubelet/dockershim/.network/cni/cni.go
如果启动不为空的话,比如Kubelet启动参数--network-plugin=cni,则执行这个目录下init操作
const (
CNIPluginName = "cni"
)
type cniNetworkPlugin struct {
network.NoopNetworkPlugin
loNetwork *cniNetwork
sync.RWMutex
defaultNetwork *cniNetwork
host network.Host
execer utilexec.Interface
nsenterPath string
confDir string
binDirs []string
podCidr string
}
5. Ini函数
func (plugin *cniNetworkPlugin) Init(host network.Host, hairpinMode kubeletconfig.HairpinMode, nonMasqueradeCIDR string, mtu int) error {
err := plugin.platformInit()
if err != nil {
return err
}
plugin.host = host
plugin.syncNetworkConfig()
return nil
}
5.1 platformInit函数
主要使用nsenter
func (plugin *cniNetworkPlugin) platformInit() error {
var err error
plugin.nsenterPath, err = plugin.execer.LookPath("nsenter")
if err != nil {
return err
}
return nil
}
5.2 syncNetworkConfig函数
读取配置文件比如kubelet参数路径--cni-conf-dir=/etc/cni/net.d
func (plugin *cniNetworkPlugin) syncNetworkConfig() {
network, err := getDefaultCNINetwork(plugin.confDir, plugin.binDirs)
if err != nil {
glog.Warningf("Unable to update cni config: %s", err)
return
}
plugin.setDefaultNetwork(network)
}
6. SetUpPod
根据数据流 SyncPod -> createPodSandbox -> RunPodSandbox -> SetUpPod
主要函数为addToNetwork
func (plugin *cniNetworkPlugin) SetUpPod(namespace string, name string, id kubecontainer.ContainerID, annotations map[string]string) error {
if err := plugin.checkInitialized(); err != nil {
return err
}
netnsPath, err := plugin.host.GetNetNS(id.ID)
if err != nil {
return fmt.Errorf("CNI failed to retrieve network namespace path: %v", err)
}
// Windows doesn't have loNetwork. It comes only with Linux
if plugin.loNetwork != nil {
if _, err = plugin.addToNetwork(plugin.loNetwork, name, namespace, id, netnsPath, annotations); err != nil {
glog.Errorf("Error while adding to cni lo network: %s", err)
return err
}
}
_, err = plugin.addToNetwork(plugin.getDefaultNetwork(), name, namespace, id, netnsPath, annotations)
if err != nil {
glog.Errorf("Error while adding to cni network: %s", err)
return err
}
return err
}
7. addToNetwork函数
发现最终调用AddNetworkList函数这个就是根据具体的网络插件进行调用
func (plugin *cniNetworkPlugin) addToNetwork(network *cniNetwork, podName string, podNamespace string, podSandboxID kubecontainer.ContainerID, podNetnsPath string, annotations map[string]string) (cnitypes.Result, error) {
rt, err := plugin.buildCNIRuntimeConf(podName, podNamespace, podSandboxID, podNetnsPath, annotations)
if err != nil {
glog.Errorf("Error adding network when building cni runtime conf: %v", err)
return nil, err
}
netConf, cniNet := network.NetworkConfig, network.CNIConfig
glog.V(4).Infof("About to add CNI network %v (type=%v)", netConf.Name, netConf.Plugins[0].Network.Type)
res, err := cniNet.AddNetworkList(netConf, rt)
if err != nil {
glog.Errorf("Error adding network: %v", err)
return nil, err
}
return res, nil
}
8. CNI接口
各种插件主要就是实现了两个方法
type CNI interface {
AddNetworkList(net *NetworkConfigList, rt *RuntimeConf) (types.Result, error)
DelNetworkList(net *NetworkConfigList, rt *RuntimeConf) error
AddNetwork(net *NetworkConfig, rt *RuntimeConf) (types.Result, error)
DelNetwork(net *NetworkConfig, rt *RuntimeConf) error
}
9. 例如使用calico插件
{
"name": "calico-k8s-network",
"cniVersion": "0.1.0",
"type": "calico",
"etcd_endpoints": "XXXXXX",
"etcd_key_file": "/etc/calico/ssl/calico-key.pem",
"etcd_cert_file": "/etc/calico/ssl/calico.pem",
"etcd_ca_cert_file": "/etc/calico/ssl/ca.pem",
"log_level": "info",
"mtu": 1500,
"ipam": {
"type": "calico-ipam"
},
"policy": {
"type": "k8s"
},
"kubernetes": {
"kubeconfig": "/root/.kube/config"
}
}
更多推荐
所有评论(0)