基于k8s release-1.9

源码目录结构

cmd/kube-controller-manager/app/core.go    // Endpoint Controller的启动代码

/pkg/controller/endpoint
.
├── BUILD
├── OWNERS
├── doc.go
├── endpoints_controller.go    // endpoint的核心代码,包括其创建和运行的代码
├── endpoints_controller_test.go

endpoint的初始化如下代码所示:

// NewEndpointController returns a new *EndpointController.
func NewEndpointController(podInformer coreinformers.PodInformer, serviceInformer coreinformers.ServiceInformer,
    endpointsInformer coreinformers.EndpointsInformer, client clientset.Interface) *EndpointController {
    if client != nil && client.CoreV1().RESTClient().GetRateLimiter() != nil {
        metrics.RegisterMetricAndTrackRateLimiterUsage("endpoint_controller", client.CoreV1().RESTClient().GetRateLimiter())
    }
    e := &EndpointController{
        client:           client,
        queue:            workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "endpoint"),
        workerLoopPeriod: time.Second,
    }

    serviceInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
        AddFunc: e.enqueueService,
        UpdateFunc: func(old, cur interface{}) {
            e.enqueueService(cur)
        },
        DeleteFunc: e.enqueueService,
    })
    e.serviceLister = serviceInformer.Lister()
    e.servicesSynced = serviceInformer.Informer().HasSynced

    podInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
        AddFunc:    e.addPod,
        UpdateFunc: e.updatePod,
        DeleteFunc: e.deletePod,
    })
    e.podLister = podInformer.Lister()
    e.podsSynced = podInformer.Informer().HasSynced

    e.endpointsLister = endpointsInformer.Lister()
    e.endpointsSynced = endpointsInformer.Informer().HasSynced

    return e
}

endpoint controller注册了三个informer,分别是podInformer,serviceInformer,endpointsInformer。
其中对service的watch操作分别如下:
1、add:以添加的service的namespace/name形式为key,并将该key加入 queue;
2、update:以更新后的service的namespace/name形式为key,并将该key加入 queue;
3、delete:以删除的service的namespace/name形式为key,并将该key加入 queue;
对pod的watch操作分别如下:
1、add:对增加的pod,经过getPodServiceMemberships函数找到pod与service的映射关系,查找逻辑为逐个对比service的selector与该pod的label,如果service的selector为该pod label的子集,则表示该pod属于service,最后将service集合以namespace/name为key逐个加入到queue中;
2、update:主要对新旧pod以下3种情况做判断:
(a) 比较两者的ResourceVersion;
对比更新后的pod与原pod,如果两者的资源版本相等,则直接返回,不进行入队操作;
(b) 判断pod相关信息是否发生改变;
进行podChanged函数,其检测逻辑为,如果新旧两个pod的DeletionTimestamp字段不等则返回true,否则继续判断两者的就绪状态,如果不等则返回true,最后再判断新旧pod的ip、nodename、namespace、UID是否相等,如果相等则返回false,否则返回true。将返回结果赋值给podChangedFlag。
(c) 判断两者的label是否已经不一致,或者hostname或subdomain已改变,如果是则将labelsChanged设置为true;
如果(b)与(c)的两个flag都为false,则表示该pod的更新操作不需要引起service的入队操作。否则new pod经过getPodServiceMemberships参数获取service列表,再根据labelsChanged是否为true经过getPodServiceMemberships函数获取old pod的service列表。根据podChangedFlag标志,如果podChangedFlag为true则service列表为new service列表和oldservice列表的并集,如果podChangedFlag为false,则service列表为(new pod的service列表与old pod的service列表的差集)与(old pod的service列表与new pod的service列表的差集)的并集,否则返回new pod的service列表。最后将所得的service集合以namespace/name为key逐个加入到queue中;
3、delete:执行增加pod类似的操作;
相关的函数如下:
getPodServiceMemberships函数:

func (e *EndpointController) getPodServiceMemberships(pod *v1.Pod) (sets.String, error) {
    set := sets.String{}
    services, err := e.serviceLister.GetPodServices(pod)
    if err != nil {
        // don't log this error because this function makes pointless
        // errors when no services match.
        return set, nil
    }
    for i := range services {
        key, err := keyFunc(services[i])
        if err != nil {
            return nil, err
        }
        set.Insert(key)
    }
    return set, nil
}

podChanged函数:

func podChanged(oldPod, newPod *v1.Pod) bool {
    // If the pod's deletion timestamp is set, remove endpoint from ready address.
    if newPod.DeletionTimestamp != oldPod.DeletionTimestamp {
        return true
    }
    // If the pod's readiness has changed, the associated endpoint address
    // will move from the unready endpoints set to the ready endpoints.
    // So for the purposes of an endpoint, a readiness change on a pod
    // means we have a changed pod.
    if podutil.IsPodReady(oldPod) != podutil.IsPodReady(newPod) {
        return true
    }
    // Convert the pod to an EndpointAddress, clear inert fields,
    // and see if they are the same.
    newEndpointAddress := podToEndpointAddress(newPod)
    oldEndpointAddress := podToEndpointAddress(oldPod)
    // Ignore the ResourceVersion because it changes
    // with every pod update. This allows the comparison to
    // show equality if all other relevant fields match.
    newEndpointAddress.TargetRef.ResourceVersion = ""
    oldEndpointAddress.TargetRef.ResourceVersion = ""
    if reflect.DeepEqual(newEndpointAddress, oldEndpointAddress) {
        // The pod has not changed in any way that impacts the endpoints
        return false
    }
    return true
}

// labelsChanged标志设置相关函数
// Check if the pod labels have changed, indicating a possibe
    // change in the service membership
    labelsChanged := false
    if !reflect.DeepEqual(newPod.Labels, oldPod.Labels) ||
        !hostNameAndDomainAreEqual(newPod, oldPod) {
        labelsChanged = true
    }
...
// 根据labelsChanged与podChangedFlag获取最终的service集合,并将service集合加入队列
if labelsChanged {
        oldServices, err := e.getPodServiceMemberships(oldPod)
        if err != nil {
            utilruntime.HandleError(fmt.Errorf("Unable to get pod %v/%v's service memberships: %v", oldPod.Namespace, oldPod.Name, err))
            return
        }
        services = determineNeededServiceUpdates(oldServices, services, podChangedFlag)
    }
    for key := range services {
        e.queue.Add(key)
    }

前面花了不少篇幅介绍了service与pod的watch操作,虽然还没真正进入endpoint controller的业务逻辑,但是这个watch操作对应的函数是剔除了那些不相关的service,只将相关的service入队,进行真正的业务逻辑。
业务逻辑函数如下:

// Run will not return until stopCh is closed. workers determines how many
// endpoints will be handled in parallel.
func (e *EndpointController) Run(workers int, stopCh <-chan struct{}) {
    defer utilruntime.HandleCrash()
    defer e.queue.ShutDown()

    glog.Infof("Starting endpoint controller")
    defer glog.Infof("Shutting down endpoint controller")
        // 等待pod、service、endpoint列表同步
    if !controller.WaitForCacheSync("endpoint", stopCh, e.podsSynced, e.servicesSynced, e.endpointsSynced) {
        return
    }
    // 这里workers数为kube-controller-manager启动参数中的--concurrent-endpoint-syncs决定,默认为5,workerLoopPeriod为1秒
    for i := 0; i < workers; i++ {
        // 执行worker函数,for死循环处理queue中的key
        go wait.Until(e.worker, e.workerLoopPeriod, stopCh)
    }

    go func() {
        defer utilruntime.HandleCrash()
        e.checkLeftoverEndpoints()
    }()

    <-stopCh
}

// worker runs a worker thread that just dequeues items, processes them, and
// marks them done. You may run as many of these in parallel as you wish; the
// workqueue guarantees that they will not end up processing the same service
// at the same time.
func (e *EndpointController) worker() {
    for e.processNextWorkItem() {
    }
}

func (e *EndpointController) processNextWorkItem() bool {
    eKey, quit := e.queue.Get()
    if quit {
        return false
    }
    defer e.queue.Done(eKey)

    err := e.syncService(eKey.(string))
    e.handleErr(err, eKey)

    return true
}

endpointController的主要逻辑在syncService函数,代码如下所示

func (e *EndpointController) syncService(key string) error {
    startTime := time.Now()
    defer func() {
        glog.V(4).Infof("Finished syncing service %q endpoints. (%v)", key, time.Now().Sub(startTime))
    }()
    // 根据key获取service的namespace和name
    namespace, name, err := cache.SplitMetaNamespaceKey(key)
    if err != nil {...}
    service, err := e.serviceLister.Services(namespace).Get(name)
    if err != nil {
        // 如果service已经被删除,则也要删除对用的endpoint资源
        err = e.client.CoreV1().Endpoints(namespace).Delete(name, nil)
        if err != nil && !errors.IsNotFound(err) {
            return err
        }
        return nil
    }
    // 如果service的.spec.selector字段为空,直接返回,endpointController不处理这种情况
    if service.Spec.Selector == nil {...}

    glog.V(5).Infof("About to update endpoints for service %q", key)
    pods, err := e.podLister.Pods(service.Namespace).List(labels.Set(service.Spec.Selector).AsSelectorPreValidated())
    if err != nil {...}

    var tolerateUnreadyEndpoints bool
    //如果service的注解含有key为service.alpha.kubernetes.io/tolerate-unready-endpoints的值,该值为bool类型,默认tolerateUnreadyEndpoints值为false
    if v, ok := service.Annotations[TolerateUnreadyEndpointsAnnotation]; ok {...}

    subsets := []v1.EndpointSubset{}
    var totalReadyEps int = 0
    var totalNotReadyEps int = 0

    //循环处理pod列表
    for _, pod := range pods {
        // pod的podIp为空,则continue for循环
        if len(pod.Status.PodIP) == 0 {...}
        // 如果该pod正在被删除,则continue for循环
        if !tolerateUnreadyEndpoints && pod.DeletionTimestamp != nil {...}
        // 获取该pod的信息,输出EndpointAddress结构体变量
        //EndpointAddress{
        //    IP string            pod的ip
        //    Hostname string      
        //    NodeName *string     pod的nodeName
        //    TargetRef *ObjectReference}  包括pod的namesapce、name、UID、资源版本等信息
        epa := *podToEndpointAddress(pod)

        hostname := pod.Spec.Hostname
        // 如果pod存在hostname,则最后的FQDN为hostname.subdomain.namespace.svc.cluster.local
        if len(hostname) > 0 && pod.Spec.Subdomain == service.Name && service.Namespace == pod.Namespace {
            epa.Hostname = hostname
        }

        // 允许headless service没有端口
        if len(service.Spec.Ports) == 0 {
            if service.Spec.ClusterIP == api.ClusterIPNone {
                epp := v1.EndpointPort{Port: 0, Protocol: v1.ProtocolTCP}
                // 1、如果tolerateUnreadyEndpoints为true,允许未就绪的pod也列入Addresses列表,如果tolerateUnreadyEndpoints为false但pod状态为ready则将pod列入Addresses列表;
                // 2、检测pod的重启策略,如果重启策略为Never,pod的运行状态不为Failed且不是Succeeded,将该pod列入NotReadyAddresses,如果重启策略为OnFailure并且pod的运行状态不为Succeeded,将该pod列入NotReadyAddresses,其它情况也将该pod列入NotReadyAddresses;
                subsets, totalReadyEps, totalNotReadyEps = addEndpointSubset(subsets, pod, epa, epp, tolerateUnreadyEndpoints)
            }
        } else {
            // 循环service的ports端口
            for i := range service.Spec.Ports {
                servicePort := &service.Spec.Ports[i]

                portName := servicePort.Name
                portProto := servicePort.Protocol
                portNum, err := podutil.FindPort(pod, servicePort)
                // 如果service中的port在pod中不存在,则继续for循环
                if err != nil {...}

                var readyEps, notReadyEps int
                epp := v1.EndpointPort{Name: portName, Port: int32(portNum), Protocol: portProto}
                // 1、如果tolerateUnreadyEndpoints为true,允许未就绪的pod也列入Addresses列表,如果tolerateUnreadyEndpoints为false但pod状态为ready则将pod列入Addresses列表;
                // 2、检测pod的重启策略,如果重启策略为Never,pod的运行状态不为Failed且不是Succeeded,将该pod列入NotReadyAddresses,如果重启策略为OnFailure并且pod的运行状态不为Succeeded,将该pod列入NotReadyAddresses,其它情况也将该pod列入NotReadyAddresses;
                subsets, readyEps, notReadyEps = addEndpointSubset(subsets, pod, epa, epp, tolerateUnreadyEndpoints)
                totalReadyEps = totalReadyEps + readyEps
                totalNotReadyEps = totalNotReadyEps + notReadyEps
            }
        }
    }
    // 重新整理subsets
    subsets = endpoints.RepackSubsets(subsets)

    // 如果endpoint不存在(通常该情况是新建一个service的情况),则新建一个,如果是其他未知错误,则返回err
    currentEndpoints, err := e.endpointsLister.Endpoints(service.Namespace).Get(service.Name)
    if err != nil {...}

    // currentEndpoints的资源版本为空时,表示要创建endpoint
    createEndpoints := len(currentEndpoints.ResourceVersion) == 0

    // 如果当前currentEndpoints的subset列表和重新整理后的subsets相等,并且label与service的label一致,则忽略本次更新操作
    if !createEndpoints &&
        apiequality.Semantic.DeepEqual(currentEndpoints.Subsets, subsets) &&
        apiequality.Semantic.DeepEqual(currentEndpoints.Labels, service.Labels) {
        glog.V(5).Infof("endpoints are equal for %s/%s, skipping update", service.Namespace, service.Name)
        return nil
    }
    newEndpoints := currentEndpoints.DeepCopy()
    newEndpoints.Subsets = subsets
    newEndpoints.Labels = service.Labels
    if newEndpoints.Annotations == nil {
        newEndpoints.Annotations = make(map[string]string)
    }

    glog.V(4).Infof("Update endpoints for %v/%v, ready: %d not ready: %d", service.Namespace, service.Name, totalReadyEps, totalNotReadyEps)
    if createEndpoints {
        // 如果没有与service同命名空间和同名的endpoint,则生成新的endpoint
        _, err = e.client.CoreV1().Endpoints(service.Namespace).Create(newEndpoints)
    } else {
        // 已经存在与service同命名空间和同名的endpoint,需要更新endpoint
        _, err = e.client.CoreV1().Endpoints(service.Namespace).Update(newEndpoints)
    }
    if err != nil {
        if createEndpoints && errors.IsForbidden(err) {
            // A request is forbidden primarily for two reasons:
            // 1. namespace is terminating, endpoint creation is not allowed by default.
            // 2. policy is misconfigured, in which case no service would function anywhere.
            // Given the frequency of 1, we log at a lower level.
            glog.V(5).Infof("Forbidden from creating endpoints: %v", err)
        }
        return err
    }
    return nil
}
Logo

K8S/Kubernetes社区为您提供最前沿的新闻资讯和知识内容

更多推荐