Swarmkit笔记(7)——exec.Executor interface

exec.Executor interface定义(位于agent/exec/executor.go):

// Executor provides controllers for tasks.
type Executor interface {
    // Describe returns the underlying node description.
    Describe(ctx context.Context) (*api.NodeDescription, error)

    // Configure uses the node object state to propagate node
    // state to the underlying executor.
    Configure(ctx context.Context, node *api.Node) error

    // Controller provides a controller for the given task.
    Controller(t *api.Task) (Controller, error)

    // SetNetworkBootstrapKeys passes the symmetric keys from the
    // manager to the executor.
    SetNetworkBootstrapKeys([]*api.EncryptionKey) error
}

container Package实现了executor结构体(位于agent/exec/container/executor.go

import engineapi "github.com/docker/engine-api/client"
type executor struct {
    client engineapi.APIClient
}

里面只有一个成员:一个Docker APIClientexecutor结构体实际只实现了下面两个方法:

// Describe returns the underlying node description from the docker client.
func (e *executor) Describe(ctx context.Context) (*api.NodeDescription, error) {
    info, err := e.client.Info(ctx)
    if err != nil {
        return nil, err
    }

    plugins := map[api.PluginDescription]struct{}{}
    addPlugins := func(typ string, names []string) {
        for _, name := range names {
            plugins[api.PluginDescription{
                Type: typ,
                Name: name,
            }] = struct{}{}
        }
    }

    addPlugins("Volume", info.Plugins.Volume)
    // Add builtin driver "overlay" (the only builtin multi-host driver) to
    // the plugin list by default.
    addPlugins("Network", append([]string{"overlay"}, info.Plugins.Network...))
    addPlugins("Authorization", info.Plugins.Authorization)

    pluginFields := make([]api.PluginDescription, 0, len(plugins))
    for k := range plugins {
        pluginFields = append(pluginFields, k)
    }
    sort.Sort(sortedPlugins(pluginFields))

    // parse []string labels into a map[string]string
    labels := map[string]string{}
    for _, l := range info.Labels {
        stringSlice := strings.SplitN(l, "=", 2)
        // this will take the last value in the list for a given key
        // ideally, one shouldn't assign multiple values to the same key
        if len(stringSlice) > 1 {
            labels[stringSlice[0]] = stringSlice[1]
        }
    }

    description := &api.NodeDescription{
        Hostname: info.Name,
        Platform: &api.Platform{
            Architecture: info.Architecture,
            OS:           info.OSType,
        },
        Engine: &api.EngineDescription{
            EngineVersion: info.ServerVersion,
            Labels:        labels,
            Plugins:       pluginFields,
        },
        Resources: &api.Resources{
            NanoCPUs:    int64(info.NCPU) * 1e9,
            MemoryBytes: info.MemTotal,
        },
    }

    return description, nil
}

// Controller returns a docker container controller.
func (e *executor) Controller(t *api.Task) (exec.Controller, error) {
    ctlr, err := newController(e.client, t)
    if err != nil {
        return nil, err
    }

    return ctlr, nil
}

Describe()方法返回当前Docker engine的资源配置信息,而Controller()则返回一个container.controller结构体。

其中关于client的初始化位于:

        client, err := engineapi.NewClient(engineAddr, "", nil, nil)
        if err != nil {
            return err
        }

        executor := container.NewExecutor(client)

如果没有对engineAddr做特殊设置,就会使用其默认值:unix:///var/run/docker.sockclient值默认如下:

(dlv) p client
*github.com/docker/swarmkit/vendor/github.com/docker/engine-api/client.Client {
        proto: "unix",
        addr: "/var/run/docker.sock",
        basePath: "",
        transport: (unreadable interface type "*transport.apiTransport" not found for 0xc8202ecbd8: no type entry found, use 'types' for a list of valid types),
        version: "",
        customHTTPHeaders: map[string]string [],}

Swarmkit笔记(6)——Agent运行

Node.runAgent()函数实现如下:

func (n *Node) runAgent(ctx context.Context, db *bolt.DB, creds credentials.TransportAuthenticator, ready chan<- struct{}) error {
    var manager api.Peer
    select {
    case <-ctx.Done():
    case manager = <-n.remotes.WaitSelect(ctx):
    }
    if ctx.Err() != nil {
        return ctx.Err()
    }
    picker := picker.NewPicker(n.remotes, manager.Addr)
    conn, err := grpc.Dial(manager.Addr,
        grpc.WithPicker(picker),
        grpc.WithTransportCredentials(creds),
        grpc.WithBackoffMaxDelay(maxSessionFailureBackoff))
    if err != nil {
        return err
    }

    agent, err := New(&Config{
        Hostname:         n.config.Hostname,
        Managers:         n.remotes,
        Executor:         n.config.Executor,
        DB:               db,
        Conn:             conn,
        Picker:           picker,
        NotifyRoleChange: n.roleChangeReq,
    })
    if err != nil {
        return err
    }
    if err := agent.Start(ctx); err != nil {
        return err
    }

    n.Lock()
    n.agent = agent
    n.Unlock()

    defer func() {
        n.Lock()
        n.agent = nil
        n.Unlock()
    }()

    go func() {
        <-agent.Ready()
        close(ready)
    }()

    // todo: manually call stop on context cancellation?

    return agent.Err(context.Background())
}

上面函数解释如下:

(1)case manager = <-n.remotes.WaitSelect(ctx):首先获得manager
(2)接下来调用grpc.Dial()去连接这个manager

    picker := picker.NewPicker(n.remotes, manager.Addr)
    conn, err := grpc.Dial(manager.Addr,
        grpc.WithPicker(picker),
        grpc.WithTransportCredentials(creds),
        grpc.WithBackoffMaxDelay(maxSessionFailureBackoff))
    if err != nil {
        return err
    }

(3)生成并运行一个Agent

    agent, err := New(&Config{
        Hostname:         n.config.Hostname,
        Managers:         n.remotes,
        Executor:         n.config.Executor,
        DB:               db,
        Conn:             conn,
        Picker:           picker,
        NotifyRoleChange: n.roleChangeReq,
    })
    if err != nil {
        return err
    }
    if err := agent.Start(ctx); err != nil {
        return err
    }

关于Agent结构体定义:

// Agent implements the primary node functionality for a member of a swarm
// cluster. The primary functionality is to run and report on the status of
// tasks assigned to the node.
type Agent struct {
    config *Config

    // The latest node object state from manager
    // for this node known to the agent.
    node *api.Node

    keys []*api.EncryptionKey

    sessionq chan sessionOperation
    worker   Worker

    started chan struct{}
    ready   chan struct{}
    stopped chan struct{} // requests shutdown
    closed  chan struct{} // only closed in run
    err     error         // read only after closed is closed
}

其中Config结构体定义:

// Config provides values for an Agent.
type Config struct {
    // Hostname the name of host for agent instance.
    Hostname string

    // Managers provides the manager backend used by the agent. It will be
    // updated with managers weights as observed by the agent.
    Managers picker.Remotes

    // Conn specifies the client connection Agent will use.
    Conn *grpc.ClientConn

    // Picker is the picker used by Conn.
    // TODO(aaronl): This is only part of the config to allow resetting the
    // GRPC connection. This should be refactored to address the coupling
    // between Conn and Picker.
    Picker *picker.Picker

    // Executor specifies the executor to use for the agent.
    Executor exec.Executor

    // DB used for task storage. Must be open for the lifetime of the agent.
    DB *bolt.DB

    // NotifyRoleChange channel receives new roles from session messages.
    NotifyRoleChange chan<- api.NodeRole
}

注释都很清楚,不必赘述。

Agent.Start()会调到Agent.Run(),实现如下:

func (a *Agent) run(ctx context.Context) {
    ctx, cancel := context.WithCancel(ctx)
    defer cancel()
    defer close(a.closed) // full shutdown.

    ctx = log.WithLogger(ctx, log.G(ctx).WithField("module", "agent"))

    log.G(ctx).Debugf("(*Agent).run")
    defer log.G(ctx).Debugf("(*Agent).run exited")

    var (
        backoff    time.Duration
        session    = newSession(ctx, a, backoff) // start the initial session
        registered = session.registered
        ready      = a.ready // first session ready
        sessionq   chan sessionOperation
    )

    if err := a.worker.Init(ctx); err != nil {
        log.G(ctx).WithError(err).Error("worker initialization failed")
        a.err = err
        return // fatal?
    }

    // setup a reliable reporter to call back to us.
    reporter := newStatusReporter(ctx, a)
    defer reporter.Close()

    a.worker.Listen(ctx, reporter)

    for {
        select {
        case operation := <-sessionq:
            operation.response <- operation.fn(session)
        case msg := <-session.tasks:
            if err := a.worker.Assign(ctx, msg.Tasks); err != nil {
                log.G(ctx).WithError(err).Error("task assignment failed")
            }
        case msg := <-session.messages:
            if err := a.handleSessionMessage(ctx, msg); err != nil {
                log.G(ctx).WithError(err).Error("session message handler failed")
            }
        case <-registered:
            log.G(ctx).Debugln("agent: registered")
            if ready != nil {
                close(ready)
            }
            ready = nil
            registered = nil // we only care about this once per session
            backoff = 0      // reset backoff
            sessionq = a.sessionq
        case err := <-session.errs:
            // TODO(stevvooe): This may actually block if a session is closed
            // but no error was sent. Session.close must only be called here
            // for this to work.
            if err != nil {
                log.G(ctx).WithError(err).Error("agent: session failed")
                backoff = initialSessionFailureBackoff + 2*backoff
                if backoff > maxSessionFailureBackoff {
                    backoff = maxSessionFailureBackoff
                }
            }

            if err := session.close(); err != nil {
                log.G(ctx).WithError(err).Error("agent: closing session failed")
            }
            sessionq = nil
            // if we're here before <-registered, do nothing for that event
            registered = nil

            // Bounce the connection.
            if a.config.Picker != nil {
                a.config.Picker.Reset()
            }
        case <-session.closed:
            log.G(ctx).Debugf("agent: rebuild session")

            // select a session registration delay from backoff range.
            delay := time.Duration(rand.Int63n(int64(backoff)))
            session = newSession(ctx, a, delay)
            registered = session.registered
            sessionq = a.sessionq
        case <-a.stopped:
            // TODO(stevvooe): Wait on shutdown and cleanup. May need to pump
            // this loop a few times.
            return
        case <-ctx.Done():
            if a.err == nil {
                a.err = ctx.Err()
            }

            return
        }
    }
}

其中重要的是session这个概念,通过“session = newSession(ctx, a, backoff)”这行代码将sessionAgent关联起来。

FreeBSD kernel 笔记(4)——UIO

UIO相关的结构体和函数定义:

 #include <sys/types.h>
 #include <sys/uio.h>

 struct uio {
     struct  iovec *uio_iov;         /* scatter/gather list */
     int     uio_iovcnt;         /* length of scatter/gather list */
     off_t   uio_offset;         /* offset in target object */
     ssize_t uio_resid;          /* remaining bytes to copy */
     enum    uio_seg uio_segflg;     /* address space */
     enum    uio_rw uio_rw;      /* operation */
     struct  thread *uio_td;         /* owner */
 };

 int
 uiomove(void *buf, int howmuch, struct uio *uiop);

 int
 uiomove_nofault(void *buf, int howmuch, struct uio *uiop);

关于uio结构体需要注意的是:如果uio_iovcnt不为1,可以把uio_iov所指向的struct iovec看成一个连接起来的大bufferuio_offset指向这个bufferoffest,而uio_resid表明还有多少字节需要copy。在执行read操作时,uio_offset表明已经填充的buffer大小,而uio_resid表明buffer剩余的空间。可以参考这个程序

uiomoveuiomove_nofault本质上调用的都是uiomove_faultflag函数:

static int
uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault)
{
    struct thread *td;
    struct iovec *iov;
    size_t cnt;
    int error, newflags, save;

    td = curthread;
    error = 0;

    KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
    ("uiomove: mode"));
    KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td,
    ("uiomove proc"));
    if (!nofault)
        WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
        "Calling uiomove()");

    /* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */
    newflags = TDP_DEADLKTREAT;
    if (uio->uio_segflg == UIO_USERSPACE && nofault) {
        /*
         * Fail if a non-spurious page fault occurs.
         */
        newflags |= TDP_NOFAULTING | TDP_RESETSPUR;
    }
    save = curthread_pflags_set(newflags);

    while (n > 0 && uio->uio_resid) {
        iov = uio->uio_iov;
        cnt = iov->iov_len;
        if (cnt == 0) {
            uio->uio_iov++;
            uio->uio_iovcnt--;
            continue;
        }
        if (cnt > n)
            cnt = n;

        switch (uio->uio_segflg) {

        case UIO_USERSPACE:
            maybe_yield();
            if (uio->uio_rw == UIO_READ)
                error = copyout(cp, iov->iov_base, cnt);
            else
                error = copyin(iov->iov_base, cp, cnt);
            if (error)
                goto out;
            break;

        case UIO_SYSSPACE:
            if (uio->uio_rw == UIO_READ)
                bcopy(cp, iov->iov_base, cnt);
            else
                bcopy(iov->iov_base, cp, cnt);
            break;
        case UIO_NOCOPY:
            break;
        }
        iov->iov_base = (char *)iov->iov_base + cnt;
        iov->iov_len -= cnt;
        uio->uio_resid -= cnt;
        uio->uio_offset += cnt;
        cp = (char *)cp + cnt;
        n -= cnt;
    }
out:
    curthread_pflags_restore(save);
    return (error);
}

可以看到这个函数会对传入的uio结构体的内容进行修改。

关于uiomove_nofault()函数,参考如下定义:

The function uiomovenofault() requires that the buffer and I/O vectors be accessible without incurring a page fault. The source and destination addresses must be physically mapped for read and write access, respec- tively, and neither the source nor destination addresses may be pageable. Thus, the function uiomovenofault() can be called from contexts where acquiring virtual memory system locks or sleeping are prohibited.

参考资料:
UIO