FreeBSD kernel 笔记(5)——分配内存

FreeBSD kernel编程分配内存可以参考这两篇文档:MALLOC(9)CONTIGMALLOC(9)。需要注意以下几点:

(1)在中断上下文中使用malloc系列分配内存函数时,要使用M_NOWAIT标记;

(2)contigmalloc有一个boundary参数:

If the given value “boundary” is non-zero, then the set of physical pages cannot cross any physical address boundary that is a multiple of that value.

举个例子,如果boundary设置为1M,则实际分配的物理内存页面可以位于0~1M1M~2M,而不能位于1.9M~2.1M

FreeBSD kernel 笔记(4)——UIO

UIO相关的结构体和函数定义:

 #include <sys/types.h>
 #include <sys/uio.h>

 struct uio {
     struct  iovec *uio_iov;         /* scatter/gather list */
     int     uio_iovcnt;         /* length of scatter/gather list */
     off_t   uio_offset;         /* offset in target object */
     ssize_t uio_resid;          /* remaining bytes to copy */
     enum    uio_seg uio_segflg;     /* address space */
     enum    uio_rw uio_rw;      /* operation */
     struct  thread *uio_td;         /* owner */
 };

 int
 uiomove(void *buf, int howmuch, struct uio *uiop);

 int
 uiomove_nofault(void *buf, int howmuch, struct uio *uiop);

关于uio结构体需要注意的是:如果uio_iovcnt不为1,可以把uio_iov所指向的struct iovec看成一个连接起来的大bufferuio_offset指向这个bufferoffest,而uio_resid表明还有多少字节需要copy。在执行read操作时,uio_offset表明已经填充的buffer大小,而uio_resid表明buffer剩余的空间。可以参考这个程序

uiomoveuiomove_nofault本质上调用的都是uiomove_faultflag函数:

static int
uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault)
{
    struct thread *td;
    struct iovec *iov;
    size_t cnt;
    int error, newflags, save;

    td = curthread;
    error = 0;

    KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
    ("uiomove: mode"));
    KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td,
    ("uiomove proc"));
    if (!nofault)
        WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
        "Calling uiomove()");

    /* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */
    newflags = TDP_DEADLKTREAT;
    if (uio->uio_segflg == UIO_USERSPACE && nofault) {
        /*
         * Fail if a non-spurious page fault occurs.
         */
        newflags |= TDP_NOFAULTING | TDP_RESETSPUR;
    }
    save = curthread_pflags_set(newflags);

    while (n > 0 && uio->uio_resid) {
        iov = uio->uio_iov;
        cnt = iov->iov_len;
        if (cnt == 0) {
            uio->uio_iov++;
            uio->uio_iovcnt--;
            continue;
        }
        if (cnt > n)
            cnt = n;

        switch (uio->uio_segflg) {

        case UIO_USERSPACE:
            maybe_yield();
            if (uio->uio_rw == UIO_READ)
                error = copyout(cp, iov->iov_base, cnt);
            else
                error = copyin(iov->iov_base, cp, cnt);
            if (error)
                goto out;
            break;

        case UIO_SYSSPACE:
            if (uio->uio_rw == UIO_READ)
                bcopy(cp, iov->iov_base, cnt);
            else
                bcopy(iov->iov_base, cp, cnt);
            break;
        case UIO_NOCOPY:
            break;
        }
        iov->iov_base = (char *)iov->iov_base + cnt;
        iov->iov_len -= cnt;
        uio->uio_resid -= cnt;
        uio->uio_offset += cnt;
        cp = (char *)cp + cnt;
        n -= cnt;
    }
out:
    curthread_pflags_restore(save);
    return (error);
}

可以看到这个函数会对传入的uio结构体的内容进行修改。

关于uiomove_nofault()函数,参考如下定义:

The function uiomovenofault() requires that the buffer and I/O vectors be accessible without incurring a page fault. The source and destination addresses must be physically mapped for read and write access, respec- tively, and neither the source nor destination addresses may be pageable. Thus, the function uiomovenofault() can be called from contexts where acquiring virtual memory system locks or sleeping are prohibited.

参考资料:
UIO

FreeBSD kernel 笔记(3)——设备名字

在使用下列函数

int makedevs(struct makedevargs *args, struct cdev **cdev, const char *fmt, …);

struct cdev * make_dev(struct cdevsw *cdevsw, int unit, uidt uid, gidt gid, int perms, const char *fmt, …);

为设备创建cdev结构体时,fmt用来指定设备的名字:

The name is the expansion of fmt and following arguments as printf(9) would print it. The name determines its path under /dev or other devfs(5) mount point and may contain slash `/’ char- acters to denote subdirectories.

也就是/dev下面节点的名字。

cdevsw结构体中的d_name指定的是driver的名字:

struct cdevsw {
    ......
    const char      *d_name;
    ......
}

一个driver可以用来操作多个设备。

参考资料:
MAKE_DEV

FreeBSD kernel 笔记(2)——“preparing a device”和“preparing a device for I/O”

preparing (or initializing) a device”通常发生在加载设备驱动模块时,举例如下:

static int hello_modevent(module_t mod __unused, int /* modeventtype_t */ event, void *arg __unused)
{
    ......
    switch (event) 
    {
        case MOD_LOAD:
        {
            make_dev_args_init(&args);
            args.mda_devsw = &hello_cdevsw;
            args.mda_uid = UID_ROOT;
            args.mda_gid = GID_WHEEL;
            args.mda_mode = 0600;
            uprintf("Hello is loaded:%d\n", make_dev_s(&args, &hello_dev, "hello"));
            break;
        }
        ......
    }
    return error;
}

preparing a device for I/O”则是发生在open这个设备时,比如cdevsw结构体的d_open函数:

struct cdevsw {
    ......
    d_open_t        *d_open;  
    ......
}

FreeBSD中的sysctl函数

FreeBSDsysctl家族的函数定义:

#include <sys/types.h>
 #include <sys/sysctl.h>

 int
 sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp,
 const void *newp, size_t newlen);

 int
 sysctlbyname(const char *name, void *oldp, size_t *oldlenp,
 const void *newp, size_t newlen);

 int
 sysctlnametomib(const char *name, int *mibp, size_t *sizep);

sysctl函数参数中,namenamelen用来表明内核参数IDoldpoldlenp用来存储当前内核参数的值;而newpnewlen则用来设置新的内核参数值。如果不需要的话,可以把相应的值置成NULL
看一下sysctlbyname的实现:

int
sysctlbyname(const char *name, void *oldp, size_t *oldlenp,
    const void *newp, size_t newlen)
{
    int real_oid[CTL_MAXNAME+2];
    size_t oidlen;

    oidlen = sizeof(real_oid) / sizeof(int);
    if (sysctlnametomib(name, real_oid, &oidlen) < 0)
        return (-1);
    return (sysctl(real_oid, oidlen, oldp, oldlenp, newp, newlen));
}

可以看到,sysctlbyname首先通过sysctlnametomib获得真正的ID,接着调用sysctl完成想要的工作。

参考资料:
SYSCTL(3)
Grokking SYSCTL and the Art of Smashing Kernel Variables