Linux kernel 笔记 (41)——“inode”结构体中的“i_rdev”成员

inode结构体中有一个i_rdev成员(定义在<linux/fs.h>中):

struct inode {
    ......
    dev_t           i_rdev;
    ......
}

如果inode代表一个设备,则i_rdev的值为设备号。为了代码更好地可移植性,获取inodemajorminor号应该使用imajoriminor函数:

static inline unsigned iminor(const struct inode *inode)
{
    return MINOR(inode->i_rdev);
}

static inline unsigned imajor(const struct inode *inode)
{
    return MAJOR(inode->i_rdev);
}

 

Linux kernel 笔记 (40)——”file”和“inode”结构体的比较

LDD中对file结构体的描述:

struct file, defined in <linux/fs.h>, is the second most important data structure used in device drivers. Note that a file has nothing to do with the FILE pointers of user-space programs. A FILE is defined in the C library and never appears in kernel code. A struct file, on the other hand, is a kernel structure that never appears in user programs.

The file structure represents an open file . (It is not specific to device drivers; every open file in the system has an associated struct file in kernel space.) It is created by the kernel on open and is passed to any function that operates on the file, until the last close. After all instances of the file are closed, the kernel releases the data structure.

In the kernel sources, a pointer to struct file is usually called either file or filp (“file pointer”). We’ll consistently call the pointer filp to prevent ambiguities with the structure itself. Thus, file refers to the structure and filp to a pointer to the structure.

inode结构体的描述:

The inode structure is used by the kernel internally to represent files. Therefore, it is different from the file structure that represents an open file descriptor. There can be numerous file structures representing multiple open descriptors on a single file, but they all point to a single inode structure.

总结如下:在kernel中,每一个文件都有一个inode结构体来表示,而file结构体是和打开的文件描述符关联的。如果一个文件被打开多次,有多个文件描述符,也就相应地有多个file结构体与这个文件关联。而inode却永远只有一个。

 

libvirt笔记 (3) —— 得到virtualization host的能力信息

getCapabilities方法得到一个字符串,用来描述virtualization host的能力,以及能创建什么样的Guest OS。请看下面代码:

#!/usr/bin/python

from __future__ import print_function
import sys
import libvirt

conn = libvirt.open('xen:///')
if conn == None:
    print('Failed to open connection to xen:///', file=sys.stderr)
    exit(1)

caps = conn.getCapabilities() # caps will be a string of XML
print('Capabilities:\n'+caps)

conn.close()
exit(0)

执行如下:

Capabilities:
<capabilities>

  <host>
    <cpu>
      <arch>x86_64</arch>
      <features>
        <pae/>
      </features>
    </cpu>
    <power_management/>
    <migration_features>
      <live/>
    </migration_features>
    <topology>
      <cells num='1'>
        <cell id='0'>
          <memory unit='KiB'>1048512</memory>
          <cpus num='0'>
          </cpus>
        </cell>
      </cells>
    </topology>
  </host>

  <guest>
    <os_type>xen</os_type>
    <arch name='x86_64'>
      <wordsize>64</wordsize>
      <emulator>/usr/lib/xen/bin/qemu-system-i386</emulator>
      <machine>xenpv</machine>
      <domain type='xen'/>
    </arch>
  </guest>

  <guest>
    <os_type>xen</os_type>
    <arch name='i686'>
      <wordsize>32</wordsize>
      <emulator>/usr/lib/xen/bin/qemu-system-i386</emulator>
      <machine>xenpv</machine>
      <domain type='xen'/>
    </arch>
    <features>
      <pae/>
    </features>
  </guest>

</capabilities>

参考资料:
Capability information

 

SystemTap 笔记 (4)—— timer event

timer event会周期性执行handler。举个例子:

# stap -e 'probe timer.s(1) { printf("Hello world!\n");}'
Hello world!
Hello world!
Hello world!
Hello world!

上面脚本每隔1秒打印一次Hello world!

timer event定义如下:

timer.ms(milliseconds)
timer.us(microseconds)
timer.ns(nanoseconds)
timer.hz(hertz)
timer.jiffies(jiffies)

另外,还有一种randomize表示方式(参考自这里):

timer.jiffies(N).randomize(M)

The probe handler is run every N jiffies (a kernel-defined unit of time, typically between 1 and 60 ms). If the “randomize” component is given, a linearly distributed random value in the range [-M..+M] is added to N every time the handler is run. N is restricted to a reasonable range (1 to around a million), and M is restricted to be smaller than N.

Alternatively, intervals may be specified in units of time. There are two probe point variants similar to the jiffies timer:

timer.ms(N)

timer.ms(N).randomize(M)

Here, N and M are specified in milliseconds, but the full options for units are seconds (s/sec), milliseconds (ms/msec), microseconds (us/usec), nanoseconds (ns/nsec), and hertz (hz). Randomization is not supported for hertz timers.

最后结合一个例子看一下如何使用timer event(选自这里):

global count_jiffies, count_ms
probe timer.jiffies(100) { count_jiffies ++ }
probe timer.ms(100) { count_ms ++ }
probe timer.ms(12345)
{
  hz=(1000*count_jiffies) / count_ms
  printf ("jiffies:ms ratio %d:%d => CONFIG_HZ=%d\n",
    count_jiffies, count_ms, hz)
  exit ()
}

首先要知道,每秒发生HZjiffies

其次,每发生100jiffiescount_jiffies计数加1,所以脚本退出时,一共发生100 * count_jiffiesHZ。一共经历了count_ms / 10秒。

最后计算CONFIG_HZ(100 * count_jiffies) / (count_ms / 10) = (1000 * count_jiffies) / count_ms

 

Linux kernel 笔记 (39)——”THIS_MODULE”

THIS_MODULE是一个macro,定义在<linux/module.h>中:

#ifdef MODULE
#define MODULE_GENERIC_TABLE(gtype,name)            \
extern const struct gtype##_id __mod_##gtype##_table        \
  __attribute__ ((unused, alias(__stringify(name))))

extern struct module __this_module;
#define THIS_MODULE (&__this_module)
#else  /* !MODULE */
#define MODULE_GENERIC_TABLE(gtype,name)
#define THIS_MODULE ((struct module *)0)
#endif

THIS_MODULE即是__this_module这个变量的地址。__this_module会指向这个模块起始的地址空间,恰好是struct module变量定义的位置。

file_operations结构体的第一个成员是struct module类型的指针,定义在<linux/fs.h>中:

struct file_operations {
    struct module *owner;
    ......
}

LDD对其的解释:

struct module *owner

The first file_operations field is not an operation at all; it is a pointer to the module that “owns” the structure. This field is used to prevent the module from being unloaded while its operations are in use. Almost all the time, it is simply initialized to THIS_MODULE , a macro defined in <linux/module.h>.

owner指向绑定file_operations的模块。在大多时候,只需把THIS_MODULE赋给它即可。

参考资料:
Where is the memory allocation of “_thismodule” variable?
深入淺出 insmod, #1

 

Linux kernel 笔记 (38)——”__user”修饰符

kernel代码中,有时会看到函数声明中有的参数带有__user修饰符:

ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);

LDD给出的解释:

This annotation is a form of documentation, noting that a pointer is a user-space address that cannot be directly dereferenced. For normal compilation, __user has no effect, but it can be used by external checking software to find misuse of user-space addresses.

__user表明参数是一个用户空间的指针,不能在kernel代码中直接访问。也方便其它工具对代码进行检查。

 

调试Makefile的工具——remake

这两天在调试开源项目的一个Makefile,发现了remake这个工具(项目主页:http://bashdb.sourceforge.net/remake/),真的很好用。稍微大型点的开源项目,Makefile都很复杂,一旦出了错,很令人抓狂。而这个remake工具产生的输出能把整个编译过程的来龙去脉打印的很清楚。举例如下,一个简单的编译Linux模块的Makefile

ifneq ($(KERNELRELEASE),)
        obj-m := hello.o
else
        KDIR ?= /lib/modules/`uname -r`/build
default:
        $(MAKE) -C $(KDIR) M=$$PWD
endif

执行remake -x命令:

# remake -x
Reading makefiles...
Updating goal targets....
 File 'default' does not exist.
Must remake target 'default'.
Makefile:8: target 'default' does not exist
##>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
remake -C /lib/modules/`uname -r`/build M=$PWD
##<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
Reading makefiles...
Updating goal targets....
 File 'all' does not exist.
Must remake target 'all'.
remake[1]: Entering directory '/usr/src/linux-3.12.49-6-obj/x86_64/default'
Makefile:26: target 'all' does not exist
##>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
remake -C ../../../linux-3.12.49-6 O=/usr/src/linux-3.12.49-6-obj/x86_64/default/.
##<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
Reading makefiles...
Updating goal targets....
 File '_all' does not exist.
   File 'sub-make' does not exist.
     File 'FORCE' does not exist.
    Must remake target 'FORCE'.
    Successfully remade target file 'FORCE'.
  Must remake target 'sub-make'.
Makefile:195: update target 'sub-make' due to: FORCE
##>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
echo "make[1]: Entering directory \`/usr/src/linux-3.12.49-6-obj/x86_64/default'"
##<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
make[1]: Entering directory `/usr/src/linux-3.12.49-6-obj/x86_64/default'
##>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
remake -C /usr/src/linux-3.12.49-6-obj/x86_64/default \
KBUILD_SRC=/usr/src/linux-3.12.49-6 \
KBUILD_EXTMOD="/root/Documents/test" -f /usr/src/linux-3.12.49-6/Makefile \

##<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
......

可以看到,输出了非常详细的日志信息,很方便debug

 

libvirt笔记 (2) —— Hypervisor connections

Hypervisor connectionlibvirt一个核心概念(以下内容摘自这里):

A connection is the primary or top level object in the libvirt API and Python libvirt module. An instance of this object is required before attempting to use almost any of the classes or methods. A connection is associated with a particular hypervisor, which may be running locally on the same machine as the libvirt client application, or on a remote machine over the network. In all cases, the connection is represented by an instance of the virConnect class and identified by a URI. The URI scheme and path defines the hypervisor to connect to, while the host part of the URI determines where it is located.

An application is permitted to open multiple connections at the same time, even when using more than one type of hypervisor on a single machine. For example, a host may provide both KVM full machine virtualization and LXC container virtualization. A connection object may be used concurrently across multiple threads. Once a connection has been established, it is possible to obtain handles to other managed objects or create new managed objects.

以下代码测试Xen连接:

#!/usr/bin/python
from __future__ import print_function
import sys
import libvirt

conn = libvirt.open('xen:///')
if conn == None:
        print('Failed to open connection to xen:///', file=sys.stderr)
        exit(1)
else:
        print('Open connection success', file=sys.stdout)
        conn.close()
        exit(0)

 

libvirt笔记 (1) —— 术语

以下内容摘自Libvert terminology and goals

a node is a single physical machine

an hypervisor is a layer of software allowing to virtualize a node in a set of virtual machines with possibly different configurations than the node itself

a domain is an instance of an operating system (or subsystem in the case of container virtualization) running on a virtualized machine provided by the hypervisor Hypervisor and domains running on a node

 

node

Now we can define the goal of libvirt: to provide a common and stable layer sufficient to securely manage domains on a node, possibly remote.

libvirt中,node即指物理机器,domain可以理解为虚拟机。

 

ktap代码分析笔记 (1) —— ktap.c

ktap.c源码如下(省去版权信息):

#include <linux/version.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
#error "Currently ktap don't support kernel older than 3.1"
#endif

#if !CONFIG_EVENT_TRACING
#error "Please enable CONFIG_EVENT_TRACING before compile ktap"
#endif

#if !CONFIG_PERF_EVENTS
#error "Please enable CONFIG_PERF_EVENTS before compile ktap"
#endif

#include <linux/module.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/fcntl.h>
#include <linux/sched.h>
#include <linux/poll.h>
#include <linux/anon_inodes.h>
#include <linux/debugfs.h>
#include <linux/vmalloc.h>
#include "../include/ktap_types.h"
#include "ktap.h"
#include "kp_bcread.h"
#include "kp_vm.h"

/* common helper function */
long gettimeofday_ns(void)
{
    struct timespec now;

    getnstimeofday(&now);
    return now.tv_sec * NSEC_PER_SEC + now.tv_nsec;
}

static int load_trunk(ktap_option_t *parm, unsigned long **buff)
{
    int ret;
    unsigned long *vmstart;

    vmstart = vmalloc(parm->trunk_len);
    if (!vmstart)
        return -ENOMEM;

    ret = copy_from_user(vmstart, (void __user *)parm->trunk,
                 parm->trunk_len);
    if (ret < 0) {
        vfree(vmstart);
        return -EFAULT;
    }

    *buff = vmstart;
    return 0;
}

static struct dentry *kp_dir_dentry;

/* Ktap Main Entry */
static int ktap_main(struct file *file, ktap_option_t *parm)
{
    unsigned long *buff = NULL;
    ktap_state_t *ks;
    ktap_proto_t *pt;
    long start_time, delta_time;
    int ret;

    start_time = gettimeofday_ns();

    ks = kp_vm_new_state(parm, kp_dir_dentry);
    if (unlikely(!ks))
        return -ENOEXEC;

    file->private_data = ks;

    ret = load_trunk(parm, &buff);
    if (ret) {
        kp_error(ks, "cannot load file\n");
        goto out;
    }

    pt = kp_bcread(ks, (unsigned char *)buff, parm->trunk_len);

    vfree(buff);

    if (pt) {
        /* validate byte code */
        if (kp_vm_validate_code(ks, pt, ks->stack))
            goto out;

        delta_time = (gettimeofday_ns() - start_time) / NSEC_PER_USEC;
        kp_verbose_printf(ks, "booting time: %d (us)\n", delta_time);

        /* enter vm */
        kp_vm_call_proto(ks, pt);
    }

 out:
    kp_vm_exit(ks);
    return ret;
}


static void print_version(void)
{
}

static long ktap_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
    ktap_option_t parm;
    int ret;

    switch (cmd) {
    case KTAP_CMD_IOC_VERSION:
        print_version();
        return 0;
    case KTAP_CMD_IOC_RUN:
        /*
         * must be root to run ktap script (at least for now)
         *
         * TODO: check perf_paranoid sysctl and allow non-root user
         * to use ktap for tracing process(like uprobe) ?
         */
        if (!capable(CAP_SYS_ADMIN))
            return -EACCES;

        ret = copy_from_user(&parm, (void __user *)arg,
                     sizeof(ktap_option_t));
        if (ret < 0)
            return -EFAULT;

        return ktap_main(file, &parm);
    default:
        return -EINVAL;
    };

        return 0;
}

static const struct file_operations ktap_fops = {
    .llseek                 = no_llseek,
    .unlocked_ioctl         = ktap_ioctl,
};

static long ktapvm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
    int new_fd, err;
    struct file *new_file;

    new_fd = get_unused_fd_flags(0);
    if (new_fd < 0)
        return new_fd;

    new_file = anon_inode_getfile("[ktap]", &ktap_fops, NULL, O_RDWR);
    if (IS_ERR(new_file)) {
        err = PTR_ERR(new_file);
        put_unused_fd(new_fd);
        return err;
    }

    file->private_data = NULL;
    fd_install(new_fd, new_file);
    return new_fd;
}

static const struct file_operations ktapvm_fops = {
    .owner  = THIS_MODULE,
    .unlocked_ioctl         = ktapvm_ioctl,
};

int (*kp_ftrace_profile_set_filter)(struct perf_event *event, int event_id,
                    const char *filter_str);

struct syscall_metadata **syscalls_metadata;

/*TODO: kill this function in future */
static int __init init_dummy_kernel_functions(void)
{
    unsigned long *addr;

    /*
     * ktap need symbol ftrace_profile_set_filter to set event filter, 
     * export it in future. 
     */
#ifdef CONFIG_PPC64
    kp_ftrace_profile_set_filter =
        (void *)kallsyms_lookup_name(".ftrace_profile_set_filter");
#else
    kp_ftrace_profile_set_filter =
        (void *)kallsyms_lookup_name("ftrace_profile_set_filter");
#endif
    if (!kp_ftrace_profile_set_filter) {
        pr_err("ktap: cannot lookup ftrace_profile_set_filter "
            "in kallsyms\n");
        return -1;
    }

    /* use syscalls_metadata for syscall event handling */
    addr = (void *)kallsyms_lookup_name("syscalls_metadata");
    if (!addr) {
        pr_err("ktap: cannot lookup syscalls_metadata in kallsyms\n");
        return -1;
    }

    syscalls_metadata = (struct syscall_metadata **)*addr;
    return 0;
}

static int __init init_ktap(void)
{
    struct dentry *ktapvm_dentry;

    if (init_dummy_kernel_functions())
        return -1;

    kp_dir_dentry = debugfs_create_dir("ktap", NULL);
    if (!kp_dir_dentry) {
        pr_err("ktap: debugfs_create_dir failed\n");
        return -1;
    }

    ktapvm_dentry = debugfs_create_file("ktapvm", 0444, kp_dir_dentry, NULL,
                        &ktapvm_fops);

    if (!ktapvm_dentry) {
        pr_err("ktapvm: cannot create ktapvm file\n");
        debugfs_remove_recursive(kp_dir_dentry);
        return -1;
    }

    return 0;
}

static void __exit exit_ktap(void)
{
    debugfs_remove_recursive(kp_dir_dentry);
}

module_init(init_ktap);
module_exit(exit_ktap);

MODULE_AUTHOR("Jovi Zhangwei <jovi.zhangwei@gmail.com>");
MODULE_DESCRIPTION("ktap");
MODULE_LICENSE("GPL");

int kp_max_loop_count = 100000;
module_param_named(max_loop_count, kp_max_loop_count, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(max_loop_count, "max loop execution count"); 

(1)

#include <linux/version.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
#error "Currently ktap don't support kernel older than 3.1"
#endif

#if !CONFIG_EVENT_TRACING
#error "Please enable CONFIG_EVENT_TRACING before compile ktap"
#endif

#if !CONFIG_PERF_EVENTS
#error "Please enable CONFIG_PERF_EVENTS before compile ktap"
#endif

ktap目前只能在3.1.0以上的kernel版本上运行,并且要有perf的支持。

(2)

module_init(init_ktap);
module_exit(exit_ktap);

MODULE_AUTHOR("Jovi Zhangwei <jovi.zhangwei@gmail.com>");
MODULE_DESCRIPTION("ktap");
MODULE_LICENSE("GPL");

int kp_max_loop_count = 100000;
module_param_named(max_loop_count, kp_max_loop_count, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(max_loop_count, "max loop execution count"); 

以上定义了一些ktapvm.ko这个内核模块的一些信息。

(3)

static int __init init_dummy_kernel_functions(void)
{
    unsigned long *addr;

    /*
     * ktap need symbol ftrace_profile_set_filter to set event filter, 
     * export it in future. 
     */
#ifdef CONFIG_PPC64
    kp_ftrace_profile_set_filter =
        (void *)kallsyms_lookup_name(".ftrace_profile_set_filter");
#else
    kp_ftrace_profile_set_filter =
        (void *)kallsyms_lookup_name("ftrace_profile_set_filter");
#endif
    if (!kp_ftrace_profile_set_filter) {
        pr_err("ktap: cannot lookup ftrace_profile_set_filter "
            "in kallsyms\n");
        return -1;
    }

    /* use syscalls_metadata for syscall event handling */
    addr = (void *)kallsyms_lookup_name("syscalls_metadata");
    if (!addr) {
        pr_err("ktap: cannot lookup syscalls_metadata in kallsyms\n");
        return -1;
    }

    syscalls_metadata = (struct syscall_metadata **)*addr;
    return 0;
}

static int __init init_ktap(void)
{
    struct dentry *ktapvm_dentry;

    if (init_dummy_kernel_functions())
        return -1;

    kp_dir_dentry = debugfs_create_dir("ktap", NULL);
    if (!kp_dir_dentry) {
        pr_err("ktap: debugfs_create_dir failed\n");
        return -1;
    }

    ktapvm_dentry = debugfs_create_file("ktapvm", 0444, kp_dir_dentry, NULL,
                        &ktapvm_fops);

    if (!ktapvm_dentry) {
        pr_err("ktapvm: cannot create ktapvm file\n");
        debugfs_remove_recursive(kp_dir_dentry);
        return -1;
    }

    return 0;
}

static void __exit exit_ktap(void)
{
    debugfs_remove_recursive(kp_dir_dentry);
}

module_init(init_ktap);
module_exit(exit_ktap);

ktapvm.ko的加载函数init_ktap会创建/sys/kernel/debug/ktap/这个文件夹,并在下面创建ktapvm文件。kallsyms_lookup_name函数则用来查找指定symbol的内存地址,而syscalls_metadata在目前代码中并没有使用。卸载模块函数exit_ktap则会删除/sys/kernel/debug/ktap/这个文件夹以及文件夹下的所有文件。

(4)

static const struct file_operations ktapvm_fops = {
    .owner  = THIS_MODULE,
    .unlocked_ioctl         = ktapvm_ioctl,
};

以上代码定义了ktapvm.ko的核心函数:ktapvm_ioctl,对这个模块的所有的操作都是通过ktapvm_ioctl函数完成。

(5)

static long ktapvm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
    int new_fd, err;
    struct file *new_file;

    new_fd = get_unused_fd_flags(0);
    if (new_fd < 0)
        return new_fd;

    new_file = anon_inode_getfile("[ktap]", &ktap_fops, NULL, O_RDWR);
    if (IS_ERR(new_file)) {
        err = PTR_ERR(new_file);
        put_unused_fd(new_fd);
        return err;
    }

    file->private_data = NULL;
    fd_install(new_fd, new_file);
    return new_fd;
}

ktapvm模块只提供了一个ioctl函数:ktapvm_ioctl,而这个函数的作用就是生成一个[ktap]的文件描述符。

(6)

static long ktap_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
    ktap_option_t parm;
    int ret;

    switch (cmd) {
    case KTAP_CMD_IOC_VERSION:
        print_version();
        return 0;
    case KTAP_CMD_IOC_RUN:
        /*
         * must be root to run ktap script (at least for now)
         *
         * TODO: check perf_paranoid sysctl and allow non-root user
         * to use ktap for tracing process(like uprobe) ?
         */
        if (!capable(CAP_SYS_ADMIN))
            return -EACCES;

        ret = copy_from_user(&parm, (void __user *)arg,
                     sizeof(ktap_option_t));
        if (ret < 0)
            return -EFAULT;

        return ktap_main(file, &parm);
    default:
        return -EINVAL;
    };

        return 0;
}

static const struct file_operations ktap_fops = {
    .llseek                 = no_llseek,
    .unlocked_ioctl         = ktap_ioctl,
};

[ktap]文件的ioctl操作函数是ktap_ioctl,它的核心功能就是从user-space空间拷贝ktap_option_t,然后调用ktap_main函数执行ktap命令。

(7)

/* common helper function */
long gettimeofday_ns(void)
{
    struct timespec now;

    getnstimeofday(&now);
    return now.tv_sec * NSEC_PER_SEC + now.tv_nsec;
}

static int load_trunk(ktap_option_t *parm, unsigned long **buff)
{
    int ret;
    unsigned long *vmstart;

    vmstart = vmalloc(parm->trunk_len);
    if (!vmstart)
        return -ENOMEM;

    ret = copy_from_user(vmstart, (void __user *)parm->trunk,
                 parm->trunk_len);
    if (ret < 0) {
        vfree(vmstart);
        return -EFAULT;
    }

    *buff = vmstart;
    return 0;
}

gettimeofday_ns返回当前的纳秒值,而load_trunk则从user-space空间拷贝ktap命令。

(8)

static struct dentry *kp_dir_dentry;

/* Ktap Main Entry */
static int ktap_main(struct file *file, ktap_option_t *parm)
{
    unsigned long *buff = NULL;
    ktap_state_t *ks;
    ktap_proto_t *pt;
    long start_time, delta_time;
    int ret;

    start_time = gettimeofday_ns();

    ks = kp_vm_new_state(parm, kp_dir_dentry);
    if (unlikely(!ks))
        return -ENOEXEC;

    file->private_data = ks;

    ret = load_trunk(parm, &buff);
    if (ret) {
        kp_error(ks, "cannot load file\n");
        goto out;
    }

    pt = kp_bcread(ks, (unsigned char *)buff, parm->trunk_len);

    vfree(buff);

    if (pt) {
        /* validate byte code */
        if (kp_vm_validate_code(ks, pt, ks->stack))
            goto out;

        delta_time = (gettimeofday_ns() - start_time) / NSEC_PER_USEC;
        kp_verbose_printf(ks, "booting time: %d (us)\n", delta_time);

        /* enter vm */
        kp_vm_call_proto(ks, pt);
    }

 out:
    kp_vm_exit(ks);
    return ret;
}

ktap_main是运行ktap命令的函数,关于其中调用其它函数的细节,会在其它部分讨论,此处不再赘述。