ktap代码分析笔记 (1) —— ktap.c

ktap.c源码如下(省去版权信息):

#include <linux/version.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
#error "Currently ktap don't support kernel older than 3.1"
#endif

#if !CONFIG_EVENT_TRACING
#error "Please enable CONFIG_EVENT_TRACING before compile ktap"
#endif

#if !CONFIG_PERF_EVENTS
#error "Please enable CONFIG_PERF_EVENTS before compile ktap"
#endif

#include <linux/module.h>
#include <linux/errno.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/fcntl.h>
#include <linux/sched.h>
#include <linux/poll.h>
#include <linux/anon_inodes.h>
#include <linux/debugfs.h>
#include <linux/vmalloc.h>
#include "../include/ktap_types.h"
#include "ktap.h"
#include "kp_bcread.h"
#include "kp_vm.h"

/* common helper function */
long gettimeofday_ns(void)
{
    struct timespec now;

    getnstimeofday(&now);
    return now.tv_sec * NSEC_PER_SEC + now.tv_nsec;
}

static int load_trunk(ktap_option_t *parm, unsigned long **buff)
{
    int ret;
    unsigned long *vmstart;

    vmstart = vmalloc(parm->trunk_len);
    if (!vmstart)
        return -ENOMEM;

    ret = copy_from_user(vmstart, (void __user *)parm->trunk,
                 parm->trunk_len);
    if (ret < 0) {
        vfree(vmstart);
        return -EFAULT;
    }

    *buff = vmstart;
    return 0;
}

static struct dentry *kp_dir_dentry;

/* Ktap Main Entry */
static int ktap_main(struct file *file, ktap_option_t *parm)
{
    unsigned long *buff = NULL;
    ktap_state_t *ks;
    ktap_proto_t *pt;
    long start_time, delta_time;
    int ret;

    start_time = gettimeofday_ns();

    ks = kp_vm_new_state(parm, kp_dir_dentry);
    if (unlikely(!ks))
        return -ENOEXEC;

    file->private_data = ks;

    ret = load_trunk(parm, &buff);
    if (ret) {
        kp_error(ks, "cannot load file\n");
        goto out;
    }

    pt = kp_bcread(ks, (unsigned char *)buff, parm->trunk_len);

    vfree(buff);

    if (pt) {
        /* validate byte code */
        if (kp_vm_validate_code(ks, pt, ks->stack))
            goto out;

        delta_time = (gettimeofday_ns() - start_time) / NSEC_PER_USEC;
        kp_verbose_printf(ks, "booting time: %d (us)\n", delta_time);

        /* enter vm */
        kp_vm_call_proto(ks, pt);
    }

 out:
    kp_vm_exit(ks);
    return ret;
}


static void print_version(void)
{
}

static long ktap_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
    ktap_option_t parm;
    int ret;

    switch (cmd) {
    case KTAP_CMD_IOC_VERSION:
        print_version();
        return 0;
    case KTAP_CMD_IOC_RUN:
        /*
         * must be root to run ktap script (at least for now)
         *
         * TODO: check perf_paranoid sysctl and allow non-root user
         * to use ktap for tracing process(like uprobe) ?
         */
        if (!capable(CAP_SYS_ADMIN))
            return -EACCES;

        ret = copy_from_user(&parm, (void __user *)arg,
                     sizeof(ktap_option_t));
        if (ret < 0)
            return -EFAULT;

        return ktap_main(file, &parm);
    default:
        return -EINVAL;
    };

        return 0;
}

static const struct file_operations ktap_fops = {
    .llseek                 = no_llseek,
    .unlocked_ioctl         = ktap_ioctl,
};

static long ktapvm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
    int new_fd, err;
    struct file *new_file;

    new_fd = get_unused_fd_flags(0);
    if (new_fd < 0)
        return new_fd;

    new_file = anon_inode_getfile("[ktap]", &ktap_fops, NULL, O_RDWR);
    if (IS_ERR(new_file)) {
        err = PTR_ERR(new_file);
        put_unused_fd(new_fd);
        return err;
    }

    file->private_data = NULL;
    fd_install(new_fd, new_file);
    return new_fd;
}

static const struct file_operations ktapvm_fops = {
    .owner  = THIS_MODULE,
    .unlocked_ioctl         = ktapvm_ioctl,
};

int (*kp_ftrace_profile_set_filter)(struct perf_event *event, int event_id,
                    const char *filter_str);

struct syscall_metadata **syscalls_metadata;

/*TODO: kill this function in future */
static int __init init_dummy_kernel_functions(void)
{
    unsigned long *addr;

    /*
     * ktap need symbol ftrace_profile_set_filter to set event filter, 
     * export it in future. 
     */
#ifdef CONFIG_PPC64
    kp_ftrace_profile_set_filter =
        (void *)kallsyms_lookup_name(".ftrace_profile_set_filter");
#else
    kp_ftrace_profile_set_filter =
        (void *)kallsyms_lookup_name("ftrace_profile_set_filter");
#endif
    if (!kp_ftrace_profile_set_filter) {
        pr_err("ktap: cannot lookup ftrace_profile_set_filter "
            "in kallsyms\n");
        return -1;
    }

    /* use syscalls_metadata for syscall event handling */
    addr = (void *)kallsyms_lookup_name("syscalls_metadata");
    if (!addr) {
        pr_err("ktap: cannot lookup syscalls_metadata in kallsyms\n");
        return -1;
    }

    syscalls_metadata = (struct syscall_metadata **)*addr;
    return 0;
}

static int __init init_ktap(void)
{
    struct dentry *ktapvm_dentry;

    if (init_dummy_kernel_functions())
        return -1;

    kp_dir_dentry = debugfs_create_dir("ktap", NULL);
    if (!kp_dir_dentry) {
        pr_err("ktap: debugfs_create_dir failed\n");
        return -1;
    }

    ktapvm_dentry = debugfs_create_file("ktapvm", 0444, kp_dir_dentry, NULL,
                        &ktapvm_fops);

    if (!ktapvm_dentry) {
        pr_err("ktapvm: cannot create ktapvm file\n");
        debugfs_remove_recursive(kp_dir_dentry);
        return -1;
    }

    return 0;
}

static void __exit exit_ktap(void)
{
    debugfs_remove_recursive(kp_dir_dentry);
}

module_init(init_ktap);
module_exit(exit_ktap);

MODULE_AUTHOR("Jovi Zhangwei <jovi.zhangwei@gmail.com>");
MODULE_DESCRIPTION("ktap");
MODULE_LICENSE("GPL");

int kp_max_loop_count = 100000;
module_param_named(max_loop_count, kp_max_loop_count, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(max_loop_count, "max loop execution count"); 

(1)

#include <linux/version.h>
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0)
#error "Currently ktap don't support kernel older than 3.1"
#endif

#if !CONFIG_EVENT_TRACING
#error "Please enable CONFIG_EVENT_TRACING before compile ktap"
#endif

#if !CONFIG_PERF_EVENTS
#error "Please enable CONFIG_PERF_EVENTS before compile ktap"
#endif

ktap目前只能在3.1.0以上的kernel版本上运行,并且要有perf的支持。

(2)

module_init(init_ktap);
module_exit(exit_ktap);

MODULE_AUTHOR("Jovi Zhangwei <jovi.zhangwei@gmail.com>");
MODULE_DESCRIPTION("ktap");
MODULE_LICENSE("GPL");

int kp_max_loop_count = 100000;
module_param_named(max_loop_count, kp_max_loop_count, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(max_loop_count, "max loop execution count"); 

以上定义了一些ktapvm.ko这个内核模块的一些信息。

(3)

static int __init init_dummy_kernel_functions(void)
{
    unsigned long *addr;

    /*
     * ktap need symbol ftrace_profile_set_filter to set event filter, 
     * export it in future. 
     */
#ifdef CONFIG_PPC64
    kp_ftrace_profile_set_filter =
        (void *)kallsyms_lookup_name(".ftrace_profile_set_filter");
#else
    kp_ftrace_profile_set_filter =
        (void *)kallsyms_lookup_name("ftrace_profile_set_filter");
#endif
    if (!kp_ftrace_profile_set_filter) {
        pr_err("ktap: cannot lookup ftrace_profile_set_filter "
            "in kallsyms\n");
        return -1;
    }

    /* use syscalls_metadata for syscall event handling */
    addr = (void *)kallsyms_lookup_name("syscalls_metadata");
    if (!addr) {
        pr_err("ktap: cannot lookup syscalls_metadata in kallsyms\n");
        return -1;
    }

    syscalls_metadata = (struct syscall_metadata **)*addr;
    return 0;
}

static int __init init_ktap(void)
{
    struct dentry *ktapvm_dentry;

    if (init_dummy_kernel_functions())
        return -1;

    kp_dir_dentry = debugfs_create_dir("ktap", NULL);
    if (!kp_dir_dentry) {
        pr_err("ktap: debugfs_create_dir failed\n");
        return -1;
    }

    ktapvm_dentry = debugfs_create_file("ktapvm", 0444, kp_dir_dentry, NULL,
                        &ktapvm_fops);

    if (!ktapvm_dentry) {
        pr_err("ktapvm: cannot create ktapvm file\n");
        debugfs_remove_recursive(kp_dir_dentry);
        return -1;
    }

    return 0;
}

static void __exit exit_ktap(void)
{
    debugfs_remove_recursive(kp_dir_dentry);
}

module_init(init_ktap);
module_exit(exit_ktap);

ktapvm.ko的加载函数init_ktap会创建/sys/kernel/debug/ktap/这个文件夹,并在下面创建ktapvm文件。kallsyms_lookup_name函数则用来查找指定symbol的内存地址,而syscalls_metadata在目前代码中并没有使用。卸载模块函数exit_ktap则会删除/sys/kernel/debug/ktap/这个文件夹以及文件夹下的所有文件。

(4)

static const struct file_operations ktapvm_fops = {
    .owner  = THIS_MODULE,
    .unlocked_ioctl         = ktapvm_ioctl,
};

以上代码定义了ktapvm.ko的核心函数:ktapvm_ioctl,对这个模块的所有的操作都是通过ktapvm_ioctl函数完成。

(5)

static long ktapvm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
    int new_fd, err;
    struct file *new_file;

    new_fd = get_unused_fd_flags(0);
    if (new_fd < 0)
        return new_fd;

    new_file = anon_inode_getfile("[ktap]", &ktap_fops, NULL, O_RDWR);
    if (IS_ERR(new_file)) {
        err = PTR_ERR(new_file);
        put_unused_fd(new_fd);
        return err;
    }

    file->private_data = NULL;
    fd_install(new_fd, new_file);
    return new_fd;
}

ktapvm模块只提供了一个ioctl函数:ktapvm_ioctl,而这个函数的作用就是生成一个[ktap]的文件描述符。

(6)

static long ktap_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
    ktap_option_t parm;
    int ret;

    switch (cmd) {
    case KTAP_CMD_IOC_VERSION:
        print_version();
        return 0;
    case KTAP_CMD_IOC_RUN:
        /*
         * must be root to run ktap script (at least for now)
         *
         * TODO: check perf_paranoid sysctl and allow non-root user
         * to use ktap for tracing process(like uprobe) ?
         */
        if (!capable(CAP_SYS_ADMIN))
            return -EACCES;

        ret = copy_from_user(&parm, (void __user *)arg,
                     sizeof(ktap_option_t));
        if (ret < 0)
            return -EFAULT;

        return ktap_main(file, &parm);
    default:
        return -EINVAL;
    };

        return 0;
}

static const struct file_operations ktap_fops = {
    .llseek                 = no_llseek,
    .unlocked_ioctl         = ktap_ioctl,
};

[ktap]文件的ioctl操作函数是ktap_ioctl,它的核心功能就是从user-space空间拷贝ktap_option_t,然后调用ktap_main函数执行ktap命令。

(7)

/* common helper function */
long gettimeofday_ns(void)
{
    struct timespec now;

    getnstimeofday(&now);
    return now.tv_sec * NSEC_PER_SEC + now.tv_nsec;
}

static int load_trunk(ktap_option_t *parm, unsigned long **buff)
{
    int ret;
    unsigned long *vmstart;

    vmstart = vmalloc(parm->trunk_len);
    if (!vmstart)
        return -ENOMEM;

    ret = copy_from_user(vmstart, (void __user *)parm->trunk,
                 parm->trunk_len);
    if (ret < 0) {
        vfree(vmstart);
        return -EFAULT;
    }

    *buff = vmstart;
    return 0;
}

gettimeofday_ns返回当前的纳秒值,而load_trunk则从user-space空间拷贝ktap命令。

(8)

static struct dentry *kp_dir_dentry;

/* Ktap Main Entry */
static int ktap_main(struct file *file, ktap_option_t *parm)
{
    unsigned long *buff = NULL;
    ktap_state_t *ks;
    ktap_proto_t *pt;
    long start_time, delta_time;
    int ret;

    start_time = gettimeofday_ns();

    ks = kp_vm_new_state(parm, kp_dir_dentry);
    if (unlikely(!ks))
        return -ENOEXEC;

    file->private_data = ks;

    ret = load_trunk(parm, &buff);
    if (ret) {
        kp_error(ks, "cannot load file\n");
        goto out;
    }

    pt = kp_bcread(ks, (unsigned char *)buff, parm->trunk_len);

    vfree(buff);

    if (pt) {
        /* validate byte code */
        if (kp_vm_validate_code(ks, pt, ks->stack))
            goto out;

        delta_time = (gettimeofday_ns() - start_time) / NSEC_PER_USEC;
        kp_verbose_printf(ks, "booting time: %d (us)\n", delta_time);

        /* enter vm */
        kp_vm_call_proto(ks, pt);
    }

 out:
    kp_vm_exit(ks);
    return ret;
}

ktap_main是运行ktap命令的函数,关于其中调用其它函数的细节,会在其它部分讨论,此处不再赘述。

 

Linux kernel 笔记 (34)——模块参数

module_parammodule_param_named定义在<linux/moduleparam.h>文件:

/**
 * module_param - typesafe helper for a module/cmdline parameter
 * @value: the variable to alter, and exposed parameter name.
 * @type: the type of the parameter
 * @perm: visibility in sysfs.
 *
 * @value becomes the module parameter, or (prefixed by KBUILD_MODNAME and a
 * ".") the kernel commandline parameter.  Note that - is changed to _, so
 * the user can use "foo-bar=1" even for variable "foo_bar".
 *
 * @perm is 0 if the the variable is not to appear in sysfs, or 0444
 * for world-readable, 0644 for root-writable, etc.  Note that if it
 * is writable, you may need to use kparam_block_sysfs_write() around
 * accesses (esp. charp, which can be kfreed when it changes).
 *
 * The @type is simply pasted to refer to a param_ops_##type and a
 * param_check_##type: for convenience many standard types are provided but
 * you can create your own by defining those variables.
 *
 * Standard types are:
 *  byte, short, ushort, int, uint, long, ulong
 *  charp: a character pointer
 *  bool: a bool, values 0/1, y/n, Y/N.
 *  invbool: the above, only sense-reversed (N = true).
 */
#define module_param(name, type, perm)              \
    module_param_named(name, name, type, perm)

/**
 * module_param_named - typesafe helper for a renamed module/cmdline parameter
 * @name: a valid C identifier which is the parameter name.
 * @value: the actual lvalue to alter.
 * @type: the type of the parameter
 * @perm: visibility in sysfs.
 *
 * Usually it's a good idea to have variable names and user-exposed names the
 * same, but that's harder if the variable must be non-static or is inside a
 * structure.  This allows exposure under a different name.
 */
#define module_param_named(name, value, type, perm)            \
    param_check_##type(name, &(value));                \
    module_param_cb(name, &param_ops_##type, &value, perm);        \
    __MODULE_PARM_TYPE(name, #type)

module_param用来定义一个模块参数,type指定类型(intbool等等),perm指定用户访问权限,取值如下(<linux/stat.h>):

#define S_IRWXU 00700
#define S_IRUSR 00400
#define S_IWUSR 00200
#define S_IXUSR 00100

#define S_IRWXG 00070
#define S_IRGRP 00040
#define S_IWGRP 00020
#define S_IXGRP 00010

#define S_IRWXO 00007
#define S_IROTH 00004
#define S_IWOTH 00002
#define S_IXOTH 00001

#define S_IRWXUGO   (S_IRWXU|S_IRWXG|S_IRWXO)
#define S_IALLUGO   (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
#define S_IRUGO     (S_IRUSR|S_IRGRP|S_IROTH)
#define S_IWUGO     (S_IWUSR|S_IWGRP|S_IWOTH)
#define S_IXUGO     (S_IXUSR|S_IXGRP|S_IXOTH)

module_param_named则是为变量取一个可读性更好的名字。

ktap源码为例:

int kp_max_loop_count = 100000;
module_param_named(max_loop_count, kp_max_loop_count, int, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(max_loop_count, "max loop execution count");

加载ktapvm模块,读取kp_max_loop_count的值:

[root@Linux ~]# cat /sys/module/ktapvm/parameters/max_loop_count
100000
[root@Linux ~]# ls -lt /sys/module/ktapvm/parameters/max_loop_count
-rw-r--r--. 1 root root 4096 Oct 22 22:51 /sys/module/ktapvm/parameters/max_loop_count

可以看到kp_max_loop_count变量在/sys/module/ktapvm/parameters文件夹下的名字是max_loop_count,值是100000,只有root用户拥有写权限。可以通过修改这个文件达到改变kp_max_loop_count变量的目的:

[root@Linux ~]# echo 200000 > /sys/module/ktapvm/parameters/max_loop_count
[root@Linux ~]# cat /sys/module/ktapvm/parameters/max_loop_count
200000

MODULE_PARM_DESC用来定义参数的描述信息,使用modinfo命令可以查看:

[root@Linux ~]# modinfo ktapvm.ko
.....
parm:           max_loop_count:max loop execution count (int)

参考资料:
Everything You Wanted to Know About Module Parameters