Linux kernel IOMMU代码分析笔记(5)——Interrupt Remapping初始化相关部分

irq_remap_ops定义在drivers\iommu\irq_remapping.h中:

struct irq_remap_ops {
    /* Check whether Interrupt Remapping is supported */
    int (*supported)(void);

    /* Initializes hardware and makes it ready for remapping interrupts */
    int  (*prepare)(void);

    /* Enables the remapping hardware */
    int  (*enable)(void);

    /* Disables the remapping hardware */
    void (*disable)(void);

    /* Reenables the remapping hardware */
    int  (*reenable)(int);

    /* Enable fault handling */
    int  (*enable_faulting)(void);

    /* IO-APIC setup routine */
    int (*setup_ioapic_entry)(int irq, struct IO_APIC_route_entry *,
                  unsigned int, int,
                  struct io_apic_irq_attr *);

    /* Set the CPU affinity of a remapped interrupt */
    int (*set_affinity)(struct irq_data *data, const struct cpumask *mask,
                bool force);

    /* Free an IRQ */
    int (*free_irq)(int);

    /* Create MSI msg to use for interrupt remapping */
    void (*compose_msi_msg)(struct pci_dev *,
                unsigned int, unsigned int,
                struct msi_msg *, u8);

    /* Allocate remapping resources for MSI */
    int (*msi_alloc_irq)(struct pci_dev *, int, int);

    /* Setup the remapped MSI irq */
    int (*msi_setup_irq)(struct pci_dev *, unsigned int, int, int);

    /* Setup interrupt remapping for an HPET MSI */
    int (*setup_hpet_msi)(unsigned int, unsigned int);
};

extern struct irq_remap_ops intel_irq_remap_ops;
extern struct irq_remap_ops amd_iommu_irq_ops;

可以看到,在结构体中定义了一系列的函数指针,每个函数的作用都写得很清楚。

针对intel处理器的结构体定义在drivers\iommu\intel_irq_remapping.c

struct irq_remap_ops intel_irq_remap_ops = {
    .supported      = intel_irq_remapping_supported,
    .prepare        = dmar_table_init,
    .enable         = intel_enable_irq_remapping,
    .disable        = disable_irq_remapping,
    .reenable       = reenable_irq_remapping,
    .enable_faulting    = enable_drhd_fault_handling,
    .setup_ioapic_entry = intel_setup_ioapic_entry,
    .set_affinity       = intel_ioapic_set_affinity,
    .free_irq       = free_irte,
    .compose_msi_msg    = intel_compose_msi_msg,
    .msi_alloc_irq      = intel_msi_alloc_irq,
    .msi_setup_irq      = intel_msi_setup_irq,
    .setup_hpet_msi     = intel_setup_hpet_msi,
};

AMD相关的暂不讨论。

enable_IR_x2apic函数(arch\x86\kernel\apic)中,包含着irq_remap_ops中初始化相关函数的调用:

void __init enable_IR_x2apic(void)
{
    ......
    /* Make sure irq_remap_ops are initialized */
    setup_irq_remapping_ops();

    hardware_init_ret = irq_remapping_prepare();
    if (hardware_init_ret && !x2apic_supported())
        return;

    ......   
    if (hardware_init_ret)
        ret = -1;
    else
        ret = enable_IR();

    ......
}

(1)

void __init setup_irq_remapping_ops(void)
{
    remap_ops = &intel_irq_remap_ops;

#ifdef CONFIG_AMD_IOMMU
    if (amd_iommu_irq_ops.prepare() == 0)
        remap_ops = &amd_iommu_irq_ops;
#endif
}

这个函数会让全局变量remap_ops这个指针指向intel_irq_remap_opsamd_iommu_irq_ops

(2)

int __init irq_remapping_prepare(void)
{
    if (!remap_ops || !remap_ops->prepare)
        return -ENODEV;

    return remap_ops->prepare();
}

irq_remapping_prepare会调用prepare函数(Intel平台即dmar_table_init)。

(3)

int __init enable_IR(void)
{
#ifdef CONFIG_IRQ_REMAP
    if (!irq_remapping_supported()) {
        pr_debug("intr-remapping not supported\n");
        return -1;
    }

    if (!x2apic_preenabled && skip_ioapic_setup) {
        pr_info("Skipped enabling intr-remap because of skipping "
            "io-apic setup\n");
        return -1;
    }

    return irq_remapping_enable();
#endif
    return -1;
}

enable_IR中会调用irq_remapping_supportedirq_remapping_enable

int irq_remapping_supported(void)
{
    if (disable_irq_remap)
        return 0;

    if (!remap_ops || !remap_ops->supported)
        return 0;

    return remap_ops->supported();
}

int __init irq_remapping_enable(void)
{
    int ret;

    if (!remap_ops || !remap_ops->enable)
        return -ENODEV;

    ret = remap_ops->enable();

    if (irq_remapping_enabled)
        irq_remapping_modify_x86_ops();

    return ret;
}

会先后调用supportedIntel平台即为intel_irq_remapping_supported)和enable函数(Intel平台即为intel_enable_irq_remapping)。

irq_remapping_modify_x86_ops会把其它函数赋值给相应的全局函数指针:

static void __init irq_remapping_modify_x86_ops(void)
{
    x86_io_apic_ops.disable     = irq_remapping_disable_io_apic;
    x86_io_apic_ops.set_affinity    = set_remapped_irq_affinity;
    x86_io_apic_ops.setup_entry = setup_ioapic_remapped_entry;
    x86_io_apic_ops.eoi_ioapic_pin  = eoi_ioapic_pin_remapped;
    x86_msi.setup_msi_irqs      = irq_remapping_setup_msi_irqs;
    x86_msi.setup_hpet_msi      = setup_hpet_msi_remapped;
    x86_msi.compose_msi_msg     = compose_remapped_msi_msg;
}

Shark代码分析笔记(2)——main函数

看一下main函数:

int main(int argc, char **argv)
{
    int ret = EXIT_FAILURE;
    int flags = 0, script;
    int base;

    if (argv[0] && argv[0][0]) progname = argv[0];

    lua_State *ls = lua_open();
    if (!ls) {
        l_message(progname, "cannot create state: not enough memory");
        return ret;
    }

    g_ls = ls;

    script = collectargs(argv, &flags);
    if (script <= 0) {  /* invalid args? */
        print_usage();
        return 0;
    }

    if (flags & FLAGS_VERSION)
        print_version();

    luaL_openlibs(ls);  /* open libraries */

    // Get package.preload so we can store builtins in it.
    lua_getglobal(ls, "package");
    lua_getfield(ls, -1, "preload");
    lua_remove(ls, -2); // Remove package

    // Store uv module definition at preload.uv
    lua_pushcfunction(ls, luaopen_luv);
    lua_setfield(ls, -2, "uv");

    luaL_openlib(ls, "shark", ll_shark, 0);

    lua_getglobal(ls, "shark");
    lua_pushboolean(ls, shark_verbose);
    lua_setfield(ls, -2, "verbose");
    lua_pop(ls, 1);

    int narg = getargs(ls, argv, script);  /* collect arguments */
    lua_setglobal(ls, "arg");

#include "shark_init.h"
    luaL_loadbuffer(ls, luaJIT_BC_shark_init, luaJIT_BC_shark_init_SIZE,
            NULL);
    if((ret = lua_pcall(ls, 0, 0, 0))) {
        ret = lua_report(ls, ret);
        goto out;
    }

    g_event_loop = luv_loop(ls);

    if((ret = luaL_loadfile(ls, argv[script]))) {
        ret = lua_report(ls, ret);
        goto out;
    }

    base = lua_gettop(ls) - 1;
    lua_pushcfunction(ls, lua_traceback);
    lua_insert(ls, base);

    if (lua_pcall(ls, 0, 0, base)) {
        fprintf(stderr, "%s\n", lua_tostring(ls, -1));
        exit(EXIT_FAILURE);
    }

    lua_pop(ls, 1);

    //TODO: move to lua init code
    uv_run(g_event_loop, UV_RUN_DEFAULT);

    ret = 0;
 out:
    lua_close(ls);
    return ret;
}  

(1)

if (argv[0] && argv[0][0]) progname = argv[0];

progname存的是运行程序名字:/path/to/shark

(2)

    lua_State *ls = lua_open();
    if (!ls) {
        l_message(progname, "cannot create state: not enough memory");
        return ret;
    }

    g_ls = ls;

创建一个新的Lua运行环境,为了后续使用。

(3)

script = collectargs(argv, &flags);
if (script <= 0) {  /* invalid args? */
    print_usage();
    return 0;
}

if (flags & FLAGS_VERSION)
    print_version();

先看一下print_version

static void print_version(void)
{
    fputs(SHARK_VERSION " -- " SHARK_COPYRIGHT ". " SHARK_URL "\n", stdout);
    exit(0);
}

比较简单,打印出版本就退出了。

再看一下print_usage

static void print_usage(void)
{
    fprintf(stderr,
    "usage: shark [options]... [script [args]...].\n"
    "Available options are:\n"
    "  -v        Show version information.\n");
    fflush(stderr);
}

可以看到shark的使用方法:

shark [options]... [script [args]...]

shark程序后面跟着可选参数,还有执行脚本。

(4)

luaL_openlibs(ls);  /* open libraries */

加载Luajit提供的函数库。

(5)

// Get package.preload so we can store builtins in it.
lua_getglobal(ls, "package");
lua_getfield(ls, -1, "preload");
lua_remove(ls, -2); // Remove package

lua_getglobal(ls, "package");用来把package这个table压入堆栈:

 ___________________________
|____ package(类型:table)___|  (-1)

lua_getfield(ls, -1, "preload");用来从index-1处取出keypreload值,也就是:package.preload这个table,并压入堆栈:

 ___________________________
|package.preload(类型:table)|  (-1)
|____ package(类型:table)___|  (-2)

lua_remove(ls, -2);package从堆栈中移除:

 ___________________________
|package.preload(类型:table)|  (-1)

(6)

// Store uv module definition at preload.uv
lua_pushcfunction(ls, luaopen_luv);
lua_setfield(ls, -2, "uv");

lua_pushcfunction(ls, luaopen_luv);luaopen_luv函数压入堆栈:

 ___________________________
|__luaopen_luv(类型:函数)___|  (-1)
|package.preload(类型:table)|  (-2)

lua_setfield(ls, -2, "uv");执行效果是:package.preload.uv = luaopen_luv,然后把luaopen_luv弹出堆栈:

 ____________________________________________
|package.preload(类型:table,uv = luaopen_luv)|  (-1)

(7)

luaL_openlib(ls, "shark", ll_shark, 0);

ll_shark数组定义如下:

static const struct luaL_reg ll_shark[] = {
        {"debuginfo_set", &shark_api_debuginfo_set},
        {"lua_ref", &shark_api_lua_ref},
        {"get_ref", &shark_api_get_ref},
        {"stats", &shark_api_stats},
        {"set_binary", &shark_api_set_binary},
        {"exec", &shark_api_exec},
//TODO: move to sock library
#ifndef BPF_DISABLE
        {"open_raw_sock", &shark_api_open_raw_sock},
        {"sock_attach_bpf", &shark_api_sock_attach_bpf},
        {"iptos", &shark_api_iptos},
#endif
    {NULL, NULL}
};

luaL_openlib(ls, "shark", ll_shark, 0);函数的作用是创建一个table然后将这个table赋给package.loaded[shark]和全局变量shark,并将ll_shark数组中的函数注册到这个table,最后把table压入堆栈:

 ————————————————————————————————————————————
|___一个注册ll_shark函数数组的table___________|  (-1)
|package.preload(类型:table,uv = luaopen_luv)|  (-2)

(8)

    lua_getglobal(ls, "shark");
    lua_pushboolean(ls, shark_verbose);
    lua_setfield(ls, -2, "verbose");
    lua_pop(ls, 1);

lua_getglobal(ls, "shark");shark table压入堆栈:

 ____________________________________________
|       shark(类型:table)                    |  (-1)
 ————————————————————————————————————————————
|___一个注册ll_shark函数数组的table___________|  (-2)
|package.preload(类型:table,uv = luaopen_luv)|  (-3)

lua_pushboolean(ls, shark_verbose);shark_verbose这个布尔值压入堆栈:

_____________________________________________
| ______shark_verbose(类型:bool)_____________|  (-1)
|       shark(类型:table)                    |  (-2)
 ————————————————————————————————————————————
|___一个注册ll_shark函数数组的table___________|  (-3)
|package.preload(类型:table,uv = luaopen_luv)|  (-4)

lua_setfield(ls, -2, "verbose");执行效果是:shark.verbose = shark_verbose,然后把shark_verbose弹出堆栈:

 ____________________________________________
| shark(类型:table, verbose = shark_verbose) |  (-1)
 ————————————————————————————————————————————
|___一个注册ll_shark函数数组的table___________|  (-2)
|package.preload(类型:table,uv = luaopen_luv)|  (-3)

lua_pop(ls, 1);把栈顶元素弹出堆栈:

 ————————————————————————————————————————————
|___一个注册ll_shark函数数组的table___________|  (-1)
|package.preload(类型:table,uv = luaopen_luv)|  (-2)

(9)

int narg = getargs(ls, argv, script);  /* collect arguments */
lua_setglobal(ls, "arg");

getargs函数实现如下:

static int getargs(lua_State *ls, char **argv, int n)
{
    int narg;
    int i;
    int argc = 0;

    while (argv[argc])
        argc++;  /* count total number of arguments */

    narg = argc - (n + 1);  /* number of arguments to the script */
    luaL_checkstack(ls, narg + 3, "too many arguments to script");

    for (i = n + 1; i < argc; i++)
        lua_pushstring(ls, argv[i]);

    lua_createtable(ls, narg, n + 1);

    for (i = 0; i < argc; i++) {
        lua_pushstring(ls, argv[i]);
        lua_rawseti(ls, -2, i - n);
    }

    return narg;
}

getargs函数中,传入参数nLua脚本在命令行参数的索引,而narg则是脚本的参数。举个例子:

/root/shark/shark trace.lua 1 2

argc4n1narg2

luaL_checkstack(ls, narg + 3, "too many arguments to script");检查堆栈是否有足够的空间。

    for (i = n + 1; i < argc; i++)
        lua_pushstring(ls, argv[i]);

把参数12压入堆栈:

 ___________________________________________
|__________参数: 2__________________________|  (-1)
|          参数: 1                          |  (-2)
|———————————————————————————————————————————
|___一个注册ll_shark函数数组的table___________|  (-3)
|package.preload(类型:table,uv = luaopen_luv)|  (-4)  

lua_createtable(ls, narg, n + 1);创建一个table(包含narg数组元素,n + 1非数组元素)并压入堆栈:

 ———————————————————————————————————————————
|______table________________________________|  (-1)
|__________参数: 2__________________________|  (-2)
|          参数: 1                          |  (-3)
|———————————————————————————————————————————
|___一个注册ll_shark函数数组的table___________|  (-4)
|package.preload(类型:table,uv = luaopen_luv)|  (-5)  

看最后一个循环:

    for (i = 0; i < argc; i++) {
        lua_pushstring(ls, argv[i]);
        lua_rawseti(ls, -2, i - n);
    }

lua_pushstring(ls, argv[i]);依次把参数压入堆栈,lua_rawseti(ls, -2, i - n);则是把参数传入table

 —————————————————————————————————————————————————
|_table(index:value->1:shark;0:trace.lua;1:1;2:2_|  (-1)
|__________参数: 2_______________________________|  (-2)
|          参数: 1                               |  (-3)
|————————————————————————————————————————|
|___一个注册ll_shark函数数组的table________________|  (-4)
|package.preload(类型:table,uv = luaopen_luv)     |  (-5)  

lua_setglobal(ls, "arg");作用是把栈顶table弹出,并赋值给arg。所以arg就指向了这个tableindex:value->1:shark;0:trace.lua;1:1;2:2)。堆栈变为:

____________________________________________
|__________参数: 2__________________________|  (-1)
|          参数: 1                          |  (-2)
|———————————————————————————————————————————
|___一个注册ll_shark函数数组的table___________|  (-3)
|package.preload(类型:table,uv = luaopen_luv)|  (-4) 

(10)

#include "shark_init.h"
luaL_loadbuffer(ls, luaJIT_BC_shark_init, luaJIT_BC_shark_init_SIZE,
        NULL);

shark_init.h是由shark_init.lua生成的(以后再详细介绍shark_init.lua),luaJIT_BC_shark_initluaJIT_BC_shark_init_SIZE也定义在shark_init.h文件中。

luaL_loadbuffer(ls, luaJIT_BC_shark_init, luaJIT_BC_shark_init_SIZE, NULL);luaJIT_BC_shark_init这个chunk压入堆栈:

____________________________________________
|_luaJIT_BC_shark_init chunk(类型:函数)______| (-1)
|__________参数: 2__________________________|  (-2)
|          参数: 1                          |  (-3)
|———————————————————————————————————————————
|___一个注册ll_shark函数数组的table___________|  (-4)
|package.preload(类型:table,uv = luaopen_luv)|  (-5)  

(11)

if((ret = lua_pcall(ls, 0, 0, 0))) {
        ret = lua_report(ls, ret);
        goto out;
}

接下来lua_pcall(ls, 0, 0, 0)会运行luaJIT_BC_shark_init这个chunk。运行完后,把chunk弹出堆栈:

————————————————————————————————————————————
|__________参数: 2__________________________|  (-1)
|          参数: 1                          |  (-2)
|———————————————————————————————————————————
|___一个注册ll_shark函数数组的table___________|  (-3)
|package.preload(类型:table,uv = luaopen_luv)|  (-4)  

lua_report比较简单,就是如果出错的话,就从栈顶取出错误信息,打印完以后再弹栈:

int lua_report(lua_State *ls, int status)
{
    if (status && !lua_isnil(ls, -1)) {
        const char *msg = lua_tostring(ls, -1);
        if (msg == NULL)
            msg = "(error object is not a string)";
        l_message(progname, msg);
        lua_pop(ls, 1);
    }
    return status;
}  

(12)

    g_event_loop = luv_loop(ls);

    if((ret = luaL_loadfile(ls, argv[script]))) {
        ret = lua_report(ls, ret);
        goto out;
    }

    base = lua_gettop(ls) - 1;
    lua_pushcfunction(ls, lua_traceback);
    lua_insert(ls, base);

    if (lua_pcall(ls, 0, 0, base)) {
        fprintf(stderr, "%s\n", lua_tostring(ls, -1));
        exit(EXIT_FAILURE);
    }

    lua_pop(ls, 1);

    //TODO: move to lua init code
    uv_run(g_event_loop, UV_RUN_DEFAULT);

    ret = 0;
 out:
    lua_close(ls);
    return ret;

剩下这段代码就是运行脚本,其中lua_traceback是脚本出错时的处理函数。个人觉得细节上还有些问题,需要和作者沟通一下,这块代码暂时留个小尾巴。