老友聚会

7月17日,臣一家来北京玩。下午参观了天文馆,晚上吃了“东来顺”。
7月18日晚上,四家人在“便宜坊”聚会,并合影留恋。
7月19日,送他们一家人离京。
时间好快,上次四个人聚会还是10年前的事。希望以后这样的机会能多一点吧。。。

Linux kernel 笔记 (7) ——内存地址类型

Linux中有以下几种内存地址类型:

(1)User virtual addresses
用户空间程序(user-space program)所看到和使用的地址。这些都是虚拟地址(virtual address),需要经过转化后,才能变成实际的内存物理地址。每个进程都拥有自己的虚拟地址空间。

(2)Physical addresses
实际的内存物理地址,也即是CPU用来访问物理内存的地址。

(3)Bus addresses
总线地址。外围设备所看到的物理内存地址。通常情况下,这个地址就是实际的内存物理地址。但是如果系统支持IOMMU的话,总线地址和实际的内存物理地址之间需要做一个映射。

(4)Kernel logic addresses
内核逻辑地址。内核逻辑地址会部分或全部地映射到物理内存地址,所以看起来就像实际的物理内存地址一样,并且这种映射是线性的,一对一的。在许多体系结构上,内核逻辑地址和物理内存地址只差一个常量偏移。内核虚拟地址的值通常存储在指针类型或unsigned long类型的变量里。kmalloc函数返回的就是内核逻辑地址。

(5)Kernel virtual addresses
内核虚拟地址。内核虚拟地址同内核逻辑地址相似的地方是都是把内核地址空间和实际物理内存的地址空间做映射,不同之处在于内核虚拟地址不要求这种映射是线性的,一对一的。内核逻辑地址都是内核虚拟地址,但是反过来则不成立。内核虚拟地址的值通常存储在指针类型变量里。vmallockmap返回的都是内核虚拟地址。

__pa(定义在<asm/page.h>)宏用来把内核逻辑地址映射为实际的内存物理地址;__va则用来把实际的内存物理地址映射为内核逻辑地址(仅限于low memory)。

Linux kernel IOMMU代码分析笔记(3)

看一下dmar_table_init这个函数:

int __init dmar_table_init(void)
{
    static int dmar_table_initialized;
    int ret;

    if (dmar_table_initialized == 0) {
        ret = parse_dmar_table();
        if (ret < 0) {
            if (ret != -ENODEV)
                pr_info("parse DMAR table failure.\n");
        } else  if (list_empty(&dmar_drhd_units)) {
            pr_info("No DMAR devices found\n");
            ret = -ENODEV;
        }

        if (ret < 0)
            dmar_table_initialized = ret;
        else
            dmar_table_initialized = 1;
    }

    return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
}

dmar_table_init其实就是调用了parse_dmar_table这个函数来解析 DMAR ACPI tableparse_dmar_table代码如下:

static int __init
parse_dmar_table(void)
{
    struct acpi_table_dmar *dmar;
    struct acpi_dmar_header *entry_header;
    int ret = 0;
    int drhd_count = 0;

    /*
     * Do it again, earlier dmar_tbl mapping could be mapped with
     * fixed map.
     */
    dmar_table_detect();

    /*
     * ACPI tables may not be DMA protected by tboot, so use DMAR copy
     * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
     */
    dmar_tbl = tboot_get_dmar_table(dmar_tbl);

    dmar = (struct acpi_table_dmar *)dmar_tbl;
    if (!dmar)
        return -ENODEV;

    if (dmar->width < PAGE_SHIFT - 1) {
        pr_warn("Invalid DMAR haw\n");
        return -EINVAL;
    }

    pr_info("Host address width %d\n", dmar->width + 1);

    entry_header = (struct acpi_dmar_header *)(dmar + 1);
    while (((unsigned long)entry_header) <
            (((unsigned long)dmar) + dmar_tbl->length)) {
        /* Avoid looping forever on bad ACPI tables */
        if (entry_header->length == 0) {
            pr_warn("Invalid 0-length structure\n");
            ret = -EINVAL;
            break;
        }

        dmar_table_print_dmar_entry(entry_header);

        switch (entry_header->type) {
        case ACPI_DMAR_TYPE_HARDWARE_UNIT:
            drhd_count++;
            ret = dmar_parse_one_drhd(entry_header);
            break;
        case ACPI_DMAR_TYPE_RESERVED_MEMORY:
            ret = dmar_parse_one_rmrr(entry_header);
            break;
        case ACPI_DMAR_TYPE_ATSR:
            ret = dmar_parse_one_atsr(entry_header);
            break;
        case ACPI_DMAR_HARDWARE_AFFINITY:
#ifdef CONFIG_ACPI_NUMA
            ret = dmar_parse_one_rhsa(entry_header);
#endif
            break;
        case ACPI_DMAR_TYPE_ANDD:
            ret = dmar_parse_one_andd(entry_header);
            break;
        default:
            pr_warn("Unknown DMAR structure type %d\n",
                entry_header->type);
            ret = 0; /* for forward compatibility */
            break;
        }
        if (ret)
            break;

        entry_header = ((void *)entry_header + entry_header->length);
    }
    if (drhd_count == 0)
        pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
    return ret;
}

parse_dmar_table函数中重要的工作有以下两点:
(1)检查host机器地址总线宽度:

if (dmar->width < PAGE_SHIFT - 1) {
        pr_warn("Invalid DMAR haw\n");
        return -EINVAL;
}

(2)遍历并解析DMAR table中的每一项,并加到相应的列表中:

while (((unsigned long)entry_header) <
            (((unsigned long)dmar) + dmar_tbl->length)) {
            ......
}

DMA Remapping Hardware单元为例:

static int __init
dmar_parse_one_drhd(struct acpi_dmar_header *header)
{
    struct acpi_dmar_hardware_unit *drhd;
    struct dmar_drhd_unit *dmaru;
    int ret = 0;

    drhd = (struct acpi_dmar_hardware_unit *)header;
    dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
    if (!dmaru)
        return -ENOMEM;

    dmaru->hdr = header;
    dmaru->reg_base_addr = drhd->address;
    dmaru->segment = drhd->segment;
    dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
    dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
                          ((void *)drhd) + drhd->header.length,
                          &dmaru->devices_cnt);
    if (dmaru->devices_cnt && dmaru->devices == NULL) {
        kfree(dmaru);
        return -ENOMEM;
    }

    ret = alloc_iommu(dmaru);
    if (ret) {
        dmar_free_dev_scope(&dmaru->devices,
                    &dmaru->devices_cnt);
        kfree(dmaru);
        return ret;
    }
    dmar_register_drhd_unit(dmaru);
    return 0;
}

最后dmar_register_drhd_unit这个函数会把解析好的DMA Remapping Hardware单元加到dmar_drhd_units这个列表中。

Linux kernel 笔记 (6) ——__init和__initdata

__init__initdata定义在include/linux/init.h

/* These macros are used to mark some functions or 
 * initialized data (doesn't apply to uninitialized data)
 * as `initialization' functions. The kernel can take this
 * as hint that the function is used only during the initialization
 * phase and free up used memory resources after
 *
 * Usage:
 * For functions:
 * 
 * You should add __init immediately before the function name, like:
 *
 * static void __init initme(int x, int y)
 * {
 *    extern int z; z = x * y;
 * }
 *
 * If the function has a prototype somewhere, you can also add
 * __init between closing brace of the prototype and semicolon:
 *
 * extern int initialize_foobar_device(int, int, int) __init;
 *
 * For initialized data:
 * You should insert __initdata between the variable name and equal
 * sign followed by value, e.g.:
 *
 * static int init_variable __initdata = 0;
 * static const char linux_logo[] __initconst = { 0x32, 0x36, ... };
 *
 * Don't forget to initialize data not at file scope, i.e. within a function,
 * as gcc otherwise puts the data into the bss section and not into the init
 * section.
 * 
 * Also note, that this data cannot be "const".
 */

/* These are for everybody (although not all archs will actually
   discard it in modules) */
#define __init      __section(.init.text) __cold notrace
#define __initdata  __section(.init.data)
#define __initconst __constsection(.init.rodata)

__init修饰函数时表明函数只在kernel初始化阶段使用(函数放在.init.text区),在初始化完成后,这些函数所占用的内存就可以回收利用。举例如下:

static int __init parse_dmar_table(void)
{
    ....
}

__initdata作用类似于__init,只不过它修饰变量,并且存放在.init.data区。