Linux kernel IOMMU代码分析笔记(7)——intel_enable_irq_remapping(1)

看一下intel_enable_irq_remapping的代码:

static int __init intel_enable_irq_remapping(void)
{
    struct dmar_drhd_unit *drhd;
    struct intel_iommu *iommu;
    bool x2apic_present;
    int setup = 0;
    int eim = 0;

    x2apic_present = x2apic_supported();

    if (parse_ioapics_under_ir() != 1) {
        printk(KERN_INFO "Not enable interrupt remapping\n");
        goto error;
    }

    if (x2apic_present) {
        pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");

        eim = !dmar_x2apic_optout();
        if (!eim)
            printk(KERN_WARNING
                "Your BIOS is broken and requested that x2apic be disabled.\n"
                "This will slightly decrease performance.\n"
                "Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
    }

    for_each_iommu(iommu, drhd) {
        /*
         * If the queued invalidation is already initialized,
         * shouldn't disable it.
         */
        if (iommu->qi)
            continue;

        /*
         * Clear previous faults.
         */
        dmar_fault(-1, iommu);

        /*
         * Disable intr remapping and queued invalidation, if already
         * enabled prior to OS handover.
         */
        iommu_disable_irq_remapping(iommu);

        dmar_disable_qi(iommu);
    }

    /*
     * check for the Interrupt-remapping support
     */
    for_each_iommu(iommu, drhd) {
        if (!ecap_ir_support(iommu->ecap))
            continue;

        if (eim && !ecap_eim_support(iommu->ecap)) {
            printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
                   " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
            goto error;
        }
    }

    /*
     * Enable queued invalidation for all the DRHD's.
     */
    for_each_iommu(iommu, drhd) {
        int ret = dmar_enable_qi(iommu);

        if (ret) {
            printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
                   " invalidation, ecap %Lx, ret %d\n",
                   drhd->reg_base_addr, iommu->ecap, ret);
            goto error;
        }
    }

    /*
     * Setup Interrupt-remapping for all the DRHD's now.
     */
    for_each_iommu(iommu, drhd) {
        if (!ecap_ir_support(iommu->ecap))
            continue;

        if (intel_setup_irq_remapping(iommu, eim))
            goto error;

        setup = 1;
    }

    if (!setup)
        goto error;

    irq_remapping_enabled = 1;

    /*
     * VT-d has a different layout for IO-APIC entries when
     * interrupt remapping is enabled. So it needs a special routine
     * to print IO-APIC entries for debugging purposes too.
     */
    x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries;

    pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic");

    return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;

error:
    /*
     * handle error condition gracefully here!
     */

    if (x2apic_present)
        pr_warn("Failed to enable irq remapping.  You are vulnerable to irq-injection attacks.\n");

    return -1;
}

(1)

x2apic_present = x2apic_supported();

查看系统是否支持x2APIC模式。

(2)

if (parse_ioapics_under_ir() != 1) {
    printk(KERN_INFO "Not enable interrupt remapping\n");
    goto error;
}

parse_ioapics_under_ir函数如下:

/*
 * Finds the assocaition between IOAPIC's and its Interrupt-remapping
 * hardware unit.
 */
static int __init parse_ioapics_under_ir(void)
{
    struct dmar_drhd_unit *drhd;
    struct intel_iommu *iommu;
    int ir_supported = 0;
    int ioapic_idx;

    for_each_iommu(iommu, drhd)
        if (ecap_ir_support(iommu->ecap)) {
            if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu))
                return -1;

            ir_supported = 1;
        }

    if (!ir_supported)
        return 0;

    for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
        int ioapic_id = mpc_ioapic_id(ioapic_idx);
        if (!map_ioapic_to_ir(ioapic_id)) {
            pr_err(FW_BUG "ioapic %d has no mapping iommu, "
                   "interrupt remapping will be disabled\n",
                   ioapic_id);
            return -1;
        }
    }

    return 1;
}

ir_parse_ioapic_hpet_scope函数的作用是解析device scope类型是IOAPICHPET (High Precision Event Timer)

static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header,
                      struct intel_iommu *iommu)
{
    struct acpi_dmar_hardware_unit *drhd;
    struct acpi_dmar_device_scope *scope;
    void *start, *end;

    drhd = (struct acpi_dmar_hardware_unit *)header;

    start = (void *)(drhd + 1);
    end = ((void *)drhd) + header->length;

    while (start < end) {
        scope = start;
        if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
            if (ir_ioapic_num == MAX_IO_APICS) {
                printk(KERN_WARNING "Exceeded Max IO APICS\n");
                return -1;
            }

            printk(KERN_INFO "IOAPIC id %d under DRHD base "
                   " 0x%Lx IOMMU %d\n", scope->enumeration_id,
                   drhd->address, iommu->seq_id);

            ir_parse_one_ioapic_scope(scope, iommu);
        } else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET) {
            if (ir_hpet_num == MAX_HPET_TBS) {
                printk(KERN_WARNING "Exceeded Max HPET blocks\n");
                return -1;
            }

            printk(KERN_INFO "HPET id %d under DRHD base"
                   " 0x%Lx\n", scope->enumeration_id,
                   drhd->address);

            ir_parse_one_hpet_scope(scope, iommu);
        }
        start += scope->length;
    }

    return 0;
}

ir_parse_one_ioapic_scope为例(ir_parse_one_hpet_scope类似):

static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
                      struct intel_iommu *iommu)
{
    struct acpi_dmar_pci_path *path;
    u8 bus;
    int count;

    bus = scope->bus;
    path = (struct acpi_dmar_pci_path *)(scope + 1);
    count = (scope->length - sizeof(struct acpi_dmar_device_scope))
        / sizeof(struct acpi_dmar_pci_path);

    while (--count > 0) {
        /*
         * Access PCI directly due to the PCI
         * subsystem isn't initialized yet.
         */
        bus = read_pci_config_byte(bus, path->device, path->function,
                       PCI_SECONDARY_BUS);
        path++;
    }

    ir_ioapic[ir_ioapic_num].bus   = bus;
    ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->device, path->function);
    ir_ioapic[ir_ioapic_num].iommu = iommu;
    ir_ioapic[ir_ioapic_num].id    = scope->enumeration_id;
    ir_ioapic_num++;
}

可以看到,实际上是通过递归访问path得到IOAPIC信息的过程:bus好,对应的iommu设备单元,等等。

for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
    int ioapic_id = mpc_ioapic_id(ioapic_idx);
    if (!map_ioapic_to_ir(ioapic_id)) {
        pr_err(FW_BUG "ioapic %d has no mapping iommu, "
               "interrupt remapping will be disabled\n",
               ioapic_id);
        return -1;
    }
}

这段代码则是检查IOAPIC是否都有对应的IOMMU

(3)

if (x2apic_present) {
        pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");

        eim = !dmar_x2apic_optout();
        if (!eim)
            printk(KERN_WARNING
                "Your BIOS is broken and requested that x2apic be disabled.\n"
                "This will slightly decrease performance.\n"
                "Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
    }  

dmar_x2apic_optout函数实现如下:

static int __init dmar_x2apic_optout(void)
{
    struct acpi_table_dmar *dmar;
    dmar = (struct acpi_table_dmar *)dmar_tbl;
    if (!dmar || no_x2apic_optout)
        return 0;
    return dmar->flags & DMAR_X2APIC_OPT_OUT;
}

这个函数的返回值表示系统是否使用X2APIC功能(1表示不使用,0表示使用)。

参考资料:
"BIOS Considerations" in *Intel ® Virtualization Technology for Directed I/Oc

发表评论

邮箱地址不会被公开。 必填项已用*标注

This site uses Akismet to reduce spam. Learn how your comment data is processed.