看一下intel_enable_irq_remapping
的代码:
static int __init intel_enable_irq_remapping(void)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
bool x2apic_present;
int setup = 0;
int eim = 0;
x2apic_present = x2apic_supported();
if (parse_ioapics_under_ir() != 1) {
printk(KERN_INFO "Not enable interrupt remapping\n");
goto error;
}
if (x2apic_present) {
pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
eim = !dmar_x2apic_optout();
if (!eim)
printk(KERN_WARNING
"Your BIOS is broken and requested that x2apic be disabled.\n"
"This will slightly decrease performance.\n"
"Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
}
for_each_iommu(iommu, drhd) {
/*
* If the queued invalidation is already initialized,
* shouldn't disable it.
*/
if (iommu->qi)
continue;
/*
* Clear previous faults.
*/
dmar_fault(-1, iommu);
/*
* Disable intr remapping and queued invalidation, if already
* enabled prior to OS handover.
*/
iommu_disable_irq_remapping(iommu);
dmar_disable_qi(iommu);
}
/*
* check for the Interrupt-remapping support
*/
for_each_iommu(iommu, drhd) {
if (!ecap_ir_support(iommu->ecap))
continue;
if (eim && !ecap_eim_support(iommu->ecap)) {
printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
" ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
goto error;
}
}
/*
* Enable queued invalidation for all the DRHD's.
*/
for_each_iommu(iommu, drhd) {
int ret = dmar_enable_qi(iommu);
if (ret) {
printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
" invalidation, ecap %Lx, ret %d\n",
drhd->reg_base_addr, iommu->ecap, ret);
goto error;
}
}
/*
* Setup Interrupt-remapping for all the DRHD's now.
*/
for_each_iommu(iommu, drhd) {
if (!ecap_ir_support(iommu->ecap))
continue;
if (intel_setup_irq_remapping(iommu, eim))
goto error;
setup = 1;
}
if (!setup)
goto error;
irq_remapping_enabled = 1;
/*
* VT-d has a different layout for IO-APIC entries when
* interrupt remapping is enabled. So it needs a special routine
* to print IO-APIC entries for debugging purposes too.
*/
x86_io_apic_ops.print_entries = intel_ir_io_apic_print_entries;
pr_info("Enabled IRQ remapping in %s mode\n", eim ? "x2apic" : "xapic");
return eim ? IRQ_REMAP_X2APIC_MODE : IRQ_REMAP_XAPIC_MODE;
error:
/*
* handle error condition gracefully here!
*/
if (x2apic_present)
pr_warn("Failed to enable irq remapping. You are vulnerable to irq-injection attacks.\n");
return -1;
}
(1)
x2apic_present = x2apic_supported();
查看系统是否支持x2APIC
模式。
(2)
if (parse_ioapics_under_ir() != 1) {
printk(KERN_INFO "Not enable interrupt remapping\n");
goto error;
}
parse_ioapics_under_ir
函数如下:
/*
* Finds the assocaition between IOAPIC's and its Interrupt-remapping
* hardware unit.
*/
static int __init parse_ioapics_under_ir(void)
{
struct dmar_drhd_unit *drhd;
struct intel_iommu *iommu;
int ir_supported = 0;
int ioapic_idx;
for_each_iommu(iommu, drhd)
if (ecap_ir_support(iommu->ecap)) {
if (ir_parse_ioapic_hpet_scope(drhd->hdr, iommu))
return -1;
ir_supported = 1;
}
if (!ir_supported)
return 0;
for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
int ioapic_id = mpc_ioapic_id(ioapic_idx);
if (!map_ioapic_to_ir(ioapic_id)) {
pr_err(FW_BUG "ioapic %d has no mapping iommu, "
"interrupt remapping will be disabled\n",
ioapic_id);
return -1;
}
}
return 1;
}
ir_parse_ioapic_hpet_scope
函数的作用是解析device scope
类型是IOAPIC
和HPET (High Precision Event Timer)
:
static int ir_parse_ioapic_hpet_scope(struct acpi_dmar_header *header,
struct intel_iommu *iommu)
{
struct acpi_dmar_hardware_unit *drhd;
struct acpi_dmar_device_scope *scope;
void *start, *end;
drhd = (struct acpi_dmar_hardware_unit *)header;
start = (void *)(drhd + 1);
end = ((void *)drhd) + header->length;
while (start < end) {
scope = start;
if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
if (ir_ioapic_num == MAX_IO_APICS) {
printk(KERN_WARNING "Exceeded Max IO APICS\n");
return -1;
}
printk(KERN_INFO "IOAPIC id %d under DRHD base "
" 0x%Lx IOMMU %d\n", scope->enumeration_id,
drhd->address, iommu->seq_id);
ir_parse_one_ioapic_scope(scope, iommu);
} else if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_HPET) {
if (ir_hpet_num == MAX_HPET_TBS) {
printk(KERN_WARNING "Exceeded Max HPET blocks\n");
return -1;
}
printk(KERN_INFO "HPET id %d under DRHD base"
" 0x%Lx\n", scope->enumeration_id,
drhd->address);
ir_parse_one_hpet_scope(scope, iommu);
}
start += scope->length;
}
return 0;
}
以ir_parse_one_ioapic_scope
为例(ir_parse_one_hpet_scope
类似):
static void ir_parse_one_ioapic_scope(struct acpi_dmar_device_scope *scope,
struct intel_iommu *iommu)
{
struct acpi_dmar_pci_path *path;
u8 bus;
int count;
bus = scope->bus;
path = (struct acpi_dmar_pci_path *)(scope + 1);
count = (scope->length - sizeof(struct acpi_dmar_device_scope))
/ sizeof(struct acpi_dmar_pci_path);
while (--count > 0) {
/*
* Access PCI directly due to the PCI
* subsystem isn't initialized yet.
*/
bus = read_pci_config_byte(bus, path->device, path->function,
PCI_SECONDARY_BUS);
path++;
}
ir_ioapic[ir_ioapic_num].bus = bus;
ir_ioapic[ir_ioapic_num].devfn = PCI_DEVFN(path->device, path->function);
ir_ioapic[ir_ioapic_num].iommu = iommu;
ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
ir_ioapic_num++;
}
可以看到,实际上是通过递归访问path
得到IOAPIC
信息的过程:bus
好,对应的iommu
设备单元,等等。
for (ioapic_idx = 0; ioapic_idx < nr_ioapics; ioapic_idx++) {
int ioapic_id = mpc_ioapic_id(ioapic_idx);
if (!map_ioapic_to_ir(ioapic_id)) {
pr_err(FW_BUG "ioapic %d has no mapping iommu, "
"interrupt remapping will be disabled\n",
ioapic_id);
return -1;
}
}
这段代码则是检查IOAPIC
是否都有对应的IOMMU
。
(3)
if (x2apic_present) {
pr_info("Queued invalidation will be enabled to support x2apic and Intr-remapping.\n");
eim = !dmar_x2apic_optout();
if (!eim)
printk(KERN_WARNING
"Your BIOS is broken and requested that x2apic be disabled.\n"
"This will slightly decrease performance.\n"
"Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
}
dmar_x2apic_optout
函数实现如下:
static int __init dmar_x2apic_optout(void)
{
struct acpi_table_dmar *dmar;
dmar = (struct acpi_table_dmar *)dmar_tbl;
if (!dmar || no_x2apic_optout)
return 0;
return dmar->flags & DMAR_X2APIC_OPT_OUT;
}
这个函数的返回值表示系统是否使用X2APIC
功能(1
表示不使用,0
表示使用)。
参考资料:
"BIOS Considerations" in *Intel ® Virtualization Technology for Directed I/Oc