硬件规范
APIC的信息在SDM vol3中有比较详细的介绍
外设的中断过程
linux.git/arch/x86/kernel/apic/io_apic.c
static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
union entry_union eu;
eu.entry = e;
/*
* reg:是寄存器地址(相对比基址的偏移);0x10,是IO APIC重定向开始的偏移,对于第1个管脚,pin值为0,0x10,0x11恰好是第一个entry;对于第2个管脚,pin值为2,0x12,0x13对应第二个entry。
* value:要写入的内容,eu.w2是entry的低32位;eu.w1是entry的高32位。
*
*/
io_apic_write(apic, 0x11 + 2*pin, eu.w2);
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
}
static inline void io_apic_write(unsigned int apic,
unsigned int reg, unsigned int value)
{
struct io_apic __iomem *io_apic = io_apic_base(apic);
writel(reg, &io_apic->index);
writel(value, &io_apic->data);
}
Timer related:
CCR: Current Count Register
ICR: Initial Count Register
DCR: Divide Configuration Register
Timer: in LVT
LVT (Local Vector Table):
Timer
Local Interrupt
Performance Monitor Counters
Thermal Sensor
Error
IPI:
ICR: Interrupt Command Register
LDR: Logical Destination Register
DFR: Destination Format Register
Interrupt State:
ISR: In-Service Register
IRR: Interrupt Request Register
TMR: Trigger Mode Register
初始化
/**
* setup_local_APIC - setup the local APIC
*
* Used to setup local APIC while initializing BSP or bringing up APs.
* Always called with preemption disabled.
*/
static void setup_local_APIC(void)
{
int cpu = smp_processor_id();
unsigned int value;
if (disable_apic) {
disable_ioapic_support();
return;
}
/*
* If this comes from kexec/kcrash the APIC might be enabled in
* SPIV. Soft disable it before doing further initialization.
*/
value = apic_read(APIC_SPIV);
value &= ~APIC_SPIV_APIC_ENABLED;
apic_write(APIC_SPIV, value);
#ifdef CONFIG_X86_32
/* Pound the ESR really hard over the head with a big hammer - mbligh */
if (lapic_is_integrated() && apic->disable_esr) {
apic_write(APIC_ESR, 0);
apic_write(APIC_ESR, 0);
apic_write(APIC_ESR, 0);
apic_write(APIC_ESR, 0);
}
#endif
/*
* Double-check whether this APIC is really registered.
* This is meaningless in clustered apic mode, so we skip it.
*/
BUG_ON(!apic->apic_id_registered());
/*
* Intel recommends to set DFR, LDR and TPR before enabling
* an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
* document number 292116). So here it goes...
*/
apic->init_apic_ldr();
#ifdef CONFIG_X86_32
if (apic->dest_mode_logical) {
int logical_apicid, ldr_apicid;
/*
* APIC LDR is initialized. If logical_apicid mapping was
* initialized during get_smp_config(), make sure it matches
* the actual value.
*/
logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
if (logical_apicid != BAD_APICID)
WARN_ON(logical_apicid != ldr_apicid);
/* Always use the value from LDR. */
early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
}
#endif
/*
* Set Task Priority to 'accept all except vectors 0-31'. An APIC
* vector in the 16-31 range could be delivered if TPR == 0, but we
* would think it's an exception and terrible things will happen. We
* never change this later on.
*/
value = apic_read(APIC_TASKPRI);
value &= ~APIC_TPRI_MASK;
value |= 0x10;
apic_write(APIC_TASKPRI, value);
/* Clear eventually stale ISR/IRR bits */
apic_pending_intr_clear();
/*
* Now that we are all set up, enable the APIC
*/
value = apic_read(APIC_SPIV);
value &= ~APIC_VECTOR_MASK;
/*
* Enable APIC
*/
value |= APIC_SPIV_APIC_ENABLED;
#ifdef CONFIG_X86_32
/*
* Some unknown Intel IO/APIC (or APIC) errata is biting us with
* certain networking cards. If high frequency interrupts are
* happening on a particular IOAPIC pin, plus the IOAPIC routing
* entry is masked/unmasked at a high rate as well then sooner or
* later IOAPIC line gets 'stuck', no more interrupts are received
* from the device. If focus CPU is disabled then the hang goes
* away, oh well :-(
*
* [ This bug can be reproduced easily with a level-triggered
* PCI Ne2000 networking cards and PII/PIII processors, dual
* BX chipset. ]
*/
/*
* Actually disabling the focus CPU check just makes the hang less
* frequent as it makes the interrupt distribution model be more
* like LRU than MRU (the short-term load is more even across CPUs).
*/
/*
* - enable focus processor (bit==0)
* - 64bit mode always use processor focus
* so no need to set it
*/
value &= ~APIC_SPIV_FOCUS_DISABLED;
#endif
/*
* Set spurious IRQ vector
*/
value |= SPURIOUS_APIC_VECTOR;
apic_write(APIC_SPIV, value);
perf_events_lapic_init();
/*
* Set up LVT0, LVT1:
*
* set up through-local-APIC on the boot CPU's LINT0. This is not
* strictly necessary in pure symmetric-IO mode, but sometimes
* we delegate interrupts to the 8259A.
*/
/*
* TODO: set up through-local-APIC from through-I/O-APIC? --macro
*/
value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
if (!cpu && (pic_mode || !value || skip_ioapic_setup)) {
value = APIC_DM_EXTINT;
apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
} else {
value = APIC_DM_EXTINT | APIC_LVT_MASKED;
apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
}
apic_write(APIC_LVT0, value);
/*
* Only the BSP sees the LINT1 NMI signal by default. This can be
* modified by apic_extnmi= boot option.
*/
if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) ||
apic_extnmi == APIC_EXTNMI_ALL)
value = APIC_DM_NMI;
else
value = APIC_DM_NMI | APIC_LVT_MASKED;
/* Is 82489DX ? */
if (!lapic_is_integrated())
value |= APIC_LVT_LEVEL_TRIGGER;
apic_write(APIC_LVT1, value);
#ifdef CONFIG_X86_MCE_INTEL
/* Recheck CMCI information after local APIC is up on CPU #0 */
if (!cpu)
cmci_recheck();
#endif
}
static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
struct irq_alloc_info *info = arg;
struct apic_chip_data *apicd;
struct irq_data *irqd;
int i, err, node;
pr_info("DEBUG***** x86_vector_alloc_irqs0: %px\n", x86_vector_alloc_irqs);
if (disable_apic)
return -ENXIO;
/* Currently vector allocator can't guarantee contiguous allocations */
if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1)
return -ENOSYS;
/*
* Catch any attempt to touch the cascade interrupt on a PIC
* equipped system.
*/
if (WARN_ON_ONCE(info->flags & X86_IRQ_ALLOC_LEGACY &&
virq == PIC_CASCADE_IR))
return -EINVAL;
for (i = 0; i < nr_irqs; i++) {
irqd = irq_domain_get_irq_data(domain, virq + i);
BUG_ON(!irqd);
node = irq_data_get_node(irqd);
WARN_ON_ONCE(irqd->chip_data);
apicd = alloc_apic_chip_data(node);
if (!apicd) {
err = -ENOMEM;
goto error;
}
apicd->irq = virq + i;
irqd->chip = &lapic_controller;
irqd->chip_data = apicd;
irqd->hwirq = virq + i;
irqd_set_single_target(irqd);
/*
* Prevent that any of these interrupts is invoked in
* non interrupt context via e.g. generic_handle_irq()
* as that can corrupt the affinity move state.
*/
irqd_set_handle_enforce_irqctx(irqd);
/* Don't invoke affinity setter on deactivated interrupts */
irqd_set_affinity_on_activate(irqd);
/*
* Legacy vectors are already assigned when the IOAPIC
* takes them over. They stay on the same vector. This is
* required for check_timer() to work correctly as it might
* switch back to legacy mode. Only update the hardware
* config.
*/
if (info->flags & X86_IRQ_ALLOC_LEGACY) {
if (!vector_configure_legacy(virq + i, irqd, apicd))
continue;
}
err = assign_irq_vector_policy(irqd, info);
trace_vector_setup(virq + i, false, err);
if (err) {
irqd->chip_data = NULL;
free_apic_chip_data(apicd);
goto error;
}
}
return 0;
error:
x86_vector_free_irqs(domain, virq, i);
return err;
}
static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
msi_alloc_info_t *arg)
{
return arg->hwirq;
}
static int dmar_msi_init(struct irq_domain *domain,
struct msi_domain_info *info, unsigned int virq,
irq_hw_number_t hwirq, msi_alloc_info_t *arg)
{
pr_info("===========================dmar_msi_init===========================\n");
dump_stack();
irq_domain_set_info(domain, virq, arg->devid, info->chip, NULL,
handle_edge_irq, arg->data, "edge");
return 0;
}
static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
struct msi_domain_info *info = domain->host_data;
struct msi_domain_ops *ops = info->ops;
irq_hw_number_t hwirq = ops->get_hwirq(info, arg); //msi_domain_ops_get_hwirq
int i, ret;
if (irq_find_mapping(domain, hwirq) > 0)
return -EEXIST;
if (domain->parent) {
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
if (ret < 0)
return ret;
}
for (i = 0; i < nr_irqs; i++) {
pr_info("msi_domain_alloc:ops->msi_init=%px\n", ops->msi_init);
ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
if (ret < 0) {
if (ops->msi_free) {
for (i--; i > 0; i--)
ops->msi_free(domain, info, virq + i);
}
irq_domain_free_irqs_top(domain, virq, nr_irqs);
return ret;
}
}
return 0;
}
int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
unsigned int irq_base,
unsigned int nr_irqs, void *arg)
{
if (!domain->ops->alloc) {
pr_debug("domain->ops->alloc() is NULL\n");
return -ENOSYS;
}
// 这里调用了两个地方 static const struct irq_domain_ops x86_vector_domain_ops 和 static const struct irq_domain_ops msi_domain_ops
//irq_domain_alloc_irqs_hierarchy[domain->ops->alloc] domain == domain->parent
return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
}
/**
* __irq_domain_alloc_irqs - Allocate IRQs from domain
* @domain: domain to allocate from
* @irq_base: allocate specified IRQ number if irq_base >= 0
* @nr_irqs: number of IRQs to allocate
* @node: NUMA node id for memory allocation
* @arg: domain specific argument
* @realloc: IRQ descriptors have already been allocated if true
* @affinity: Optional irq affinity mask for multiqueue devices
*
* Allocate IRQ numbers and initialized all data structures to support
* hierarchy IRQ domains.
* Parameter @realloc is mainly to support legacy IRQs.
* Returns error code or allocated IRQ number
*
* The whole process to setup an IRQ has been split into two steps.
* The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
* descriptor and required hardware resources. The second step,
* irq_domain_activate_irq(), is to program the hardware with preallocated
* resources. In this way, it's easier to rollback when failing to
* allocate resources.
*/
int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
unsigned int nr_irqs, int node, void *arg,
bool realloc, const struct irq_affinity_desc *affinity)
{
int i, ret, virq;
if (domain == NULL) {
domain = irq_default_domain;
if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
return -EINVAL;
}
if (realloc && irq_base >= 0) {
virq = irq_base;
} else {
virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node,
affinity);
if (virq < 0) {
pr_debug("cannot allocate IRQ(base %d, count %d)\n",
irq_base, nr_irqs);
return virq;
}
}
if (irq_domain_alloc_irq_data(domain, virq, nr_irqs)) {
pr_debug("cannot allocate memory for IRQ%d\n", virq);
ret = -ENOMEM;
goto out_free_desc;
}
mutex_lock(&irq_domain_mutex);
ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
if (ret < 0) {
mutex_unlock(&irq_domain_mutex);
goto out_free_irq_data;
}
for (i = 0; i < nr_irqs; i++) {
ret = irq_domain_trim_hierarchy(virq + i);
if (ret) {
mutex_unlock(&irq_domain_mutex);
goto out_free_irq_data;
}
}
for (i = 0; i < nr_irqs; i++)
irq_domain_insert_irq(virq + i);
mutex_unlock(&irq_domain_mutex);
return virq;
out_free_irq_data:
irq_domain_free_irq_data(virq, nr_irqs);
out_free_desc:
irq_free_descs(virq, nr_irqs);
return ret;
}
static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
unsigned int nr_irqs, int node, void *arg)
{
return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false,
NULL);
}
int dmar_alloc_hwirq(int id, int node, void *arg)
{
struct irq_domain *domain = dmar_get_irq_domain();
struct irq_alloc_info info;
if (!domain)
return -1;
init_irq_alloc_info(&info, NULL);
info.type = X86_IRQ_ALLOC_TYPE_DMAR;
info.devid = id;
info.hwirq = id;
info.data = arg;
return irq_domain_alloc_irqs(domain, 1, node, &info);
}
start_kernel()
init_IRQ()
native_init_IRQ()
idt_setup_apic_and_irq_gates()
idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true);
msi
static const struct irq_domain_ops msi_domain_ops = {
.alloc = msi_domain_alloc,
.free = msi_domain_free,
.activate = msi_domain_activate,
.deactivate = msi_domain_deactivate,
};
static const struct irq_domain_ops x86_vector_domain_ops = {
.select = x86_vector_select,
.alloc = x86_vector_alloc_irqs,
.free = x86_vector_free_irqs,
.activate = x86_vector_activate,
.deactivate = x86_vector_deactivate,
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
.debug_show = x86_vector_debug_show,
#endif
};
static int msi_domain_ops_init(struct irq_domain *domain,
struct msi_domain_info *info,
unsigned int virq, irq_hw_number_t hwirq,
msi_alloc_info_t *arg)
{
irq_domain_set_hwirq_and_chip(domain, virq, hwirq, info->chip,
info->chip_data);
if (info->handler && info->handler_name) {
__irq_set_handler(virq, info->handler, 0, info->handler_name);
if (info->handler_data)
irq_set_handler_data(virq, info->handler_data);
}
return 0;
}
/**
* irq_domain_set_hwirq_and_chip - Set hwirq and irqchip of @virq at @domain
* @domain: Interrupt domain to match
* @virq: IRQ number
* @hwirq: The hwirq number
* @chip: The associated interrupt chip
* @chip_data: The associated chip data
*/
int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
irq_hw_number_t hwirq, struct irq_chip *chip,
void *chip_data)
{
struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
if (!irq_data)
return -ENOENT;
irq_data->hwirq = hwirq;
irq_data->chip = chip ? chip : &no_irq_chip;
irq_data->chip_data = chip_data;
return 0;
}
EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip);
ops->msi_init = msi_domain_ops_default.msi_init;
static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
struct msi_domain_info *info = domain->host_data;
struct msi_domain_ops *ops = info->ops;
irq_hw_number_t hwirq = ops->get_hwirq(info, arg);
int i, ret;
if (irq_find_mapping(domain, hwirq) > 0)
return -EEXIST;
if (domain->parent) {
ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
if (ret < 0)
return ret;
}
for (i = 0; i < nr_irqs; i++) {
ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
if (ret < 0) {
if (ops->msi_free) {
for (i--; i > 0; i--)
ops->msi_free(domain, info, virq + i);
}
irq_domain_free_irqs_top(domain, virq, nr_irqs);
return ret;
}
}
return 0;
}
int dmar_set_interrupt(struct intel_iommu *iommu)
{
int irq, ret;
/*
* Check if the fault interrupt is already initialized.
*/
if (iommu->irq)
return 0;
irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
if (irq > 0) {
iommu->irq = irq;
} else {
pr_err("No free IRQ vectors\n");
return -EINVAL;
}
ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
if (ret)
pr_err("Can't request irq\n");
return ret;
}