硬中断--apic

liaocj 2024-12-03 09:02:56
Categories: Tags:

硬件规范

APIC的信息在SDM vol3中有比较详细的介绍
alt text
alt text
alt text
外设的中断过程
alt text
alt text

linux.git/arch/x86/kernel/apic/io_apic.c
static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
    union entry_union eu;
    eu.entry = e;
    /*
     * reg:是寄存器地址(相对比基址的偏移);0x10,是IO APIC重定向开始的偏移,对于第1个管脚,pin值为0,0x10,0x11恰好是第一个entry;对于第2个管脚,pin值为2,0x12,0x13对应第二个entry。
     * value:要写入的内容,eu.w2是entry的低32位;eu.w1是entry的高32位。
     *
     */
    io_apic_write(apic, 0x11 + 2*pin, eu.w2);
    io_apic_write(apic, 0x10 + 2*pin, eu.w1);
}
static inline void io_apic_write(unsigned int apic,
unsigned int reg, unsigned int value)
{
    struct io_apic __iomem *io_apic = io_apic_base(apic);
    writel(reg, &io_apic->index);
    writel(value, &io_apic->data);
}
Timer related:

    CCR: Current Count Register
    ICR: Initial Count Register
    DCR: Divide Configuration Register
    Timer: in LVT

LVT (Local Vector Table):

    Timer
    Local Interrupt
    Performance Monitor Counters
    Thermal Sensor
    Error

IPI:

    ICR: Interrupt Command Register
    LDR: Logical Destination Register
    DFR: Destination Format Register

Interrupt State:

    ISR: In-Service Register
    IRR: Interrupt Request Register
    TMR: Trigger Mode Register

初始化

    /**
 * setup_local_APIC - setup the local APIC
 *
 * Used to setup local APIC while initializing BSP or bringing up APs.
 * Always called with preemption disabled.
 */
static void setup_local_APIC(void)
{
    int cpu = smp_processor_id();
    unsigned int value;

    if (disable_apic) {
        disable_ioapic_support();
        return;
    }

    /*
     * If this comes from kexec/kcrash the APIC might be enabled in
     * SPIV. Soft disable it before doing further initialization.
     */
    value = apic_read(APIC_SPIV);
    value &= ~APIC_SPIV_APIC_ENABLED;
    apic_write(APIC_SPIV, value);

#ifdef CONFIG_X86_32
    /* Pound the ESR really hard over the head with a big hammer - mbligh */
    if (lapic_is_integrated() && apic->disable_esr) {
        apic_write(APIC_ESR, 0);
        apic_write(APIC_ESR, 0);
        apic_write(APIC_ESR, 0);
        apic_write(APIC_ESR, 0);
    }
#endif
    /*
     * Double-check whether this APIC is really registered.
     * This is meaningless in clustered apic mode, so we skip it.
     */
    BUG_ON(!apic->apic_id_registered());

    /*
     * Intel recommends to set DFR, LDR and TPR before enabling
     * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
     * document number 292116).  So here it goes...
     */
    apic->init_apic_ldr();

#ifdef CONFIG_X86_32
    if (apic->dest_mode_logical) {
        int logical_apicid, ldr_apicid;

        /*
         * APIC LDR is initialized.  If logical_apicid mapping was
         * initialized during get_smp_config(), make sure it matches
         * the actual value.
         */
        logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
        ldr_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
        if (logical_apicid != BAD_APICID)
            WARN_ON(logical_apicid != ldr_apicid);
        /* Always use the value from LDR. */
        early_per_cpu(x86_cpu_to_logical_apicid, cpu) = ldr_apicid;
    }
#endif

    /*
     * Set Task Priority to 'accept all except vectors 0-31'.  An APIC
     * vector in the 16-31 range could be delivered if TPR == 0, but we
     * would think it's an exception and terrible things will happen.  We
     * never change this later on.
     */
    value = apic_read(APIC_TASKPRI);
    value &= ~APIC_TPRI_MASK;
    value |= 0x10;
    apic_write(APIC_TASKPRI, value);

    /* Clear eventually stale ISR/IRR bits */
    apic_pending_intr_clear();

    /*
     * Now that we are all set up, enable the APIC
     */
    value = apic_read(APIC_SPIV);
    value &= ~APIC_VECTOR_MASK;
    /*
     * Enable APIC
     */
    value |= APIC_SPIV_APIC_ENABLED;

#ifdef CONFIG_X86_32
    /*
     * Some unknown Intel IO/APIC (or APIC) errata is biting us with
     * certain networking cards. If high frequency interrupts are
     * happening on a particular IOAPIC pin, plus the IOAPIC routing
     * entry is masked/unmasked at a high rate as well then sooner or
     * later IOAPIC line gets 'stuck', no more interrupts are received
     * from the device. If focus CPU is disabled then the hang goes
     * away, oh well :-(
     *
     * [ This bug can be reproduced easily with a level-triggered
     *   PCI Ne2000 networking cards and PII/PIII processors, dual
     *   BX chipset. ]
     */
    /*
     * Actually disabling the focus CPU check just makes the hang less
     * frequent as it makes the interrupt distribution model be more
     * like LRU than MRU (the short-term load is more even across CPUs).
     */

    /*
     * - enable focus processor (bit==0)
     * - 64bit mode always use processor focus
     *   so no need to set it
     */
    value &= ~APIC_SPIV_FOCUS_DISABLED;
#endif

    /*
     * Set spurious IRQ vector
     */
    value |= SPURIOUS_APIC_VECTOR;
    apic_write(APIC_SPIV, value);

    perf_events_lapic_init();

    /*
     * Set up LVT0, LVT1:
     *
     * set up through-local-APIC on the boot CPU's LINT0. This is not
     * strictly necessary in pure symmetric-IO mode, but sometimes
     * we delegate interrupts to the 8259A.
     */
    /*
     * TODO: set up through-local-APIC from through-I/O-APIC? --macro
     */
    value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
    if (!cpu && (pic_mode || !value || skip_ioapic_setup)) {
        value = APIC_DM_EXTINT;
        apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
    } else {
        value = APIC_DM_EXTINT | APIC_LVT_MASKED;
        apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
    }
    apic_write(APIC_LVT0, value);

    /*
     * Only the BSP sees the LINT1 NMI signal by default. This can be
     * modified by apic_extnmi= boot option.
     */
    if ((!cpu && apic_extnmi != APIC_EXTNMI_NONE) ||
        apic_extnmi == APIC_EXTNMI_ALL)
        value = APIC_DM_NMI;
    else
        value = APIC_DM_NMI | APIC_LVT_MASKED;

    /* Is 82489DX ? */
    if (!lapic_is_integrated())
        value |= APIC_LVT_LEVEL_TRIGGER;
    apic_write(APIC_LVT1, value);

#ifdef CONFIG_X86_MCE_INTEL
    /* Recheck CMCI information after local APIC is up on CPU #0 */
    if (!cpu)
        cmci_recheck();
#endif
}

static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
                 unsigned int nr_irqs, void *arg)
{
    struct irq_alloc_info *info = arg;
    struct apic_chip_data *apicd;
    struct irq_data *irqd;
    int i, err, node;
    pr_info("DEBUG***** x86_vector_alloc_irqs0: %px\n", x86_vector_alloc_irqs);
    if (disable_apic)
        return -ENXIO;

    /* Currently vector allocator can't guarantee contiguous allocations */
    if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1)
        return -ENOSYS;

    /*
     * Catch any attempt to touch the cascade interrupt on a PIC
     * equipped system.
     */
    if (WARN_ON_ONCE(info->flags & X86_IRQ_ALLOC_LEGACY &&
             virq == PIC_CASCADE_IR))
        return -EINVAL;

    for (i = 0; i < nr_irqs; i++) {
        irqd = irq_domain_get_irq_data(domain, virq + i);
        BUG_ON(!irqd);
        node = irq_data_get_node(irqd);
        WARN_ON_ONCE(irqd->chip_data);
        apicd = alloc_apic_chip_data(node);
        if (!apicd) {
            err = -ENOMEM;
            goto error;
        }

        apicd->irq = virq + i;
        irqd->chip = &lapic_controller;
        irqd->chip_data = apicd;
        irqd->hwirq = virq + i;
        irqd_set_single_target(irqd);
        /*
         * Prevent that any of these interrupts is invoked in
         * non interrupt context via e.g. generic_handle_irq()
         * as that can corrupt the affinity move state.
         */
        irqd_set_handle_enforce_irqctx(irqd);

        /* Don't invoke affinity setter on deactivated interrupts */
        irqd_set_affinity_on_activate(irqd);

        /*
         * Legacy vectors are already assigned when the IOAPIC
         * takes them over. They stay on the same vector. This is
         * required for check_timer() to work correctly as it might
         * switch back to legacy mode. Only update the hardware
         * config.
         */
        if (info->flags & X86_IRQ_ALLOC_LEGACY) {
            if (!vector_configure_legacy(virq + i, irqd, apicd))
                continue;
        }

        err = assign_irq_vector_policy(irqd, info);
        trace_vector_setup(virq + i, false, err);
        if (err) {
            irqd->chip_data = NULL;
            free_apic_chip_data(apicd);
            goto error;
        }
    }

    return 0;

error:
    x86_vector_free_irqs(domain, virq, i);
    return err;
}

static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
                        msi_alloc_info_t *arg)
{
    return arg->hwirq;
}

static int dmar_msi_init(struct irq_domain *domain,
             struct msi_domain_info *info, unsigned int virq,
             irq_hw_number_t hwirq, msi_alloc_info_t *arg)
{
    pr_info("===========================dmar_msi_init===========================\n");
    dump_stack();
    irq_domain_set_info(domain, virq, arg->devid, info->chip, NULL,
                handle_edge_irq, arg->data, "edge");

    return 0;
}

static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
                unsigned int nr_irqs, void *arg)
{
    struct msi_domain_info *info = domain->host_data;
    struct msi_domain_ops *ops = info->ops;
    irq_hw_number_t hwirq = ops->get_hwirq(info, arg); //msi_domain_ops_get_hwirq
    int i, ret;

    if (irq_find_mapping(domain, hwirq) > 0)
        return -EEXIST;

    if (domain->parent) {
        ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
        if (ret < 0)
            return ret;
    }

    for (i = 0; i < nr_irqs; i++) {
        pr_info("msi_domain_alloc:ops->msi_init=%px\n", ops->msi_init);
        ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
        if (ret < 0) {
            if (ops->msi_free) {
                for (i--; i > 0; i--)
                    ops->msi_free(domain, info, virq + i);
            }
            irq_domain_free_irqs_top(domain, virq, nr_irqs);
            return ret;
        }
    }

    return 0;
}

int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
                    unsigned int irq_base,
                    unsigned int nr_irqs, void *arg)
{
    if (!domain->ops->alloc) {
        pr_debug("domain->ops->alloc() is NULL\n");
        return -ENOSYS;
    }
    // 这里调用了两个地方 static const struct irq_domain_ops x86_vector_domain_ops 和 static const struct irq_domain_ops msi_domain_ops
    //irq_domain_alloc_irqs_hierarchy[domain->ops->alloc] domain == domain->parent
    return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
}

/**
 * __irq_domain_alloc_irqs - Allocate IRQs from domain
 * @domain:	domain to allocate from
 * @irq_base:	allocate specified IRQ number if irq_base >= 0
 * @nr_irqs:	number of IRQs to allocate
 * @node:	NUMA node id for memory allocation
 * @arg:	domain specific argument
 * @realloc:	IRQ descriptors have already been allocated if true
 * @affinity:	Optional irq affinity mask for multiqueue devices
 *
 * Allocate IRQ numbers and initialized all data structures to support
 * hierarchy IRQ domains.
 * Parameter @realloc is mainly to support legacy IRQs.
 * Returns error code or allocated IRQ number
 *
 * The whole process to setup an IRQ has been split into two steps.
 * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
 * descriptor and required hardware resources. The second step,
 * irq_domain_activate_irq(), is to program the hardware with preallocated
 * resources. In this way, it's easier to rollback when failing to
 * allocate resources.
 */
int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
                unsigned int nr_irqs, int node, void *arg,
                bool realloc, const struct irq_affinity_desc *affinity)
{
    int i, ret, virq;

    if (domain == NULL) {
        domain = irq_default_domain;
        if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
            return -EINVAL;
    }

    if (realloc && irq_base >= 0) {
        virq = irq_base;
    } else {
        virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node,
                          affinity);
        if (virq < 0) {
            pr_debug("cannot allocate IRQ(base %d, count %d)\n",
                 irq_base, nr_irqs);
            return virq;
        }
    }

    if (irq_domain_alloc_irq_data(domain, virq, nr_irqs)) {
        pr_debug("cannot allocate memory for IRQ%d\n", virq);
        ret = -ENOMEM;
        goto out_free_desc;
    }

    mutex_lock(&irq_domain_mutex);
    ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
    if (ret < 0) {
        mutex_unlock(&irq_domain_mutex);
        goto out_free_irq_data;
    }

    for (i = 0; i < nr_irqs; i++) {
        ret = irq_domain_trim_hierarchy(virq + i);
        if (ret) {
            mutex_unlock(&irq_domain_mutex);
            goto out_free_irq_data;
        }
    }

    for (i = 0; i < nr_irqs; i++)
        irq_domain_insert_irq(virq + i);
    mutex_unlock(&irq_domain_mutex);

    return virq;

out_free_irq_data:
    irq_domain_free_irq_data(virq, nr_irqs);
out_free_desc:
    irq_free_descs(virq, nr_irqs);
    return ret;
}

static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
            unsigned int nr_irqs, int node, void *arg)
{
    return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false,
                       NULL);
}

int dmar_alloc_hwirq(int id, int node, void *arg)
{
    struct irq_domain *domain = dmar_get_irq_domain();
    struct irq_alloc_info info;

    if (!domain)
        return -1;

    init_irq_alloc_info(&info, NULL);
    info.type = X86_IRQ_ALLOC_TYPE_DMAR;
    info.devid = id;
    info.hwirq = id;
    info.data = arg;

    return irq_domain_alloc_irqs(domain, 1, node, &info);
}
start_kernel()
    init_IRQ()
        native_init_IRQ()
            idt_setup_apic_and_irq_gates()
                idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true);

msi

static const struct irq_domain_ops msi_domain_ops = {
    .alloc		= msi_domain_alloc,
    .free		= msi_domain_free,
    .activate	= msi_domain_activate,
    .deactivate	= msi_domain_deactivate,
};

static const struct irq_domain_ops x86_vector_domain_ops = {
    .select		= x86_vector_select,
    .alloc		= x86_vector_alloc_irqs,
    .free		= x86_vector_free_irqs,
    .activate	= x86_vector_activate,
    .deactivate	= x86_vector_deactivate,
#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
    .debug_show	= x86_vector_debug_show,
#endif
};
static int msi_domain_ops_init(struct irq_domain *domain,
                   struct msi_domain_info *info,
                   unsigned int virq, irq_hw_number_t hwirq,
                   msi_alloc_info_t *arg)
{
    irq_domain_set_hwirq_and_chip(domain, virq, hwirq, info->chip,
                      info->chip_data);
    if (info->handler && info->handler_name) {
        __irq_set_handler(virq, info->handler, 0, info->handler_name);
        if (info->handler_data)
            irq_set_handler_data(virq, info->handler_data);
    }
    return 0;
}

/**
 * irq_domain_set_hwirq_and_chip - Set hwirq and irqchip of @virq at @domain
 * @domain:	Interrupt domain to match
 * @virq:	IRQ number
 * @hwirq:	The hwirq number
 * @chip:	The associated interrupt chip
 * @chip_data:	The associated chip data
 */
int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
                  irq_hw_number_t hwirq, struct irq_chip *chip,
                  void *chip_data)
{
    struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);

    if (!irq_data)
        return -ENOENT;

    irq_data->hwirq = hwirq;
    irq_data->chip = chip ? chip : &no_irq_chip;
    irq_data->chip_data = chip_data;

    return 0;
}
EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip);


ops->msi_init = msi_domain_ops_default.msi_init;

static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
                unsigned int nr_irqs, void *arg)
{
    struct msi_domain_info *info = domain->host_data;
    struct msi_domain_ops *ops = info->ops;
    irq_hw_number_t hwirq = ops->get_hwirq(info, arg);
    int i, ret;

    if (irq_find_mapping(domain, hwirq) > 0)
        return -EEXIST;

    if (domain->parent) {
        ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
        if (ret < 0)
            return ret;
    }

    for (i = 0; i < nr_irqs; i++) {
        ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
        if (ret < 0) {
            if (ops->msi_free) {
                for (i--; i > 0; i--)
                    ops->msi_free(domain, info, virq + i);
            }
            irq_domain_free_irqs_top(domain, virq, nr_irqs);
            return ret;
        }
    }

    return 0;
}

int dmar_set_interrupt(struct intel_iommu *iommu)
{
    int irq, ret;

    /*
     * Check if the fault interrupt is already initialized.
     */
    if (iommu->irq)
        return 0;

    irq = dmar_alloc_hwirq(iommu->seq_id, iommu->node, iommu);
    if (irq > 0) {
        iommu->irq = irq;
    } else {
        pr_err("No free IRQ vectors\n");
        return -EINVAL;
    }

    ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
    if (ret)
        pr_err("Can't request irq\n");
    return ret;
}