0%

riscv imsic 相关

参考文档
Documentation/devicetree/bindings/interrupt-controller/riscv,imsics.yaml

dts 信息

ex:
提供了两个interrupt file M-mode 和 S-mode的.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
imsic_mlevel: interrupt-controller@24000000 {
compatible = "riscv,qemu-imsics", "riscv,imsics";
interrupts-extended = <&cpu1_intc 11>,
<&cpu2_intc 11>,
<&cpu3_intc 11>,
<&cpu4_intc 11>;
reg = <0x28000000 0x4000>;
interrupt-controller;
#interrupt-cells = <0>;
msi-controller;
riscv,num-ids = <127>;
};

imsic_slevel: interrupt-controller@28000000 {
compatible = "riscv,qemu-imsics", "riscv,imsics";
interrupts-extended = <&cpu1_intc 9>,
<&cpu2_intc 9>,
<&cpu3_intc 9>,
<&cpu4_intc 9>;
reg = <0x28000000 0x2000>, /* Group0 IMSICs */
<0x29000000 0x2000>; /* Group1 IMSICs */
interrupt-controller;
#interrupt-cells = <0>;
msi-controller;
riscv,num-ids = <127>;
riscv,group-index-bits = <1>;
riscv,group-index-shift = <24>;
};

下面对上述dts的相关属性做一个大概的介绍

  1. interrupts-extended
    对应cpu1-4, 每个cpu都有这两个interrupt file M-mode和S-mode的
    M的对应中断位为11, 代表 Machine external interrupt
    S的对应中断位为9, 代表 Supervisor external interrupt
  2. riscv,num-ids
    Number of interrupt identities supported by IMSIC interrupt file.
    外部中断数量, 反映到eip和eie中, 最小63 最大 2047
  3. riscv,group-index-bits
    Number of group index bits in the MSI target address. When not specified it is assumed to be 0.
    总的group index bits
  4. riscv,group-index-shift
    The least significant bit position of the group index bits in the MSI target address. When not specified it is assumed to be 24.
  5. reg
    Base address of each IMSIC group.
    关于MSI target address:
    从第24 bit 开始, 上述group-index-bits 表示在Group Index中总共有几个bit 被使用
    riscv,group-index-shift 表示Group Index中当前interrupt file 占位开始的那个bit位, 如其占了两个bit位 26-27, shift表示最开始的bit位 26.
1
2
3
4
5
XLEN-1           >=24                                 12    0
| | | |
-------------------------------------------------------------
|xxxxxx|Group Index|xxxxxxxxxxx|HART Index|Guest Index| 0 |
-------------------------------------------------------------

guest os相关

guest external interrupt 相关的dts信息

  1. riscv,num-guest-ids
    Number of interrupt identities are supported by IMSIC guest interrupt
    file. When not specified it is assumed to be same as specified by the
    riscv,num-ids property.
    guest external 硬件中断号 最小63 最大2047
  2. riscv,hart-index-bits
    Number of HART index bits in the MSI target address. When not
    specified it is estimated based on the interrupts-extended property.
    MSI target address 中 Hart Index 总共使用了几个bit位.
    最小0 最大 15
  3. riscv,guest-index-bits
    Number of HART index bits in the MSI target address. When not
    specified it is estimated based on the interrupts-extended property.
    MSI target address 中 Guest Index 总共使用了几个bit位.
    最小0 最大 7

下面开始大概分析 imsic 的代码

代码部分

imsic_init

dts 相关封装的接口

1
2
3
4
5
6
7
8
9
struct imsic_fwnode_ops ops = {
.nr_parent_irq = imsic_dt_nr_parent_irq,
.parent_hartid = imsic_dt_parent_hartid,
.nr_mmio = imsic_dt_nr_mmio,
.mmio_to_resource = imsic_mmio_to_resource,
.mmio_map = imsic_dt_mmio_map,
.read_u32 = imsic_dt_read_u32,
.read_bool = imsic_dt_read_bool,
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
-+ imsic_init(struct imsic_fwnode_ops *fwops, struct fwnode_handle *fwnode, void *fwopaque)
\ - check riscv_isa_extension_available(NULL, SxAIA) "RISCV_ISA_EXT_SxAIA -> RISCV_ISA_EXT_SSAIA ?"
| - priv = kzalloc(sizeof(*priv), GFP_KERNEL);
| - global = &priv->global;
| -+ nr_parent_irqs = fwops->nr_parent_irq(fwnode, fwopaque);
\ -+ imsic_dt_nr_parent_irq(fwnode, fwopaque)
\ - of_irq_count(to_of_node(fwnode)); "一般会读到 dts的 interrupt-extended 属性中, 代表cpu的中断域
如前面dts中一共有S-mode 和 M-mode的, 每个有1-4 cpu的中断域, 这里的值 nr_parent_irqs 即为8"
| - rc = fwops->read_u32(fwnode, fwopaque, "riscv,guest-index-bits", &global->guest_index_bits);
"调用 imsic_dt_read_u32 读 riscv,guest-index-bits 读出 Msi target address 中 Guest Index 总共使用了几个bit位,
结果放入 priv->global->guest_index_bits"
| - rc = fwops->read_u32(fwnode, fwopaque, "riscv,hart-index-bits", &global->hart_index_bits);
"读取 dts riscv,hart-index-bits , 读出 Msi target address 中 Hart Index 总共使用了几个bit位, 结果放入priv->global->hart_index_bits"
| - rc = fwops->read_u32(fwnode, fwopaque, "riscv,group-index-bits", &global->group_index_bits);
| - rc = fwops->read_u32(fwnode, fwopaque, "riscv,group-index-shift", &global->group_index_shift);
| - rc = fwops->read_u32(fwnode, fwopaque, "riscv,num-ids", &global->nr_ids); "多少个hwirq"
| - fwops->read_u32(fwnode, fwopaque, "riscv,num-guest-ids", &global->nr_guest_ids) "多少个guest 可以使用的hwirq"
| -+ fwops->mmio_to_resource(fwnode, fwopaque, 0, &res);
\ -+ imsic_mmio_to_resource(fwnode, fwopaque, 0, &res); "Compute base address"
\ - of_address_to_resource(to_of_node(fwnode), index, res); "在设备树中找到第一个"reg",并将解析到的信息填充在"res"结构体里"
| - global->base_addr = res.start;
| - global->base_addr &= ~(BIT(global->guest_index_bits + global->hart_index_bits + IMSIC_MMIO_PAGE_SHIFT) - 1);
"Msi target address 中 屏蔽低 "guest bit + hart bit +12" 位"
| - global->base_addr &= ~((BIT(global->group_index_bits) - 1) << global->group_index_shift);
"Msi target address 中 屏蔽 其他group 的bit 位, 剩下的才是本group 使用的bit位"
"这个地方结合前面的dts信息, 这个做完之后应该就是0x28000000"
| -+ priv->num_mmios = fwops->nr_mmio(fwnode, fwopaque);
\ -+ imsic_dt_nr_mmio(fwnode, fwopaque) "Find number of MMIO register sets"
\ -+ while (!of_address_to_resource(to_of_node(fwnode), ret, &res)) "依次查剩下的reg , 结果放到 res中"
\ - ret++
| return ret "即dts中 总共几个 reg 信息"
| - priv->mmios = kcalloc(priv->num_mmios, sizeof(*mmio), GFP_KERNEL); "分配mmios 数组"
| -+ for i in priv->num_mmios "i从0 开始"
\ - mmio = &priv->mmios[i];
| - rc = fwops->mmio_to_resource(fwnode, fwopaque, i, &res); "从0 开始, 因此第一个仍是 M-mode的cpu的中断域"
| - mmio->pa = res.start;
| - mmio->size = res.end - res.start + 1; "这个是dts中 reg 的第二个字段 决定了size"
| - mmio->va = fwops->mmio_map(fwnode, fwopaque, i); "为pa 建立页表映射, 返回va"
| -+ imsic_ids_init(priv) "为hwirq 初始化 bitmap 位图"
\ - priv->ids_used_bimap = kcalloc(BITS_TO_LONGS(global->nr_ids + 1), sizeof(unsigned long), GFP_KERNEL);
| - priv->ids_target_cpu = kcalloc(global->nr_ids + 1, sizeof(unsigned int), GFP_KERNEL);
| -+ for (i = 0; i <= global->nr_ids; i++)
\ - priv->ids_target_cpu[i] = UINT_MAX; \
"这个ids_target_cpu数组维护了每个hwirq 同 cpuid 的映射关系, 即该hwirq 会发送到哪个hart上, 设置hwirq的亲和性时会用到"
| - bitmap_set(priv->ids_used_bimap, 0, 1); "清空hwirq的 used bitmap"
| -+ for (i = 0; i < nr_parent_irqs; i++)
\ - rc = fwops->parent_hartid(fwnode, fwopaque, i, &hartid); "找到 对应的hartid, 这里举例的一个8个, 每个hart 有M/S 两个"
| - cpu = riscv_hartid_to_cpuid(hartid); "hartid -> cpuid"
| - reloff = i * BIT(global->guest_index_bits) * IMSIC_MMIO_PAGE_SZ; "相对于 mmio base_addr 的偏移"
"这里将guest_index_bits 计入了, 代表每个hart的mmio 需要包含 guest interrupt file的地址空间"
| - handler = per_cpu_ptr(&imsic_handlers, cpu); "每个cpu 一个 handler 结构"
| - handler->local.msi_pa = mmio->pa + reloff; "msi_pa 加上该hart mmio的 相对于 mmio_base_addr 的偏移量"
| - handler->local.msi_va = mmio->va + reloff; "msi_va 同上"
| - domain = irq_find_matching_fwnode(riscv_get_intc_hwnode(), DOMAIN_BUS_ANY); "cpu-intc"
| - imsic_parent_irq = irq_create_mapping(domain, RV_IRQ_EXT); "IRQ_S_EXT 9 为S-mode 外部中断, 建立hwirq 同 linux irq的映射关系"
| - irq_set_chained_handler(imsic_parent_irq, imsic_handle_irq);
"cpu 来了 9号中断 S external interrupt 后, 由 imsic_handle_irq 处理函数进行处理该中断"
| - imsic_ipi_domain_init(priv); "ipi_domain"
| - imsic_irq_domains_init(priv, fwnode); "/* Initialize IRQ and MSI domains */"
| -+ cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "irqchip/riscv/imsic:starting", imsic_starting_cpu, NULL);
"注册 cpu startup 为 imsic_starting_cpu(), cpu online 时回调 startup "
\ -+ imsic_starting_cpu(cpu)
\ - enable_percpu_irq(imsic_parent_irq, irq_get_trigger_type(imsic_parent_irq));
"软件层 所有cpu 打开 映射了 hwirq为 IRQ_S_EXT的 linux irq 的中断"
| -+ imsic_ipi_enable(priv); ""
\ -+ __imsic_id_enable(priv->ipi_id); "imsci 寄存器打开 ipi 中断"
\ -+ __imsic_eix_update((priv->ipi_id), 1, false, true)
"让iselect 选择对应的 eip/eie 寄存器, 然后选择置1或置0 来开关中断/开关pending"
\ -+ imsic_csr_set(isel, ireg);
\ - csr_write(CSR_ISELECT, isel) "更新 siselct csr"
| - csr_set(CSR_IREG, ireg) "更新 sireg csr"

比较重要的数据:
global->base_addr
handler->local.msi_pa / handler->local.msi_va

imsic_handle_irq

S external interrupt 中断来了之后, 会由 imsic_handle_irq 函数进行处理, 简单看一下外部中断处理的流程

1
2
3
4
5
6
7
8
9
-+ imsic_handle_irq(struct irq_desc *desc)
\ - imsic_handler *handler = this_cpu_ptr(&imsic_handlers); "找到前面为每个hart 创建的handler "
| - irq_chip *chip = irq_desc_get_chip(desc); "这个芯片注册的irq的相关 ops"
| - chained_irq_enter(chip, desc); "在级联的中断处理函数中,调用`chained_irq_enter`进入中断级联处理"
| -+ while ((hwirq = csr_swap(CSR_TOPEI, 0)) "读 stopei csr, 该csr 为 imsic 专属, 其 27:16 代表hwirq"
"这个地方为什么用while, 猜测可能一次有多个外部中断, 每次对stopei 清0后, 中断控制器会为其设置次一个优先级的中断标识, stopei csr 是eipx & eiex 联合选择的结果, 从高优先级->低优先级 依次选择供给stopei 相应的中断标识"
\ - hwirq = hwirq >> TOPEI_ID_SHIFT;
| - generic_handle_domain_irq(priv->base_domain, hwirq);
"找到 外设驱动 request_irq 为该hwirq 注册的irq_handler 进行中断处理"

还是大体上是这个结构, 这个上面也有loop, 代表需要多次处理stopei, 从高优先级->低优先级依次处理完本次来的所有的S-mode的外部中断.

chip imsic 相关hook

1
2
3
4
5
6
7
8
9
10
11
static struct irq_chip imsic_irq_base_chip = {
.name = "RISC-V IMSIC-BASE",
.irq_mask = imsic_irq_mask,
.irq_unmask = imsic_irq_unmask,
#ifdef CONFIG_SMP
.irq_set_affinity = imsic_irq_set_affinity,
#endif
.irq_compose_msi_msg = imsic_irq_compose_msi_msg,
.flags = IRQCHIP_SKIP_SET_WAKE |
IRQCHIP_MASK_ON_SUSPEND,
};

imsic_irq_set_affinity

在外设驱动注册hwirq 中断时, 会走到 irq_set_affinity, 进而调用chip的 irq_set_affinity 钩子.
对于imsic来说, 调用 imsic_irq_set_affinity 函数设置cpu亲和性, 即该外部中断由哪个cpu进行处理
简单分析下这个函数

1
2
3
4
5
6
7
8
9
10
11
12
-+ imsic_irq_set_affinity(struct irq_data *d, const struct cpumask *mask_val, bool force)
"与cpuid 相关的在 mask_val 参数中"
\ -+ imsic_get_cpu(priv, mask_val, force, &target_cpu);
\ - cpumask_and(&amask, &priv->lmask, mask_val);
| -|+ if force
\ - cpu = cpumask_first(&amask); "直接从 cpu_mask mask_val中选出对应的cpu"
| -|+ if !force
\ - cpu = cpumask_any_and(&amask, cpu_online_mask); "优先从上线的cpu中再结合 cpu_mask 选出对应的cpu"

| - *out_target_cpu = cpu;
| -+ imsic_id_set_target(priv, d->hwirq, target_cpu); "? 并没有硬件寄存器相关的操作"
\ - priv->ids_target_cpu[id] = target_cpu; "? "

说明与hwirq 开启需要设置的csr 操作并未在这个函数中

  1. of_irq_get -> irq_create_of_mapping -> irq_domain_alloc_irqs -> irq_domain_alloc_irqs_hierarchy -> domain->ops->alloc(domain, irq_base, nr_irqs, arg)
  2. request_irq -> request_threaded_irq -> irq_startup -> __irq_startup -> irq_enable -> unmask_irq(desc) -> chip->irq_unmask(&desc->irq_data);

.alloc = imsic_irq_domain_alloc

1
2
3
4
static const struct irq_domain_ops imsic_base_domain_ops = {
.alloc = imsic_irq_domain_alloc,
.free = imsic_irq_domain_free,
};

相关堆栈

1
2
3
4
5
6
#1  0xffffffff8006a134 in irq_domain_alloc_irqs_hierarchy (arg=0xff2000000060b728, nr_irqs=1, irq_base=2, domain=0xff6000007fe0c800) at ../kernel/irq/irqdomain.c:1426
#2 __irq_domain_alloc_irqs (domain=domain@entry=0xff6000007fe0c800, irq_base=irq_base@entry=-1, nr_irqs=nr_irqs@entry=1, node=node@entry=-1, arg=arg@entry=0xff2000000060b728, realloc=realloc@entry=false, affinity=affinity@entry=0x0) at ../kernel/irq/irqdomain.c:1482
#3 0xffffffff8006a5d0 in irq_domain_alloc_irqs (arg=0xff2000000060b728, node=-1, nr_irqs=1, domain=0xff6000007fe0c800) at ../include/linux/irqdomain.h:516
#4 irq_create_fwspec_mapping (fwspec=fwspec@entry=0xff2000000060b728) at ../kernel/irq/irqdomain.c:825
#5 0xffffffff8006a6ee in irq_create_of_mapping (irq_data=irq_data@entry=0xff2000000060b7a8) at ../kernel/irq/irqdomain.c:858
#6 0xffffffff80515156 in of_irq_get (dev=dev@entry=0xff600000bfdf5200, index=index@entry=0) at ../drivers/of/irq.c:444

unmask_irq 相关堆栈

1
2
3
4
5
6
7
8
#0  plic_irq_unmask (d=0xff6000007fefec20) at ../drivers/irqchip/irq-sifive-plic.c:122
#1 0xffffffff80067d16 in unmask_irq (desc=<optimized out>) at ../kernel/irq/chip.c:435
#2 irq_enable (desc=desc@entry=0xff6000007fefec00) at ../kernel/irq/chip.c:342
#3 0xffffffff80067d72 in __irq_startup (desc=desc@entry=0xff6000007fefec00) at ../kernel/irq/chip.c:246
#4 0xffffffff80067e4e in irq_startup (desc=desc@entry=0xff6000007fefec00, resend=resend@entry=true, force=force@entry=false) at ../kernel/irq/chip.c:267
#5 0xffffffff80065a8a in __setup_irq (irq=irq@entry=1, desc=desc@entry=0xff6000007fefec00, new=new@entry=0xff6000007ff24f80) at ../kernel/irq/manage.c:1777
#6 0xffffffff80065d7c in request_threaded_irq (irq=<optimized out>, handler=handler@entry=0xffffffff80386ca6 <vp_interrupt>, thread_fn=thread_fn@entry=0x0, irqflags=irqflags@entry=128, devname=0xff60000080172858 "virtio2", dev_id=dev_id@entry=0xff6000008015b800) at ../kernel/irq/manage.c:2206
#7 0xffffffff80387540 in request_irq (dev=0xff6000008015b800, name=<optimized out>, flags=128, handler=<optimized out>, irq=<optimized out>) at ../include/linux/interrupt.h:168

imsic_irq_domain_alloc

1
2
3
4
5
6
7
8
9
-+ imsic_irq_domain_alloc(struct irq_domain *domain,
unsigned int virq,
unsigned int nr_irqs,
void *args)
\ - imsic_priv *priv = domain->host_data;
| - imsic_get_cpu(priv, &priv->lmask, false, &cpu); "获取"
| - imsic_cpu_page_phys(cpu, 0, &msi_addr);
| - hwirq = imsic_ids_alloc(priv, priv->global.nr_ids, get_count_order(nr_irqs));
| - imsic_id_set_target(priv, hwirq + i, cpu);

unmask_irq -> imsic_irq_unmask

1
2
3
4
5
6
7
8
9
10
-+ imsic_irq_unmask(struct irq_data *d)
\ -+ imsic_id_enable(irq_data_get_irq_chip_data(d), d->hwirq);
\ - bitmap_set(priv->ids_enabled_bimap, id, 1); "used bitmap 位图中 hwirq 的bit置1"
\ -+ __imsic_id_enable(priv->ipi_id); "imsci 寄存器打开 ipi 中断"
\ -+ __imsic_eix_update((priv->ipi_id), 1, false, true)
"让iselect 选择对应的 eip/eie 寄存器, 然后选择置1或置0 来开关中断/开关pending"
\ -+ imsic_csr_set(isel, ireg);
\ - csr_write(CSR_ISELECT, isel) "更新 siselct csr"
| - csr_set(CSR_IREG, ireg) "更新 sireg csr"

最终由unmask_irq -> imsic_irq_unmask 打开中断, 最后设置了该hart的 siselct 和 sireg csr, 将对应的eipx/eiex 置位.

小结与思考

从上面中断注册和中断处理过程中, 可以看到每个 hart 对应的MSI的mmio
对于S-mode来说, 外部中断设置的最终是
__imsic_id_enable(id)
该函数最终设置的是 eipx/eiex, 通过siselct 与 sireg 这两个csr 选择对应的 eipx/eiex, 再设置对应的值, 即可开关对应的pending/中断使能, csr 是hart 专属的, 每一个hart 都由一组csr.

imsic_irq_mask imsic_irq_unmask 最终关联 __imsic_id_enable __imsic_id_disable 这一组函数

irq_mask 中断屏蔽
irq_unmask 中断打开

但这其中并未发现直接的mmio 地址关联
上述中断开关最终都是操作的siselect/sireg 寄存器, 猜测最终都是设置的该hart的 S-mode interrupt file的mmio region.

与guest interrupt file 相关的设置在哪里呢, 怀疑的地方是msi相关的函数 imsic_irq_compose_msi_msg
该提交中看起来并没有涉及到 guest 相关的设置.
除了初始化函数中与guest_index 相关的内容外, 并没有其他的.