Fixed
Created: Oct 16, 2014
Updated: Dec 3, 2018
Resolved Date: Dec 17, 2014
Found In Version: 6.0.0.12
Fix Version: 6.0.0.16
Severity: Standard
Applicable for: Wind River Linux 6
Component/s: Kernel
The kexec'ed crash kernel crashes at boot when CONFIG_INTEL_IOMMU=y. If CONFIG_INTEL_IOMMU is not set or if iommu=off boot parameter is defined, the problem disappears.
My customer has this problem on his Intel Gladden (Sandy Bridge) CPU with Cave Creek PCH, and I can reproduce on my Emerson MATXM-CORE-411.
On the host:
$ /ala-cvl1-lx1/common/lx6/wrlinux-6/wrlinux/configure --enable-board=intel-x86-64 --enable-kernel=standard --enable-rootfs=glibc_core+debug --enable-checkout-all-layers=yes --enable-reconfig --enable-rm-oldimgs=yes --with-template=feature/initramfs,feature/kexec,feature/kdump --with-rcpl-version=0012
$ make
$ make -C build linux-windriver.menuconfig
# Add CONFIG_INTEL_IOMMU=y to the kernel config
$ make -C build linux-windriver.rebuild
$ make
$ cp export/*bzImage* export/*cpio* export/dist/root/
$ make nfs-start TOPTS="-in 1"
On the target (Emerson MATXM-CORE-411 or Intel Gladden (Sandy Bridge) CPU with Cave Creek PCH):
# Add crashkernel=512M to the bootline:
rw clock=pit console=ttyS0,115200 root=/dev/nfs ip=dhcp crashkernel=512M nfsroot=147.11.153.19:/ala-cvl1-lx1/sandbox/nmarguet/lx6/juniper/bala-20141013_prj/export/dist,nfsvers=3,port=3149,mountprog=21211,nfsprog=11211,udp,mountport=3148
# Load the crash kernel
root@localhost:~# kexec -p --command-line="`cat /proc/cmdline | sed -e 's/crashkernel=[^ ]*//'` irqpoll maxcpus=1 nr_cpus=1 noacpi" --initrd=/root/intel-x86-64-glibc-core-standard-dist.cpio.gz /root/intel-x86-64-bzImage-WR6.0.0.12_standard
root@localhost:~# cat /sys/kernel/kexec_crash_loaded
1
root@localhost:~# zcat /proc/config.gz |grep IOMMU
CONFIG_GART_IOMMU=y
# CONFIG_CALGARY_IOMMU is not set
CONFIG_IOMMU_HELPER=y
CONFIG_IOMMU_API=y
CONFIG_IOMMU_SUPPORT=y
# CONFIG_AMD_IOMMU is not set
CONFIG_INTEL_IOMMU=y
CONFIG_INTEL_IOMMU_DEFAULT_ON=y
CONFIG_INTEL_IOMMU_FLOPPY_WA=y
# CONFIG_IOMMU_DEBUG is not set
# CONFIG_IOMMU_STRESS is not set
root@localhost:~# echo 10 > /proc/sys/kernel/panic
root@localhost:~# echo 1 > /proc/sys/kernel/sysrq
root@localhost:~# echo c > /proc/sysrq-trigger
SysRq : Trigger a crash
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<ffffffff8143eb06>] sysrq_handle_crash+0x16/0x20
...
RIP [<ffffffff8143eb06>] sysrq_handle_crash+0x16/0x20
RSP <ffff88006fb4fe88>
CR2: 0000000000000000
################################################################
#### Expected reboot happens here! ####
#### But the next crash is not expected ####
################################################################
Initializing cgroup subsys cpuset
Initializing cgroup subsys cpu
Initializing cgroup subsys cpuacct
...
Trying to unpack rootfs image as initramfs...
Freeing initrd memory: 9956k freed
dmar: Host address width 36
dmar: DRHD base: 0x000000fed90000 flags: 0x0
dmar: IOMMU 0: reg_base_addr fed90000 ver 1:0 cap c9008020e30272 ecap 1000
dmar: DRHD base: 0x000000fed91000 flags: 0x0
dmar: IOMMU 1: reg_base_addr fed91000 ver 1:0 cap c0000020230272 ecap 1000
dmar: DRHD base: 0x000000fed93000 flags: 0x1
dmar: IOMMU 2: reg_base_addr fed93000 ver 1:0 cap c9008020630272 ecap 1000
dmar: RMRR base: 0x00000076e70000 end: 0x00000076e84fff
dmar: RMRR base: 0x00000079c00000 end: 0x0000007bffffff
IOMMU crashdump_accepting_active_iommu = true
IOMMU Skip disabling iommu hardware translations
DMAR: No ATSR found
IOMMU 1 0xfed91000: using Register based invalidation
IOMMU 0 0xfed90000: using Register based invalidation
IOMMU 2 0xfed93000: using Register based invalidation
PCI-DMA: Intel(R) Virtualization Technology for Directed I/O
BUG: unable to handle kernel paging request at ffffffffffffffff
IP: [<ffffffff811494ec>] kmem_cache_alloc_trace+0xac/0x250
PGD 36e0d067 PUD 36e0f067 PMD 0
Oops: 0000 [#1] PREEMPT SMP
Modules linked in:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.10.38-ltsi-WR6.0.0.12_standard #5
Hardware name: Intel Corporation Calpella platform/MATXM-CORE-411-B, BIOS 4.6.3 07/29/2011
task: ffff880035fd8000 ti: ffff880035fd2000 task.ti: ffff880035fd2000
RIP: 0010:[<ffffffff811494ec>] [<ffffffff811494ec>] kmem_cache_alloc_trace+0xac/0x250
RSP: 0000:ffff880035fd3c50 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff880037215a00 RCX: 0000000000123400
RDX: 0000000000123300 RSI: 0000000000000000 RDI: 0000000000000001
RBP: ffff880035fd3c98 R08: 0000000000015a00 R09: 0000000000000000
R10: 00000000000016b7 R11: ffff880034a768b8 R12: ffffffffffffffff
R13: 00000000000080d0 R14: ffff880035c01c00 R15: ffff880035c01c00
FS: 0000000000000000(0000) GS:ffff880037200000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: ffffffffffffffff CR3: 0000000036e0c000 CR4: 00000000000007f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Stack:
ffffffff8175dee2 0000000000123300 ffff880035fd3fd8 0000000000000020
ffff880034bd2800 ffff880034952000 ffff880034952098 ffff880034bd2800
0000000000000000 ffff880035fd3cd8 ffffffff8175dee2 ffff880034bd2800
Call Trace:
[<ffffffff8175dee2>] ? iommu_group_add_device+0x32/0x1c0
[<ffffffff8175dee2>] iommu_group_add_device+0x32/0x1c0
[<ffffffff81761544>] intel_iommu_add_device+0x114/0x1f0
[<ffffffff8175e2e0>] ? bus_set_iommu+0x50/0x50
[<ffffffff8175e30a>] add_iommu_group+0x2a/0x50
[<ffffffff81501643>] bus_for_each_dev+0x63/0xa0
[<ffffffff8175e2d8>] bus_set_iommu+0x48/0x50
[<ffffffff81f4b6bf>] intel_iommu_init+0xd41/0xfde
[<ffffffff81f0d20e>] pci_iommu_init+0x12/0x3c
[<ffffffff81f0d1fc>] ? memblock_find_dma_reserve+0x124/0x124
[<ffffffff810002d2>] do_one_initcall+0x102/0x160
[<ffffffff81f06f71>] kernel_init_freeable+0x15b/0x21e
[<ffffffff81f06818>] ? do_early_param+0x88/0x88
[<ffffffff8196aec8>] ? _raw_spin_unlock_irq+0x18/0x40
[<ffffffff8106cd6e>] ? finish_task_switch+0x4e/0xb0
[<ffffffff819502e0>] ? rest_init+0x90/0x90
[<ffffffff819502ee>] kernel_init+0xe/0x190
[<ffffffff8197271c>] ret_from_fork+0x7c/0xb0
[<ffffffff819502e0>] ? rest_init+0x90/0x90
Code: ab 01 00 00 4c 8b 23 48 8b 43 10 4d 85 e4 0f 84 53 01 00 00 48 85 c0 0f 84 4a 01 00 00 49 63 46 20 48 8d 8a 00 01 00 00 4d 8b 06 <49> 8b 1c 04 4c 89 e0 65 49 0f c7 08 0f 94 c0 84 c0 74 87 49 63
RIP [<ffffffff811494ec>] kmem_cache_alloc_trace+0xac/0x250
RSP <ffff880035fd3c50>
CR2: ffffffffffffffff
---[ end trace de4ef6671540a56f ]---
Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009