Bug 1077467
| Summary: | kernel BUG at block/blk-core.c:2479! with virtio-scsi driver | ||||||
|---|---|---|---|---|---|---|---|
| Product: | Red Hat Enterprise Linux 7 | Reporter: | Xu Han <xuhan> | ||||
| Component: | qemu-kvm | Assignee: | Paolo Bonzini <pbonzini> | ||||
| Status: | CLOSED DUPLICATE | QA Contact: | Virtualization Bugs <virt-bugs> | ||||
| Severity: | high | Docs Contact: | |||||
| Priority: | high | ||||||
| Version: | 7.0 | CC: | acathrow, areis, chayang, hhuang, juzhang, knoel, kwolf, lagarcia, michen, pbonzini, sluo, stefanha, virt-maint, xfu, xuhan | ||||
| Target Milestone: | rc | Keywords: | TestOnly | ||||
| Target Release: | --- | ||||||
| Hardware: | Unspecified | ||||||
| OS: | Unspecified | ||||||
| Whiteboard: | |||||||
| Fixed In Version: | Doc Type: | Bug Fix | |||||
| Doc Text: | Story Points: | --- | |||||
| Clone Of: | Environment: | ||||||
| Last Closed: | 2014-08-29 12:38:20 UTC | Type: | Bug | ||||
| Regression: | --- | Mount Type: | --- | ||||
| Documentation: | --- | CRM: | |||||
| Verified Versions: | Category: | --- | |||||
| oVirt Team: | --- | RHEL 7.3 requirements from Atomic Host: | |||||
| Cloudforms Team: | --- | Target Upstream Version: | |||||
| Embargoed: | |||||||
| Bug Depends On: | 1098506, 1164749 | ||||||
| Bug Blocks: | 1110703 | ||||||
| Attachments: |
|
||||||
Created attachment 875787 [details]
dmesg log
crash> bt -a
PID: 27071 TASK: ffff880095965780 CPU: 0 COMMAND: "kworker/u12:2"
#0 [ffff88013fc03a68] machine_kexec at ffffffff8103f3c2
#1 [ffff88013fc03ab8] crash_kexec at ffffffff810c75e3
#2 [ffff88013fc03b80] oops_end at ffffffff815cb5a8
#3 [ffff88013fc03ba8] die at ffffffff8101612b
#4 [ffff88013fc03bd8] do_trap at ffffffff815caca0
#5 [ffff88013fc03c28] do_invalid_op at ffffffff81013072
#6 [ffff88013fc03cd0] invalid_op at ffffffff815d43de
[exception RIP: blk_finish_request+240]
RIP: ffffffff8127f840 RSP: ffff88013fc03d88 RFLAGS: 00010087
RAX: 0000000000000246 RBX: ffff880121572a80 RCX: 00000000000011ee
RDX: 0000000011ee11ee RSI: 00000000fffffffb RDI: ffff880121572a80
RBP: ffff88013fc03d98 R8: 0000000000000246 R9: 000000000000007f
R10: ffffffff81dae83c R11: 0000000000000000 R12: 00000000fffffffb
R13: ffff880036898000 R14: 0000000000000246 R15: ffff88005ea21500
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#7 [ffff88013fc03da0] blk_end_bidi_request at ffffffff8127f892
#8 [ffff88013fc03dd0] blk_end_request_err at ffffffff8127f94d
#9 [ffff88013fc03df0] scsi_io_completion at ffffffff813c94db
#10 [ffff88013fc03e50] scsi_finish_command at ffffffff813beac3
#11 [ffff88013fc03e80] scsi_softirq_done at ffffffff813c914f
#12 [ffff88013fc03eb0] blk_done_softirq at ffffffff81285c90
#13 [ffff88013fc03ee8] __do_softirq at ffffffff81062807
#14 [ffff88013fc03f58] call_softirq at ffffffff815d45dc
#15 [ffff88013fc03f70] do_softirq at ffffffff81014ba5
#16 [ffff88013fc03f88] irq_exit at ffffffff81062ba5
#17 [ffff88013fc03fa0] smp_call_function_single_interrupt at ffffffff81037305
#18 [ffff88013fc03fb0] call_function_single_interrupt at ffffffff815d3d9d
--- <IRQ stack> ---
#19 [ffff880060f03c28] call_function_single_interrupt at ffffffff815d3d9d
[exception RIP: _raw_spin_unlock_irqrestore+27]
RIP: ffffffff815c9ffb RSP: ffff880060f03cd8 RFLAGS: 00000287
RAX: 0000000000000001 RBX: 00000000390968f8 RCX: ffff880136cf7000
RDX: 0000000000000001 RSI: 0000000001ca01c8 RDI: 0000000000000287
RBP: ffff880060f03ce0 R8: 0000000000017360 R9: ffffffffa00634e7
R10: 0000000000000023 R11: 0000000000000000 R12: ffffffffa00634e7
R13: ffffffff811888e5 R14: 0000000000000006 R15: ffff880060f03c60
ORIG_RAX: ffffffffffffff04 CS: 0010 SS: 0018
#20 [ffff880060f03ce8] virtscsi_kick_cmd at ffffffffa004a8fc [virtio_scsi]
#21 [ffff880060f03da0] virtscsi_tmf.constprop.6 at ffffffffa004af12 [virtio_scsi]
#22 [ffff880060f03de0] virtscsi_abort at ffffffffa004b05a [virtio_scsi]
#23 [ffff880060f03e00] scmd_eh_abort_handler at ffffffff813c56b7
#24 [ffff880060f03e28] process_one_work at ffffffff8107860b
#25 [ffff880060f03e70] worker_thread at ffffffff810793bb
#26 [ffff880060f03ed0] kthread at ffffffff8107fc10
#27 [ffff880060f03f50] ret_from_fork at ffffffff815d2bec
PID: 27491 TASK: ffff880137bb41a0 CPU: 1 COMMAND: "netperf"
#0 [ffff88013fc46e78] crash_nmi_callback at ffffffff810361d2
#1 [ffff88013fc46e88] nmi_handle at ffffffff815cb729
#2 [ffff88013fc46ec8] do_nmi at ffffffff815cb830
#3 [ffff88013fc46ef0] end_repeat_nmi at ffffffff815cabb1
[exception RIP: __d_free+48]
RIP: ffffffff811b78c0 RSP: ffff88004e3bbe28 RFLAGS: 00000246
RAX: 0000000000000010 RBX: 0000000000000010 RCX: 0000000000000246
RDX: ffff88004e3bbe28 RSI: 0000000000000018 RDI: 0000000000000001
RBP: ffffffff811b78c0 R8: ffffffff811b78c0 R9: 0000000000000018
R10: ffff88004e3bbe28 R11: 0000000000000246 R12: ffffffffffffffff
R13: ffff88005ba884b8 R14: ffff880131422080 R15: 0000000009a05068
ORIG_RAX: 0000000009a05068 CS: 0010 SS: 0018
--- <NMI exception stack> ---
#4 [ffff88004e3bbe28] __d_free at ffffffff811b78c0
#5 [ffff88004e3bbe40] d_free at ffffffff811b7c95
#6 [ffff88004e3bbe58] dentry_kill at ffffffff811b85b1
#7 [ffff88004e3bbe88] dput at ffffffff811b86bc
#8 [ffff88004e3bbea8] __fput at ffffffff811a3ffb
#9 [ffff88004e3bbef0] ____fput at ffffffff811a421e
#10 [ffff88004e3bbf00] task_work_run at ffffffff8107c847
#11 [ffff88004e3bbf30] do_notify_resume at ffffffff81012a27
#12 [ffff88004e3bbf50] int_signal at ffffffff815d2f52
RIP: 00007f91086c5b70 RSP: 00007fffa119aca8 RFLAGS: 00000246
RAX: 0000000000000000 RBX: 000000000063eca8 RCX: ffffffffffffffff
RDX: 0000000000000001 RSI: 00007fffa119acd0 RDI: 0000000000000004
RBP: 00007fffa119ace0 R8: 0000000000000000 R9: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 000000000063ec78
R13: 00007fffa119b020 R14: 0000000000000000 R15: 0000000000000000
ORIG_RAX: 0000000000000003 CS: 0033 SS: 002b
PID: 0 TASK: ffff880139b70af0 CPU: 2 COMMAND: "swapper/2"
#0 [ffff88013fc86e78] crash_nmi_callback at ffffffff810361d2
#1 [ffff88013fc86e88] nmi_handle at ffffffff815cb729
#2 [ffff88013fc86ec8] do_nmi at ffffffff815cb830
#3 [ffff88013fc86ef0] end_repeat_nmi at ffffffff815cabb1
[exception RIP: native_safe_halt+6]
RIP: ffffffff810440e6 RSP: ffff880139b7bea8 RFLAGS: 00000286
RAX: 0000000000000010 RBX: 0000000000000010 RCX: 0000000000000286
RDX: ffff880139b7bea8 RSI: 0000000000000018 RDI: 0000000000000001
RBP: ffffffff810440e6 R8: ffffffff810440e6 R9: 0000000000000018
R10: ffff880139b7bea8 R11: 0000000000000286 R12: ffffffffffffffff
R13: 0000000000000046 R14: 0000000000000000 R15: 0000000000000000
ORIG_RAX: 0000000000000000 CS: 0010 SS: 0018
--- <NMI exception stack> ---
#4 [ffff880139b7bea8] native_safe_halt at ffffffff810440e6
#5 [ffff880139b7beb0] default_idle at ffffffff8101ae0f
#6 [ffff880139b7bed0] arch_cpu_idle at ffffffff8101b6d6
#7 [ffff880139b7bee0] cpu_startup_entry at ffffffff810ad405
#8 [ffff880139b7bf38] start_secondary at ffffffff815b210c
PID: 0 TASK: ffff880139b715e0 CPU: 3 COMMAND: "swapper/3"
#0 [ffff88013fcc6e78] crash_nmi_callback at ffffffff810361d2
#1 [ffff88013fcc6e88] nmi_handle at ffffffff815cb729
#2 [ffff88013fcc6ec8] do_nmi at ffffffff815cb830
#3 [ffff88013fcc6ef0] end_repeat_nmi at ffffffff815cabb1
[exception RIP: native_safe_halt+6]
RIP: ffffffff810440e6 RSP: ffff880139b7dea8 RFLAGS: 00000286
RAX: 0000000000000010 RBX: 0000000000000010 RCX: 0000000000000286
RDX: ffff880139b7dea8 RSI: 0000000000000018 RDI: 0000000000000001
RBP: ffffffff810440e6 R8: ffffffff810440e6 R9: 0000000000000018
R10: ffff880139b7dea8 R11: 0000000000000286 R12: ffffffffffffffff
R13: 0000000000000046 R14: 0000000000000000 R15: 0000000000000000
ORIG_RAX: 0000000000000000 CS: 0010 SS: 0018
--- <NMI exception stack> ---
#4 [ffff880139b7dea8] native_safe_halt at ffffffff810440e6
#5 [ffff880139b7deb0] default_idle at ffffffff8101ae0f
#6 [ffff880139b7ded0] arch_cpu_idle at ffffffff8101b6d6
#7 [ffff880139b7dee0] cpu_startup_entry at ffffffff810ad405
#8 [ffff880139b7df38] start_secondary at ffffffff815b210c
Unlikely to be a regression. *** Bug 1081430 has been marked as a duplicate of this bug. *** This is a kernel bug, closing as dup. *** This bug has been marked as a duplicate of bug 1001412 *** |
Description of problem: A guest running iozone benchmark about a week hit kernel crash. The backtrace shows that this issue most likely relate to virtio-scsi driver. Version-Release number of selected component (if applicable): guest: kernel-3.10.0-97.el7.x86_64 qemu-kvm-rhev-1.5.3-49.el7.x86_64 How reproducible: 1/1 Steps to Reproduce: 1. Run iozone endlessly. # while true; do iozone -a; sleep 60; done 2. 3. Actual results: [732802.513165] end_request: I/O error, dev sda, sector 38884872 [732802.514093] ------------[ cut here ]------------ [732802.514093] kernel BUG at block/blk-core.c:2479! [732802.514093] invalid opcode: 0000 [#1] SMP [732802.514093] Modules linked in: nfsv3 rpcsec_gss_krb5 nfsv4 dns_resolver nfs fscache tcp_lp bnep bluetooth rfkill fuse ip6t_rpfilter ip6t_REJECT ipt_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw iptable_filter ip_tables sg virtio_balloon virtio_console i2c_piix4 mperf crc32c_intel serio_raw pcspkr microcode nfsd auth_rpcgss nfs_acl lockd sunrpc uinput xfs libcrc32c ata_generic sd_mod crc_t10dif crct10dif_common pata_acpi virtio_net virtio_scsi qxl drm_kms_helper ttm ata_piix drm libata virtio_pci virtio_ring i2c_core [732802.514093] virtio floppy dm_mirror dm_region_hash dm_log dm_mod [732802.514093] CPU: 0 PID: 27071 Comm: kworker/u12:2 Not tainted 3.10.0-97.el7.x86_64 #1 [732802.514093] Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011 [732802.514093] Workqueue: scsi_tmf_2 scmd_eh_abort_handler [732802.514093] task: ffff880095965780 ti: ffff880060f02000 task.ti: ffff880060f02000 [732802.514093] RIP: 0010:[<ffffffff8127f840>] [<ffffffff8127f840>] blk_finish_request+0xf0/0x100 [732802.514093] RSP: 0018:ffff88013fc03d88 EFLAGS: 00010087 [732802.514093] RAX: 0000000000000246 RBX: ffff880121572a80 RCX: 00000000000011ee [732802.514093] RDX: 0000000011ee11ee RSI: 00000000fffffffb RDI: ffff880121572a80 [732802.514093] RBP: ffff88013fc03d98 R08: 0000000000000246 R09: 000000000000007f [732802.514093] R10: ffffffff81dae83c R11: 0000000000000000 R12: 00000000fffffffb [732802.514093] R13: ffff880036898000 R14: 0000000000000246 R15: ffff88005ea21500 [732802.514093] FS: 0000000000000000(0000) GS:ffff88013fc00000(0000) knlGS:0000000000000000 [732802.514093] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [732802.514093] CR2: 00007ff4604c4840 CR3: 00000000b7c34000 CR4: 00000000000006f0 [732802.514093] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [732802.514093] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [732802.514093] Stack: [732802.514093] ffff880121572a80 00000000fffffffb ffff88013fc03dc8 ffffffff8127f892 [732802.514093] ffff880121572a80 00000000fffffffb 00000000fffffffb ffff880121572a80 [732802.514093] ffff88013fc03de8 ffffffff8127f94d ffff880121572a80 0000000000000000 [732802.514093] Call Trace: [732802.514093] <IRQ> [732802.514093] [732802.514093] [<ffffffff8127f892>] blk_end_bidi_request+0x42/0x60 [732802.514093] [<ffffffff8127f94d>] blk_end_request_err+0x2d/0x50 [732802.514093] [<ffffffff813c94db>] scsi_io_completion+0x28b/0x640 [732802.514093] [<ffffffff813beac3>] scsi_finish_command+0xb3/0x110 [732802.514093] [<ffffffff813c914f>] scsi_softirq_done+0x12f/0x160 [732802.514093] [<ffffffff81285c90>] blk_done_softirq+0x80/0xa0 [732802.514093] [<ffffffff81062807>] __do_softirq+0xf7/0x290 [732802.514093] [<ffffffff815d45dc>] call_softirq+0x1c/0x30 [732802.514093] [<ffffffff81014ba5>] do_softirq+0x55/0x90 [732802.514093] [<ffffffff81062ba5>] irq_exit+0x115/0x120 [732802.514093] [<ffffffff81037305>] smp_call_function_single_interrupt+0x35/0x40 [732802.514093] [<ffffffff815d3d9d>] call_function_single_interrupt+0x6d/0x80 [732802.514093] <EOI> [732802.514093] [732802.514093] [<ffffffffa00634e7>] ? virtqueue_add_sgs+0x327/0x5ac [virtio_ring] [732802.514093] [<ffffffff815c9ffb>] ? _raw_spin_unlock_irqrestore+0x1b/0x40 [732802.514093] [<ffffffffa004a8fc>] virtscsi_kick_cmd+0x1dc/0x250 [virtio_scsi] [732802.514093] [<ffffffffa004af12>] virtscsi_tmf.constprop.6+0x52/0xa0 [virtio_scsi] [732802.514093] [<ffffffffa004b05a>] virtscsi_abort+0xfa/0x1b0 [virtio_scsi] [732802.514093] [<ffffffff813c56b7>] scmd_eh_abort_handler+0xc7/0x490 [732802.514093] [<ffffffff8107860b>] process_one_work+0x17b/0x460 [732802.514093] [<ffffffff810793bb>] worker_thread+0x11b/0x400 [732802.514093] [<ffffffff810792a0>] ? rescuer_thread+0x3e0/0x3e0 [732802.514093] [<ffffffff8107fc10>] kthread+0xc0/0xd0 [732802.514093] [<ffffffff8107fb50>] ? kthread_create_on_node+0x110/0x110 [732802.514093] [<ffffffff815d2bec>] ret_from_fork+0x7c/0xb0 [732802.514093] [<ffffffff8107fb50>] ? kthread_create_on_node+0x110/0x110 [732802.514093] Code: 89 de e8 94 12 00 00 e9 4b ff ff ff 83 7b 50 01 0f 85 58 ff ff ff 48 8b 43 38 48 8d b8 98 01 00 00 e8 35 32 ec ff e9 43 ff ff ff <0f> 0b 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 [732802.514093] RIP [<ffffffff8127f840>] blk_finish_request+0xf0/0x100 [732802.514093] RSP <ffff88013fc03d88> Expected results: Additional info: QEMU cmd line: # cat RHEL-7.0-x86_64-bench.cli /usr/libexec/qemu-kvm \ -M pc-i440fx-rhel7.0.0 \ -cpu Nehalem \ -m 4G \ -smp 4,threads=1,cores=4,sockets=1,maxcpus=6 \ -enable-kvm \ -name hp-z800-03-RHEL-7.0-x86_64-bench \ -uuid 3cc6f8c2-3f32-4607-b08e-4e479e841c2e \ -nodefconfig \ -nodefaults \ -no-shutdown \ -k en-us \ -rtc base=utc,clock=host,driftfix=slew \ -qmp tcp:0:5001,server,nowait \ -boot order=c,menu=on \ -iscsi initiator-name=iqn.1994-05.com.redhat:longevity-test \ -vga qxl \ -global qxl-vga.vram_size=67108864 \ -spice port=6001,password=******* \ -device virtio-scsi-pci,id=scsi0 \ -drive file=/nfs/image/RHEL-7.0-x86_64-mysql.qcow2,if=none,id=drive-scsi0-0-0,cache=none,aio=native,rerror=stop,werror=stop \ -device scsi-hd,drive=drive-scsi0-0-0,id=os-disk,bus=scsi0.0,bootindex=1 \ -netdev tap,id=tap0,vhost=on,script=/etc/qemu-ifup,queues=2 \ -device virtio-net-pci,netdev=tap0,mac=54:e1:d2:c3:b4:af,id=net0,vectors=5,mq=on \ -chardev socket,id=charserial0,path=/var/local/qemu/hp-z800-03-RHEL-7.0-x86_64-bench/console,server,nowait \ -device isa-serial,chardev=charserial0,id=serial0 \ -device virtio-serial-pci,id=virtio-serial0,max_ports=16 \ -chardev socket,id=channel0,path=/var/local/qemu/hp-z800-03-RHEL-7.0-x86_64-bench/virtserial,server,nowait \ -device virtserialport,chardev=channel0,name=org.linux-kvm.port.0,bus=virtio-serial0.0,id=port1 \ -chardev socket,id=qemu-ga0,path=/var/local/qemu/hp-z800-03-RHEL-7.0-x86_64-bench/qemu-ga,server,nowait \ -device virtserialport,chardev=qemu-ga0,name=org.qemu.guest_agent.0,bus=virtio-serial0.0,id=port2 \ -device virtio-balloon-pci,id=balloon0 \ 2>&1 |\ awk -v logfile="/var/local/qemu/hp-z800-03-RHEL-7.0-x86_64-bench/session.log" '{ msg="["strftime("%D %T", systime())"] "$0 }{ print msg; print msg > logfile; fflush() }'