Hide Forgot
Description of problem: Boot guest with VFs, then unbind one VF and bind back this VF to host driver. After these operators, host will core dump. Version-Release number of selected component (if applicable): 3.10.0-57.el7.x86_64 qemu-kvm-rhev-1.5.3-20.el7.x86_64 How reproducible: always Steps to Reproduce: 1.Boot guest with VFs. # /usr/libexec/qemu-kvm -S -M pc-i440fx-rhel7.0.0 -cpu SandyBridge -enable-kvm -m 2G -smp 2,sockets=2,cores=1,threads=1 -name juli -uuid 355a2475-4e03-4cdd-bf7b-5d6a59edaa68 -rtc base=localtime,clock=host,driftfix=slew -device virtio-scsi-pci,bus=pci.0,addr=0x5,id=scsi0 -drive file=/home/juli/win2012r2.qcow2_v3,if=none,id=drive-scsi0-0-0,media=disk,cache=none,format=qcow2,werror=stop,rerror=stop,aio=native -device scsi-hd,drive=drive-scsi0-0-0,bus=scsi0.0,scsi-id=0,lun=0,id=juli,bootindex=0 -drive file=/home/juli/ISO/en_windows_server_2012_r2_x64_dvd_2707946.iso,if=none,media=cdrom,format=raw,aio=native,id=drive-ide1-0-0 -device ide-drive,drive=drive-ide1-0-0,id=ide1-0-0,bus=ide.0,unit=0,bootindex=4 -drive file=/home/juli/virtio-win-prewhql-0.1-74.iso,if=none,media=cdrom,format=raw,aio=native,id=drive-ide1-0-1 -device ide-drive,drive=drive-ide1-0-1,id=ide1-0-1,bus=ide.0,unit=1,bootindex=6 -device virtio-balloon-pci,id=ballooning -global PIIX4_PM.disable_s3=0 -global PIIX4_PM.disable_s4=0 -k en-us -boot menu=on,reboot-timeout=-1,strict=on -qmp tcp:0:4477,server,nowait -serial unix:/tmp/ttyS0,server,nowait -vnc :3 -spice port=5939,disable-ticketing -vga qxl -global qxl-vga.revision=3 -monitor stdio -monitor tcp:0:7777,server,nowait -monitor unix:/tmp/monitor1,server,nowait -netdev tap,id=tap1,vhost=on,queues=4,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown,ifname=tap-juli -device virtio-net-pci,netdev=tap1,id=nic1,mq=on,vectors=17,mac=1a:59:0a:4b:aa:94 -device vfio-pci,host=09:06.3,id=hostnet_VF1 -device vfio-pci,host=09:06.4,id=hostnet_VF2 -device vfio-pci,host=09:06.5,id=hostnet_VF3 -device vfio-pci,host=09:06.6,id=hostnet_VF4 -device vfio-pci,host=09:06.7,id=hostnet_VF5 -device vfio-pci,host=09:07.1,id=hostnet_VF6 -device vfio-pci,host=09:07.2,id=hostnet_VF7 -device vfio-pci,host=09:07.3,id=hostnet_VF8 -device vfio-pci,host=09:07.4,id=hostnet_VF9 2.unbind one VF from vfio-pci. # echo 0000:09:07.0 > /sys/bus/pci/drivers/vfio-pci/unbind 3.bind back to host driver. echo 0000:09:07.0 > /sys/bus/pci/drivers/be2net/bind Actual results: After step 3, host will core dump. crash> bt PID: 3925 TASK: ffff88011d41b610 CPU: 4 COMMAND: "bash" #0 [ffff880121cf39a8] machine_kexec at ffffffff8103ee12 #1 [ffff880121cf39f8] crash_kexec at ffffffff810c5433 #2 [ffff880121cf3ac0] oops_end at ffffffff815bd7e8 #3 [ffff880121cf3ae8] die at ffffffff81015f6b #4 [ffff880121cf3b18] do_trap at ffffffff815bcee0 #5 [ffff880121cf3b68] do_invalid_op at ffffffff81013092 #6 [ffff880121cf3c10] invalid_op at ffffffff815c661e [exception RIP: vfio_iommu_group_notifier+427] RIP: ffffffffa0b430cb RSP: ffff880121cf3cc0 RFLAGS: 00010282 RAX: 00000000ffffffea RBX: ffff88012070cf00 RCX: ffff880121cf3fd8 RDX: 0000000000000000 RSI: ffff88021cd3e098 RDI: ffff88012070cf30 RBP: ffff880121cf3ce8 R8: ffffffffa066a91c R9: ffff88012070cf00 R10: 0000000000000000 R11: 0000000000000000 R12: ffff88012070cf00 R13: ffff88021cd3e098 R14: 0000000000000004 R15: 0000000000000000 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffff880121cf3cf0] notifier_call_chain at ffffffff815c088c #8 [ffff880121cf3d28] __blocking_notifier_call_chain at ffffffff8108498d #9 [ffff880121cf3d68] blocking_notifier_call_chain at ffffffff810849c6 #10 [ffff880121cf3d78] iommu_bus_notifier at ffffffff8147d85c #11 [ffff880121cf3da8] notifier_call_chain at ffffffff815c088c #12 [ffff880121cf3de0] __blocking_notifier_call_chain at ffffffff8108498d #13 [ffff880121cf3e20] blocking_notifier_call_chain at ffffffff810849c6 #14 [ffff880121cf3e30] driver_bound at ffffffff8138e23f #15 [ffff880121cf3e48] driver_probe_device at ffffffff8138e4d8 #16 [ffff880121cf3e70] driver_bind at ffffffff8138c9fb #17 [ffff880121cf3ea8] drv_attr_store at ffffffff8138c0f4 #18 [ffff880121cf3eb8] sysfs_write_file at ffffffff8120fa16 #19 [ffff880121cf3f00] vfs_write at ffffffff8119de8d #20 [ffff880121cf3f40] sys_write at ffffffff8119e859 #21 [ffff880121cf3f80] system_call_fastpath at ffffffff815c4ed9 RIP: 00000033f3ae7a00 RSP: 00007fff24f87978 RFLAGS: 00010202 RAX: 0000000000000001 RBX: ffffffff815c4ed9 RCX: 0000000030303030 RDX: 000000000000000d RSI: 00007fa6f9afe000 RDI: 0000000000000001 RBP: 00007fa6f9afe000 R8: 000000000000000a R9: 00007fa6f9af8740 R10: 000000000000000c R11: 0000000000000246 R12: 0000000000000001 R13: 000000000000000d R14: 00000033f3dbb400 R15: 000000000000000d ORIG_RAX: 0000000000000001 CS: 0033 SS: 002b Expected results: Host will work well. Additional info:
If the VF is a member of the same IOMMU group as the devices assigned to the guest then by binding the VF back to the host driver the integrity of the IOMMU group has been compromised and the system is therefore brought down. This is the expected behavior. Please confirm if 0000:09:07.0 belongs to the same IOMMU group as the other VFs (/sys/kernel/iommu_groups).
(In reply to Alex Williamson from comment #2) > If the VF is a member of the same IOMMU group as the devices assigned to the > guest then by binding the VF back to the host driver the integrity of the > IOMMU group has been compromised and the system is therefore brought down. > This is the expected behavior. Please confirm if 0000:09:07.0 belongs to > the same IOMMU group as the other VFs (/sys/kernel/iommu_groups). Yes. 0000:09:07.0 belongs to the same IOMMU group as the other VFs. # ls /sys/kernel/iommu_groups/34/devices/ 0000:09:07.0 0000:09:07.2 0000:09:07.4 0000:09:07.6 0000:09:07.1 0000:09:07.3 0000:09:07.5 0000:09:07.7
This is the expected behavior to preserve the integrity of the system.