Description of problem: Add vtpm devices with OVMF.amdsev.fd causes VM reset, this is an issue for SEV-ES guest, as it doesn't support reboot/reset, so sev-es VM will never boot automatically. But manually select 'continue boot' on system reset screen, sev-es VM can boot. Version-Release number of selected component (if applicable): edk2-ovmf-20230301gitf80f052277c8-5.el9.noarch qemu-kvm-8.0.0-6.el9.x86_64 kernel-5.14.0-331.el9.x86_64 How reproducible: 100% Steps to Reproduce: 1. Boot a sev-es with a vtpm device and /usr/share/edk2/ovmf/OVMF.amdsev.fd # /usr/bin/swtpm socket --ctrl type=unixio,path=/root/avocado/data/avocado-vt/swtpm/avocado-vt-vm1_tpm0_swtpm.sock,mode=0600 --tpmstate dir=/root/avocado/data/avocado-vt/swtpm/avocado-vt-vm1_tpm0_state,mode=0600 --terminate --tpm2 --log file=/root/avocado/job-results/job-2023-06-28T03.49-3724c4a/test-results/1-Host_RHEL.m9.u3.ovmf.qcow2.virtio_scsi.up.virtio_net.Guest.RHEL.9.3.0.x86_64.io-github-autotest-qemu.boot.q35/vtpm_avocado-vt-vm1_tpm0_swtpm.log qemu cmdline: /usr/libexec/qemu-kvm \ -S \ -name 'avocado-vt-vm1' \ -sandbox on \ -blockdev '{"node-name": "file_ovmf_code", "driver": "file", "filename": "/usr/share/edk2/ovmf/OVMF.amdsev.fd", "auto-read-only": true, "discard": "unmap"}' \ -blockdev '{"node-name": "drive_ovmf_code", "driver": "raw", "read-only": true, "file": "file_ovmf_code"}' \ -machine q35,pflash0=drive_ovmf_code,confidential-guest-support=lsec0,memory-backend=mem-machine_mem \ -object sev-guest,id=lsec0,policy=7,cbitpos=51,reduced-phys-bits=1 \ -device '{"id": "pcie-root-port-0", "driver": "pcie-root-port", "multifunction": true, "bus": "pcie.0", "addr": "0x1", "chassis": 1}' \ -device '{"id": "pcie-pci-bridge-0", "driver": "pcie-pci-bridge", "addr": "0x0", "bus": "pcie-root-port-0"}' \ -nodefaults \ -device '{"driver": "VGA", "bus": "pcie.0", "addr": "0x2"}' \ -m 4096 \ -object '{"size": 4294967296, "id": "mem-machine_mem", "qom-type": "memory-backend-ram"}' \ -smp 48,maxcpus=48,cores=24,threads=1,dies=1,sockets=2 \ -cpu 'EPYC-Milan',x2apic=on,tsc-deadline=on,hypervisor=on,tsc-adjust=on,vaes=on,vpclmulqdq=on,spec-ctrl=on,stibp=on,arch-capabilities=on,ssbd=on,cmp-legacy=on,virt-ssbd=on,lbrv=on,tsc-scale=on,vmcb-clean=on,pause-filter=on,pfthreshold=on,v-vmsave-vmload=on,vgif=on,rdctl-no=on,skip-l1dfl-vmentry=on,mds-no=on,pschange-mc-no=on,kvm_pv_unhalt=on \ -chardev socket,server=on,wait=off,id=qmp_id_qmpmonitor1,path=/var/tmp/avocado_as4vdz88/monitor-qmpmonitor1-20230628-030129-xipuDste \ -mon chardev=qmp_id_qmpmonitor1,mode=control \ -chardev socket,server=on,wait=off,id=qmp_id_catch_monitor,path=/var/tmp/avocado_as4vdz88/monitor-catch_monitor-20230628-030129-xipuDste \ -mon chardev=qmp_id_catch_monitor,mode=control \ -device '{"ioport": 1285, "driver": "pvpanic", "id": "idr8y3qB"}' \ -chardev socket,server=on,wait=off,id=chardev_serial0,path=/var/tmp/avocado_as4vdz88/serial-serial0-20230628-030129-xipuDste \ -device '{"id": "serial0", "driver": "isa-serial", "chardev": "chardev_serial0"}' \ -chardev socket,id=seabioslog_id_20230628-030129-xipuDste,path=/var/tmp/avocado_as4vdz88/seabios-20230628-030129-xipuDste,server=on,wait=off \ -device isa-debugcon,chardev=seabioslog_id_20230628-030129-xipuDste,iobase=0x402 \ -device '{"id": "pcie-root-port-1", "port": 1, "driver": "pcie-root-port", "addr": "0x1.0x1", "bus": "pcie.0", "chassis": 2}' \ -device '{"driver": "qemu-xhci", "id": "usb1", "bus": "pcie-root-port-1", "addr": "0x0"}' \ -device '{"driver": "usb-tablet", "id": "usb-tablet1", "bus": "usb1.0", "port": "1"}' \ -device '{"id": "pcie-root-port-2", "port": 2, "driver": "pcie-root-port", "addr": "0x1.0x2", "bus": "pcie.0", "chassis": 3}' \ -device '{"id": "virtio_scsi_pci0", "driver": "virtio-scsi-pci", "bus": "pcie-root-port-2", "addr": "0x0"}' \ -blockdev '{"node-name": "file_image1", "driver": "file", "auto-read-only": true, "discard": "unmap", "aio": "threads", "filename": "/home/kvm_autotest_root/images/rhel930-64-virtio-scsi-ovmf.qcow2", "cache": {"direct": true, "no-flush": false}}' \ -blockdev '{"node-name": "drive_image1", "driver": "qcow2", "read-only": false, "cache": {"direct": true, "no-flush": false}, "file": "file_image1"}' \ -device '{"driver": "scsi-hd", "id": "image1", "drive": "drive_image1", "write-cache": "on"}' \ -device '{"id": "pcie-root-port-3", "port": 3, "driver": "pcie-root-port", "addr": "0x1.0x3", "bus": "pcie.0", "chassis": 4}' \ -device '{"driver": "virtio-net-pci", "mac": "9a:8f:81:2a:7b:a8", "id": "idkuetK7", "netdev": "idHAEXFR", "bus": "pcie-root-port-3", "addr": "0x0"}' \ -netdev tap,id=idHAEXFR,vhost=on \ -chardev socket,id=char_vtpm_avocado-vt-vm1_tpm0,path=/root/avocado/data/avocado-vt/swtpm/avocado-vt-vm1_tpm0_swtpm.sock \ -tpmdev emulator,chardev=char_vtpm_avocado-vt-vm1_tpm0,id=emulator_vtpm_avocado-vt-vm1_tpm0 \ -device '{"id": "tpm-crb_vtpm_avocado-vt-vm1_tpm0", "tpmdev": "emulator_vtpm_avocado-vt-vm1_tpm0", "driver": "tpm-crb"}' \ -monitor stdio \ -vnc :0 \ -rtc base=utc,clock=host,driftfix=slew \ -boot menu=off,order=cdn,once=c,strict=off \ -enable-kvm \ Actual results: System reset automatically Expected results: VM can boot Additional info: Not a regression, test the first OVMF.amdsev.fd edk2 build edk2-ovmf-202202gitb24306f15d-1.el9.noarch also reproduces this issue.
> Actual results: > System reset automatically Yes, sev-es does not support reboot, so the guest is powered off instead. > Expected results: > VM can boot It should boot fine if you start it again with the identical configuration. (Might be needed multiple times, in case fallback.efi restores the boot configuration it will try reboot the guest too).
(In reply to Gerd Hoffmann from comment #1) > > Actual results: > > System reset automatically > > Yes, sev-es does not support reboot, so the guest is powered off instead. > > > Expected results: > > VM can boot > > It should boot fine if you start it again with the identical configuration. > > (Might be needed multiple times, in case fallback.efi restores the boot > configuration it will try reboot the guest too). In this case, repeat booting the guest with same configuration always goes to system reset, I've tried repeating to start the vm more than 10 times after vm automatically quit. This is only the case when tpm + OVMF.amdsev.fd, tpm + OVMF_CODE.fd & nvram VAR file doesn't reproduce this issue.
> > It should boot fine if you start it again with the identical configuration. > > > > (Might be needed multiple times, in case fallback.efi restores the boot > > configuration it will try reboot the guest too). > In this case, repeat booting the guest with same configuration always goes > to system reset, I've tried repeating to start the vm more than 10 times > after vm automatically quit. > This is only the case when tpm + OVMF.amdsev.fd, tpm + OVMF_CODE.fd & nvram > VAR file doesn't reproduce this issue. Can you attach the firmware log to this bug?
Failed case with OVMF.amdsev.fd firmware log: ovmf.log Worked case with OVMF_CODE.fd and OVMF_VARS.log: ovmf_nvram.log
My suspicion is this: 1) The AmdSev firmware build uses a ephemeral/stateless UEFI variable store: there is no separate "OVMF_VARS.fd" file, and the "OVMF.amdsev.fd" (code binary) is added to the VM as read-only. The firmware uses a memory-backed write overlay internally. This means any changes made to it will be lost when Qemu is stopped. 2) AmdSev VMs can not be rebooted. Qemu needs to be stop and restarted to perform a reboot, losing any changes made to the variable store. 3) shim's fallback mechanism checks the variable store when booting. If it is empty it sets the default EFI boot option (to bypass the BOOTX64.efi default). If a TPM is present, the machine is reset after this in order to keep the boot chain clean for measured boot. In case of an ephemeral variable store, this creates an endless reboot loop. To offer a way out the blue box with the message and the option to skip the reboot is presented. See also: - https://github.com/rhboot/shim/issues/418 - https://github.com/rhboot/shim/pull/445
I was able to test this on an AMD SEV-ES capable machine and I can confirm that EFI variables are not preserved across reboots. Inside the SEV-ES guest: # dmesg | grep -i sev [ 0.316049] Memory Encryption Features active: AMD SEV SEV-ES [ 3.842543] kvm_amd: KVM is unsupported when running as an SEV guest # echo "test" > data # NAME=$(uuidgen)-test # echo $NAME e41b4422-7d52-4a9d-b0eb-2f87da469d55-test # efivar -w --name=$NAME -f data # efivar -l e41b4422-7d52-4a9d-b0eb-2f87da469d55-test 8be4df61-93ca-11d2-aa0d-00e098032b8c-BootOrder 8be4df61-93ca-11d2-aa0d-00e098032b8c-Boot0001 8be4df61-93ca-11d2-aa0d-00e098032b8c-Boot0003 8be4df61-93ca-11d2-aa0d-00e098032b8c-ConIn 8be4df61-93ca-11d2-aa0d-00e098032b8c-ErrOut 8be4df61-93ca-11d2-aa0d-00e098032b8c-ConOut 04b37fe8-f6ae-480b-bdd5-37d98c5e89aa-VarErrorFlag 8be4df61-93ca-11d2-aa0d-00e098032b8c-Lang 8be4df61-93ca-11d2-aa0d-00e098032b8c-PlatformLang 8be4df61-93ca-11d2-aa0d-00e098032b8c-Timeout 8be4df61-93ca-11d2-aa0d-00e098032b8c-Boot0000 eb704011-1402-11d3-8e77-00a0c969723b-MTC 605dab50-e046-4300-abb6-3dd810dd8b23-MokListTrustedRT 605dab50-e046-4300-abb6-3dd810dd8b23-SbatLevelRT 605dab50-e046-4300-abb6-3dd810dd8b23-MokListXRT 605dab50-e046-4300-abb6-3dd810dd8b23-MokListRT 8be4df61-93ca-11d2-aa0d-00e098032b8c-BootCurrent 8be4df61-93ca-11d2-aa0d-00e098032b8c-ConInDev 8be4df61-93ca-11d2-aa0d-00e098032b8c-ErrOutDev 8be4df61-93ca-11d2-aa0d-00e098032b8c-ConOutDev 8be4df61-93ca-11d2-aa0d-00e098032b8c-PlatformRecovery0000 8be4df61-93ca-11d2-aa0d-00e098032b8c-PlatformLangCodes 8be4df61-93ca-11d2-aa0d-00e098032b8c-LangCodes 8be4df61-93ca-11d2-aa0d-00e098032b8c-BootOptionSupport 8be4df61-93ca-11d2-aa0d-00e098032b8c-OsIndicationsSupported [root@ibm-p8-kvm-03-guest-02 ~]# efivar -p --name=$NAME GUID: e41b4422-7d52-4a9d-b0eb-2f87da469d55 Name: "test" Attributes: Non-Volatile Boot Service Access Runtime Service Access Value: 00000000 74 65 73 74 0a |test. | # reboot [...] # efivar -l 8be4df61-93ca-11d2-aa0d-00e098032b8c-BootOrder 8be4df61-93ca-11d2-aa0d-00e098032b8c-Boot0001 8be4df61-93ca-11d2-aa0d-00e098032b8c-Boot0003 8be4df61-93ca-11d2-aa0d-00e098032b8c-ConIn 8be4df61-93ca-11d2-aa0d-00e098032b8c-ErrOut 8be4df61-93ca-11d2-aa0d-00e098032b8c-ConOut 04b37fe8-f6ae-480b-bdd5-37d98c5e89aa-VarErrorFlag 8be4df61-93ca-11d2-aa0d-00e098032b8c-Lang 8be4df61-93ca-11d2-aa0d-00e098032b8c-PlatformLang 8be4df61-93ca-11d2-aa0d-00e098032b8c-Timeout 8be4df61-93ca-11d2-aa0d-00e098032b8c-Boot0000 eb704011-1402-11d3-8e77-00a0c969723b-MTC 605dab50-e046-4300-abb6-3dd810dd8b23-MokListTrustedRT 605dab50-e046-4300-abb6-3dd810dd8b23-SbatLevelRT 605dab50-e046-4300-abb6-3dd810dd8b23-MokListXRT 605dab50-e046-4300-abb6-3dd810dd8b23-MokListRT 8be4df61-93ca-11d2-aa0d-00e098032b8c-BootCurrent 8be4df61-93ca-11d2-aa0d-00e098032b8c-ConInDev 8be4df61-93ca-11d2-aa0d-00e098032b8c-ErrOutDev 8be4df61-93ca-11d2-aa0d-00e098032b8c-ConOutDev 8be4df61-93ca-11d2-aa0d-00e098032b8c-PlatformRecovery0000 8be4df61-93ca-11d2-aa0d-00e098032b8c-PlatformLangCodes 8be4df61-93ca-11d2-aa0d-00e098032b8c-LangCodes 8be4df61-93ca-11d2-aa0d-00e098032b8c-BootOptionSupport 8be4df61-93ca-11d2-aa0d-00e098032b8c-OsIndicationsSupported -> variable is gone
(In reply to Oliver Steffen from comment #8) > I was able to test this on an AMD SEV-ES capable machine and I can confirm > that EFI variables are not preserved across reboots. This is true of all of the confidential virt tech in general - TDX, SEV, SEV-ES, SEV-SNP. In all of them, while the EFI code is covered by the confidential VM attestation, the EFI variables cannot be covered as they're inherently variable data and under control of the host OS admin. This forces an initial model where the EFI builds are stateless, such that variable changes are never persisted. It will eventually be possible to address this in the SEV-SNP case. It would require taking advantage of SVSM, to do an early pre-(firmware|OS)-boot attestation to acquire keys to provision and unlock encrypted state for EFI variable storage and vTPM NVRAM storage. In the absence of SVSM pre-(firmware|OS)-boot attestation though, the guest OS image cannot rely on being able to persist EFI vars. IOW, the shim fallback impl is not currently supportable with confidential virt and TPMs, unless a persistence mechanism can be provided. Some options * Modify shim to NOT do a reboot for fallback mode when in a confidential VM, instead just make it boot directly. The TPM measurements are still usable as they would be the same on every boot by virtue of the EFI being stateless. Wouldnt want this change in an SVSM environment though * Instead of a single EFI firmware blob for code have the hypervisor provisioning tool maintain a set of known good firmware for each supported guest OS, with the EFI specific boot entry hardcoded avoiding shim fallback being triggered * Declare vTPM to be an unsupported feature unless EFI variable persistence is available. In the case of SEV/SEV-ES specifically though, I think there's a case to be made that vTPM should be an unsupported configuration. It makes little sense to go to the trouble of using a confidential VM, and then rely on a vTPM impl that's entirely under control of the untrusted host OS admin.
Thanks Oliver and Daniel. From QE view, if this could be addressed with SEV-SNP eventually, I agree to doc a limitation of vtpm reboot with sev-es VM.
zixchen: I am still not sure how to proceed, discussion is ongoing. Quoting an email from Cole <crobinso>: > Behavior was added to shim here: > https://github.com/rhboot/shim/commit/431b8a2e75a71a0b1f47d47d3f045b1e3efbce53 > > commit 431b8a2e75a71a0b1f47d47d3f045b1e3efbce53 > Author: Peter Jones <pjones> > Date: Mon Jul 31 13:10:41 2017 -0400 > > Make fallback aware of tpm measurements, and reboot if tpm is used. > > Since booting the entry with fallback in the stack of things that got > measured will result in all the wrong PCR values, in the cases where TPM > is present and enabled, use ->Reset() instead of loading the Boot#### > variable and executing its target. > > Note this behavior also causes some weirdness with even non-SEV > virt-install installations with TPM attached, the reset is unexpected > and tricks virt-install into thinking the install was completed: > https://bugzilla.redhat.com/show_bug.cgi?id=2133525 > > This later shim commit provides an escape hatch with an NV variable. Any > way to use that for SEV and non SEV cases? > https://github.com/rhboot/shim/commit/a5db51a52e8d4cae938fc807b991383309dffca7 The "escape hatch" is setting the EFI variable "FB_NO_REBOOT". Maybe the OVMF could set this variable in the store on startup?
(In reply to Daniel Berrangé from comment #9) > (In reply to Oliver Steffen from comment #8) > > I was able to test this on an AMD SEV-ES capable machine and I can confirm > > that EFI variables are not preserved across reboots. > > This is true of all of the confidential virt tech in general - TDX, SEV, > SEV-ES, SEV-SNP. > > In all of them, while the EFI code is covered by the confidential VM > attestation, the EFI variables cannot be covered as they're inherently > variable data and under control of the host OS admin. > > This forces an initial model where the EFI builds are stateless, such that > variable changes are never persisted. > > It will eventually be possible to address this in the SEV-SNP case. It would > require taking advantage of SVSM, to do an early pre-(firmware|OS)-boot > attestation to acquire keys to provision and unlock encrypted state for EFI > variable storage and vTPM NVRAM storage. > > In the absence of SVSM pre-(firmware|OS)-boot attestation though, the guest > OS image cannot rely on being able to persist EFI vars. > > IOW, the shim fallback impl is not currently supportable with confidential > virt and TPMs, unless a persistence mechanism can be provided. > > Some options > > * Modify shim to NOT do a reboot for fallback mode when in a confidential > VM, instead just make it boot directly. The TPM measurements are still > usable as they would be the same on every boot by virtue of the EFI being > stateless. Wouldnt want this change in an SVSM environment though Will be good if we can make it work without having to reset/reboot. > * Instead of a single EFI firmware blob for code have the hypervisor > provisioning tool maintain a set of known good firmware for each supported > guest OS, with the EFI specific boot entry hardcoded avoiding shim fallback > being triggered > * Declare vTPM to be an unsupported feature unless EFI variable persistence > is available. > EFI variable persistence is a desired feature but nobody is working on that AFAIK in coconut-SVSM. Also most likely vTPM support will come first without EFI variable persistence and we will like it to work. So it probably will be good to not rely on EFI variable persistence being available and we should be able to work without that. Peter, do you any thoughts on this. We don't have EFI variable persistence available in case of confidential VMs yet and it might not happen for a long time. Is there a way to make it work even without persistence EFI variable support. > > In the case of SEV/SEV-ES specifically though, I think there's a case to be > made that vTPM should be an unsupported configuration. It makes little sense > to go to the trouble of using a confidential VM, and then rely on a vTPM > impl that's entirely under control of the untrusted host OS admin. Fair enough. I am more worried about SEV-SNP + SVSM + vTPM + no-persitent-EFI-variable-store configuration. I suspect that's going to be a supported use case for a while.