Bug 1663051
Summary: | libvirtd encountered sigsegv due to null pointer in virJSONValueObjectHasKey() | |||
---|---|---|---|---|
Product: | Red Hat Enterprise Linux 7 | Reporter: | Gordon Watson <gwatson> | |
Component: | libvirt | Assignee: | Ján Tomko <jtomko> | |
Status: | CLOSED ERRATA | QA Contact: | jiyan <jiyan> | |
Severity: | high | Docs Contact: | ||
Priority: | unspecified | |||
Version: | 7.5 | CC: | chhu, fjin, jdenemar, jsuchane, jtomko, lhuang, xuzhang, yafu, yalzhang | |
Target Milestone: | rc | |||
Target Release: | --- | |||
Hardware: | Unspecified | |||
OS: | Unspecified | |||
Whiteboard: | ||||
Fixed In Version: | libvirt-4.5.0-12.el7 | Doc Type: | If docs needed, set a value | |
Doc Text: | Story Points: | --- | ||
Clone Of: | ||||
: | 1665000 (view as bug list) | Environment: | ||
Last Closed: | 2019-08-06 13:14:35 UTC | Type: | Bug | |
Regression: | --- | Mount Type: | --- | |
Documentation: | --- | CRM: | ||
Verified Versions: | Category: | --- | ||
oVirt Team: | --- | RHEL 7.3 requirements from Atomic Host: | ||
Cloudforms Team: | --- | Target Upstream Version: | ||
Embargoed: | ||||
Bug Depends On: | ||||
Bug Blocks: | 1665000 |
Description
Gordon Watson
2019-01-02 21:53:47 UTC
- core file analysis; - null pointer; (gdb) frame 0 #0 virJSONValueObjectHasKey (object=object@entry=0x0, key=key@entry=0x7ff4e1ae6826 "error") at util/virjson.c:751 751 if (object->type != VIR_JSON_TYPE_OBJECT) (gdb) l 746 virJSONValueObjectHasKey(virJSONValuePtr object, 747 const char *key) 748 { 749 size_t i; 750 751 if (object->type != VIR_JSON_TYPE_OBJECT) 752 return -1; 753 754 for (i = 0; i < object->data.object.npairs; i++) { 755 if (STREQ(object->data.object.pairs[i].key, key)) (gdb) p object $12 = (virJSONValue *) 0x0 - called by qemuAgentCheckError(); (gdb) frame 1 #1 0x00007ff4e19ee19d in qemuAgentCheckError (cmd=cmd@entry=0x7ff4cc06bbe0, reply=0x0) at qemu/qemu_agent.c:1098 1098 if (virJSONValueObjectHasKey(reply, "error")) { (gdb) l 1093 1094 static int 1095 qemuAgentCheckError(virJSONValuePtr cmd, 1096 virJSONValuePtr reply) 1097 { 1098 if (virJSONValueObjectHasKey(reply, "error")) { 1099 virJSONValuePtr error = virJSONValueObjectGet(reply, "error"); 1100 char *cmdstr = virJSONValueToString(cmd, false); 1101 char *replystr = virJSONValueToString(reply, false); - called by qemuAgentGetInterfaces(); (gdb) frame 2 #2 0x00007ff4e19f2040 in qemuAgentGetInterfaces (mon=mon@entry=0x7ff4a8001090, ifaces=ifaces@entry=0x7ff4ee95cad8) at qemu/qemu_agent.c:2070 2070 qemuAgentCheckError(cmd, reply) < 0) { (gdb) l 2065 2066 if (!(cmd = qemuAgentMakeCommand("guest-network-get-interfaces", NULL))) 2067 goto cleanup; 2068 2069 if (qemuAgentCommand(mon, cmd, &reply, false, VIR_DOMAIN_QEMU_AGENT_COMMAND_BLOCK) < 0 || 2070 qemuAgentCheckError(cmd, reply) < 0) { 2071 goto cleanup; 2072 } 2073 2074 if (!(ret_array = virJSONValueObjectGet(reply, "return"))) { - mon->running = false; (gdb) p (* mon)->running $8 = false - mon->await_event; (gdb) p mon->await_event $14 = QEMU_AGENT_EVENT_SHUTDOWN - VM "internet_021"; (gdb) frame 3 #3 0x00007ff4e1aac785 in qemuDomainInterfaceAddresses (dom=0x7ff4cc04a030, ifaces=0x7ff4ee95cad8, source=<optimized out>, flags=<optimized out>) at qemu/qemu_driver.c:20403 20403 ret = qemuAgentGetInterfaces(agent, ifaces); (gdb) p * dom $13 = {object = {u = {dummy_align1 = 7700611123, dummy_align2 = 0x1cafe0033, s = {magic = 3405643827, refs = 1}}, klass = 0x7ff4d8152f60}, conn = 0x7ff4d811ab00, name = 0x7ff4cc06bc00 "internet_021", id = 312, uuid = "\024v\214\266\300\235F\004\256\345\001\003\371L", <incomplete sequence \351>} - In qemuAgentCommand(), mon->running must have been 'true'; qemuAgentCommand(qemuAgentPtr mon, virJSONValuePtr cmd, virJSONValuePtr *reply, bool needReply, int seconds) { int ret = -1; qemuAgentMessage msg; char *cmdstr = NULL; int await_event = mon->await_event; *reply = NULL; if (!mon->running) { virReportError(VIR_ERR_AGENT_UNRESPONSIVE, "%s", _("Guest agent disappeared while executing command")); return -1; } if (qemuAgentGuestSync(mon) < 0) return -1; memset(&msg, 0, sizeof(msg)); if (!(cmdstr = virJSONValueToString(cmd, false))) goto cleanup; if (virAsprintf(&msg.txBuffer, "%s" LINE_ENDING, cmdstr) < 0) goto cleanup; msg.txLength = strlen(msg.txBuffer); VIR_DEBUG("Send command '%s' for write, seconds = %d", cmdstr, seconds); ret = qemuAgentSend(mon, &msg, seconds); VIR_DEBUG("Receive command reply ret=%d rxObject=%p", ret, msg.rxObject); if (ret == 0) { /* If we haven't obtained any reply but we wait for an * event, then don't report this as error */ if (!msg.rxObject) { if (await_event && !needReply) { VIR_DEBUG("Woken up by event %d", await_event); } else { if (mon->running) virReportError(VIR_ERR_INTERNAL_ERROR, "%s", _("Missing monitor reply object")); else virReportError(VIR_ERR_AGENT_UNRESPONSIVE, "%s", _("Guest agent disappeared while executing command")); ret = -1; } } else { *reply = msg.rxObject; ret = qemuAgentCheckError(cmd, *reply); } } - vdsm logs, VM's uuid = '14768cb6-c09d-4604-aee5-0103f94c79e9'; 2018-12-20 04:05:52,898+0900 WARN (qgapoller/56) [virt.periodic.VmDispatcher] could not run <function <lambda> at 0x7f6ba01d5aa0> on ['14768cb6-c09d-4604-aee5-0103f94c79e9', 'bccc97c6-8b6 2018-12-20 04:05:53,680+0900 INFO (jsonrpc/7) [api.host] FINISH getAllVmIoTunePolicies return={'status': {'message': 'Done', 'code': 0}, 'io_tune_policies_dict': {'14768cb6-c09d-4604-aee5 2018-12-20 04:05:54,718+0900 WARN (qgapoller/55) [virt.periodic.VmDispatcher] could not run <function <lambda> at 0x7f6ba01d5c80> on ['14768cb6-c09d-4604-aee5-0103f94c79e9', 'bccc97c6-8b6 2018-12-20 04:05:55,779+0900 WARN (periodic/1251) [virt.periodic.VmDispatcher] could not run <class 'vdsm.virt.periodic.UpdateVolumes'> on ['14768cb6-c09d-4604-aee5-0103f94c79e9', 'bccc97 2018-12-20 04:06:02,812+0900 INFO (vdsm.Scheduler) [Executor] Worker discarded: <Worker name=qgapoller/54 running <Task discardable <NetworkInterfacesCheck vm=14768cb6-c09d-4604-aee5-0103 2018-12-20 04:06:04,717+0900 WARN (qgapoller/52) [virt.periodic.VmDispatcher] could not run <function <lambda> at 0x7f6ba01d5c80> on ['14768cb6-c09d-4604-aee5-0103f94c79e9', 'bccc97c6-8b6 - sigsegv at 04:06:07; $ grep -i sigsegv ../messages Dec 20 04:06:07 host04 abrt-hook-ccpp: Process 7144 (libvirtd) of user 0 killed by SIGSEGV - dumping core - Assessment; I think the guest agent must have been shutdown or terminated during the execution of this guest agent command. I have to admit, I have just realised that I made a mistake in my initial analysis. qemuAgentCheckError() is called from qemuAgentGetInterfaces() in tis case, but it is also called in qemuAgentCommand(). I had convinced myself that it was this latter call that was the failing one, and that had me confused. Then when I started to write this bug update, I saw/remembered that the "failing one" was actually called in qemuAgentGetInterfaces(). This was on a RHV host. The vdsm logs (above) show that the VM in question, 'internet_021', started to get qgapoller warnings at 04:05:52, then the sigsegv happened at 04:06:07. The qgapoller is a mechanism in VDSM that polls/queries the guest agent on a cyclic basis. So, it was having problems accessing the guest agent just prior to the core dump. This occurred on a 7.5 version of libvirt, libvirt-3.9.0-14.el7_5.6. However, I checked the latest d/s version, libvirt-4.5.0-10.el7_6.3, and the code appears to be the same in these areas in both versions. This was possibly fixed by: https://libvirt.org/git/?p=libvirt.git;a=commitdiff;h=9ed175fbc2deecfdaeabca7bc77c7e7ae33a3377 commit 9ed175fbc2deecfdaeabca7bc77c7e7ae33a3377 Author: John Ferlan <jferlan> CommitDate: 2018-09-11 10:32:01 -0400 qemu: Remove duplicated qemuAgentCheckError Commit 5b3492fadb moved qemuAgentCheckError calls into qemuAgentCommand for various reasons; however, subsequent commit 0977b8aa0 adding a new command made call again So let's just remove the duplicitous call from qemuAgentGetInterfaces. Signed-off-by: John Ferlan <jferlan> ACKed-by: Michal Privoznik <mprivozn> git describe: v4.7.0-83-g9ed175fbc2 contains: v4.8.0-rc1~226 I can reproduce this race condition issue by call domifaddr --agent during shutdown guest (like bug 1058149). After some debuging, i think virJSONValueObjectGet(reply, "return") still will crash libvirtd after John's commit 9ed175fb, since virJSONValueObjectGet won't check if @object is null and this will crash libvirtd. And I think maybe the right fix is to set needReply to true during call qemuAgentCommand() in qemuAgentGetInterfaces(), then qemuAgentCommand won't return 0 with a null @reply pointer, and also from the design this qemu agent command will return some guest info, it really "needReply" from qemu agent. Obviously this is just my opinion, corrections are most welcome. Right. That commit merely changed the point where libvirtd will crash. I've sent the patch to libvirt-security, since this might possibly be used maliciously. Here is my reproducing steps: 1. In guest (already install qemu-guest-agent): # cat script.gdb set width 0 set height 0 set verbose off b qmp_guest_network_get_interfaces commands 1 kill end b qmp_guest_shutdown commands 2 return c end c # gdb -batch --command=script.gdb qemu-ga `pidof qemu-ga` 2. On host: # virsh shutdown vm1 --mode agent; virsh domifaddr vm1 --source agent error: Failed to shutdown domain vm1 error: Guest agent is not responding: Guest agent not available for now error: Disconnected from qemu:///system due to end of file error: Failed to query for interfaces addresses error: End of file while reading data: Input/output error Patch sent upstream: https://www.redhat.com/archives/libvir-list/2019-January/msg00241.html Pushed upstream as: commit 7cfd1fbb1332ae5df678b9f41a62156cb2e88c73 Author: Ján Tomko <jtomko> CommitDate: 2019-01-10 12:09:48 +0100 qemu: require reply from guest agent in qemuAgentGetInterfaces git describe: v4.10.0-164-g7cfd1fbb13 Verify this bug with libvirt-4.5.0-18.el7.x86_64: 1. In guest (already install qemu-guest-agent): # cat script.gdb set width 0 set height 0 set verbose off b qmp_guest_network_get_interfaces commands 1 kill end b qmp_guest_shutdown commands 2 return c end c # gdb -batch --command=script.gdb qemu-ga `pidof qemu-ga` 2. On host: # virsh shutdown vm1 --mode agent; virsh domifaddr vm1 --source agent error: Failed to shutdown domain vm1 error: Guest agent is not responding: Guest agent not available for now error: Failed to query for interfaces addresses error: Guest agent is not responding: Guest agent disappeared while executing command Since the problem described in this bug report should be resolved in a recent advisory, it has been closed with a resolution of ERRATA. For information on the advisory, and where to find the updated files, follow the link below. If the solution does not work for you, open a new bug report. https://access.redhat.com/errata/RHSA-2019:2294 |