Bug 956072

Summary: 3.1 - [vdsm] vdsm losses its connection to libvirt socket on certain case
Product: Red Hat Enterprise Virtualization Manager Reporter: Simon Grinberg <sgrinber>
Component: vdsmAssignee: Yaniv Bronhaim <ybronhei>
Status: CLOSED ERRATA QA Contact: Elad <ebenahar>
Severity: urgent Docs Contact:
Priority: urgent    
Version: 3.1.3CC: acathrow, bazulay, danken, dgibson, hateya, iheim, jentrena, lpeer, lyarwood, sgrinber, vvyazmin, ybronhei, ykaul, zdover
Target Milestone: ---Keywords: Reopened, ZStream
Target Release: 3.1.4   
Hardware: x86_64   
OS: Linux   
Whiteboard: infra
Fixed In Version: vdsm-4.10.2-1.12.el6ev Doc Type: Bug Fix
Doc Text:
Previously, when the host was the SPM, killing the VDSM process and then restarting after 120 seconds resulted in a situation in which VDSM was not able to communicate with the terminated PID. This meant that VDSM was losing its connection to the libvirt socket. A patch to VDSM allows VDSM to retain its connection to libvirt sockets even after the termination and restart of the VDSM process.
Story Points: ---
Clone Of: 834041 Environment:
Last Closed: 2013-05-01 18:26:30 UTC Type: Bug
Regression: --- Mount Type: ---
Documentation: --- CRM:
Verified Versions: Category: ---
oVirt Team: Infra RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: --- Target Upstream Version:
Embargoed:
Bug Depends On: 834041    
Bug Blocks:    

Comment 2 Elad 2013-04-28 14:55:38 UTC
Checked on RHEVM - 3.1 - SI28.1

vdsm-4.10.2-1.13.el6ev.x86_64
libvirt-0.10.2-18.el6_4.4.x86_64


vdsm do not losses connection to libvirt socket.


[root@tigris01 ~]# kill -STOP `pgrep vdsm` && sleep 120 && kill -CONT `pgrep vdsm`


[root@tigris01 ~]# vdsClient -s 0 getVdsCaps 
        HBAInventory = {'iSCSI': [{'InitiatorName': 'iqn.1994-05.com.redhat:tigris01'}], 'FC': [{'wwpn': '50014380186af904', 'wwnn': '50014380186af905', 'model': 'HPAJ764A - HP 8Gb Dual Channel PCI-e 2.0 FC HBA'}, {'wwpn': '50014380186af906', 'wwnn': '50014380186af907', 'model': 'HPAJ764A - HP 8Gb Dual Channel PCI-e 2.0 FC HBA'}]}                                                                                              
        ISCSIInitiatorName = iqn.1994-05.com.redhat:tigris01                                                                                                                                                         
        bondings = {'bond4': {'addr': '', 'cfg': {}, 'mtu': '1500', 'netmask': '', 'slaves': [], 'hwaddr': '00:00:00:00:00:00'}, 'bond0': {'addr': '', 'cfg': {}, 'mtu': '1500', 'netmask': '', 'slaves': [], 'hwaddr': '00:00:00:00:00:00'}, 'bond1': {'addr': '', 'cfg': {}, 'mtu': '1500', 'netmask': '', 'slaves': [], 'hwaddr': '00:00:00:00:00:00'}, 'bond2': {'addr': '', 'cfg': {}, 'mtu': '1500', 'netmask': '', 'slaves': [], 'hwaddr': '00:00:00:00:00:00'}, 'bond3': {'addr': '', 'cfg': {}, 'mtu': '1500', 'netmask': '', 'slaves': [], 'hwaddr': '00:00:00:00:00:00'}}
        bridges = {'rhevm': {'addr': '10.35.160.123', 'cfg': {'IPV6INIT': 'yes', 'DHCP_HOSTNAME': 'tigris01.scl.lab.tlv.redhat.com', 'HOSTNAME': 'tigris01.scl.lab.tlv.redhat.com', 'MTU': '1500', 'DELAY': '0', 'NM_CONTROLLED': 'no', 'BOOTPROTO': 'dhcp', 'DEVICE': 'rhevm', 'TYPE': 'Bridge', 'ONBOOT': 'yes', 'UUID': '2413ad85-3d6d-4de4-b707-bf8770da8f6b'}, 'mtu': '1500', 'netmask': '255.255.255.0', 'stp': 'off', 'ports': ['eth0']}}
        clusterLevels = ['3.0', '3.1']
        cpuCores = 128
        cpuFlags = fpu,vme,de,pse,tsc,msr,pae,mce,cx8,apic,sep,mtrr,pge,mca,cmov,pat,pse36,clflush,mmx,fxsr,sse,sse2,ht,syscall,nx,mmxext,fxsr_opt,pdpe1gb,rdtscp,lm,constant_tsc,rep_good,nonstop_tsc,extd_apicid,amd_dcm,aperfmperf,pni,pclmulqdq,monitor,ssse3,cx16,sse4_1,sse4_2,popcnt,aes,xsave,avx,lahf_lm,cmp_legacy,svm,extapic,cr8_legacy,abm,sse4a,misalignsse,3dnowprefetch,osvw,ibs,xop,skinit,wdt,lwp,fma4,nodeid_msr,topoext,perfctr_core,cpb,npt,lbrv,svm_lock,nrip_save,tsc_scale,vmcb_clean,flushbyasid,decodeassists,pausefilter,pfthreshold,model_Opteron_G3,model_Opteron_G1,model_Opteron_G4,model_Opteron_G2
        cpuModel = AMD Opteron(TM) Processor 6272
        cpuSockets = 4
        cpuSpeed = 2100.000
        emulatedMachines = ['rhel6.4.0', 'pc', 'rhel6.3.0', 'rhel6.2.0', 'rhel6.1.0', 'rhel6.0.0', 'rhel5.5.0', 'rhel5.4.4', 'rhel5.4.0']
        guestOverhead = 65
        hooks = {}
        kvmEnabled = true
        lastClient = 10.35.161.52
        lastClientIface = rhevm
        management_ip =
        memSize = 129058
        netConfigDirty = False
        networks = {'rhevm': {'iface': 'rhevm', 'addr': '10.35.160.123', 'cfg': {'IPV6INIT': 'yes', 'DHCP_HOSTNAME': 'tigris01.scl.lab.tlv.redhat.com', 'HOSTNAME': 'tigris01.scl.lab.tlv.redhat.com', 'MTU': '1500', 'DELAY': '0', 'NM_CONTROLLED': 'no', 'BOOTPROTO': 'dhcp', 'DEVICE': 'rhevm', 'TYPE': 'Bridge', 'ONBOOT': 'yes', 'UUID': '2413ad85-3d6d-4de4-b707-bf8770da8f6b'}, 'mtu': '1500', 'netmask': '255.255.255.0', 'stp': 'off', 'bridged': True, 'gateway': '10.35.160.254', 'ports': ['eth0']}}
        nics = {'eth3': {'addr': '10.35.163.123', 'cfg': {'DEVICE': 'eth3', 'UUID': 'ed7f03fb-3438-40f1-991e-37481dfccaca', 'IPADDR': '10.35.163.123', 'NM_CONTROLLED': 'yes', 'NETMASK': '255.255.255.0', 'HWADDR': 'E8:39:35:B0:75:D5', 'TYPE': 'Ethernet', 'ONBOOT': 'yes'}, 'mtu': '1500', 'netmask': '255.255.255.0', 'hwaddr': 'e8:39:35:b0:75:d5', 'speed': 1000}, 'eth2': {'addr': '10.35.162.123', 'cfg': {'DEVICE': 'eth2', 'UUID': '7c17da01-1f9e-4f52-89b3-f9a972a4fa28', 'IPADDR': '10.35.162.123', 'NM_CONTROLLED': 'yes', 'NETMASK': '255.255.255.0', 'HWADDR': 'E8:39:35:B0:75:D4', 'TYPE': 'Ethernet', 'ONBOOT': 'yes'}, 'mtu': '1500', 'netmask': '255.255.255.0', 'hwaddr': 'e8:39:35:b0:75:d4', 'speed': 1000}, 'eth1': {'addr': '', 'cfg': {'UUID': 'f195acc4-3a95-43c7-9018-bd5f44c4063a', 'DEVICE': 'eth1', 'NETMASK': '255.255.255.0', 'HWADDR': 'E8:39:35:B0:75:D7', 'TYPE': 'Ethernet', 'ONBOOT': 'no'}, 'mtu': '1500', 'netmask': '', 'hwaddr': 'e8:39:35:b0:75:d7', 'speed': 0}, 'eth0': {'addr': '', 'cfg': {'BRIDGE': 'rhevm', 'DEVICE': 'eth0', 'IPV6INIT': 'yes', 'DHCP_HOSTNAME': 'tigris01.scl.lab.tlv.redhat.com', 'HOSTNAME': 'tigris01.scl.lab.tlv.redhat.com', 'MTU': '1500', 'NM_CONTROLLED': 'no', 'HWADDR': 'E8:39:35:B0:75:D6', 'ONBOOT': 'yes', 'UUID': '2413ad85-3d6d-4de4-b707-bf8770da8f6b'}, 'mtu': '1500', 'netmask': '', 'hwaddr': 'e8:39:35:b0:75:d6', 'speed': 1000}}
        operatingSystem = {'release': '6.4.0.4.el6_4', 'version': '6Server', 'name': 'RHEL'}
        packages2 = {'kernel': {'release': '358.2.1.el6.x86_64', 'buildtime': 1361362657.0, 'version': '2.6.32'}, 'spice-server': {'release': '12.el6', 'buildtime': 1358331735, 'version': '0.12.0'}, 'vdsm': {'release': '1.13.el6ev', 'buildtime': 1366900192, 'version': '4.10.2'}, 'qemu-kvm': {'release': '2.355.el6_4.3', 'buildtime': 1365612567, 'version': '0.12.1.2'}, 'libvirt': {'release': '18.el6_4.4', 'buildtime': 1365809427, 'version': '0.10.2'}, 'qemu-img': {'release': '2.355.el6_4.3', 'buildtime': 1365612567, 'version': '0.12.1.2'}}
        reservedMem = 321
        software_revision = 1.13
        software_version = 4.10
        supportedENGINEs = ['3.0', '3.1']
        supportedProtocols = ['2.2', '2.3']
        supportedRHEVMs = ['3.0']
        uuid = 81E7323E-BB06-E111-A8C2-E83935B391DF
        version_name = Snow Man
        vlans = {}
        vmTypes = ['kvm']

Comment 3 errata-xmlrpc 2013-05-01 18:26:30 UTC
Since the problem described in this bug report should be
resolved in a recent advisory, it has been closed with a
resolution of ERRATA.

For information on the advisory, and where to find the updated
files, follow the link below.

If the solution does not work for you, open a new bug report.

http://rhn.redhat.com/errata/RHBA-2013-0774.html

Comment 4 David Gibson 2013-07-31 23:52:35 UTC
Knowledgebase link is broken, because it should be to solutions, instead of articles.  Correcting.