Bug 2212968

Summary: pvs segfault in device_ids_check_serial
Product: Red Hat Enterprise Linux 9 Reporter: Corey Marthaler <cmarthal>
Component: lvm2Assignee: LVM Team <lvm-team>
lvm2 sub component: Devices, Filtering and Stacking QA Contact: cluster-qe <cluster-qe>
Status: POST --- Docs Contact:
Severity: high    
Priority: high CC: agk, heinzm, jbrassow, msnitzer, prajnoha, teigland, zkabelac
Version: 9.3Keywords: Triaged
Target Milestone: rc   
Target Release: ---   
Hardware: x86_64   
OS: Linux   
Whiteboard:
Fixed In Version: Doc Type: If docs needed, set a value
Doc Text:
Story Points: ---
Clone Of: Environment:
Last Closed: Type: Bug
Regression: --- Mount Type: ---
Documentation: --- CRM:
Verified Versions: Category: ---
oVirt Team: --- RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: --- Target Upstream Version:
Embargoed:

Description Corey Marthaler 2023-06-06 18:20:51 UTC
Description of problem:
[root@node01 coredump]# pvs
  Devices file sys_serial SHAREDDATADISK PVID none last seen on /dev/vdb8 not found.
  Devices file sys_serial SHAREDDATADISK PVID none last seen on /dev/vdb7 not found.
  Devices file sys_serial SHAREDDATADISK PVID none last seen on /dev/vdb6 not found.
  Devices file sys_serial SHAREDDATADISK PVID none last seen on /dev/vdb5 not found.
  Devices file sys_serial SHAREDDATADISK PVID none last seen on /dev/vdb12 not found.
  Devices file sys_serial SHAREDDATADISK PVID none last seen on /dev/vdb11 not found.
  Devices file sys_serial SHAREDDATADISK PVID Ig2eylcGzRbAQFmAClON4HYgL2VYOaT9 last seen on /dev/vdb10 not found.
  Devices file sys_serial SHAREDDATADISK PVID none last seen on /dev/vdb9 not found.
Segmentation fault (core dumped)

# LVM uses devices listed in this file.
# Created by LVM command pvremove pid 61364 at Tue Jun  6 11:06:29 2023
VERSION=1.1.39
IDTYPE=sys_serial IDNAME=SHAREDDATADISK DEVNAME=/dev/vdb8 PVID=. PART=8
IDTYPE=sys_serial IDNAME=SHAREDDATADISK DEVNAME=/dev/vdb7 PVID=. PART=7
IDTYPE=sys_serial IDNAME=SHAREDDATADISK DEVNAME=/dev/vdb6 PVID=. PART=6
IDTYPE=sys_serial IDNAME=SHAREDDATADISK DEVNAME=/dev/vdb5 PVID=. PART=5
IDTYPE=sys_serial IDNAME=SHAREDDATADISK DEVNAME=/dev/vdb12 PVID=. PART=12
IDTYPE=sys_serial IDNAME=SHAREDDATADISK DEVNAME=/dev/vdb11 PVID=. PART=11
IDTYPE=sys_serial IDNAME=SHAREDDATADISK DEVNAME=/dev/vdb10 PVID=Ig2eylcGzRbAQFmAClON4HYgL2VYOaT9 PART=10
IDTYPE=sys_serial IDNAME=SHAREDDATADISK DEVNAME=/dev/vdb9 PVID=. PART=9
IDTYPE=sys_serial IDNAME=SHAREDDATADISK DEVNAME=/dev/vdb3 PVID=XU0vpdveElBThDtMtzaeTWaa5JiDwSPM PART=3
IDTYPE=sys_serial IDNAME=SHAREDDATADISK DEVNAME=/dev/vdb2 PVID=lqXAAyQUKaQoC6VKSougdmCiNxmDmirK PART=2
IDTYPE=sys_serial IDNAME=SHAREDDATADISK DEVNAME=/dev/vdb1 PVID=KFB4RtYwh2Oh0p8QgoeqehqgRl0OuIjf PART=1
IDTYPE=sys_serial IDNAME=SHAREDDATADISK DEVNAME=/dev/vdb4 PVID=. PART=4



Jun  6 12:58:34 fabbione-node01-vm-nic-protected kernel: pvs[2156]: segfault at 0 ip 0000557b371b5f05 sp 00007ffebb8ee240 error 4 in lvm[557b3712f000+192000] likely on CPU 2 (core 0, socket 2)
Jun  6 12:58:34 fabbione-node01-vm-nic-protected kernel: Code: 70 28 ff 70 30 31 c0 e8 d9 53 02 00 48 8b bc 24 90 00 00 00 59 5e 4c 39 ef 0f 84 58 07 00 00 4c 8b 75 10 48 89 f8 4d 8b 46 30 <4d> 8b 18 4d 8b 48 08 0f 1f 40 00 48 8b 58 10 48 8b 93 b0 00 00 00
Jun  6 12:58:34 fabbione-node01-vm-nic-protected systemd[1]: Started Process Core Dump (PID 2164/UID 0).
Jun  6 12:58:34 fabbione-node01-vm-nic-protected systemd-coredump[2165]: Process 2156 (pvs) of user 0 dumped core.#012#012Stack trace of thread 2156:#012#0  0x0000557b371b5f05 device_ids_check_serial (lvm + 0xbdf05)#012#1  0x0000557b37196b64 lvmcache_label_scan (lvm + 0x9eb64)#012#2  0x0000557b371784c1 process_each_pv (lvm + 0x804c1)#012#3  0x0000557b37173385 _do_report (lvm + 0x7b385)#012#4  0x0000557b371738a3 _report.lto_priv.0 (lvm + 0x7b8a3)#012#5  0x0000557b3715608c lvm_run_command (lvm + 0x5e08c)#012#6  0x0000557b37157d11 lvm2_main (lvm + 0x5fd11)#012#7  0x00007ff22783feb0 __libc_start_call_main (libc.so.6 + 0x3feb0)#012#8  0x00007ff22783ff60 __libc_start_main@@GLIBC_2.34 (libc.so.6 + 0x3ff60)#012#9  0x0000557b37131f25 _start (lvm + 0x39f25)#012ELF object binary architecture: AMD x86-64



Core was generated by `pvs'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  0x0000557b371b5f05 in device_ids_check_serial (cmd=0x557b388e2f20, scan_devs=0x7ffebb8ee440, update_needed=0x0, noupdate=0) at ../lib/device/device_id.c:2600
2600                            if (!memcmp(dul->du->pvid, devl->dev->pvid, ID_LEN)) {
(gdb) bt
#0  0x0000557b371b5f05 in device_ids_check_serial (cmd=0x557b388e2f20, scan_devs=0x7ffebb8ee440, update_needed=0x0, noupdate=0) at ../lib/device/device_id.c:2600
#1  0x0000557b37196b64 in lvmcache_label_scan (cmd=0x557b388e2f20) at ../lib/cache/lvmcache.c:1626
#2  0x0000557b371784c1 in process_each_pv (cmd=0x557b388e2f20, argc=<optimized out>, argv=<optimized out>, only_this_vgname=<optimized out>, all_is_set=<optimized out>, read_flags=<optimized out>, 
    handle=<optimized out>, process_single_pv=<optimized out>) at /usr/src/debug/lvm2-2.03.21-1.el9.x86_64/tools/toollib.c:4650
#3  0x0000557b37173385 in _do_report (cmd=cmd@entry=0x557b388e2f20, handle=handle@entry=0x557b389dd4a0, args=args@entry=0x7ffebb8eeb70, single_args=single_args@entry=0x7ffebb8eebb8)
    at /usr/src/debug/lvm2-2.03.21-1.el9.x86_64/tools/reporter.c:1148
#4  0x0000557b371738a3 in _report (cmd=0x557b388e2f20, argc=0, argv=0x7ffebb8ef250, report_type=<optimized out>) at /usr/src/debug/lvm2-2.03.21-1.el9.x86_64/tools/reporter.c:1399
#5  0x0000557b3715608c in lvm_run_command (cmd=<optimized out>, argc=<optimized out>, argv=<optimized out>) at /usr/src/debug/lvm2-2.03.21-1.el9.x86_64/tools/lvmcmdline.c:3317
#6  0x0000557b37157d11 in lvm2_main (argc=1, argv=0x7ffebb8ef248) at /usr/src/debug/lvm2-2.03.21-1.el9.x86_64/tools/lvmcmdline.c:3847
#7  0x00007ff22783feb0 in __libc_start_call_main (main=main@entry=0x557b37131e50 <main>, argc=argc@entry=1, argv=argv@entry=0x7ffebb8ef248) at ../sysdeps/nptl/libc_start_call_main.h:58
#8  0x00007ff22783ff60 in __libc_start_main_impl (main=0x557b37131e50 <main>, argc=1, argv=0x7ffebb8ef248, init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, stack_end=0x7ffebb8ef238)
    at ../csu/libc-start.c:389
#9  0x0000557b37131f25 in _start ()


Version-Release number of selected component (if applicable):
kernel-5.14.0-284.11.1.el9_2    
lvm2-2.03.21-1.el9    BUILT: Fri Apr 21 08:33:33 EDT 2023
lvm2-libs-2.03.21-1.el9    BUILT: Fri Apr 21 08:33:33 EDT 2023

Comment 2 David Teigland 2023-06-08 16:39:05 UTC
Method I'm using to reproduce this bug.

Set up fake sysfs files for the three devices being used, with each device given the same serial number:

# ls -l /dev/sdb
brw-rw----. 1 root disk 8, 16 Jun  8 10:57 /dev/sdb
# ls -l /dev/sdc
brw-rw----. 1 root disk 8, 32 Jun  8 10:57 /dev/sdc
# ls -l /dev/sdg
brw-rw----. 1 root disk 8, 96 Jun  8 10:54 /dev/sdg


# lvmconfig | grep device_id_sysfs_dir
        device_id_sysfs_dir="/test/sys/"

# find /test/
/test/
/test/sys
/test/sys/dev
/test/sys/dev/block
/test/sys/dev/block/8:16
/test/sys/dev/block/8:16/device
/test/sys/dev/block/8:16/device/serial
/test/sys/dev/block/8:32
/test/sys/dev/block/8:32/device
/test/sys/dev/block/8:32/device/serial
/test/sys/dev/block/8:96
/test/sys/dev/block/8:96/device
/test/sys/dev/block/8:96/device/serial

# cat /test/sys/dev/block/*/device/serial
s123
s123
s123

Two of the three devices are PVs, one is not.  Add all three to the devices file (e.g. lvmdevices --adddev), and all should be identified with the same serial number:

# cat /etc/lvm/devices/system.devices 
# LVM uses devices listed in this file.
# Created by LVM command pvs pid 560639 at Thu Jun  8 11:06:22 2023
SYSTEMID=null-04.lab.msp.redhat.com
VERSION=1.1.25
IDTYPE=sys_serial IDNAME=s123 DEVNAME=/dev/sdg PVID=.
IDTYPE=sys_serial IDNAME=s123 DEVNAME=/dev/sdb PVID=a6GZt7fVibApl60R18Hmuyfe8D4M1O2Z
IDTYPE=sys_serial IDNAME=s123 DEVNAME=/dev/sdc PVID=n1mV9NeGOj4gZvhft66kzB33dcGaJCYK

# pvs -o+uuid
  PV         VG Fmt  Attr PSize    PFree    PV UUID                               
  /dev/sdb   bb lvm2 a--  <931.01g <930.75g a6GZt7-fVib-Apl6-0R18-Hmuy-fe8D-4M1O2Z
  /dev/sdc   bb lvm2 a--  <931.01g <930.75g n1mV9N-eGOj-4gZv-hft6-6kzB-33dc-GaJCYK

Edit system.devices and rename sdb to sdc, and sdc to sdb.  The DEVNAME field is used by lvm as a hint when doing a match of devices to system.devices.  This renaming gives lvm a false hint and causes an initial mismatch of devices to system.devices entries.  When that mismatch is noticed and corrected, it triggers the faulty code and segfaults when looking at sdg.

# cat /etc/lvm/devices/system.devices 
# LVM uses devices listed in this file.
# Created by LVM command pvs pid 560639 at Thu Jun  8 11:06:22 2023
SYSTEMID=null-04.lab.msp.redhat.com
VERSION=1.1.25
IDTYPE=sys_serial IDNAME=s123 DEVNAME=/dev/sdg PVID=.
IDTYPE=sys_serial IDNAME=s123 DEVNAME=/dev/sdc PVID=a6GZt7fVibApl60R18Hmuyfe8D4M1O2Z
IDTYPE=sys_serial IDNAME=s123 DEVNAME=/dev/sdb PVID=n1mV9NeGOj4gZvhft66kzB33dcGaJCYK

This pvs command will segfault without the fix:

# pvs -o+uuid
  PV         VG Fmt  Attr PSize    PFree    PV UUID                               
  /dev/sdb   bb lvm2 a--  <931.01g <930.75g a6GZt7-fVib-Apl6-0R18-Hmuy-fe8D-4M1O2Z
  /dev/sdc   bb lvm2 a--  <931.01g <930.75g n1mV9N-eGOj-4gZv-hft6-6kzB-33dc-GaJCYK