Description of problem: Segfault in clvmd nightly lvm2-2.02.106-0.304 - ef1d910dee1157d157402c28680dfe34e06194af Looks like clvmd has problem parsing metadata: Core was generated by `/usr/sbin/clvmd -T180'. Program terminated with signal 11, Segmentation fault. #0 0x00007fe885b448c7 in _tok_match (str=0x2020202020202020 <Address 0x2020202020202020 out of bounds>, b=0x7fe8868b2e97 "system_id", e=0x7fe8868b2ea0 "") at libdm-config.c:85 85 while (*str && (b != e)) { Missing separate debuginfos, use: debuginfo-install corosynclib-2.3.3-2.el7.x86_64 dlm-lib-4.0.2-3.el7.x86_64 libqb-0.16.0-6.el7.x86_64 libselinux-2.2.2-4.el7.x86_64 libsepol-2.1.9-3.el7.x86_64 pcre-8.32-12.el7.x86_64 xz-libs-5.1.2-8alpha.el7.x86_64 (gdb) bt full #0 0x00007fe885b448c7 in _tok_match (str=0x2020202020202020 <Address 0x2020202020202020 out of bounds>, b=0x7fe8868b2e97 "system_id", e=0x7fe8868b2ea0 "") at libdm-config.c:85 No locals. #1 0x00007fe885b47f51 in _find_config_node (start=0x7fe87c04aa60, path=0x7fe8868b2e97 "system_id") at libdm-config.c:798 e = 0x7fe8868b2ea0 "" cn = 0x7fe8868ae8c6 cn_found = 0x0 #2 0x00007fe885b48dbd in dm_config_get_str (cn=0x7fe87c04aa60, path=0x7fe8868b2e97 "system_id", result=0x7fe8866bc520) at libdm-config.c:1086 n = 0x2000 #3 0x00007fe88682e375 in _read_vg (fid=0x7fe87c052600, cft=0x7fe87c04a6e0, use_cached_pvs=0) at format_text/import_vsn1.c:763 vgn = 0x7fe87c04aa60 cv = 0x7fe885b48a04 <dm_config_find_node+35> str = 0x7fe8868b23c4 "version" vg = 0x7fe87c03bbe0 pv_hash = 0x7fe87c04ec10 lv_hash = 0x7fe87c02fbe0 scan_done_once = 0 #4 0x00007fe88682bb73 in text_vg_import_fd (fid=0x7fe87c052600, file=0x7fe87c0526ba "/etc/lvm/backup/helter_skelter", single_device=0, dev=0x0, offset=0, size=0, offset2=0, size2=0, checksum_fn=0x0, checksum=0, when=0x7fe8866bc660, desc=0x7fe8866bc668) at format_text/import.c:112 vg = 0x0 cft = 0x7fe87c04a6e0 vsn = 0x7fe886af1080 <_text_vsn_list> #5 0x00007fe88682bc5d in text_vg_import_file (fid=0x7fe87c052600, file=0x7fe87c0526ba "/etc/lvm/backup/helter_skelter", when=0x7fe8866bc660, desc=0x7fe8866bc668) at format_text/import.c:128 No locals. #6 0x00007fe886826676 in _vg_read_file_name (fid=0x7fe87c052600, vgname=0x7fe87c0467f2 "helter_skelter", read_path=0x7fe87c0526ba "/etc/lvm/backup/helter_skelter") at format_text/format-text.c:864 vg = 0x7fe8866bc6a0 when = 0 desc = 0x0 #7 0x00007fe8868267d2 in _vg_read_file (fid=0x7fe87c052600, vgname=0x7fe87c0467f2 "helter_skelter", mda=0x7fe87c052670, single_device=0) at format_text/format-text.c:891 tc = 0x7fe87c0526a0 #8 0x00007fe88681fa16 in backup_read_vg (cmd=0x7fe87c0008c0, vg_name=0x7fe87c0467f2 "helter_skelter", file=0x7fe8866bc7a0 "/etc/lvm/backup/helter_skelter") at format_text/archiver.c:307 vg = 0x0 tf = 0x7fe87c052600 fic = {type = 8, context = {pv_id = 0x7fe8866bc740 "\240\307k\206\350\177", vg_ref = {vg_name = 0x7fe8866bc740 "\240\307k\206\350\177", vg_id = 0x0}, private = 0x7fe8866bc740}} tc = {path_live = 0x7fe8866bc7a0 "/etc/lvm/backup/helter_skelter", path_edit = 0x0, desc = 0x7fe8868a7f73 "clvmd"} mda = 0x7fe87c052670 #9 0x00007fe8868203d5 in check_current_backup (vg=0x7fe87c0466d0) at format_text/archiver.c:501 path = "/etc/lvm/backup/helter_skelter", '\000' <repeats 858 times>... vg_backup = 0x0 old_suppress = 0 #10 0x00007fe8867f722b in lvm_do_backup (vgname=0x7fe888906303 "helter_skelter") at lvm-functions.c:838 vg = 0x7fe87c0466d0 consistent = 1 #11 0x00007fe8867ef0ca in do_command (client=0x0, msg=0x7fe8889062f0, msglen=34, buf=0x7fe8866bdb58, buflen=65516, retlen=0x7fe8866bdb4c) at clvmd-command.c:171 args = 0x7fe888906301 "\200\005helter_skelter" arglen = 16 status = 0 lockname = 0x88906303 <Address 0x88906303 out of bounds> locktype = 0x0 nodeinfo = {sysname = "\377\377\377\377\350\177\000\000p\000\000|\350\177\000\000Q'\224y\027\200\377\377\260\330k\206\350\177\000\000\257\330k\206\350\177", '\000' <repeats 18 times>, "R{\016\001\000\000\000\000\377", nodename = "\377\377\377\000\000\000\000\000\347k\206\350\177\000\000\320\331k\206\350\177\000\000@\307\354\204\350\177\000\000\000\000\000\000\000\000\000\000\307Q\266\204\350\177\000\000\000\347k\206\350\177\000\000 \000\000|\350\177\000\000\000", release = "\001\000\000\000\000\000\000\000\000\005\000\000\000\000t\000\000\000\000\000\000\000\020\333k\206\350\177\000\000(\332k\206\350\177\000\000\031\024\271\204\350\177\000\000\355\377\000\000\000\000\000\000q&\224y\027\200\377\377\000\020", version = "\000\004\000\000\000\000\004\000\000\000\000\000\000\060\004\000\000\350\177\000\000\200\000\000\000\000\000\000\000\220\331k\206\350\177\000\000\020\000\000\000\000\000\000\000\333\000\000\000~\000\000\000\002\000\000\000\000\000\000\000\000\000\000", machine = "\000\000\000\000y\000\000\000|\000\000\000\217\331k\206\350\177\000\000\300D\220\210\350\177\000\000\000\332k\206\027\000\000\000\230E\220\210\350\177\000\000\320\331k\206\350\177\000\000\331\031\262\205\350\177\000\000\006\000\000\000\022", domainname = "\000\000\000\340\207\177\206\350\177\000\000@c\220\210\350\177\000\000 \000\000|\350\177\000\000\355\377\000\000\000\000\000\000\000\000\000\005\000\000\000\000t\000\000\000\000\000\000\000\020\333k\206\350\177\000\000(\332k\206\350\177"} lock_cmd = 0 '\000' lock_flags = 0 '\000' #12 0x00007fe8867f384a in process_remote_command (msg=0x7fe8889062f0, msglen=34, fd=6, csid=0x7fe888906340 "\002") at clvmd.c:1648 replyargs = 0x7fe87c054110 "H\b" nodename = "UNKNOWN 2\000\255\373\350\177\000\000p$\005|\350\177\000\000p$\005|\350\177\000\000p$\005|\350\177\000\000p$\005|\350\177\000\000q$\005|\350\177\000\000\377\377\377\377\377\377\377\377p$\005|\350\177\000\000\377\377\377\377\377\377\377\377", '\000' <repeats 40 times>, "\320\334k\206\000\000\000\000\354\377\000\000\035\000\000\000\000\000\220\210\350\177", '\000' <repeats 16 times>, "\001\004\000\000\000\000\v\000\000\000\001c\220\210\350\177\000\000\000\000\000\000\000\000\000\000\003c\220\210\350\177\000\000\000\000\000\000\000\000\000\000"... replylen = 0 buflen = 65516 status = 0 #13 0x00007fe8867f492e in process_work_item (cmd=0x7fe888906320) at clvmd.c:2109 No locals. #14 0x00007fe8867f4a78 in lvm_thread_fn (arg=0x7ffff7a3dc40) at clvmd.c:2152 ss = {__val = {2560, 0 <repeats 15 times>}} lvm_params = 0x7ffff7a3dc40 cmd = 0x7fe888906320 #15 0x00007fe8852f4df3 in start_thread (arg=0x7fe8866be700) at pthread_create.c:308 __res = <optimized out> pd = 0x7fe8866be700 now = <optimized out> unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140636664358656, 510565078428650217, 0, 140636664359360, 140636664358656, 0, -521035554717594903, -521029538016163095}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}} not_first_call = <optimized out> pagesize_m1 = <optimized out> sp = <optimized out> freesize = <optimized out> #16 0x00007fe884c093cd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113 No locals. (gdb) info threads Id Target Id Frame 4 Thread 0x7fe882ca1700 (LWP 4009) 0x00007fe8852fb25d in read () at ../sysdeps/unix/syscall-template.S:81 3 Thread 0x7fe8867cc880 (LWP 4003) 0x00007fe884c00b53 in select () at ../sysdeps/unix/syscall-template.S:81 2 Thread 0x7fe88669d700 (LWP 5460) pthread_cond_wait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 * 1 Thread 0x7fe8866be700 (LWP 4010) 0x00007fe885b448c7 in _tok_match (str=0x2020202020202020 <Address 0x2020202020202020 out of bounds>, b=0x7fe8868b2e97 "system_id", e=0x7fe8868b2ea0 "") at libdm-config.c:85 (gdb) thread 2 [Switching to thread 2 (Thread 0x7fe88669d700 (LWP 5460))] #0 pthread_cond_wait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 185 62: movl (%rsp), %edi (gdb) bt full #0 pthread_cond_wait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:185 No locals. #1 0x00007fe8867f3e8d in pre_and_post_thread (arg=0x7fe888906050) at clvmd.c:1810 client = 0x7fe888906050 status = 0 write_status = 4 ss = {__val = {2048, 0 <repeats 15 times>}} pipe_fd = 16 #2 0x00007fe8852f4df3 in start_thread (arg=0x7fe88669d700) at pthread_create.c:308 __res = <optimized out> pd = 0x7fe88669d700 now = <optimized out> unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140636664223488, 510565078428650217, 0, 140636664224192, 140636664223488, 18, -521035572434334999, -521029538016163095}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}} not_first_call = <optimized out> pagesize_m1 = <optimized out> sp = <optimized out> freesize = <optimized out> #3 0x00007fe884c093cd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113 No locals (gdb) thread 3 [Switching to thread 3 (Thread 0x7fe8867cc880 (LWP 4003))] #0 0x00007fe884c00b53 in select () at ../sysdeps/unix/syscall-template.S:81 81 T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) (gdb) bt full #0 0x00007fe884c00b53 in select () at ../sysdeps/unix/syscall-template.S:81 No locals. #1 0x00007fe8867f16a5 in main_loop (local_sock=4, cmd_timeout=60) at clvmd.c:874 select_status = 1 thisfd = 0x0 tv = {tv_sec = 59, tv_usec = 851845} quorate = 1 in = {fds_bits = {49232, 0 <repeats 15 times>}} ss = {__val = {16386, 0 <repeats 15 times>}} #2 0x00007fe8867f0a25 in main (argc=2, argv=0x7ffff7a3de28) at clvmd.c:616 local_sock = 4 newfd = 0x7fe8889080c0 delfd = 0x7ffff7a3dc70 lvm_params = {excl_uuid = 0x7fe888902010} opt = -1 cmd_timeout = 60 start_timeout = 180 cluster_iface = IF_AUTO ss = {__val = {18434, 0 <repeats 15 times>}} debug_opt = DEBUG_OFF debug_arg = DEBUG_OFF clusterwide_opt = 0 old_mask = 22 ret = 1 longopts = {{name = 0x7fe8868a6347 "help", has_arg = 0, flag = 0x0, val = 104}, {name = 0x0, has_arg = 0, flag = 0x0, val = 0}} (gdb) thread 4 [Switching to thread 4 (Thread 0x7fe882ca1700 (LWP 4009))] #0 0x00007fe8852fb25d in read () at ../sysdeps/unix/syscall-template.S:81 81 T_PSEUDO (SYSCALL_SYMBOL, SYSCALL_NAME, SYSCALL_NARGS) (gdb) bt full #0 0x00007fe8852fb25d in read () at ../sysdeps/unix/syscall-template.S:81 No locals. #1 0x00007fe88550a945 in do_dlm_dispatch_v6 () from /lib64/libdlm.so.3 No symbol table info available. #2 0x00007fe88550ad6f in dlm_recv_thread () from /lib64/libdlm.so.3 No symbol table info available. #3 0x00007fe8852f4df3 in start_thread (arg=0x7fe882ca1700) at pthread_create.c:308 __res = <optimized out> pd = 0x7fe882ca1700 now = <optimized out> unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140636603422464, 510565078428650217, 0, 140636603423168, 140636603422464, 0, -521043511681381655, -521029538016163095}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}} not_first_call = <optimized out> pagesize_m1 = <optimized out> sp = <optimized out> freesize = <optimized out> #4 0x00007fe884c093cd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113 No locals. Something is wrong at Frame 1 Thread 1: (gdb) set $start = *(const struct dm_config_node *)start (gdb) p $start $5 = {key = 0x7fe87c04aa98 "id", parent = 0x7fe87c04aa18, sib = 0x7fe87c04aae8, child = 0x0, v = 0x7fe87c04aaa0, id = 0} (gdb) p ($start.sib) $6 = (struct dm_config_node *) 0x7fe87c04aae8 (gdb) p *($start.sib) $7 = {key = 0x7fe87c04ab20 "seqno", parent = 0x7fe87c04aa18, sib = 0x7fe87c04ab48, child = 0x0, v = 0x7fe87c04ab28, id = 0} (gdb) p *($start.sib->sib) $8 = {key = 0x7fe87c04ab80 "format", parent = 0x7fe87c04aa18, sib = 0x7fe886ae22b0 <_open_devices>, child = 0x0, v = 0x7fe87c04ab88, id = 0} (gdb) p *($start.sib->sib->sib) $9 = {key = 0x7fe87c04b098 "\350\256\004|\350\177", parent = 0x7fe87c04ab58, sib = 0x7fe8868ae8c6, child = 0x7fe8868ae8d0, v = 0x7fe8868ae8d4, id = -2037716778} ^--- corruption starts here (gdb) p *($start.sib->sib->sib->sib) $10 = {key = 0x2020202020202020 <Address 0x2020202020202020 out of bounds>, parent = 0x20002020200020, sib = 0x6574796261784520, child = 0x2000450042452000, v = 0x6574796261746550, id = 1112547328}
Created attachment 871880 [details] lvmdump (cluster VG is not available here - cluster is not running after the node was fenced)
Some logs are incomplete, but after parsing the driver perl script, this happened after two legs of three leg mirror were removed[1] on all three nodes of a cluster, and a GFS2 FS on top of the mirror was written to (dd to a file). On the other two nodes the legs were replaced correctly. clvmd on the node on which the write operation was initiated has segfaulted. [1]: echo offline > /sys/block/$DEVICE/device/state
And one more clvmd segfault. Unfortunately this time the logs are even more incomplete. Again 3leg mirror with 2 legs failing. With alloc policy. I do not know at exactly what time this happened :-( Likely the same issue, creating invalid metadata structure, this time failing later on free. 1. I will try persistent journal, /var/log/messages looks like running through random line killing filter (journald?), with reproducer being the outcome. 2. Then I will try sort of "bisecting" that to find offending change. #0 __GI___libc_free (mem=0x31) at malloc.c:2903 ar_ptr = <optimized out> p = <optimized out> hook = 0x0 #1 0x00007f8f20b9fef8 in _free_cached_vgmetadata (vginfo=0x7f8f1800d338) at cache/lvmcache.c:134 No locals. #2 0x00007f8f20ba204f in _free_vginfo (vginfo=0x7f8f1800d338) at cache/lvmcache.c:999 primary_vginfo = 0x7f8f1800e180 vginfo2 = 0x7f8f1800e180 r = 1 #3 0x00007f8f20ba3e02 in _lvmcache_destroy_vgnamelist (vginfo=0x7f8f1800d338) at cache/lvmcache.c:1591 next = 0x0 #4 0x00007f8f1fec1c1e in dm_hash_iter (t=0x7f8f18054860, f=0x7f8f20ba3dde <_lvmcache_destroy_vgnamelist>) at datastruct/hash.c:224 c = 0x7f8f20e822b0 <_open_devices> n = 0x7f8f1800d338 i = 113 #5 0x00007f8f20ba3faa in lvmcache_destroy (cmd=0x7f8f180008c0, retain_orphans=0) at cache/lvmcache.c:1631 n = 0x7f8f20b9fd74 <activation_release+9> #6 0x00007f8f20ba9b26 in refresh_toolcontext (cmd=0x7f8f180008c0) at commands/toolcontext.c:1592 cft_cmdline = 0x7f8f20e83580 <buf.7466> cft_tmp = 0x7f8f20e83b20 <buf.9441> #7 0x00007f8f20b969e6 in do_refresh_cache () at lvm-functions.c:652 No locals. #8 0x00007f8f20b96b17 in do_lock_vg (command=1 '\001', lock_flags=4 '\004', resource=0x7f8f2170b303 "P_#global") at lvm-functions.c:688 lock_cmd = 1 vgname = 0x7f8f2170b305 "#global" #9 0x00007f8f20b8eec2 in do_command (client=0x0, msg=0x7f8f2170b2f0, msglen=29, buf=0x7f8f20a5db58, buflen=65516, retlen=0x7f8f20a5db4c) at clvmd-command.c:117 args = 0x7f8f2170b301 "\001\004P_#global" arglen = 11 status = 0 lockname = 0x7f8f2170b303 "P_#global" locktype = 0x0 nodeinfo = {sysname = "\377\377\377\377\000\000\000\000p\000\000\030\217\177\000\000Q'Z\337p\200\377\377\260إ \217\177\000\000\257إ \217\177", '\000' <repeats 18 times>, "R{\016\001\000\000\000\000\377", nodename = "\377\377\377\000\000\000\000\000\347\245 \217\177\000\000\320٥ \217\177\000\000@\307&\037\217\177\000\000\000\000\000\000\000\000\000\000\307Q\360\036\217\177\000\000\000\347\245 \217\177\000\000 \000\000\030\217\177\000\000\000", release = "\001\000\000\000\000\000\000\000\000\f\000\000\000\000s\000\000\000\000\000\000\000\020ۥ \217\177\000\000(ڥ \217\177\000\000\031\024\363\036\217\177\000\000\355\377\000\000\000\000\000\000q&Z\337p\200\377\377\000\020", version = "\000\004\000\000\000\000\004\000\000\000\000\000\000\060\004\000\000\000\000\000\000\200\000\000\000\000\000\000\000\220٥ \217\177\000\000\020\000\000\000\000\000\000\000\333\000\000\000~\000\000\000\002\000\000\000\000\000\000\000\000\000\000", machine = "\000\000\000\000y\000\000\000|\000\000\000\217٥ \217\177\000\000\300\224p!\217\177\000\000\000ڥ \021\000\000\000h\225p!\217\177\000\000\320٥ \217\177\000\000\331\031\354\037\217\177\000\000\006\000\000\000\022", domainname = "\000\000\000\240H\002\030\004\000\000\000@\263p!\217\177\000\000 \000\000\030\217\177\000\000\355\377\000\000\000\000\000\000\000\000\000\f\000\000\000\000s\000\000\000\000\000\000\000\020ۥ \217\177\000\000(ڥ \217\177"} lock_cmd = 1 '\001' lock_flags = 4 '\004' #10 0x00007f8f20b9384a in process_remote_command (msg=0x7f8f2170b2f0, msglen=29, fd=6, csid=0x7f8f2170b340 "\001") at clvmd.c:1648 replyargs = 0x7f8f18020d00 "\370\a" nodename = "1\000KNOWN 2\000\255\373\217\177\000\000\340\351\004\030\217\177\000\000\340\351\004\030\217\177\000\000\340\351\004\030\217\177\000\000\340\351\004\030\217\177\000\000\341\351\004\030\217\177\000\000\377\377\377\377\377\377\377\377\340\351\004\030\217\177\000\000\377\377\377\377\377\377\377\377", '\000' <repeats 40 times>, "\320ܥ \000\000\000\000\354\377\000\000\035\000\000\000\000\000p!\217\177", '\000' <repeats 16 times>, "\006\004\000\000\000\000\v\000\000\000A\264p!\217\177\000\000\000\000\000\000\000\000\000\000C\264p!\217\177\000\000\000\306\353\037\217\177\000\000\377\377\377\377"... replylen = 0 buflen = 65516 status = 0 #11 0x00007f8f20b9492e in process_work_item (cmd=0x7f8f2170b320) at clvmd.c:2109 No locals. #12 0x00007f8f20b94a78 in lvm_thread_fn (arg=0x7fffb2d7af00) at clvmd.c:2152 ss = {__val = {2560, 0 <repeats 15 times>}} lvm_params = 0x7fffb2d7af00 cmd = 0x7f8f2170b320 #13 0x00007f8f1f694df3 in start_thread (arg=0x7f8f20a5e700) at pthread_create.c:308 __res = <optimized out> pd = 0x7f8f20a5e700 now = <optimized out> unwind_buf = {cancel_jmp_buf = {{jmp_buf = {140252704794368, -3378772670044370856, 0, 140252704795072, 140252704794368, 0, 3315336948984645720, 3315228746399549528}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}} not_first_call = <optimized out> pagesize_m1 = <optimized out> sp = <optimized out> freesize = <optimized out> #14 0x00007f8f1efa93cd in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:113 No locals.
Bug in recent upstream code: Invalid read of size 1 at 0x4C2C2F2: strlen (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) by 0x54ABD31: dm_pool_strdup (pool.c:51) by 0x167A93: _init_profiles (toolcontext.c:627) by 0x169FC7: create_toolcontext (toolcontext.c:1456) by 0x1416ED: init_lvm (lvmcmdline.c:1403) by 0x141FBA: lvm2_main (lvmcmdline.c:1571) by 0x15A7A4: main (lvm.c:21) Address 0x703f17a is 410 bytes inside a block of size 8,192 alloc'd at 0x4C2945D: malloc (in /usr/lib64/valgrind/vgpreload_memcheck-amd64-linux.so) by 0x54AACB6: dm_malloc_aux (dbg_malloc.c:269) by 0x54ABA9C: _new_chunk (pool-fast.c:286) by 0x54AB1C1: dm_pool_alloc_aligned (pool-fast.c:106) by 0x54AB11B: dm_pool_alloc (pool-fast.c:90) by 0x16BEA4: _cfg_def_get_path (config.c:512) by 0x16D53E: find_config_tree_bool (config.c:969) by 0x166332: _init_logging (toolcontext.c:170) by 0x169E5C: create_toolcontext (toolcontext.c:1439) by 0x1416ED: init_lvm (lvmcmdline.c:1403) by 0x141FBA: lvm2_main (lvmcmdline.c:1571) by 0x15A7A4: main (lvm.c:21)
(In reply to Zdenek Kabelac from comment #5) > Bug in recent upstream code: Should be fixed now with: https://git.fedorahosted.org/cgit/lvm2.git/commit/?id=eeff7729d9a48b07bc650c552ec7a03b064c3bc9
This is a nightly upstream build, the RHEL7 official build did not have this code in yet. Closing...
(In reply to Peter Rajnoha from comment #7) > This is a nightly upstream build, the RHEL7 official build s/RHEL7/rawhide
There is still something wrong with the build from commit bfffccca. #0 0x00007f1ca85aa989 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56 resultvar = 0 pid = 3727 selftid = 3734 #1 0x00007f1ca85ac098 in __GI_abort () at abort.c:90 save_stage = 2 act = {__sigaction_handler = {sa_handler = 0x7f1ca85873fc, sa_sigaction = 0x7f1ca85873fc}, sa_mask = {__val = {5, 139761061663797, 3, 139761089113854, 2, 139761061660168, 1, 139761061668833, 3, 139761089113828, 12, 139761061668837, 2, 139761089114640, 139761089114640, 139761089116400}}, sa_flags = 14, sa_restorer = 0x7f1caa11f410} sigs = {__val = {32, 0 <repeats 15 times>}} #2 0x00007f1ca85eb0e7 in __libc_message (do_abort=do_abort@entry=2, fmt=fmt@entry=0x7f1ca86f3b68 "*** Error in `%s': %s: 0x%s ***\n") at ../sysdeps/unix/sysv/linux/libc_fatal.c:196 ap = {{gp_offset = 40, fp_offset = 48, overflow_arg_area = 0x7f1caa11f900, reg_save_area = 0x7f1caa11f810}} ap_copy = {{gp_offset = 16, fp_offset = 48, overflow_arg_area = 0x7f1caa11f900, reg_save_area = 0x7f1caa11f810}} fd = 2 on_2 = <optimized out> list = <optimized out> nlist = <optimized out> cp = <optimized out> written = <optimized out> #3 0x00007f1ca85f255d in malloc_printerr (ptr=<optimized out>, str=0x7f1ca86f3c20 "double free or corruption (out)", action=3) at malloc.c:4972 buf = "00007f1ca002fdc0" cp = <optimized out> #4 _int_free (av=0x7f1ca892f760 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:3804 size = <optimized out> fb = <optimized out> nextchunk = <optimized out> nextsize = <optimized out> nextinuse = <optimized out> prevsize = <optimized out> bck = <optimized out> fwd = <optimized out> errstr = 0x7f1ca86f3c20 "double free or corruption (out)" locked = <optimized out> #5 0x00007f1ca95838bb in _free_nodes (t=0x7f1ca004bac0) at datastruct/hash.c:125 c = 0x7f1ca002fdc0 n = 0x7f1ca002fe20 i = 40 #6 0x00007f1ca95838f4 in dm_hash_destroy (t=0x7f1ca004bac0) at datastruct/hash.c:131 No locals. #7 0x00007f1caa27e547 in _persistent_destroy (f=0x7f1ca0039180) at filters/filter-persistent.c:319 pf = 0x7f1ca0039750 #8 0x00007f1caa26bb81 in refresh_toolcontext (cmd=0x7f1ca00008c0) at commands/toolcontext.c:1597 cft_cmdline = 0x7f1caa545580 <buf.7466> cft_tmp = 0x7f1caa545b20 <buf.9441> #9 0x00007f1caa2589e6 in do_refresh_cache () at lvm-functions.c:652 No locals. #10 0x00007f1caa258b17 in do_lock_vg (command=6 '\006', lock_flags=4 '\004', resource=0x7f1caa94a393 "P_#global") at lvm-functions.c:688 lock_cmd = 6 vgname = 0x7f1caa94a395 "#global" #11 0x00007f1caa250ec2 in do_command (client=0x7f1caa94a050, msg=0x7f1caa94a380, msglen=29, buf=0x7f1caa11fcd0, buflen=65516, retlen=0x7f1caa11fcc4) at clvmd-command.c:117 args = 0x7f1caa94a391 "\006\004P_#global" arglen = 11 status = 0 lockname = 0x7f1caa94a393 "P_#global" locktype = 0x0 nodeinfo = { sysname = "\377\377\377\377\005\000\000\000p\000\000\240\034\177\000\000\301\004\356U\343\200\377\377@䒨\034\177\000\000\000\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000\060\020#\252\034\177\000\000\000\a\022\252\034\177\000\000P", nodename = "\374\021\252\034\177\000\000\000\000\000\000\000\000\000\000P\374\021\252\034\177\000\000\333\034%\252\034\177\000\000\000\000\000\000\354\377\000\000\240\222\060\252\034\177\000\000\000\a\022\252\034\177\000\000\220\374\021\252\034\177\000\000\000", release = "\000\000\000\000\000\000\300\t\022\252\034\177\000\000\220\374\021\252\034\177\000\000\333\034%\252\034\177\000\000\000\000\000@\000\000\000\000\300\223\060\252\034\177\000\000p\374\021\252\034\177\000\000\260\373\021\252\034\177\000\000\001\020", version = "\000\034\177\000\000\000\004\000\000\000\000\000\000\060\004\000\000\034\177\000\000\000\351\061\377\327#\230J3\000\000\000\000\000\000\000\200UT\252\034\177\000\000\200\243\224\252\034\177\000\000\035\000\000\000\000\000\000\000P\240\224\252", machine = "\034\177\000\000\r\000\000\000\000\000\000\000\037\374\021\252\034\177\000\000\333\034%\252\034\177\000\000\030\001\000\000\000\000\000\000\225\225\060\252\034\177\000\000P\374\021\252\034\177\000\000\000\351\061\377\327#\230J\000\000\002\000", domainname = '\000' <repeats 11 times>, "\240TT\252\034\177\000\000 \000\000\240\034\177\000\000\377\377", '\000' <repeats 14 times>, "\300\t\022\252\034\177\000\000\310\037%\252\034\177\000\000\000\000\000\000\000"} lock_cmd = 6 '\006' lock_flags = 4 '\004' #12 0x00007f1caa2560b5 in process_local_command (msg=0x7f1caa94a380, msglen=29, client=0x7f1caa94a050, xid=723) at clvmd.c:1867 replybuf = 0x7f1ca004bc20 "H\b" buflen = 65516 replylen = 0 status = 0 #13 0x00007f1caa2568f2 in process_work_item (cmd=0x7f1caa94a2b0) at clvmd.c:2105 No locals. #14 0x00007f1caa256a78 in lvm_thread_fn (arg=0x7fffa614c9b0) at clvmd.c:2152 ss = {__val = {2560, 0 <repeats 15 times>}} lvm_params = 0x7fffa614c9b0 cmd = 0x7f1caa94a2b0 #15 0x00007f1ca8d56df3 in start_thread (arg=0x7f1caa120700) at pthread_create.c:308 __res = <optimized out> pd = 0x7f1caa120700 now = <optimized out> unwind_buf = {cancel_jmp_buf = {{jmp_buf = {139761089120000, 5025155098685953502, 0, 139761089120704, 139761089120000, 0, -4934331297814587938, -4934328519517710882}, mask_was_saved = 0}}, priv = {pad = {0x0, 0x0, 0x0, 0x0}, data = {prev = 0x0, cleanup = 0x0, canceltype = 0}}} not_first_call = <optimized out> pagesize_m1 = <optimized out> sp = <optimized out> freesize = <optimized out>
Hmm, I'll recheck...
(In reply to Peter Rajnoha from comment #10) > Hmm, I'll recheck... Kabi found a bug in some recent patch he added for clvmd... Reassigning to him.
Update: Released version does not suffer from the segfault.
The original reason for the coredump was missed dev_close() as result of commits a36869867275b8ef0f6d324bc5a7f3e2b8e445e9 and 3e5bec37e9cec455f20656858ba510b69010676d. Fixed in upstream commit: https://www.redhat.com/archives/lvm-devel/2014-March/msg00214.html To catch such problem earlier - few more patches have been committed upstream: https://www.redhat.com/archives/lvm-devel/2014-March/msg00197.html https://www.redhat.com/archives/lvm-devel/2014-March/msg00196.html