Hide Forgot
Created attachment 558547 [details] One of the brick log. Description of problem: Created a striped replicate volume(4 bricks -> 2x2) & ran posix compliance test from one client & untarred openssl from the other. Server crashed 2 bricks with same backtrace. Similar kind of setup was created on Bug 786007, but here write-behind was enabled. Happens regularly. Core was generated by `/usr/local/sbin/glusterfsd -s localhost --volfile-id vol.dagobah.data-export4 -'. Program terminated with signal 11, Segmentation fault. #0 __strlen_sse2 () at ../sysdeps/x86_64/multiarch/../strlen.S:32 32 ../sysdeps/x86_64/multiarch/../strlen.S: No such file or directory. in ../sysdeps/x86_64/multiarch/../strlen.S (gdb) bt #0 __strlen_sse2 () at ../sysdeps/x86_64/multiarch/../strlen.S:32 #1 0x00007fafc489809b in ?? () from /usr/lib/libefence.so.0 #2 0x00007fafc4898431 in EF_Exit () from /usr/lib/libefence.so.0 #3 0x00007fafc4897b53 in memalign () from /usr/lib/libefence.so.0 #4 0x00007fafc4897757 in malloc () from /usr/lib/libefence.so.0 #5 0x00007fafc4aa25b4 in ?? () from /lib64/ld-linux-x86-64.so.2 #6 0x00007fafc4aacb4a in ?? () from /lib64/ld-linux-x86-64.so.2 #7 0x00007fafc4aa8996 in ?? () from /lib64/ld-linux-x86-64.so.2 #8 0x00007fafc4aad37a in ?? () from /lib64/ld-linux-x86-64.so.2 #9 0x00007fafc3d351e0 in do_dlopen (ptr=0x7fafb5132570) at dl-libc.c:86 #10 0x00007fafc4aa8996 in ?? () from /lib64/ld-linux-x86-64.so.2 #11 0x00007fafc3d3529a in dlerror_run (args=0x7fafb5132570, operate=0x7fafc3d351a0 <do_dlopen>) at dl-libc.c:47 #12 __GI___libc_dlopen_mode (name=<optimized out>, mode=<optimized out>) at dl-libc.c:160 #13 0x00007fafc3d108a5 in init () at ../sysdeps/x86_64/../ia64/backtrace.c:41 #14 0x00007fafc3fc4713 in pthread_once () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_once.S:104 #15 0x00007fafc3d1099c in __GI___backtrace (array=<optimized out>, size=200) at ../sysdeps/x86_64/../ia64/backtrace.c:85 #16 0x00007fafc4635d26 in gf_print_trace (signum=11) at ../../../libglusterfs/src/common-utils.c:425 #17 <signal handler called> #18 __strlen_sse2 () at ../sysdeps/x86_64/multiarch/../strlen.S:32 #19 0x00007fafc489809b in ?? () from /usr/lib/libefence.so.0 #20 0x00007fafc4898431 in EF_Exit () from /usr/lib/libefence.so.0 #21 0x00007fafc4897a79 in memalign () from /usr/lib/libefence.so.0 #22 0x00007fafc4897757 in malloc () from /usr/lib/libefence.so.0 #23 0x00007fafc4897d34 in calloc () from /usr/lib/libefence.so.0 #24 0x00007fafc4654914 in __gf_calloc (nmemb=1, size=32, type=3) at ../../../libglusterfs/src/mem-pool.c:145 #25 0x00007fafc461a468 in get_new_data () at ../../../libglusterfs/src/dict.c:55 #26 0x00007fafc461c10f in data_from_int32 (value=0) at ../../../libglusterfs/src/dict.c:785 #27 0x00007fafc461e3f6 in dict_set_int32 (this=0x7fafacb60fc0, key=0x7fafbe9903e2 "glusterfs.entrylk-count", val=0) at ../../../libglusterfs/src/dict.c:1737 #28 0x00007fafbe987547 in pl_entrylk_xattr_fill (this=0x7fafbfa92688, inode=0x7fafbd94b014, dict=0x7fafacb60fc0) at ../../../../../xlators/features/locks/src/posix.c:1372 #29 0x00007fafbe9877e4 in pl_lookup_cbk (frame=0x7fafc2b6a654, cookie=0x7fafc2b694dc, this=0x7fafbfa92688, op_ret=0, op_errno=0, inode=0x7fafbd94b014, buf=0x7fafb5133b90, dict=0x7fafacb60fc0, postparent=0x7fafb5133c00) at ../../../../../xlators/features/locks/src/posix.c:1434 #30 0x00007fafbeb998de in posix_acl_lookup_cbk (frame=0x7fafc2b694dc, cookie=0x7fafc2b63d84, this=0x7fafbfa5c688, op_ret=0, op_errno=0, inode=0x7fafbd94b014, buf=0x7fafb5133b90, xattr=0x7fafacb60fc0, postparent=0x7fafb5133c00) ---Type <return> to continue, or q <return> to quit--- at ../../../../../xlators/system/posix-acl/src/posix-acl.c:746 #31 0x00007fafbedad1bf in posix_lookup (frame=0x7fafc2b63d84, this=0x7fafbfa50688, loc=0x7fafc19f6668, xattr_req=0x7fafacdb3fc0) at ../../../../../xlators/storage/posix/src/posix.c:162 #32 0x00007fafbeb99d09 in posix_acl_lookup (frame=0x7fafc2b694dc, this=0x7fafbfa5c688, loc=0x7fafc19f6668, xattr=0x7fafacdb3fc0) at ../../../../../xlators/system/posix-acl/src/posix-acl.c:798 #33 0x00007fafbe987e55 in pl_lookup (frame=0x7fafc2b6a654, this=0x7fafbfa92688, loc=0x7fafc19f6668, xattr_req=0x7fafacdb3fc0) at ../../../../../xlators/features/locks/src/posix.c:1486 #34 0x00007fafbe763f01 in iot_lookup_wrapper (frame=0x7fafc2b69384, this=0x7fafbfaac688, loc=0x7fafc19f6668, xattr_req=0x7fafacdb3fc0) at ../../../../../xlators/performance/io-threads/src/io-threads.c:295 #35 0x00007fafc464831a in call_resume_wind (stub=0x7fafc19f6630) at ../../../libglusterfs/src/call-stub.c:2461 #36 0x00007fafc464f419 in call_resume (stub=0x7fafc19f6630) at ../../../libglusterfs/src/call-stub.c:3932 #37 0x00007fafbe7638ef in iot_worker (data=0x7fafbe51fee8) at ../../../../../xlators/performance/io-threads/src/io-threads.c:138 #38 0x00007fafc3fbeefc in start_thread (arg=0x7fafb5134700) at pthread_create.c:304 #39 0x00007fafc3cf989d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #40 0x0000000000000000 in ?? () (gdb) f 27 #27 0x00007fafc461e3f6 in dict_set_int32 (this=0x7fafacb60fc0, key=0x7fafbe9903e2 "glusterfs.entrylk-count", val=0) at ../../../libglusterfs/src/dict.c:1737 1737 data = data_from_int32 (val); (gdb) p val $1 = 0 (gdb) p data $2 = (data_t *) 0x0 (gdb) f 25 #25 0x00007fafc461a468 in get_new_data () at ../../../libglusterfs/src/dict.c:55 55 data = (data_t *) GF_CALLOC (1, sizeof (data_t), gf_common_mt_data_t); (gdb) p data $4 = (data_t *) 0x0 I have attached the brick logs.
Rahul, The crash is because of libefence (3)'s handling of word-alignment and memory overrun detection. To avoid libefence resulting in spurious segmentation faults, you could try recreating the problem after setting the global env variable EF_ALIGNMENT to 0. For further details on how to ensure libefence from interfering with tests, see libefence(3).
Yeah that seems to be the cause for the crash. Please mark it invalid/notabug