Bug 1217576

Summary: [HC] Gluster volume locks the whole cluster
Product: [Community] GlusterFS Reporter: Christopher Pereira <kripper>
Component: coreAssignee: bugs <bugs>
Status: CLOSED INSUFFICIENT_DATA QA Contact:
Severity: urgent Docs Contact:
Priority: unspecified    
Version: mainlineCC: amukherj, amureini, bugs, dfediuck, gluster-bugs, kripper, nsoffer, sabose, sasundar, sbonazzo, ylavi
Target Milestone: ---Keywords: Triaged
Target Release: ---   
Hardware: x86_64   
OS: Linux   
Whiteboard:
Fixed In Version: Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of: Environment:
Last Closed: 2015-05-25 21:36:00 UTC Type: Bug
Regression: --- Mount Type: ---
Documentation: --- CRM:
Verified Versions: Category: ---
oVirt Team: --- RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: --- Target Upstream Version:
Embargoed:
Bug Depends On:    
Bug Blocks: 1175354    

Description Christopher Pereira 2015-04-30 17:21:42 UTC
== Description of problem ==

"stat" operations (mount, unmount, lsof, etc) on a gluster volume locks all 3 hosts from a CentOS 7 cluster.

== Versions ==

# rpm -qa | grep gluster
vdsm-gluster-4.17.0-736.git88660a1.el7.centos.noarch
glusterfs-server-3.8dev-0.30.gitb72bc58.el7.centos.x86_64
glusterfs-geo-replication-3.8dev-0.30.gitb72bc58.el7.centos.x86_64
glusterfs-cli-3.8dev-0.30.gitb72bc58.el7.centos.x86_64
glusterfs-fuse-3.8dev-0.30.gitb72bc58.el7.centos.x86_64
glusterfs-api-3.8dev-0.30.gitb72bc58.el7.centos.x86_64
glusterfs-rdma-3.8dev-0.30.gitb72bc58.el7.centos.x86_64
glusterfs-libs-3.8dev-0.30.gitb72bc58.el7.centos.x86_64
glusterfs-3.8dev-0.30.gitb72bc58.el7.centos.x86_64

== Research ==

1) On H4 (host 4), we strace'd "mount" to generate the lock by mounting any gluster volume from the cluster:

	h4:~# strace mount -t glusterfs localhost:datacenter /mnt/tmp/
	execve("/usr/bin/mount", ["mount", "-t", "glusterfs", "localhost:datacenter", "/mnt/tmp/"], [/* 28 vars */]) = 0
	brk(0)                                  = 0x7fb7e13dd000
	mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb7dfdb3000
	access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
	open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
	fstat(3, {st_mode=S_IFREG|0644, st_size=35641, ...}) = 0
	mmap(NULL, 35641, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fb7dfdaa000
	close(3)                                = 0
	open("/lib64/libmount.so.1", O_RDONLY|O_CLOEXEC) = 3
	read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\300\230\0\0\0\0\0\0"..., 832) = 832
	fstat(3, {st_mode=S_IFREG|0755, st_size=260760, ...}) = 0
	mmap(NULL, 2354496, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fb7df954000
	mprotect(0x7fb7df990000, 2097152, PROT_NONE) = 0
	mmap(0x7fb7dfb90000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3c000) = 0x7fb7dfb90000
	mmap(0x7fb7dfb92000, 3392, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fb7dfb92000
	close(3)                                = 0
	open("/lib64/libblkid.so.1", O_RDONLY|O_CLOEXEC) = 3
	read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\177\0\0\0\0\0\0"..., 832) = 832
	fstat(3, {st_mode=S_IFREG|0755, st_size=248584, ...}) = 0
	mmap(NULL, 2341704, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fb7df718000
	mprotect(0x7fb7df750000, 2093056, PROT_NONE) = 0
	mmap(0x7fb7df94f000, 16384, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x37000) = 0x7fb7df94f000
	mmap(0x7fb7df953000, 2888, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fb7df953000
	close(3)                                = 0
	open("/lib64/libuuid.so.1", O_RDONLY|O_CLOEXEC) = 3
	read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\25\0\0\0\0\0\0"..., 832) = 832
	fstat(3, {st_mode=S_IFREG|0755, st_size=20032, ...}) = 0
	mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb7dfda9000
	mmap(NULL, 2113920, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fb7df513000
	mprotect(0x7fb7df517000, 2093056, PROT_NONE) = 0
	mmap(0x7fb7df716000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7fb7df716000
	close(3)                                = 0
	open("/lib64/libselinux.so.1", O_RDONLY|O_CLOEXEC) = 3
	read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240d\0\0\0\0\0\0"..., 832) = 832
	fstat(3, {st_mode=S_IFREG|0755, st_size=147120, ...}) = 0
	mmap(NULL, 2246784, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fb7df2ee000
	mprotect(0x7fb7df30f000, 2097152, PROT_NONE) = 0
	mmap(0x7fb7df50f000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x21000) = 0x7fb7df50f000
	mmap(0x7fb7df511000, 6272, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fb7df511000
	close(3)                                = 0
	open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
	read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\34\2\0\0\0\0\0"..., 832) = 832
	fstat(3, {st_mode=S_IFREG|0755, st_size=2107760, ...}) = 0
	mmap(NULL, 3932736, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fb7def2d000
	mprotect(0x7fb7df0e3000, 2097152, PROT_NONE) = 0
	mmap(0x7fb7df2e3000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b6000) = 0x7fb7df2e3000
	mmap(0x7fb7df2e9000, 16960, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fb7df2e9000
	close(3)                                = 0
	open("/lib64/libpcre.so.1", O_RDONLY|O_CLOEXEC) = 3
	read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\360\25\0\0\0\0\0\0"..., 832) = 832
	fstat(3, {st_mode=S_IFREG|0755, st_size=398272, ...}) = 0
	mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb7dfda8000
	mmap(NULL, 2490888, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fb7deccc000
	mprotect(0x7fb7ded2b000, 2097152, PROT_NONE) = 0
	mmap(0x7fb7def2b000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x5f000) = 0x7fb7def2b000
	close(3)                                = 0
	open("/lib64/liblzma.so.5", O_RDONLY|O_CLOEXEC) = 3
	read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0000/\0\0\0\0\0\0"..., 832) = 832
	fstat(3, {st_mode=S_IFREG|0755, st_size=153184, ...}) = 0
	mmap(NULL, 2245240, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fb7deaa7000
	mprotect(0x7fb7deacb000, 2093056, PROT_NONE) = 0
	mmap(0x7fb7decca000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x23000) = 0x7fb7decca000
	close(3)                                = 0
	open("/lib64/libdl.so.2", O_RDONLY|O_CLOEXEC) = 3
	read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\320\16\0\0\0\0\0\0"..., 832) = 832
	fstat(3, {st_mode=S_IFREG|0755, st_size=19512, ...}) = 0
	mmap(NULL, 2109744, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fb7de8a3000
	mprotect(0x7fb7de8a6000, 2093056, PROT_NONE) = 0
	mmap(0x7fb7deaa5000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7fb7deaa5000
	close(3)                                = 0
	open("/lib64/libpthread.so.0", O_RDONLY|O_CLOEXEC) = 3
	read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240l\0\0\0\0\0\0"..., 832) = 832
	fstat(3, {st_mode=S_IFREG|0755, st_size=141616, ...}) = 0
	mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb7dfda7000
	mmap(NULL, 2208864, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fb7de687000
	mprotect(0x7fb7de69d000, 2097152, PROT_NONE) = 0
	mmap(0x7fb7de89d000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x16000) = 0x7fb7de89d000
	mmap(0x7fb7de89f000, 13408, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fb7de89f000
	close(3)                                = 0
	mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb7dfda6000
	mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb7dfda4000
	arch_prctl(ARCH_SET_FS, 0x7fb7dfda4880) = 0
	mprotect(0x7fb7df2e3000, 16384, PROT_READ) = 0
	mprotect(0x7fb7de89d000, 4096, PROT_READ) = 0
	mprotect(0x7fb7deaa5000, 4096, PROT_READ) = 0
	mprotect(0x7fb7decca000, 4096, PROT_READ) = 0
	mprotect(0x7fb7def2b000, 4096, PROT_READ) = 0
	mprotect(0x7fb7df50f000, 4096, PROT_READ) = 0
	mprotect(0x7fb7df716000, 4096, PROT_READ) = 0
	mprotect(0x7fb7df94f000, 12288, PROT_READ) = 0
	mprotect(0x7fb7dfb90000, 4096, PROT_READ) = 0
	mprotect(0x7fb7dffc0000, 4096, PROT_READ) = 0
	mprotect(0x7fb7dfdb4000, 4096, PROT_READ) = 0
	munmap(0x7fb7dfdaa000, 35641)           = 0
	set_tid_address(0x7fb7dfda4b50)         = 25016
	set_robust_list(0x7fb7dfda4b60, 24)     = 0
	rt_sigaction(SIGRTMIN, {0x7fb7de68d780, [], SA_RESTORER|SA_SIGINFO, 0x7fb7de696130}, NULL, 8) = 0
	rt_sigaction(SIGRT_1, {0x7fb7de68d810, [], SA_RESTORER|SA_RESTART|SA_SIGINFO, 0x7fb7de696130}, NULL, 8) = 0
	rt_sigprocmask(SIG_UNBLOCK, [RTMIN RT_1], NULL, 8) = 0
	getrlimit(RLIMIT_STACK, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
	statfs("/sys/fs/selinux", 0x7fffa8386780) = -1 ENOENT (No such file or directory)
	statfs("/selinux", 0x7fffa8386780)      = -1 ENOENT (No such file or directory)
	brk(0)                                  = 0x7fb7e13dd000
	brk(0x7fb7e13fe000)                     = 0x7fb7e13fe000
	open("/proc/filesystems", O_RDONLY)     = 3
	fstat(3, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
	mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fb7dfdb2000
	read(3, "nodev\tsysfs\nnodev\trootfs\nnodev\tb"..., 1024) = 366
	read(3, "", 1024)                       = 0
	close(3)                                = 0
	munmap(0x7fb7dfdb2000, 4096)            = 0
	open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
	fstat(3, {st_mode=S_IFREG|0644, st_size=106065056, ...}) = 0
	mmap(NULL, 106065056, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fb7d8160000
	close(3)                                = 0
	getuid()                                = 0
	geteuid()                               = 0
	getuid()                                = 0
	geteuid()                               = 0
	getgid()                                = 0
	getegid()                               = 0
	prctl(PR_GET_DUMPABLE)                  = 1
	lstat("/etc/mtab", {st_mode=S_IFLNK|0777, st_size=17, ...}) = 0
	getuid()                                = 0
	geteuid()                               = 0
	getgid()                                = 0
	getegid()                               = 0
	prctl(PR_GET_DUMPABLE)                  = 1
	stat("/run", {st_mode=S_IFDIR|0755, st_size=960, ...}) = 0
	lstat("/run/mount/utab", {st_mode=S_IFREG|0644, st_size=236, ...}) = 0
	open("/run/mount/utab", O_RDWR|O_CREAT|O_CLOEXEC, 0644) = 3
	close(3)                                = 0
	getcwd("/root", 4095)                   = 6
	readlink("/root/localhost:datacenter", 0x7fffa83844e0, 4096) = -1 ENOENT (No such file or directory)
	readlink("/mnt", 0x7fffa8384440, 4096)  = -1 EINVAL (Invalid argument)
	readlink("/mnt/tmp", 0x7fffa8384440, 4096) = -1 EINVAL (Invalid argument)
	stat("/sbin/mount.glusterfs", {st_mode=S_IFREG|0755, st_size=16949, ...}) = 0
	clone(child_stack=0, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7fb7dfda4b50) = 25017
	wait4(-1,

	**********************************************************************************************
	...here it locks. After killing the offending process on H6 (host 6), the command unlocks...
	**********************************************************************************************

	/sbin/mount.glusterfs: according to mtab, GlusterFS is already mounted on /mnt/tmp
	[{WIFEXITED(s) && WEXITSTATUS(s) == 32}], 0, NULL) = 25017
	--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=25017, si_status=32, si_utime=0, si_stime=0} ---
	close(1)                                = 0
	close(2)                                = 0
	exit_group(32)                          = ?
	+++ exited with 32 +++

2) During the lock, on H4 we saw this:

	Apr 30 13:09:33 h5.imatronix.com etc-glusterfs-glusterd.vol[13243]: [2015-04-30 16:09:33.828562] C [rpc-clnt-ping.c:161:rpc_clnt_ping_timer_expired] 0-management: server [IP of host H6]:24007 has not responded in the last 30 seconds, disconnecting.
	Apr 30 13:10:13 h5.imatronix.com etc-glusterfs-glusterd.vol[13243]: [2015-04-30 16:10:13.835735] C [rpc-clnt-ping.c:161:rpc_clnt_ping_timer_expired] 0-management: server [IP of host H6]:24007 has not responded in the last 30 seconds, disconnecting.

3) Following the logs, we did a "systemctl stop glusterd" on H6, which unlocked a similar locking process on H5 (gluster was locking the 3 hosts of our testing lab). But this didn't unlock the process from H4.

4) Then we grep'ed glusterfs processes connected with host H4

	# ps aux | grep gluster | grep h4
	root       976  4.2  0.1 474492 61928 ?        S<sl 04:44  22:00 /usr/sbin/glusterfs --volfile-server=h4.imatronix.com --volfile-id=export /rhev/data-center/mnt/glusterSD/h4.imatronix.com:export
	root      4285  0.4  0.1 584632 43800 ?        S<sl 02:57   2:59 /usr/sbin/glusterfs --volfile-server=h4.imatronix.com --volfile-id=vdisks /rhev/data-center/mnt/glusterSD/h4.imatronix.com:vdisks
	root      4374  0.6  0.2 1108920 73164 ?       S<sl 02:57   3:56 /usr/sbin/glusterfs --volfile-server=h4.imatronix.com --volfile-id=vdisks-ssd /rhev/data-center/mnt/glusterSD/h4.imatronix.com:vdisks-ssd
	root     11725  0.0  0.1 563200 34444 ?        Ssl  12:28   0:00 /usr/sbin/glusterfs --volfile-server=h4.imatronix.com --volfile-id=vdisks-ssd /mnt/disk1
	root     11733  0.0  0.0 115344   840 pts/1    S    12:28   0:00 /bin/sh /sbin/mount.glusterfs h4.imatronix.com:vdisks-ssd /mnt/disk1 -o rw
	
5) ...and started killing them one by one.

	h6:~# kill 976
	h6:~# kill 4285
	h6:~# kill 4374
	h6:~# kill 11725

6) Voila! This unlocked the whole cluster. We can see some a healing process going on. Maybe related with the lock.
This is happening frequently, so the next time I will strace the processes before killing them.

	Message from syslogd@h6 at Apr 30 13:24:51 ...
	 mnt-disk1-gluster-bricks-datacenter[2961]:[2015-04-30 16:24:51.152768] M [posix-helpers.c:1718:posix_health_check_thread_proc] 0-datacenter-posix: health-check failed, going down

	Message from syslogd@h6 at Apr 30 13:24:51 ...
	 mnt-disk1-gluster-bricks-iso[2951]:[2015-04-30 16:24:51.152823] M [posix-helpers.c:1718:posix_health_check_thread_proc] 0-iso-posix: health-check failed, going down

	Message from syslogd@h6 at Apr 30 13:24:51 ...
	 mnt-disk1-gluster-bricks-vdisks-ssd.1[3328]:[2015-04-30 16:24:51.153113] M [posix-helpers.c:1718:posix_health_check_thread_proc] 0-vdisks-ssd-posix: health-check failed, going down

	Message from syslogd@h6 at Apr 30 13:24:51 ...
	 mnt-disk1-gluster-bricks-vdisks-ssd.2[3347]:[2015-04-30 16:24:51.153664] M [posix-helpers.c:1718:posix_health_check_thread_proc] 0-vdisks-ssd-posix: health-check failed, going down

	Message from syslogd@h6 at Apr 30 13:24:51 ...
	 mnt-disk1-gluster-bricks-vdisks[2956]:[2015-04-30 16:24:51.153890] M [posix-helpers.c:1718:posix_health_check_thread_proc] 0-vdisks-posix: health-check failed, going down

	Message from syslogd@h6 at Apr 30 13:24:51 ...
	 mnt-disk1-gluster-bricks-engine[2969]:[2015-04-30 16:24:51.153869] M [posix-helpers.c:1718:posix_health_check_thread_proc] 0-engine-posix: health-check failed, going down

	Message from syslogd@h6 at Apr 30 13:25:21 ...
	 mnt-disk1-gluster-bricks-datacenter[2961]:[2015-04-30 16:25:21.152877] M [posix-helpers.c:1723:posix_health_check_thread_proc] 0-datacenter-posix: still alive! -> SIGTERM

	Message from syslogd@h6 at Apr 30 13:25:21 ...
	 mnt-disk1-gluster-bricks-iso[2951]:[2015-04-30 16:25:21.152877] M [posix-helpers.c:1723:posix_health_check_thread_proc] 0-iso-posix: still alive! -> SIGTERM

	Message from syslogd@h6 at Apr 30 13:25:21 ...
	 mnt-disk1-gluster-bricks-vdisks-ssd.1[3328]:[2015-04-30 16:25:21.153222] M [posix-helpers.c:1723:posix_health_check_thread_proc] 0-vdisks-ssd-posix: still alive! -> SIGTERM

	Message from syslogd@h6 at Apr 30 13:25:21 ...
	 mnt-disk1-gluster-bricks-vdisks-ssd.2[3347]:[2015-04-30 16:25:21.153818] M [posix-helpers.c:1723:posix_health_check_thread_proc] 0-vdisks-ssd-posix: still alive! -> SIGTERM

	Message from syslogd@h6 at Apr 30 13:25:21 ...
	 mnt-disk1-gluster-bricks-vdisks[2956]:[2015-04-30 16:25:21.154137] M [posix-helpers.c:1723:posix_health_check_thread_proc] 0-vdisks-posix: still alive! -> SIGTERM

	Message from syslogd@h6 at Apr 30 13:25:21 ...
	 mnt-disk1-gluster-bricks-engine[2969]:[2015-04-30 16:25:21.156939] M [posix-helpers.c:1723:posix_health_check_thread_proc] 0-engine-posix: still alive! -> SIGTERM

7) During the lock, on H6 vol status was reporting everything online.
	 
	h6:~# gluster vol status
	Status of volume: datacenter
	Gluster process                             TCP Port  RDMA Port  Online  Pid
	------------------------------------------------------------------------------
	Brick h4.imatronix.com:/mnt/disk1/gluster-b
	ricks/datacenter                            49160     0          Y       5147
	Brick h5.imatronix.com:/mnt/disk1/gluster-b
	ricks/datacenter                            49157     0          Y       3000
	Brick h6.imatronix.com:/mnt/disk1/gluster-b
	ricks/datacenter                            49155     0          Y       2961
	NFS Server on localhost (= H6)              2049      0          Y       3370
	NFS Server on [IP of host H4]               2049      0          Y       24866
	NFS Server on h5.imatronix.com              2049      0          Y       3219

	Task Status of Volume datacenter
	------------------------------------------------------------------------------
	There are no active volume tasks

	Status of volume: engine
	Gluster process                             TCP Port  RDMA Port  Online  Pid
	------------------------------------------------------------------------------
	Brick h4.imatronix.com:/mnt/disk1/gluster-b
	ricks/engine                                49152     0          Y       4681
	Brick h5.imatronix.com:/mnt/disk1/gluster-b
	ricks/engine                                49155     0          Y       3014
	Brick h6.imatronix.com:/mnt/disk1/gluster-b
	ricks/engine                                49152     0          Y       2969
	Self-heal Daemon on localhost               N/A       N/A        Y       3376
	Self-heal Daemon on h5.imatronix.com        N/A       N/A        Y       3226
	Self-heal Daemon on [IP of host H4]         N/A       N/A        Y       24877

	Task Status of Volume engine
	------------------------------------------------------------------------------
	There are no active volume tasks

	Status of volume: export
	Gluster process                             TCP Port  RDMA Port  Online  Pid
	------------------------------------------------------------------------------
	Brick h4.imatronix.com:/mnt/disk1/gluster-b
	ricks/export                                49154     0          Y       4656
	NFS Server on localhost                     2049      0          Y       3370
	NFS Server on h5.imatronix.com              2049      0          Y       3219
	NFS Server on [IP of host H4]               2049      0          Y       24866

	Task Status of Volume export
	------------------------------------------------------------------------------
	There are no active volume tasks

	Status of volume: georep-vdisks-ssd
	Gluster process                             TCP Port  RDMA Port  Online  Pid
	------------------------------------------------------------------------------
	Brick h4.imatronix.com:/mnt/disk1/gluster-b
	ricks/georep-vdisks-ssd                     49157     0          Y       4650
	NFS Server on localhost                     2049      0          Y       3370
	NFS Server on h5.imatronix.com              2049      0          Y       3219
	NFS Server on [IP of host H4]               2049      0          Y       24866

	Task Status of Volume georep-vdisks-ssd
	------------------------------------------------------------------------------
	There are no active volume tasks

	Status of volume: iso
	Gluster process                             TCP Port  RDMA Port  Online  Pid
	------------------------------------------------------------------------------
	Brick h4.imatronix.com:/mnt/disk1/gluster-b
	ricks/iso                                   49155     0          Y       4645
	Brick h6.imatronix.com:/mnt/disk1/gluster-b
	ricks/iso                                   49154     0          Y       2951
	NFS Server on localhost                     2049      0          Y       3370
	NFS Server on [IP of host H4]               2049      0          Y       24866
	NFS Server on h5.imatronix.com              2049      0          Y       3219

	Task Status of Volume iso
	------------------------------------------------------------------------------
	There are no active volume tasks

	Status of volume: vdisks
	Gluster process                             TCP Port  RDMA Port  Online  Pid
	------------------------------------------------------------------------------
	Brick h4.imatronix.com:/mnt/disk1/gluster-b
	ricks/vdisks                                49153     0          Y       4664
	Brick h6.imatronix.com:/mnt/disk1/gluster-b
	ricks/vdisks                                49153     0          Y       2956
	Self-heal Daemon on localhost               N/A       N/A        Y       3376
	Self-heal Daemon on [IP of host H4]         N/A       N/A        Y       24877
	Self-heal Daemon on h5.imatronix.com        N/A       N/A        Y       3226

	Task Status of Volume vdisks
	------------------------------------------------------------------------------
	There are no active volume tasks

	Status of volume: vdisks-ssd
	Gluster process                             TCP Port  RDMA Port  Online  Pid
	------------------------------------------------------------------------------
	Brick h5.imatronix.com:/mnt/disk1/gluster-b
	ricks/vdisks-ssd                            49156     0          Y       3005
	Brick h4.imatronix.com:/mnt/disk1/gluster-b
	ricks/vdisks-ssd.1                          49158     0          Y       4675
	Brick h6.imatronix.com:/mnt/disk1/gluster-b
	ricks/vdisks-ssd.1                          49156     0          Y       3328
	Brick h5.imatronix.com:/gluster-bricks/vdis
	ks-ssd.2                                    49154     0          Y       3019
	Brick h4.imatronix.com:/mnt/disk1/gluster-b
	ricks/vdisks-ssd.2                          49159     0          Y       4663
	Brick h6.imatronix.com:/mnt/disk1/gluster-b
	ricks/vdisks-ssd.2                          49157     0          Y       3347
	Self-heal Daemon on localhost               N/A       N/A        Y       3376
	Self-heal Daemon on h5.imatronix.com        N/A       N/A        Y       3226
	Self-heal Daemon on [IP of host H4]         N/A       N/A        Y       24877

	Task Status of Volume vdisks-ssd
	------------------------------------------------------------------------------
	Task                 : Rebalance
	ID                   : 26d87c3b-d959-430c-b973-3fe0066c2c04
	Status               : completed

8) This is the complete list of the other gluster processes on H6 during the lock:

	h6:~# ps aux | grep gluster

	root       976  4.2  0.1 474492 61928 ?        S<sl 04:44  22:00 /usr/sbin/glusterfs --volfile-server=h4.imatronix.com --volfile-id=export /rhev/data-center/mnt/glusterSD/h4.imatronix.com:export
	root      1825  0.0  0.0 545928 30372 ?        Ssl  02:23   0:02 /usr/sbin/glusterfs --volfile-server=h6.imatronix.com --volfile-id=datacenter /mnt/datacenter
	root      2951  0.0  0.1 1824680 42140 ?       Ssl  02:23   0:06 /usr/sbin/glusterfsd -s h6.imatronix.com --volfile-id iso.h6.imatronix.com.mnt-disk1-gluster-bricks-iso -p /var/lib/glusterd/vols/iso/run/h6.imatronix.com-mnt-disk1-gluster-bricks-iso.pid -S /var/run/gluster/145ef4f9686c30fd41fbff7065bc2ab8.socket --brick-name /mnt/disk1/gluster-bricks/iso -l /var/log/glusterfs/bricks/mnt-disk1-gluster-bricks-iso.log --xlator-option *-posix.glusterd-uuid=59674673-93f6-420b-b4a9-750a3f79f104 --brick-port 49154 --xlator-option iso-server.listen-port=49154
	root      2956 80.4  0.1 2100160 51328 ?       Ssl  02:23 530:20 /usr/sbin/glusterfsd -s h6.imatronix.com --volfile-id vdisks.h6.imatronix.com.mnt-disk1-gluste -bricks-vdisks -p /var/lib/glusterd/vols/vdisks/run/h6.imatronix.com-mnt-disk1-gluster-bricks-vdisks.pid -S /var/run/gluster/5ec6f638d4d29f3547aeaf98d9a394a2.socket --brick-name /mnt/disk1/gluster-bricks/vdisks -l /var/log/glusterfs/bricks/mnt-disk1-gluster-bricks-vdisks.log --xlator-option *-posix.glusterd-uuid=59674673-93f6-420b-b4a9-750a3f79f104 --brick-port 49153 --xlator-option vdisks-server.listen-port=49153
	root      2961  0.0  0.1 1777652 42596 ?       Ssl  02:23   0:03 /usr/sbin/glusterfsd -s h6.imatronix.com --volfile-id datacenter.h6.imatronix.com.mnt-disk1-gluster-bricks-datacenter -p /var/lib/glusterd/vols/datacenter/run/h6.imatronix.com-mnt-disk1-gluster-bricks-datacenter.pid -S /var/run/gluster/d8892cb68dcc85ffc86ddb8fe225a825.socket --brick-name /mnt/disk1/gluster-bricks/datacenter -l /var/log/glusterfs/bricks/mnt-disk1-gluster-bricks-datacenter.log --xlator-option *-posix.glusterd-uuid=59674673-93f6-420b-b4a9-750a3f79f104 --brick-port 49155 --xlator-option datacenter-server.listen-port=49155
	root      2969  6.0  0.1 2028456 54556 ?       Ssl  02:23  39:36 /usr/sbin/glusterfsd -s h6.imatronix.com --volfile-id engine.h6.imatronix.com.mnt-disk1-gluste -bricks-engine -p /var/lib/glusterd/vols/engine/run/h6.imatronix.com-mnt-disk1-gluster-bricks-engine.pid -S /var/run/gluster/7e71887fbbad3668e2838dcf2783d1eb.socket --brick-name /mnt/disk1/gluster-bricks/engine -l /var/log/glusterfs/bricks/mnt-disk1-gluster-bricks-engine.log --xlator-option *-posix.glusterd-uuid=59674673-93f6-420b-b4a9-750a3f79f104 --brick-port 49152 --xlator-option engine-server.listen-port=49152
	root      3328 25.2  0.1 1978272 53536 ?       Ssl  02:40 161:58 /usr/sbin/glusterfsd -s h6.imatronix.com --volfile-id vdisks-ssd.h6.imatronix.com.mnt-disk1-gluster-bricks-vdisks-ssd.1 -p /var/lib/glusterd/vols/vdisks-ssd/run/h6.imatronix.com-mnt-disk1-gluster-bricks-vdisks-ssd.1.pid -S /var/run/gluster/f853436ef3eb494cb9978142a7dbf1eb.socket --brick-name /mnt/disk1/gluster-bricks/vdisks-ssd.1 -l /var/log/glusterfs/bricks/mnt-disk1-gluster-bricks-vdisks-ssd.1.log --xlator-option *-posix.glusterd-uuid=59674673-93f6-420b-b4a9-750a3f79f104 --brick-port 49156 --xlator-option vdisks-ssd-server.listen-port=49156
	root      3347 19.7  0.1 1977244 49232 ?       Ssl  02:40 126:44 /usr/sbin/glusterfsd -s h6.imatronix.com --volfile-id vdisks-ssd.h6.imatronix.com.mnt-disk1-gluster-bricks-vdisks-ssd.2 -p /var/lib/glusterd/vols/vdisks-ssd/run/h6.imatronix.com-mnt-disk1-gluster-bricks-vdisks-ssd.2.pid -S /var/run/gluster/9bf99020d072181fe8061eb630683d1e.socket --brick-name /mnt/disk1/gluster-bricks/vdisks-ssd.2 -l /var/log/glusterfs/bricks/mnt-disk1-gluster-bricks-vdisks-ssd.2.log --xlator-option *-posix.glusterd-uuid=59674673-93f6-420b-b4a9-750a3f79f104 --brick-port 49157 --xlator-option vdisks-ssd-server.listen-port=49157
	root      3376  4.2  0.1 927776 53880 ?        Ssl  02:40  27:35 /usr/sbin/glusterfs -s localhost --volfile-id gluster/glustershd -p /var/lib/glusterd/glustershd/run/glustershd.pid -l /var/log/glusterfs/glustershd.log -S /var/run/gluster/6de10676be0e16a7240f034015814d71.socket --xlator-option *replicate*.node-uuid=59674673-93f6-420b-b4a9-750a3f79f104
	root      4285  0.4  0.1 584632 43800 ?        S<sl 02:57   2:59 /usr/sbin/glusterfs --volfile-server=h4.imatronix.com --volfile-id=vdisks /rhev/data-center/mnt/glusterSD/h4.imatronix.com:vdisks
	root      4374  0.6  0.2 1108920 73164 ?       S<sl 02:57   3:56 /usr/sbin/glusterfs --volfile-server=h4.imatronix.com --volfile-id=vdisks-ssd /rhev/data-center/mnt/glusterSD/h4.imatronix.com:vdisks-ssd
	root     11725  0.0  0.1 563200 34444 ?        Ssl  12:28   0:00 /usr/sbin/glusterfs --volfile-server=h4.imatronix.com --volfile-id=vdisks-ssd /mnt/disk1
	root     11733  0.0  0.0 115344   840 pts/1    S    12:28   0:00 /bin/sh /sbin/mount.glusterfs h4.imatronix.com:vdisks-ssd /mnt/disk1 -o rw
	root     12883  0.0  0.3 629404 122276 ?       SNsl 12:43   0:00 /usr/sbin/glusterfs -s localhost --volfile-id gluster/nfs -p /var/lib/glusterd/nfs/run/nfs.pid -l /var/log/glusterfs/nfs.log -S /var/run/gluster/fd5777af917447395d627f4fa8735f40.socket
	root     14088  0.0  0.0 112644   960 pts/1    S+   13:22   0:00 grep --color=auto gluster
	root     28004  0.1  0.1 584928 45864 ?        S<sl 04:34   0:56 /usr/sbin/glusterfs --volfile-server=h4.imatronix.com --volfile-id=engine /rhev/data-center/mnt/glusterSD/h4.imatronix.com:engine

9) How I can debug the problem?. I'm not aware of gluster internals.
Let me know if I can drop you an E-mail when this happens again so we can debug this issue together.

Best regards,
Christopher

Comment 1 Christopher Pereira 2015-05-04 01:03:32 UTC
Using statedump, we found out that Sanlock was holding leases.
This was a replica-2 volume which is not supported by Sanlock.
Please mark this bug as "Won't Fix" or as a "Sanlock + Gluster" dup.

Just in case you want to support replica-2 in the future, the problem seems to be that Sanlock takes and holds the lease, but fails itself because the file is locked at the glusterfs level.

A Gluster Statedump reveals this locks:

    [xlator.features.locks.vdisks-locks.inode]
    path=/ba7be27f-aee5-4436-ae9a-0764f551f9a7/dom_md/ids
    mandatory=0
    conn.1.id=<Host H5>-3016-2015/05/01-17:54:57:109200-vdisks-client-0-0-0
    conn.1.ref=1

         --- NOTE: conn.1 is Sanlock after a reboot ---
         --- 17:54:45 is UTC-0 = 14:54:41 is UTC-3 = last reboot ---

    conn.1.bound_xl=/mnt/disk1/gluster-bricks/vdisks
    conn.2.id=<Host H6>-3369-2015/04/30-05:40:59:928550-vdisks-client-0-0-0
    conn.2.ref=1
    conn.2.bound_xl=/mnt/disk1/gluster-bricks/vdisks
    conn.3.id=<Host H4>-31780-2015/04/30-05:57:15:152009-vdisks-client-0-0-0
    conn.3.ref=1
    conn.3.bound_xl=/mnt/disk1/gluster-bricks/vdisks
    conn.4.id=<Host H6>-16034-2015/04/30-16:40:26:355759-vdisks-client-0-0-0
    conn.4.ref=1
    conn.4.bound_xl=/mnt/disk1/gluster-bricks/vdisks

Sanlock was failing before and after the reboot (but took the lease).

    [...]
    2015-04-30 15:11:55-0300 15473 [4535]: s732 add_lockspace fail result -5
    2015-04-30 15:12:04-0300 15482 [4535]: s733 lockspace ba7be27f-aee5-4436-ae9a-0764f551f9a7:2:/rhev/data-center/mnt/glusterSD/h4.imatronix.com:vdisks/ba7be27f-aee5-4436-ae9a-0764f551f9a7/dom_md/ids:0
    2015-04-30 15:12:05-0300 15482 [27586]: ba7be27f aio collect 0 0x7f8ac80008c0:0x7f8ac80008d0:0x7f8ac8101000 result -5:0 match res
    2015-04-30 15:12:05-0300 15482 [27586]: read_sectors delta_leader offset 512 rv -5 /rhev/data-center/mnt/glusterSD/h4.imatronix.com:vdisks/ba7be27f-aee5-4436-ae9a-0764f551f9a7/dom_md/ids
    2015-04-30 15:12:05-0300 15483 [4535]: s733 add_lockspace fail result -5

           ---- Here Sanlock Started ----

           NOTE:

           14:54:41 is UTC-3 and it's the same time when
           the lease was taken in gluster

           ---

    2015-05-01 14:54:41-0300 1 [637]: sanlock daemon started 3.2.2 host e727194d-c2cb-4785-bd79-24277674bd2c.h5.imatron
    2015-05-01 14:54:58-0300 19 [644]: s1 lockspace 3233144b-7be1-445f-9ea6-6aebbacbb93f:2:/rhev/data-center/mnt/glusterSD/h4.imatronix.com:vdisks-ssd/3233144b-7be1-445f-9ea6-6aebbacbb93f/dom_md/ids:0
    2015-05-01 14:54:58-0300 19 [643]: s2 lockspace ba7be27f-aee5-4436-ae9a-0764f551f9a7:2:/rhev/data-center/mnt/glusterSD/h4.imatronix.com:vdisks/ba7be27f-aee5-4436-ae9a-0764f551f9a7/dom_md/ids:0
    2015-05-01 14:54:58-0300 19 [3300]: ba7be27f aio collect 0 0x7f0f280008c0:0x7f0f280008d0:0x7f0f28001000 result -5:0 match res
    2015-05-01 14:54:58-0300 19 [3300]: read_sectors delta_leader offset 512 rv -5 /rhev/data-center/mnt/glusterSD/h4.imatronix.com:vdisks/ba7be27f-aee5-4436-ae9a-0764f551f9a7/dom_md/ids
    2015-05-01 14:54:59-0300 20 [643]: s2 add_lockspace fail result -5
    2015-05-01 14:55:08-0300 29 [643]: s3 lockspace ba7be27f-aee5-4436-ae9a-0764f551f9a7:2:/rhev/data-center/mnt/glusterSD/h4.imatronix.com:vdisks/ba7be27f-aee5-4436-ae9a-0764f551f9a7/dom_md/ids:0

Anyway, on the brick logs we see since some days before:

    2015-04-26 17:20:36.773252] W [fuse-bridge.c:2190:fuse_readv_cbk] 0-glusterfs-fuse: 544: READ => -1 (Input/output error)
    [2015-04-26 17:20:46.740232] W [fuse-bridge.c:1262:fuse_err_cbk] 0-glusterfs-fuse: 570: REMOVEXATTR() /__DIRECT_IO_TEST__ => -1 (No data available)
    [2015-04-26 17:20:46.808687] W [fuse-bridge.c:2190:fuse_readv_cbk] 0-glusterfs-fuse: 592: READ => -1 (Input/output error)
    [2015-04-26 17:20:56.821828] W [fuse-bridge.c:2190:fuse_readv_cbk] 0-glusterfs-fuse: 613: READ => -1 (Input/output error)
    [2015-04-26 17:21:06.834420] W [fuse-bridge.c:2190:fuse_readv_cbk] 0-glusterfs-fuse: 629: READ => -1 (Input/output error)
    [2015-04-26 17:21:16.835813] W [fuse-bridge.c:2190:fuse_readv_cbk] 0-glusterfs-fuse: 645: READ => -1 (Input/output error)
    [2015-04-26 17:21:27.088096] W [fuse-bridge.c:2190:fuse_readv_cbk] 0-glusterfs-fuse: 661: READ => -1 (Input/output error)
    [2015-04-26 17:21:37.071644] W [fuse-bridge.c:2190:fuse_readv_cbk] 0-glusterfs-fuse: 677: READ => -1 (Input/output error)
    [2015-04-26 17:21:47.274038] W [fuse-bridge.c:2190:fuse_readv_cbk] 0-glusterfs-fuse: 693: READ => -1 (Input/output error)
    [2015-04-26 17:21:57.236858] W [fuse-bridge.c:1262:fuse_err_cbk] 0-glusterfs-fuse: 739: REMOVEXATTR() /__DIRECT_IO_TEST__ => -1 (No data available)
    [2015-04-26 17:21:57.306904] W [fuse-bridge.c:2190:fuse_readv_cbk] 0-glusterfs-fuse: 761: READ => -1 (Input/output error)
    [2015-04-26 17:22:07.337893] W [fuse-bridge.c:2190:fuse_readv_cbk] 0-glusterfs-fuse: 790: READ => -1 (Input/output error)
    [2015-04-26 17:22:17.331690] W [fuse-bridge.c:2190:fuse_readv_cbk] 0-glusterfs-fuse: 806: READ => -1 (Input/output error)
    [2015-04-26 17:22:27.343796] W [fuse-bridge.c:2190:fuse_readv_cbk] 0-glusterfs-fuse: 822: READ => -1 (Input/output error)

Comment 2 SATHEESARAN 2015-05-04 05:54:02 UTC
This comment0, says the version of glusterfs as 3.8dev and so this bug should be in product 'glusterfs" and not in "Red Hat Gluster Storage"

Moving it accordingly

Comment 3 Sahina Bose 2015-05-07 08:59:57 UTC
Did you have the same issue with a replica-3 volume as well?

Comment 4 Christopher Pereira 2015-05-07 09:53:47 UTC
Hi Sahina,

I'm not sure, because it seems like we corrupted the volume at some point and got overwhelmed with many different issues and workarounds, so testing again will be required.

Comment 5 Christopher Pereira 2015-05-25 21:36:00 UTC
I didn't see this issue again.
I will close and reopen if necessary.
Thanks.