Description of problem: I had I/O going to a mirror on taft-01 (part of a 4 node cluster taft-01 - taft-04) and taft-01 was the mirror server. I then rebooted taft-04 (which had no I/O going at all). After taft-04 was recovered, the I/O I had going and any more I/O that I attempted, hung. [root@taft-03 sbin]# dd if=/dev/zero of=/dev/mirror_1/mirror_10 count=10 skip=100 10+0 records in 10+0 records out [HANG] [root@taft-02 sbin]# strace dd if=/dev/zero of=/dev/mirror_1/mirror_10 count=10 skip=100 execve("/bin/dd", ["dd", "if=/dev/zero", "of=/dev/mirror_1/mirror_10", "count=10", "skip=100"], [/* 23 vars */]) = 0 uname({sys="Linux", node="taft-02", ...}) = 0 brk(0) = 0x508000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2a95556000 access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory) open("/etc/ld.so.cache", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=110042, ...}) = 0 mmap(NULL, 110042, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2a95557000 close(3) = 0 open("/lib64/tls/libc.so.6", O_RDONLY) = 3 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0`\305\241"..., 832) = 832 fstat(3, {st_mode=S_IFREG|0755, st_size=1493186, ...}) = 0 mmap(0x3eefa00000, 2310056, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x3eefa00000 mprotect(0x3eefb2b000, 1085352, PROT_NONE) = 0 mmap(0x3eefc2a000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x12a000) = 0x3eefc2a000 mmap(0x3eefc30000, 16296, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x3eefc30000 close(3) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2a95572000 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2a95573000 mprotect(0x3eefc2a000, 12288, PROT_READ) = 0 arch_prctl(ARCH_SET_FS, 0x2a95572b00) = 0 munmap(0x2a95557000, 110042) = 0 brk(0) = 0x508000 brk(0x529000) = 0x529000 open("/usr/lib/locale/locale-archive", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=48510448, ...}) = 0 mmap(NULL, 48510448, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2a95574000 close(3) = 0 close(0) = 0 open("/dev/zero", O_RDONLY) = 0 close(1) = 0 open("/dev/mirror_1/mirror_10", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 1 rt_sigaction(SIGINT, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGINT, {0x401a80, [], SA_RESTORER, 0x3eefa2e380}, NULL, 8) = 0 rt_sigaction(SIGQUIT, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGQUIT, {0x401a80, [], SA_RESTORER, 0x3eefa2e380}, NULL, 8) = 0 rt_sigaction(SIGPIPE, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGPIPE, {0x401a80, [], SA_RESTORER, 0x3eefa2e380}, NULL, 8) = 0 rt_sigaction(SIGUSR1, NULL, {SIG_DFL}, 8) = 0 rt_sigaction(SIGUSR1, {0x401af0, [], SA_RESTORER, 0x3eefa2e380}, NULL, 8) = 0 ioctl(0, MGSL_IOCSTXIDLE or MTIOCGET or SNDCTL_MIDI_MPUCMD, 0x7fbffff760) = -1 ENOTTY (Inappropriate ioctl for device) lseek(0, 51200, SEEK_CUR) = 0 read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 read(0, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 write(1, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 512) = 512 open("/usr/share/locale/locale.alias", O_RDONLY) = 3 fstat(3, {st_mode=S_IFREG|0644, st_size=2528, ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2a983b8000 read(3, "# Locale name alias data base.\n#"..., 4096) = 2528 read(3, "", 4096) = 0 close(3) = 0 munmap(0x2a983b8000, 4096) = 0 open("/usr/share/locale/en_US.UTF-8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en_US.utf8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en_US/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en.UTF-8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en.utf8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory) open("/usr/share/locale/en/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory) write(2, "10+0 records in\n", 1610+0 records in ) = 16 write(2, "10+0 records out\n", 1710+0 records out ) = 17 close(0) = 0 close(1 [HANG] [root@taft-04 sbin]# lvs LV VG Attr LSize Origin Snap% Move Log Copy% mirror_10 mirror_1 mwi-a- 136.23G mirror_10_mlog 100.00 [root@taft-04 sbin]# dmsetup status mirror_1-mirror_10: 0 285696000 mirror 2 253:3 253:4 279001/279000 1 AA 3 clustered_disk 253:2 A mirror_1-mirror_10_mlog: 0 8192 linear VolGroup00-LogVol01: 0 4063232 linear mirror_1-mirror_10_mimage_1: 0 285696000 linear VolGroup00-LogVol00: 0 40960000 linear mirror_1-mirror_10_mimage_0: 0 285696000 linear May 31 04:20:10 taft-01 kernel: device-mapper: I'm the cluster log server for LVM-Pq5y7k May 31 04:20:10 taft-01 kernel: device-mapper: Disk Resume:: May 31 04:20:10 taft-01 kernel: device-mapper: Live nodes :: 1 May 31 04:20:10 taft-01 kernel: device-mapper: In-Use Regions :: 0 May 31 04:20:10 taft-01 kernel: device-mapper: Good IUR's :: 0 May 31 04:20:10 taft-01 kernel: device-mapper: Bad IUR's :: 0 May 31 04:20:10 taft-01 kernel: device-mapper: Sync count :: 0 May 31 04:20:10 taft-01 kernel: device-mapper: Disk Region count :: 184467440737095515 May 31 04:20:10 taft-01 kernel: device-mapper: Region count :: 279000 May 31 04:20:10 taft-01 kernel: device-mapper: NOTE: Mapping has changed. May 31 04:20:10 taft-01 kernel: device-mapper: Marked regions:: May 31 04:20:10 taft-01 kernel: device-mapper: 0 - -1 May 31 04:20:10 taft-01 kernel: device-mapper: Total = -1 May 31 04:20:10 taft-01 kernel: device-mapper: Out-of-sync regions:: May 31 04:20:10 taft-01 kernel: device-mapper: 0 - -1 May 31 04:20:10 taft-01 kernel: device-mapper: Total = -1 May 31 04:20:11 taft-01 lvm[15125]: Monitoring mirror device, mirror_1-mirror_10 for ev May 31 04:20:30 taft-01 qarshd[15127]: Talking to peer 10.15.80.47:49294 May 31 04:20:30 taft-01 qarshd[15127]: Running cmdline: PATH=/usr/kerberos/bin:/usr/loc CMAN: removing node taft-04 from the cluster : Missed too many heartbeats May 31 04:39:24 taft-01 kernel: CMAN: removing node taft-04 from the cluster : Missed too mas May 31 04:39:57 taft-01 fenced[14110]: taft-04 not a cluster member after 30 sec post_fail_dy May 31 04:39:57 taft-01 fenced[14110]: fencing node "taft-04" May 31 04:40:01 taft-01 fenced[14110]: fence "taft-04" success May 31 04:40:06 taft-01 kernel: device-mapper: A node has failed. May 31 04:40:06 taft-01 kernel: device-mapper: Disk Resume:: May 31 04:40:06 taft-01 kernel: device-mapper: Live nodes :: 3 May 31 04:40:06 taft-01 kernel: device-mapper: In-Use Regions :: 2 May 31 04:40:06 taft-01 kernel: device-mapper: Good IUR's :: 2 May 31 04:40:06 taft-01 kernel: device-mapper: Bad IUR's :: 0 May 31 04:40:06 taft-01 kernel: device-mapper: Sync count :: 2 May 31 04:40:06 taft-01 kernel: device-mapper: Disk Region count :: 279000 May 31 04:40:06 taft-01 kernel: device-mapper: Region count :: 279000 May 31 04:40:06 taft-01 kernel: device-mapper: Marked regions:: May 31 04:40:06 taft-01 kernel: device-mapper: 0 - 279000 May 31 04:40:06 taft-01 kernel: device-mapper: Total = 279000 May 31 04:40:06 taft-01 kernel: device-mapper: Out-of-sync regions:: May 31 04:40:06 taft-01 kernel: device-mapper: 1 - 47190 May 31 04:40:06 taft-01 kernel: device-mapper: 47192 - 279000 May 31 04:40:06 taft-01 kernel: device-mapper: Total = 278998 May 31 04:44:00 taft-01 kernel: CMAN: node taft-04 rejoining Version-Release number of selected component (if applicable): [root@taft-04 sbin]# rpm -q cmirror-kernel-smp cmirror-kernel-smp-2.6.9-4.2
Please retest with kernel > 39.EL
Sorry, this should really be "modified" not needinfo
fix verified.
Assuming this VERIFIED fix got released. Closing. Reopen if it's not yet resolved.