Bug 194137
| Summary: | deadlock when attempting to create mulitple mirrors at the same time | ||
|---|---|---|---|
| Product: | [Retired] Red Hat Cluster Suite | Reporter: | Corey Marthaler <cmarthal> |
| Component: | cmirror | Assignee: | Jonathan Earl Brassow <jbrassow> |
| Status: | CLOSED CURRENTRELEASE | QA Contact: | Cluster QE <mspqa-list> |
| Severity: | high | Docs Contact: | |
| Priority: | high | ||
| Version: | 4 | CC: | agk, dwysocha, mbroz |
| Target Milestone: | --- | ||
| Target Release: | --- | ||
| Hardware: | All | ||
| OS: | Linux | ||
| Whiteboard: | |||
| Fixed In Version: | Doc Type: | Bug Fix | |
| Doc Text: | Story Points: | --- | |
| Clone Of: | Environment: | ||
| Last Closed: | 2010-04-27 14:54:19 UTC | Type: | --- |
| Regression: | --- | Mount Type: | --- |
| Documentation: | --- | CRM: | |
| Verified Versions: | Category: | --- | |
| oVirt Team: | --- | RHEL 7.3 requirements from Atomic Host: | |
| Cloudforms Team: | --- | Target Upstream Version: | |
| Embargoed: | |||
| Bug Depends On: | |||
| Bug Blocks: | 180185 | ||
Activation seems to be working... (as long as the two mirror devices mentioned in the logs are the only ones that are attempted to be created). This could be collisions in event registration. Please retest with the latest device=mapper rpms - the ones including dmeventd as a stand-alone binary. I attempted to create a mirror from each of the four nodes in my cluster at the same time and only one of the create commands returned. The other three commands along with any other lvm command I tried (in order to see if the mirrors were actually created and in sync) hung. It is possible though that this deadlock is caused by 195392. After I rebooted the 4 machines, I learned that only two of the mirrors in comment #4 had been created. Reproduced this a couple times this afternoon. Here are the places that these
commands are getting stuck:
lvcreate 1:
[...]
open("/etc/lvm/backup", O_RDONLY) = 9
fsync(9) = 0
close(9) = 0
lseek(5, 4096, SEEK_SET) = 4096
read(5, "\215_l_ LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 4096) = 4096
lseek(5, 106496, SEEK_SET) = 106496
read(5, "# linear\n\nstripes = [\n\"pv3\", 0\n]"..., 4096) = 4096
lseek(5, 4096, SEEK_SET) = 4096
read(5, "\215_l_ LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 4096) = 4096
lseek(5, 4096, SEEK_SET) = 4096
write(5, "M\236%Z LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 4096) = 4096
close(5) = 0
lseek(6, 4096, SEEK_SET) = 4096
read(6, "\215_l_ LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 512) = 512
lseek(6, 108032, SEEK_SET) = 108032
read(6, "B {\nid = \"06KWBz-rFMw-Up36-z4d0-"..., 512) = 512
lseek(6, 4096, SEEK_SET) = 4096
read(6, "\215_l_ LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 512) = 512
lseek(6, 4096, SEEK_SET) = 4096
write(6, "M\236%Z LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 512) = 512
close(6) = 0
lseek(7, 4096, SEEK_SET) = 4096
read(7, "\215_l_ LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 512) = 512
lseek(7, 108032, SEEK_SET) = 108032
read(7, "B {\nid = \"06KWBz-rFMw-Up36-z4d0-"..., 512) = 512
lseek(7, 4096, SEEK_SET) = 4096
read(7, "\215_l_ LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 512) = 512
lseek(7, 4096, SEEK_SET) = 4096
write(7, "M\236%Z LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 512) = 512
close(7) = 0
lseek(8, 4096, SEEK_SET) = 4096
read(8, "\215_l_ LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 4096) = 4096
lseek(8, 106496, SEEK_SET) = 106496
read(8, "# linear\n\nstripes = [\n\"pv3\", 0\n]"..., 4096) = 4096
lseek(8, 4096, SEEK_SET) = 4096
read(8, "\215_l_ LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 4096) = 4096
lseek(8, 4096, SEEK_SET) = 4096
write(8, "M\236%Z LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 4096) = 4096
close(8) = 0
write(3, "2\0Fr\0\0\0\0\0\0\0\0C\0\0\0\0\31\20406KWBzrFMwUp3"..., 85) = 85
read(3, "\1\0\0\0\0\0\0\0\0\0\0\0005\0\0\0\0t", 18) = 18
read(3, "aft-03\0\0\0\0\0\0taft-02\0\0\0\0\0\0taft-04"..., 53) = 53
stat("/dev/B/taft-01", {st_mode=S_IFBLK|0660, st_rdev=makedev(253, 13), ...}) = 0
stat("/dev/B/taft-01", {st_mode=S_IFBLK|0660, st_rdev=makedev(253, 13), ...}) = 0
stat("/dev/B/taft-01", {st_mode=S_IFBLK|0660, st_rdev=makedev(253, 13), ...}) = 0
open("/dev/B/taft-01", O_RDWR|O_DIRECT|O_NOATIME) = 5
fstat(5, {st_mode=S_IFBLK|0660, st_rdev=makedev(253, 13), ...}) = 0
ioctl(5, BLKBSZGET, 0x68e018) = 0
lseek(5, 0, SEEK_SET) = 0
read(5, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096) = 4096
lseek(5, 0, SEEK_SET) = 0
write(5, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096
lvcreate 2:
[...]
stat("/etc/lvm/lvm.conf", {st_mode=S_IFREG|0644, st_size=10504, ...}) = 0
stat("/usr/lib64/liblvm2clusterlock.so", {st_mode=S_IFREG|0555, st_size=10040,
...}) = 0
futex(0x34b8a020c4, FUTEX_WAKE, 2147483647) = 0
open("/usr/lib64/liblvm2clusterlock.so", O_RDONLY) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220\f\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0555, st_size=10040, ...}) = 0
mmap(NULL, 1056400, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) =
0x2a983bf000
mprotect(0x2a983c1000, 1048208, PROT_NONE) = 0
mmap(0x2a984c0000, 4096, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x2a984c0000
close(3) = 0
socket(PF_FILE, SOCK_STREAM, 0) = 3
connect(3, {sa_family=AF_FILE, path=@clvmd}, 110) = 0
open("/proc/devices", O_RDONLY) = 4
fstat(4, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0x2a984c1000
read(4, "Character devices:\n 1 mem\n 4 /"..., 1024) = 440
close(4) = 0
munmap(0x2a984c1000, 4096) = 0
open("/proc/misc", O_RDONLY) = 4
fstat(4, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0x2a984c1000
read(4, " 61 dlm_clvmd\n 62 dlm-control\n18"..., 1024) = 81
close(4) = 0
munmap(0x2a984c1000, 4096) = 0
stat("/dev/mapper/control", {st_mode=S_IFCHR|0600, st_rdev=makedev(10, 63),
...}) = 0
open("/dev/mapper/control", O_RDWR) = 4
open("/proc/devices", O_RDONLY) = 5
fstat(5, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) =
0x2a984c1000
read(5, "Character devices:\n 1 mem\n 4 /"..., 1024) = 440
read(5, "", 1024) = 0
close(5) = 0
munmap(0x2a984c1000, 4096) = 0
ioctl(4, DM_VERSION, 0x6b11c0) = 0
ioctl(4, DM_LIST_VERSIONS, 0x6b11c0) = 0
ioctl(4, DM_LIST_VERSIONS, 0x6b11c0) = 0
stat("/proc/lvm/VGs/B", 0x7fbfffb520) = -1 ENOENT (No such file or directory)
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [], 8) = 0
write(3, "3\1\377\277\0\0\0\0\0\0\0\0\6\0\0\0\0\4\4V_B\0\376", 24) = 24
read(3,
lvs:
[...]
fstat(4, {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 37), ...}) = 0
ioctl(4, BLKBSZGET, 0x68cba8) = 0
lseek(4, 4096, SEEK_SET) = 4096
read(4, "#)\363b LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 4096) = 4096
lseek(4, 36864, SEEK_SET) = 36864
read(4, "ntents = \"Text Format Volume Gro"..., 4096) = 4096
lseek(4, 36864, SEEK_SET) = 36864
read(4, "ntents = \"Text Format Volume Gro"..., 4096) = 4096
stat("/dev/sdc1", {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 33), ...}) = 0
stat("/dev/sdc1", {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 33), ...}) = 0
open("/dev/sdc1", O_RDONLY|O_DIRECT|O_NOATIME) = 5
fstat(5, {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 33), ...}) = 0
ioctl(5, BLKBSZGET, 0x68cf90) = 0
lseek(5, 0, SEEK_SET) = 0
read(5, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096) = 4096
lseek(5, 4096, SEEK_SET) = 4096
read(5, "#)\363b LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 4096) = 4096
lseek(5, 36864, SEEK_SET) = 36864
read(5, "ntents = \"Text Format Volume Gro"..., 4096) = 4096
lseek(5, 36864, SEEK_SET) = 36864
read(5, "ntents = \"Text Format Volume Gro"..., 4096) = 4096
close(5) = 0
stat("/dev/sdc2", {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 34), ...}) = 0
stat("/dev/sdc2", {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 34), ...}) = 0
open("/dev/sdc2", O_RDONLY|O_DIRECT|O_NOATIME) = 5
fstat(5, {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 34), ...}) = 0
ioctl(5, BLKBSZGET, 0x68d598) = 0
lseek(5, 0, SEEK_SET) = 0
read(5, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 2048) = 2048
lseek(5, 4096, SEEK_SET) = 4096
read(5, "#)\363b LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 512) = 512
lseek(5, 39936, SEEK_SET) = 39936
read(5, "C {\nid = \"0nLVHe-MgDZ-APaF-luuU-"..., 512) = 512
lseek(5, 39936, SEEK_SET) = 39936
read(5, "C {\nid = \"0nLVHe-MgDZ-APaF-luuU-"..., 1024) = 1024
close(5) = 0
stat("/dev/sdc3", {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 35), ...}) = 0
stat("/dev/sdc3", {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 35), ...}) = 0
open("/dev/sdc3", O_RDONLY|O_DIRECT|O_NOATIME) = 5
fstat(5, {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 35), ...}) = 0
ioctl(5, BLKBSZGET, 0x68d9e0) = 0
lseek(5, 0, SEEK_SET) = 0
read(5, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 2048) = 2048
lseek(5, 4096, SEEK_SET) = 4096
read(5, "#)\363b LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 512) = 512
lseek(5, 39936, SEEK_SET) = 39936
read(5, "C {\nid = \"0nLVHe-MgDZ-APaF-luuU-"..., 512) = 512
lseek(5, 39936, SEEK_SET) = 39936
read(5, "C {\nid = \"0nLVHe-MgDZ-APaF-luuU-"..., 1024) = 1024
close(5) = 0
lseek(4, 0, SEEK_SET) = 0
read(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 4096) = 4096
lseek(4, 4096, SEEK_SET) = 4096
read(4, "#)\363b LVM2 x[5A%r0N*>\1\0\0\0\0\20\0\0\0\0\0\0"..., 4096) = 4096
lseek(4, 36864, SEEK_SET) = 36864
read(4, "ntents = \"Text Format Volume Gro"..., 4096) = 4096
lseek(4, 36864, SEEK_SET) = 36864
read(4, "ntents = \"Text Format Volume Gro"..., 4096) = 4096
close(4) = 0
stat("/proc/lvm/VGs/C", 0x7fbfffb510) = -1 ENOENT (No such file or directory)
write(3, "3\0010-\0\0\0\0\0\0\0\0\6\0\0\0\0\6\4V_C\0 ", 24) = 24
read(3, "\1\0S\3721\0\0\0\0\0\0\0\16\0\0\0\0t", 18) = 18
read(3, "aft-04\0\0\0\0\0\0\0\0", 14) = 14
rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0
stat("/proc/lvm/VGs/B", 0x7fbfffb510) = -1 ENOENT (No such file or directory)
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [], 8) = 0
write(3, "3\1i\0\0\0\0\0\0\0\0\0\6\0\0\0\0\1\4V_B\0\0", 24) = 24
read(3,
Are all the mirrors uniquely named? yes the mirrors uniquely named. while true; do
for i in $(seq 2); do
lvcreate -m1 -L 500M -n `hostname -s`$i vg
done
lvremove -ff vg/`hostname -s`1 vg/`hostname -s`2
while [ `lvs vg | wc -l` -ne 0 ]; do
echo "Waiting for others"
sleep 1
done
sleep 10
done
Running the above on three machines at the same time does not cause this to
happen. It is my guess that this has been fixed in response to other bugs.
When fixing 195392, I noticed that a call to dmeventd could hang. I've made the
read associated with that non-blocking. I suspect this is a different symptom
of the same problem.
This appears to be fixed. Assuming this VERIFIED fix got released. Closing. Reopen if it's not yet resolved. |
Description of problem: I attempted to create two mirrors from two different nodes at roughly the same time and that caused clvmd to deadlock. One of the creation attempts: [...] stat("/etc/lvm/lvm.conf", {st_mode=S_IFREG|0644, st_size=10504, ...}) = 0 stat("/usr/lib64/liblvm2clusterlock.so", {st_mode=S_IFREG|0555, st_size=10008, ...}) = 0 futex(0x3f679020c4, FUTEX_WAKE, 2147483647) = 0 open("/usr/lib64/liblvm2clusterlock.so", O_RDONLY) = 3 read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220\f\0"..., 832) = 832 fstat(3, {st_mode=S_IFREG|0555, st_size=10008, ...}) = 0 mmap(NULL, 1056368, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2a983c3000 mprotect(0x2a983c5000, 1048176, PROT_NONE) = 0 mmap(0x2a984c4000, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1000) = 0x2a984c4000 close(3) = 0 socket(PF_FILE, SOCK_STREAM, 0) = 3 connect(3, {sa_family=AF_FILE, path=@clvmd}, 110) = 0 open("/proc/devices", O_RDONLY) = 4 fstat(4, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2a984c5000 read(4, "Character devices:\n 1 mem\n 4 /"..., 1024) = 440 close(4) = 0 munmap(0x2a984c5000, 4096) = 0 open("/proc/misc", O_RDONLY) = 4 fstat(4, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2a984c5000 read(4, " 61 dlm_clvmd\n 62 dlm-control\n18"..., 1024) = 81 close(4) = 0 munmap(0x2a984c5000, 4096) = 0 stat("/dev/mapper/control", {st_mode=S_IFCHR|0600, st_rdev=makedev(10, 63), ...}) = 0 open("/dev/mapper/control", O_RDWR) = 4 open("/proc/devices", O_RDONLY) = 5 fstat(5, {st_mode=S_IFREG|0444, st_size=0, ...}) = 0 mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2a984c5000 read(5, "Character devices:\n 1 mem\n 4 /"..., 1024) = 440 read(5, "", 1024) = 0 close(5) = 0 munmap(0x2a984c5000, 4096) = 0 ioctl(4, DM_VERSION, 0x6b11c0) = 0 ioctl(4, DM_LIST_VERSIONS, 0x6b11c0) = 0 ioctl(4, DM_LIST_VERSIONS, 0x6b11c0) = 0 stat("/proc/lvm/VGs/mirror_1", 0x7fbfffb530) = -1 ENOENT (No such file or directory) rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [], 8) = 0 write(3, "3\1\377\277\0\0\0\0\0\0\0\0\r\0\0\0\0\4\4V_mirror_1\0\0"..., 31) = 31 read(3, {HANG] [root@taft-02 ~]# lvs [HANG] One of the mirrors however is reported to be "in sync": Jun 5 11:16:38 taft-01 lvm[5008]: Monitoring mirror device, mirror_1-coreymirror3 for event Jun 5 11:16:38 taft-01 kernel: device-mapper: I'm the cluster mirror log server for LVM-0I9k Jun 5 11:16:38 taft-01 kernel: device-mapper: Disk Resume:: Jun 5 11:16:38 taft-01 kernel: device-mapper: Live nodes :: 4 Jun 5 11:16:38 taft-01 kernel: device-mapper: In-Use Regions :: 0 Jun 5 11:16:38 taft-01 kernel: device-mapper: Good IUR's :: 0 Jun 5 11:16:38 taft-01 kernel: device-mapper: Bad IUR's :: 0 Jun 5 11:16:38 taft-01 kernel: device-mapper: Sync count :: 0 Jun 5 11:16:38 taft-01 kernel: device-mapper: Disk Region count :: 18446744073709551615 Jun 5 11:16:38 taft-01 kernel: device-mapper: Region count :: 1504 Jun 5 11:16:38 taft-01 kernel: device-mapper: NOTE: Mapping has changed. Jun 5 11:16:38 taft-01 kernel: device-mapper: Marked regions:: Jun 5 11:16:38 taft-01 kernel: device-mapper: 0 - 1504 Jun 5 11:16:38 taft-01 kernel: device-mapper: Total = 1504 Jun 5 11:16:38 taft-01 kernel: device-mapper: Out-of-sync regions:: Jun 5 11:16:38 taft-01 kernel: device-mapper: 0 - 1504 Jun 5 11:16:38 taft-01 kernel: device-mapper: Total = 1504 Jun 5 11:17:43 taft-02 lvm[4921]: Monitoring mirror device, mirror_1-coreymirror3 for event Jun 5 11:17:43 taft-02 lvm[4921]: mirror_1-coreymirror is now in-sync Jun 5 11:17:04 taft-03 lvm[4926]: Monitoring mirror device, mirror_1-coreymirror3 for event Jun 5 12:24:48 taft-04 sshd(pam_unix)[4327]: session opened for user root by root(uid=0) Jun 5 12:25:34 taft-04 lvm[3124]: Monitoring mirror device, mirror_1-coreymirror3 for event Jun 5 12:25:48 taft-04 lvm[3124]: mirror_1-coreymirror3 is now in-sync Version-Release number of selected component (if applicable): [root@taft-04 ~]# rpm -qa | grep cmirror cmirror-kernel-smp-2.6.9-5.0 cmirror-kernel-2.6.9-5.0 cmirror-1.0.0-5 cmirror-kernel-largesmp-2.6.9-5.0