glusterd crashed while executing some gluster cli related scipts. This is the backtrace. Core was generated by `'. Program terminated with signal 6, Aborted. #0 pthread_cond_wait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:162 162 ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S: No such file or directory. in ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S (gdb) bt #0 pthread_cond_wait@@GLIBC_2.3.2 () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S:162 #1 0x0000000004e8adc5 in syncenv_task (env=0x6b17b20) at ../../../libglusterfs/src/syncop.c:194 #2 0x0000000004e8aefb in syncenv_processor (thdata=0x6b17b20) at ../../../libglusterfs/src/syncop.c:232 #3 0x00000000056f3d8c in start_thread (arg=0x7ca0700) at pthread_create.c:304 #4 0x00000000059f104d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:112 #5 0x0000000000000000 in ?? () (gdb) t 3 [Switching to thread 3 (Thread 2472)]#0 0x000000000593ed05 in raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64 64 ../nptl/sysdeps/unix/sysv/linux/raise.c: No such file or directory. in ../nptl/sysdeps/unix/sysv/linux/raise.c (gdb) bt #0 0x000000000593ed05 in raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64 #1 0x0000000005942ab6 in abort () at abort.c:92 #2 0x00000000059377c5 in __assert_fail (assertion=0x7d35393 "dist_count == clusters", file=<value optimized out>, line=2157, function=<value optimized out>) at assert.c:81 #3 0x0000000007d05372 in volume_volgen_graph_build_clusters (graph=0x7feffd150, volinfo=0xd149240) at ../../../../../xlators/mgmt/glusterd/src/glusterd-volgen.c:2157 #4 0x0000000007d0542b in client_graph_builder (graph=0x7feffd150, volinfo=0xd149240, set_dict=0x8bbfe80, param=0x0) at ../../../../../xlators/mgmt/glusterd/src/glusterd-volgen.c:2186 #5 0x0000000007d02e99 in build_graph_generic (graph=0x7feffd150, volinfo=0xd149240, mod_dict=0xd1938d0, param=0x0, builder=0x7d053bc <client_graph_builder>) at ../../../../../xlators/mgmt/glusterd/src/glusterd-volgen.c:1135 #6 0x0000000007d05572 in build_client_graph (graph=0x7feffd150, volinfo=0xd149240, mod_dict=0xd1938d0) at ../../../../../xlators/mgmt/glusterd/src/glusterd-volgen.c:2228 #7 0x0000000007d07203 in generate_single_transport_client_volfile (volinfo=0xd149240, filepath=0x7feffd270 "/etc/glusterd/vols/vol/vol-fuse.vol", dict=0xd1938d0) at ../../../../../xlators/mgmt/glusterd/src/glusterd-volgen.c:2802 #8 0x0000000007d07571 in generate_client_volfiles (volinfo=0xd149240) at ../../../../../xlators/mgmt/glusterd/src/glusterd-volgen.c:2868 #9 0x0000000007d076cd in glusterd_create_volfiles_and_notify_services (volinfo=0xd149240) at ../../../../../xlators/mgmt/glusterd/src/glusterd-volgen.c:2907 #10 0x0000000007d266c6 in glusterd_op_remove_brick (dict=0x8bb6d70, op_errstr=0x7fefff4b8) at ../../../../../xlators/mgmt/glusterd/src/glusterd-brick-ops.c:1106 #11 0x0000000007cddb06 in glusterd_op_commit_perform (op=GD_OP_REMOVE_BRICK, dict=0x8bb6d70, op_errstr=0x7fefff4b8, rsp_dict=0x0) at ../../../../../xlators/mgmt/glusterd/src/glusterd-op-sm.c:2326 #12 0x0000000007cdc01f in glusterd_op_ac_send_commit_op (event=0xd18b970, ctx=0xd18b490) at ../../../../../xlators/mgmt/glusterd/src/glusterd-op-sm.c:1663 #13 0x0000000007cdfdc9 in glusterd_op_sm () at ../../../../../xlators/mgmt/glusterd/src/glusterd-op-sm.c:3338 #14 0x0000000007d2447e in glusterd_handle_remove_brick (req=0x6b1e04c) at ../../../../../xlators/mgmt/glusterd/src/glusterd-brick-ops.c:434 #15 0x00000000050bc120 in rpcsvc_handle_rpc_call (svc=0x6b1de90, trans=0x8bb2ba0, msg=0x8bb3050) at ../../../../rpc/rpc-lib/src/rpcsvc.c:507 #16 0x00000000050bc49e in rpcsvc_notify (trans=0x8bb2ba0, mydata=0x6b1de90, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x8bb3050) at ../../../../rpc/rpc-lib/src/rpcsvc.c:603 #17 0x00000000050c1e3d in rpc_transport_notify (this=0x8bb2ba0, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x8bb3050) at ../../../../rpc/rpc-lib/src/rpc-transport.c:498 #18 0x0000000007f5a31a in socket_event_poll_in (this=0x8bb2ba0) at ../../../../../rpc/rpc-transport/socket/src/socket.c:1675 #19 0x0000000007f5a88e in socket_event_handler (fd=6, idx=1, data=0x8bb2ba0, poll_in=1, poll_out=0, poll_err=0) at ../../../../../rpc/rpc-transport/socket/src/socket.c:1790 #20 0x0000000004e7a025 in event_dispatch_epoll_handler (event_pool=0x5ca1670, events=0x606c7b0, i=0) at ../../../libglusterfs/src/event.c:794 #21 0x0000000004e7a23f in event_dispatch_epoll (event_pool=0x5ca1670) at ../../../libglusterfs/src/event.c:856 #22 0x0000000004e7a5b1 in event_dispatch (event_pool=0x5ca1670) at ../../../libglusterfs/src/event.c:956 #23 0x0000000000407d0b in main (argc=1, argv=0x7ff000528) at ../../../glusterfsd/src/glusterfsd.c:1592 (gdb) f 3 #3 0x0000000007d05372 in volume_volgen_graph_build_clusters (graph=0x7feffd150, volinfo=0xd149240) at ../../../../../xlators/mgmt/glusterd/src/glusterd-volgen.c:2157 2157 GF_ASSERT (dist_count == clusters); (gdb) p dist_count $1 = 0 (gdb) p clusters $2 = 1 (gdb) l volume_volgen_graph_build_clusters 2091 } 2092 2093 static int 2094 volume_volgen_graph_build_clusters (volgen_graph_t *graph, 2095 glusterd_volinfo_t *volinfo) 2096 { 2097 char *replicate_args[] = {"cluster/replicate", 2098 "%s-replicate-%d"}; 2099 char *stripe_args[] = {"cluster/stripe", 2100 "%s-stripe-%d"}; (gdb) 2101 int rclusters = 0; 2102 int clusters = 0; 2103 int dist_count = 0; 2104 int ret = -1; 2105 2106 if (volinfo->sub_count > 1) { 2107 switch (volinfo->type) { 2108 case GF_CLUSTER_TYPE_REPLICATE: 2109 clusters = volgen_graph_build_clusters (graph, volinfo, 2110 replicate_args[0], (gdb) 2111 replicate_args[1], 2112 volinfo->brick_count, 2113 volinfo->sub_count); 2114 if (clusters < 0) 2115 goto out; 2116 break; 2117 case GF_CLUSTER_TYPE_STRIPE: 2118 clusters = volgen_graph_build_clusters (graph, volinfo, 2119 stripe_args[0], 2120 stripe_args[1], (gdb) 2121 volinfo->brick_count, 2122 volinfo->sub_count); 2123 if (clusters < 0) 2124 goto out; 2125 break; 2126 case GF_CLUSTER_TYPE_STRIPE_REPLICATE: 2127 /* Replicate after the clients, then stripe */ 2128 if (volinfo->replica_count == 0) 2129 return -1; 2130 clusters = volgen_graph_build_clusters (graph, volinfo, (gdb) 2131 replicate_args[0], 2132 replicate_args[1], 2133 volinfo->brick_count, 2134 volinfo->replica_count); 2135 if (clusters < 0) 2136 goto out; 2137 2138 rclusters = volinfo->brick_count/volinfo->replica_count; 2139 GF_ASSERT (rclusters == clusters); 2140 clusters = volgen_graph_build_clusters (graph, volinfo, (gdb) 2141 stripe_args[0], 2142 stripe_args[1], 2143 rclusters, 2144 volinfo->stripe_count); 2145 if (clusters < 0) 2146 goto out; 2147 break; 2148 default: 2149 gf_log ("", GF_LOG_ERROR, "volume inconsistency: " 2150 "unrecognized clustering type"); (gdb) 2151 goto out; 2152 } 2153 } 2154 2155 if (volinfo->sub_count) { 2156 dist_count = volinfo->brick_count / volinfo->sub_count; 2157 GF_ASSERT (dist_count == clusters); 2158 } else { 2159 dist_count = volinfo->brick_count; 2160 }
Root cause: dist_count = volinfo->brick_count / volinfo->subcount The above expression is not (always) the same as the no. of distribute clusters in replicate type volume. In remove-brick operation we allow users to have no. of bricks in a replicate cluster less than the replica count. eg. A pure replicate volume of replica count 2, with one brick removed. brick_count = 1, subcount = 2; clusters = 1 (the only replica subvol) dist_count = 1 / 2 = 0 (integer divison), certainly not equal to the no. of replica clusters in the volume graph. This crash won't happen on more recent mainline since the assert is removed. (ref: http://review.gluster.com/435).
*** This bug has been marked as a duplicate of bug 3616 ***