Bug 763314 (GLUSTER-1582)

Summary: replace-brick data missing added brick
Product: [Community] GlusterFS Reporter: Lakshmipathi G <lakshmipathi>
Component: glusterdAssignee: Pavan Vilas Sondur <pavan>
Status: CLOSED CURRENTRELEASE QA Contact:
Severity: high Docs Contact:
Priority: low    
Version: 3.1-alphaCC: gluster-bugs, vijay
Target Milestone: ---   
Target Release: ---   
Hardware: All   
OS: Linux   
Whiteboard:
Fixed In Version: Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of: Environment:
Last Closed: Type: ---
Regression: RTP Mount Type: fuse
Documentation: --- CRM:
Verified Versions: Category: ---
oVirt Team: --- RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: ---

Description Lakshmipathi G 2010-09-09 04:24:10 EDT
Replacing brick -
in volume with 2 dht brick setup,trying to replace 2nd brick.

#gluster volume replace-brick DHT 10.192.134.144:/mnt/A1 10.214.231.112:/mnt/A1 start
replace-brick started successfully

#gluster volume replace-brick DHT 10.192.134.144:/mnt/A1 10.214.231.112:/mnt/A1 commit 
replace-brick commit successful

volume info now shows newly brick too.

#gluster volume info

Volume Name: DHT
Type: Distribute
Status: Started
Number of Bricks: 2
Transport-type: tcp
Bricks:
Brick1: 10.192.141.187:/mnt/A1
Brick2: 10.214.231.112:/mnt/A1


but new brick don't have any files -
10.214.231.112#ls /mnt/A1

#ps ax|grep glus
13436 ?        Ssl    0:38 /old_opt/3.0.4/sbin/glusterfsd -f /root/laks/cfg.vol /opt
30832 ?        Ssl    0:00 glusterd
30856 ?        Ssl    0:00 /usr/local/sbin/glusterfs --xlator-option DHT-server.listen-port=6971 -s localhost --volfile-id DHT.10.214.231.112.mnt-A1 -p /etc/glusterd/vols/DHT/run/10.214.231.112-mnt-A1.pid --brick-name /mnt/A1 --brick-port 6971 -l /etc/glusterd/logs/mnt-A1.log
30868 ?        Ssl    0:00 /usr/local/sbin/glusterfs -f /etc/glusterd/nfs/nfs-server.vol -p /etc/glusterd/nfs/run/nfs.pid
30874 pts/0    S+     0:00 grep glus


when "ls -l" at client side ,shows only files from brick1. Now newly added brick3 has files same as brick1. 

brick2 content is not appearing in brick3.
Comment 1 Vijay Bellur 2010-09-09 05:23:38 EDT
PATCH: http://patches.gluster.com/patch/4709 in master (mgmt/glusterd: Do not check for src/dst while issuing replace brick cmds.)
Comment 2 Lakshmipathi G 2010-09-09 06:19:50 EDT
while testing with qa20,replace-brick command crashed glusterd here is the bt - 
====
(gdb) bt full
#0  0x00002aaaab7b4ec5 in raise () from /lib64/libc.so.6
No symbol table info available.
#1  0x00002aaaab7b6970 in abort () from /lib64/libc.so.6
No symbol table info available.
#2  0x00002aaaab7ae11f in __assert_fail () from /lib64/libc.so.6
No symbol table info available.
#3  0x00002aaaac831a9b in rb_generate_client_volfile (volinfo=0x63a2e8, src_brickinfo=0x63dea8) at glusterd-op-sm.c:1483
	priv = (glusterd_conf_t *) 0x634bf8
	file = (FILE *) 0x643670
	filename = "/etc/glusterd/vols/dht765/rb_client.vol", '\0' <repeats 1057 times>, "\002\000\000\000\000\000\000\000\001\000\000\000\000\000\000\000.\216Ѫ�*\000\000%%%%%%%%\000\000\000\000\000\000\000\000\002\000\000\000\000\000\000\000\220H���\177\000\000-\216Ѫ�*\000\000/\216Ѫ�*\000\000\000\000\000\000\000\000\000\000�\201|��*\000\000\000\000\000\000\000\000\000\000�H���\177\000\000\200C���\177\000\000\027\216Ѫ�*\000\000\000\000\000\000\000\000\000\000�\201|��*\000\000 J���\177\000\000\030\216Ѫ�*\000"...
	ret = -1
	__FUNCTION__ = "rb_generate_client_volfile"
	__PRETTY_FUNCTION__ = "rb_generate_client_volfile"
#4  0x00002aaaac8321b3 in rb_spawn_maintainence_client (volinfo=0x63a2e8, src_brickinfo=0x63dea8) at glusterd-op-sm.c:1668
	ret = -1
	__FUNCTION__ = "rb_spawn_maintainence_client"
#5  0x00002aaaac83247a in rb_do_operation_start (volinfo=0x63a2e8, src_brickinfo=0x63dea8, dst_brickinfo=0x644dd8) at glusterd-op-sm.c:1729
	start_value = '\0' <repeats 8191 times>
	ret = -1
	__FUNCTION__ = "rb_do_operation_start"
#6  0x00002aaaac83628d in glusterd_op_ac_rcvd_commit_op_acc (event=0x6438c8, ctx=0x0) at glusterd-op-sm.c:2966
	volinfo = (glusterd_volinfo_t *) 0x63a2e8
	op = 1
	dict = (dict_t *) 0x638ae8
	src_brick = 0x642fd3 "10.192.134.144:/mnt/T1"
	dst_brick = 0x642faa "10.214.231.112:/mnt/T1"
	volname = 0x642ffa "dht765"
	src_brickinfo = (glusterd_brickinfo_t *) 0x63dea8
	dst_brickinfo = (glusterd_brickinfo_t *) 0x644dd8
	ret = 0
	__PRETTY_FUNCTION__ = "glusterd_op_ac_rcvd_commit_op_acc"
	__FUNCTION__ = "glusterd_op_ac_rcvd_commit_op_acc"
#7  0x00002aaaac837684 in glusterd_op_sm () at glusterd-op-sm.c:3662
	event = (glusterd_op_sm_event_t *) 0x6438c8
Missing separate debuginfos, use: debuginfo-install gcc.x86_64 glibc.x86_64
---Type <return> to continue, or q <return> to quit---
	tmp = (glusterd_op_sm_event_t *) 0x2aaaaca5ca30
	ret = -1
	handler = (glusterd_op_sm_ac_fn) 0x2aaaac835d7e <glusterd_op_ac_rcvd_commit_op_acc>
	state = (glusterd_op_sm_t *) 0x2aaaaca5b5a0
	event_type = GD_OP_EVENT_RCVD_ACC
	__PRETTY_FUNCTION__ = "glusterd_op_sm"
	__FUNCTION__ = "glusterd_op_sm"
#8  0x00002aaaac83e704 in glusterd3_1_commit_op_cbk (req=0x2aaaad2b6038, iov=0x2aaaad2b6078, count=1, myframe=0x2aaaac540820) at glusterd3_1-mops.c:576
	rsp = {uuid = {127 '\177', 166 '�', 193 '�', 89 'Y', 134 '\206', 64 '@', 73 'I', 113 'q', 182 '�', 244 '�', 207 '�', 148 '\224', 97 'a', 188 '�', 72 'H', 
    176 '�'}, op = 11, op_ret = 0, op_errno = 0}
	ret = 0
	op_ret = 0
	event_type = GD_OP_EVENT_RCVD_ACC
	peerinfo = (glusterd_peerinfo_t *) 0x639628
	str = "7fa6c159-8640-4971-b6f4-cf9461bc48b0", '\0' <repeats 13 times>
	__PRETTY_FUNCTION__ = "glusterd3_1_commit_op_cbk"
	__FUNCTION__ = "glusterd3_1_commit_op_cbk"
#9  0x00002aaaaaf39e27 in rpc_clnt_handle_reply (clnt=0x641078, pollin=0x643568) at rpc-clnt.c:734
	conn = (rpc_clnt_connection_t *) 0x6410a8
	saved_frame = (struct saved_frame *) 0x2aaaad4f7038
	ret = 0
	req = (struct rpc_req *) 0x2aaaad2b6038
	xid = 6
	__FUNCTION__ = "rpc_clnt_handle_reply"
#10 0x00002aaaaaf3a147 in rpc_clnt_notify (trans=0x641288, mydata=0x6410a8, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x643568) at rpc-clnt.c:844
	conn = (rpc_clnt_connection_t *) 0x6410a8
	clnt = (struct rpc_clnt *) 0x641078
	ret = -1
	req_info = (rpc_request_info_t *) 0x0
	pollin = (rpc_transport_pollin_t *) 0x643568
	tv = {tv_sec = 0, tv_usec = 0}
#11 0x00002aaaaaf37a35 in rpc_transport_notify (this=0x641288, event=RPC_TRANSPORT_MSG_RECEIVED, data=0x643568) at rpc-transport.c:1124
	ret = -1
#12 0x00002aaaaca64304 in socket_event_poll_in (this=0x641288) at socket.c:1577
	ret = 0
	pollin = (rpc_transport_pollin_t *) 0x643568
#13 0x00002aaaaca64675 in socket_event_handler (fd=13, idx=5, data=0x641288, poll_in=1, poll_out=0, poll_err=0) at socket.c:1691
---Type <return> to continue, or q <return> to quit---
	this = (rpc_transport_t *) 0x641288
	priv = (socket_private_t *) 0x641748
	ret = 0
	__FUNCTION__ = "socket_event_handler"
#14 0x00002aaaaad06f14 in event_dispatch_epoll_handler (event_pool=0x62c348, events=0x635cc8, i=0) at event.c:812
	event_data = (struct event_data *) 0x635ccc
	handler = (event_handler_t) 0x2aaaaca64579 <socket_event_handler>
	data = (void *) 0x641288
	idx = 5
	ret = -1
	__FUNCTION__ = "event_dispatch_epoll_handler"
#15 0x00002aaaaad07103 in event_dispatch_epoll (event_pool=0x62c348) at event.c:876
	events = (struct epoll_event *) 0x635cc8
	size = 1
	i = 0
	ret = 1
	__FUNCTION__ = "event_dispatch_epoll"
#16 0x00002aaaaad0745f in event_dispatch (event_pool=0x62c348) at event.c:984
	ret = -1
	__FUNCTION__ = "event_dispatch"
#17 0x0000000000405c81 in main (argc=1, argv=0x7fffc2bf7488) at glusterfsd.c:1398
	ctx = (glusterfs_ctx_t *) 0x62a010
	ret = 0
===========
Comment 3 Vijay Bellur 2010-09-10 06:14:05 EDT
PATCH: http://patches.gluster.com/patch/4728 in master (mgmt/glusterd: add a dict to glusterd commit response.)