Bug 763471 (GLUSTER-1739)

Summary: 3.1.0qa35: nfs crash on client_finodelk
Product: [Community] GlusterFS Reporter: Lakshmipathi G <lakshmipathi>
Component: nfsAssignee: Shehjar Tikoo <shehjart>
Status: CLOSED CURRENTRELEASE QA Contact:
Severity: medium Docs Contact:
Priority: low    
Version: 3.1-alphaCC: aavati, amarts, gluster-bugs, vijay
Target Milestone: ---   
Target Release: ---   
Hardware: All   
OS: Linux   
Whiteboard:
Fixed In Version: Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of: Environment:
Last Closed: Type: ---
Regression: RTP Mount Type: nfs
Documentation: --- CRM:
Verified Versions: Category: ---
oVirt Team: --- RHEL 7.3 requirements from Atomic Host:

Description Lakshmipathi G 2010-09-29 04:31:55 EDT
created two volumes 
dht vol-
#gluster volume add-brick testqa35 10.214.231.112:/mnt/a5 10.198.110.16:/mnt/a5

afr vol-
#gluster volume create testafrqa35 replica 2 10.192.141.187:/mnt/a6 10.192.134.144:/mnt/a6

and started them -
gluster volume start testafrqa35
gluster volume start testqa35


and mount them under 8 clients like -
client1-afr-pt#
client1-dht-pt#

and then start iozone,fileop,dd 10000 files,untar kernel on each mount-pts on 8 clients.


now add two more bricks to existing volumes-

#gluster volume add-brick testafrqa35 10.214.231.112:/mnt/a6 10.198.110.16:/mnt/a6

#gluster volume add-brick testqa35 10.214.231.112:/mnt/a5 10.198.110.16:/mnt/a5


after sometime nfs-server crashed. 

log file -
===
Given volfile:
+------------------------------------------------------------------------------+
  1: volume testqa35-client-0
  2:     type protocol/client
  3:     option transport-type tcp
  4:     option remote-host 10.192.141.187
  5:     option transport.socket.nodelay on
  6:     option remote-subvolume /mnt/a5
  7: end-volume
  8: 
  9: volume testqa35-client-1
 10:     type protocol/client
 11:     option transport-type tcp
 12:     option remote-host 10.192.134.144
 13:     option transport.socket.nodelay on
 14:     option remote-subvolume /mnt/a5
 15: end-volume
 16: 
 17: volume testqa35-client-2
 18:     type protocol/client
 19:     option transport-type tcp
 20:     option remote-host 10.214.231.112
 21:     option transport.socket.nodelay on
 22:     option remote-subvolume /mnt/a5
 23: end-volume
 24: 
 25: volume testqa35-client-3
 26:     type protocol/client
 27:     option transport-type tcp
 28:     option remote-host 10.198.110.16
 29:     option transport.socket.nodelay on
 30:     option remote-subvolume /mnt/a5
 31: end-volume
 32: 
 33: volume testqa35-dht
 34: type cluster/distribute
 35: #   option lookup-unhashed on
 36: #   option min-free-disk on
 37: #   option unhashed-sticky-bit on
 38:     subvolumes testqa35-client-0 testqa35-client-1 testqa35-client-2 testqa35-client-3 
 39: end-volume
 40: 
 41: volume testqa35-write-behind
 42:     type performance/write-behind
 43: #   option flush-behind on
 44: #   option cache-size on
 45: #   option disable-for-first-nbytes on
 46: #   option enable-O_SYNC on
 47: #   option enable-trickling-writes on
 48:     subvolumes testqa35-dht
 49: end-volume
 50: 
 51: volume testqa35-read-ahead
 52:     type performance/read-ahead
 53: #   option force-atime-update on
 54: #   option page-count on
 55:     subvolumes testqa35-write-behind
 56: end-volume
 57: 
 58: volume testqa35-io-cache
 59:     type performance/io-cache
 60: #   option priority on
 61: #   option cache-timeout on
 62: #   option cache-size on
 63: #   option min-file-size on
 64: #   option max-file-size on
 65:     subvolumes testqa35-read-ahead
 66: end-volume
 67: 
 68: volume testqa35-quick-read
 69:     type performance/quick-read
 70: #   option priority on
 71: #   option cache-timeout on
 72: #   option cache-size on
 73: #   option max-file-size on
 74:     subvolumes testqa35-io-cache
 75: end-volume
 76: 
 77: volume testqa35-stat-prefetch
 78:     type performance/stat-prefetch
 79:     subvolumes testqa35-quick-read
 80: end-volume
 81: 
 82: volume testqa35
 83:     type debug/io-stats
 84:     option dump-fd-stats no
 85:     option latency-measurement no
 86:     subvolumes testqa35-stat-prefetch
 87: end-volume
 88: 
 89: volume testafrqa35-client-0
 90:     type protocol/client
 91:     option transport-type tcp
 92:     option remote-host 10.192.141.187
 93:     option transport.socket.nodelay on
 94:     option remote-subvolume /mnt/a6
 95: end-volume
 96: 
 97: volume testafrqa35-client-1
 98:     type protocol/client
 99:     option transport-type tcp
100:     option remote-host 10.192.134.144
101:     option transport.socket.nodelay on
102:     option remote-subvolume /mnt/a6
103: end-volume
104: 
105: volume testafrqa35-replicate-0
106:     type cluster/replicate
107: #   option read-subvolume on
108: #   option favorite-child on
109: #   option background-self-heal-count on
110: #   option data-self-heal on
111: #   option data-self-heal-algorithm on
112: #   option data-self-heal-window-size on
113: #   option metadata-self-heal on
114: #   option entry-self-heal on
115: #   option data-change-log on
116: #   option metadata-change-log on
117: #   option entry-change-log on
118: #   option strict-readdir on
119:     subvolumes testafrqa35-client-0 testafrqa35-client-1 
120: end-volume
121: 
122: volume testafrqa35-write-behind
123:     type performance/write-behind
124: #   option flush-behind on
125: #   option cache-size on
126: #   option disable-for-first-nbytes on
127: #   option enable-O_SYNC on
128: #   option enable-trickling-writes on
129:     subvolumes testafrqa35-replicate-0
130: end-volume
131: 
132: volume testafrqa35-read-ahead
133:     type performance/read-ahead
134: #   option force-atime-update on
135: #   option page-count on
136:     subvolumes testafrqa35-write-behind
137: end-volume
138: 
139: volume testafrqa35-io-cache
140:     type performance/io-cache
141: #   option priority on
142: #   option cache-timeout on
143: #   option cache-size on
144: #   option min-file-size on
145: #   option max-file-size on
146:     subvolumes testafrqa35-read-ahead
147: end-volume
148: 
149: volume testafrqa35-quick-read
150:     type performance/quick-read
151: #   option priority on
152: #   option cache-timeout on
153: #   option cache-size on
154: #   option max-file-size on
155:     subvolumes testafrqa35-io-cache
156: end-volume
157: 
158: volume testafrqa35-stat-prefetch
159:     type performance/stat-prefetch
160:     subvolumes testafrqa35-quick-read
161: end-volume
162: 
163: volume testafrqa35
164:     type debug/io-stats
165:     option dump-fd-stats no
166:     option latency-measurement no
167:     subvolumes testafrqa35-stat-prefetch
168: end-volume
169: 
170: volume nfs-server
171: type nfs/server
172: option rpc-auth.addr.testqa35.allow *
173: option rpc-auth.addr.testafrqa35.allow *
174: option nfs.dynamic-volumes on
175: option nfs3.testqa35.volume-id f0ed9cce-66a0-4569-8bb3-2c30208d1d0c
176: option nfs3.testafrqa35.volume-id dffcf5a7-f28d-458b-8a99-d6f153a59d9e
177: subvolumes  testqa35 testafrqa35
178: end-volume

+------------------------------------------------------------------------------+
[2010-09-29 03:23:09.650321] E [afr-common.c:2654:afr_notify] testafrqa35-replicate-0: All subvolumes are down. Going offline until atleast one of them comes back up.
[2010-09-29 03:23:10.109278] E [client-handshake.c:730:client_query_portmap_cbk] testqa35-client-3: failed to get the port number for remote subvolume
[2010-09-29 03:23:10.187183] E [client-handshake.c:730:client_query_portmap_cbk] testqa35-client-2: failed to get the port number for remote subvolume
[2010-09-29 03:23:10.480290] E [afr-common.c:2654:afr_notify] testafrqa35-replicate-0: All subvolumes are down. Going offline until atleast one of them comes back up.
[2010-09-29 03:23:13.614015] I [client-handshake.c:660:select_server_supported_programs] testqa35-client-0: Using Program GlusterFS-3.1.0, Num (1298437), Version (310)
[2010-09-29 03:23:13.705071] I [client-handshake.c:496:client_setvolume_cbk] testqa35-client-0: Connected to 10.192.141.187:6971, attached to remote volume '/mnt/a5'.
[2010-09-29 03:23:13.705337] I [client-handshake.c:660:select_server_supported_programs] testafrqa35-client-0: Using Program GlusterFS-3.1.0, Num (1298437), Version (310)
[2010-09-29 03:23:13.705716] I [client-handshake.c:496:client_setvolume_cbk] testafrqa35-client-0: Connected to 10.192.141.187:6973, attached to remote volume '/mnt/a6'.
[2010-09-29 03:23:13.705736] I [afr-common.c:2624:afr_notify] testafrqa35-replicate-0: Subvolume 'testafrqa35-client-0' came back up; going online.
[2010-09-29 03:23:13.706382] I [client-handshake.c:660:select_server_supported_programs] testafrqa35-client-1: Using Program GlusterFS-3.1.0, Num (1298437), Version (310)
[2010-09-29 03:23:13.706450] I [client-handshake.c:660:select_server_supported_programs] testqa35-client-1: Using Program GlusterFS-3.1.0, Num (1298437), Version (310)
[2010-09-29 03:23:13.706593] I [afr-common.c:827:afr_fresh_lookup_cbk] testafrqa35-replicate-0: added root inode
[2010-09-29 03:23:13.707161] I [client-handshake.c:496:client_setvolume_cbk] testqa35-client-1: Connected to 10.192.134.144:6971, attached to remote volume '/mnt/a5'.
[2010-09-29 03:23:13.707243] I [client-handshake.c:496:client_setvolume_cbk] testafrqa35-client-1: Connected to 10.192.134.144:6973, attached to remote volume '/mnt/a6'.
[2010-09-29 03:23:17.641506] I [client-handshake.c:660:select_server_supported_programs] testqa35-client-2: Using Program GlusterFS-3.1.0, Num (1298437), Version (310)
[2010-09-29 03:23:17.642257] I [client-handshake.c:496:client_setvolume_cbk] testqa35-client-2: Connected to 10.214.231.112:6971, attached to remote volume '/mnt/a5'.
[2010-09-29 03:23:17.644131] I [client-handshake.c:660:select_server_supported_programs] testqa35-client-3: Using Program GlusterFS-3.1.0, Num (1298437), Version (310)
[2010-09-29 03:23:17.644709] I [client-handshake.c:496:client_setvolume_cbk] testqa35-client-3: Connected to 10.198.110.16:6971, attached to remote volume '/mnt/a5'.
[2010-09-29 03:23:17.665847] I [nfs.c:315:__nfs_subvolume_start] nfs: All exports up
[2010-09-29 03:23:18.655245] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /iozone.tmp
[2010-09-29 03:23:18.655466] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /iozone.tmp
[2010-09-29 03:23:18.655643] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /iozone.tmp
[2010-09-29 03:23:18.656104] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /iozone.tmp
[2010-09-29 03:23:23.620175] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /iozone.tmp
[2010-09-29 03:23:23.657558] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /iozone.tmp
[2010-09-29 03:23:23.669209] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /iozone.tmp
[2010-09-29 03:23:23.669852] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /iozone.tmp
[2010-09-29 03:23:23.670643] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /io/iozone.tmp
[2010-09-29 03:23:23.670770] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /io/iozone.tmp
[2010-09-29 03:23:23.670849] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /io/iozone.tmp
[2010-09-29 03:23:23.670955] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /io/iozone.tmp
[2010-09-29 03:23:23.671031] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /io/iozone.tmp
[2010-09-29 03:23:23.671452] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /io/iozone.tmp
[2010-09-29 03:23:23.671750] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /iozone.tmp
[2010-09-29 03:23:23.671967] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /io/iozone.tmp
[2010-09-29 03:23:23.744369] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /io/iozone.tmp
[2010-09-29 03:23:23.745415] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /io/iozone.tmp
[2010-09-29 03:23:23.745928] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /io/iozone.tmp
[2010-09-29 03:23:23.797254] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /io/iozone.tmp
[2010-09-29 03:23:23.830423] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /io/iozone.tmp
[2010-09-29 03:23:23.830716] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /io/iozone.tmp
[2010-09-29 03:23:23.830819] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /io/iozone.tmp
[2010-09-29 03:23:23.863843] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /io/iozone.tmp
[2010-09-29 03:23:23.864051] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /io/iozone.tmp
[2010-09-29 03:23:23.864157] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /io/iozone.tmp
[2010-09-29 03:23:23.864314] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /io/iozone.tmp
[2010-09-29 03:23:23.864412] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /io/iozone.tmp
[2010-09-29 03:23:23.864531] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /io/iozone.tmp
[2010-09-29 03:23:23.866329] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.866367] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.866434] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.866466] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.866522] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.866553] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.866608] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.866651] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.866709] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.866740] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.866797] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.866827] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.866892] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.866923] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.867003] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.867034] E [afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] testafrqa35-replicate-0: checksum on /iozone.tmp failed on subvolume testafrqa35-client-0 (Is a directory)
[2010-09-29 03:23:23.867229] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /io/iozone.tmp
[2010-09-29 03:23:23.867374] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /io/iozone.tmp
[2010-09-29 03:23:23.867501] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /io/iozone.tmp
[2010-09-29 03:23:23.868330] E [afr-self-heal-algorithm.c:956:sh_diff_loop_driver] testafrqa35-replicate-0: diff  data self-heal aborting on /iozone.tmp
[2010-09-29 03:23:23.955304] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /io/iozone.tmp
[2010-09-29 03:23:23.955510] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /io/iozone.tmp
[2010-09-29 03:23:23.955613] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /io/iozone.tmp
[2010-09-29 03:23:23.955896] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /io/iozone.tmp
[2010-09-29 03:23:23.955999] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /io/iozone.tmp
[2010-09-29 03:23:24.28864] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /iozone.tmp
[2010-09-29 03:23:24.363356] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /small2/3588.txt
[2010-09-29 03:23:24.428204] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /small2/3588.txt
[2010-09-29 03:23:24.564428] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /lin1/linux-2.6.35/Documentation/DocBook/v4l/vidioc-enum-dv-presets.xml
[2010-09-29 03:23:24.571156] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /lin1/linux-2.6.35/Documentation/DocBook/v4l/vidioc-enum-dv-presets.xml
[2010-09-29 03:23:44.790436] I [afr-common.c:724:afr_lookup_done] testafrqa35-replicate-0: background  data self-heal triggered. path: /lin1/linux-2.6.35.tar
[2010-09-29 03:23:44.792481] I [afr-self-heal-common.c:1583:afr_self_heal_completion_cbk] testafrqa35-replicate-0: background  data self-heal completed on /lin1/linux-2.6.35.tar
[2010-09-29 03:24:59.111106] I [glusterfsd.c:668:cleanup_and_exit] glusterfsd: shutting down
[2010-09-29 03:24:59.111137] I [glusterfsd.c:684:cleanup_and_exit] : Calling fini for nfs-server
[2010-09-29 03:24:59.111510] I [glusterfsd.c:686:cleanup_and_exit] : Done Calling fini for nfs-server
[2010-09-29 03:24:59.111561] I [glusterfsd.c:684:cleanup_and_exit] : Calling fini for testafrqa35
[2010-09-29 03:24:59.112713] I [io-stats.c:1680:fini] testafrqa35: io-stats translator unloaded
[2010-09-29 03:24:59.112733] I [glusterfsd.c:686:cleanup_and_exit] : Done Calling fini for testafrqa35
[2010-09-29 03:24:59.112749] I [glusterfsd.c:684:cleanup_and_exit] : Calling fini for testafrqa35-stat-prefetch
[2010-09-29 03:24:59.128209] I [glusterfsd.c:686:cleanup_and_exit] : Done Calling fini for testafrqa35-stat-prefetch
[2010-09-29 03:24:59.128280] I [glusterfsd.c:684:cleanup_and_exit] : Calling fini for testafrqa35-quick-read
[2010-09-29 03:24:59.128322] I [glusterfsd.c:686:cleanup_and_exit] : Done Calling fini for testafrqa35-quick-read
[2010-09-29 03:24:59.128338] I [glusterfsd.c:684:cleanup_and_exit] : Calling fini for testafrqa35-io-cache
[2010-09-29 03:24:59.128355] I [glusterfsd.c:686:cleanup_and_exit] : Done Calling fini for testafrqa35-io-cache
[2010-09-29 03:24:59.128370] I [glusterfsd.c:684:cleanup_and_exit] : Calling fini for testafrqa35-read-ahead
[2010-09-29 03:24:59.128385] I [glusterfsd.c:686:cleanup_and_exit] : Done Calling fini for testafrqa35-read-ahead
[2010-09-29 03:24:59.128399] I [glusterfsd.c:684:cleanup_and_exit] : Calling fini for testafrqa35-write-behind
[2010-09-29 03:24:59.128415] I [glusterfsd.c:686:cleanup_and_exit] : Done Calling fini for testafrqa35-write-behind
[2010-09-29 03:24:59.128429] I [glusterfsd.c:684:cleanup_and_exit] : Calling fini for testafrqa35-replicate-0
[2010-09-29 03:24:59.128443] I [glusterfsd.c:686:cleanup_and_exit] : Done Calling fini for testafrqa35-replicate-0
[2010-09-29 03:24:59.128457] I [glusterfsd.c:684:cleanup_and_exit] : Calling fini for testafrqa35-client-1
[2010-09-29 03:24:59.128556] E [rpc-clnt.c:335:saved_frames_unwind] rpc-clnt: forced unwinding frame type(GlusterFS 3.1) op(FINODELK(30)) called at 2010-09-29 03:23:38.924934
[2010-09-29 03:24:59.129257] E [rpc-clnt.c:335:saved_frames_unwind] rpc-clnt: forced unwinding frame type(GlusterFS 3.1) op(FINODELK(30)) called at 2010-09-29 03:23:38.924953
[2010-09-29 03:24:59.129589] E [rpc-clnt.c:335:saved_frames_unwind] rpc-clnt: forced unwinding frame type(GlusterFS 3.1) op(ENTRYLK(31)) called at 2010-09-29 03:24:59.111935
[2010-09-29 03:24:59.129632] E [rpc-clnt.c:335:saved_frames_unwind] rpc-clnt: forced unwinding frame type(GlusterFS 3.1) op(XATTROP(33)) called at 2010-09-29 03:24:59.112280
[2010-09-29 03:24:59.129651] I [glusterfsd.c:686:cleanup_and_exit] : Done Calling fini for testafrqa35-client-1
[2010-09-29 03:24:59.129666] I [glusterfsd.c:684:cleanup_and_exit] : Calling fini for testafrqa35-client-0
[2010-09-29 03:24:59.132205] E [rpc-clnt.c:335:saved_frames_unwind] rpc-clnt: forced unwinding frame type(GlusterFS 3.1) op(FINODELK(30)) called at 2010-09-29 03:24:38.923430
pending frames:

patchset: v3.1.0qa7-429-g6686ddc
signal received: 11
time of crash: 2010-09-29 03:24:59
configuration details:
argp 1
backtrace 1
dlfcn 1
fdatasync 1
libpthread 1
llistxattr 1
setfsid 1
spinlock 1
epoll.h 1
xattr.h 1
st_atim.tv_nsec 1
package-string: glusterfs 3.1.0qa35
/lib64/libc.so.6[0x2aaaab7b5f30]
/usr/local/lib/glusterfs/3.1.0qa35/xlator/protocol/client.so(client_finodelk+0x78)[0x2aaaac8288a3]
/usr/local/lib/glusterfs/3.1.0qa35/xlator/cluster/replicate.so(afr_lock_blocking+0x7c2)[0x2aaaad92deb1]
/usr/local/lib/glusterfs/3.1.0qa35/xlator/cluster/replicate.so[0x2aaaad92d044]
/usr/local/lib/glusterfs/3.1.0qa35/xlator/cluster/replicate.so[0x2aaaad92d0ab]
/usr/local/lib/glusterfs/3.1.0qa35/xlator/protocol/client.so(client3_1_finodelk_cbk+0x1d5)[0x2aaaac830055]
/usr/local/lib/libgfrpc.so.0(saved_frames_unwind+0x281)[0x2aaaaaf393c1]
/usr/local/lib/libgfrpc.so.0(saved_frames_destroy+0x1c)[0x2aaaaaf39452]
/usr/local/lib/libgfrpc.so.0(rpc_clnt_connection_cleanup+0xfc)[0x2aaaaaf3991c]
/usr/local/lib/libgfrpc.so.0(rpc_clnt_destroy+0x2d)[0x2aaaaaf3ba72]
/usr/local/lib/glusterfs/3.1.0qa35/xlator/protocol/client.so(fini+0x51)[0x2aaaac82ac92]
/usr/local/sbin/glusterfs(cleanup_and_exit+0x1d2)[0x404372]
/usr/local/sbin/glusterfs(glusterfs_sigwaiter+0xde)[0x40592c]
/lib64/libpthread.so.0[0x2aaaab570407]
/lib64/libc.so.6(clone+0x6d)[0x2aaaab859b0d]
---------
========
============
(gdb) bt full
#0  0x00002aaaac8288a3 in client_finodelk (frame=0x2aaaac346e90, this=0x63f358, volume=0x640bd8 "testafrqa35-replicate-0", fd=0x2aaab3c2276c, cmd=7, lock=0x409ffc20)
    at client.c:1253
	ret = -1
	conf = (clnt_conf_t *) 0x0
	proc = (rpc_clnt_procedure_t *) 0x0
	args = {loc = 0x0, fd = 0x0, xattr_req = 0x0, linkname = 0x0, iobref = 0x0, vector = 0x0, xattr = 0x0, stbuf = 0x0, dict = 0x0, oldloc = 0x0, newloc = 0x0, 
  name = 0x0, flock = 0x0, volume = 0x0, basename = 0x0, offset = 0, mask = 0, cmd = 0, size = 0, mode = 0, rdev = 0, flags = 0, wbflags = 0, count = 0, 
  datasync = 0, cmd_entrylk = ENTRYLK_LOCK, type = ENTRYLK_RDLCK, optype = GF_XATTROP_ADD_ARRAY, valid = 0, len = 0}
	__FUNCTION__ = "client_finodelk"
#1  0x00002aaaad92deb1 in afr_lock_blocking (frame=0x2aaaac5562e0, this=0x640268, child_index=1) at afr-lk-common.c:1008
	_new = (call_frame_t *) 0x2aaaac346e90
	old_THIS = (xlator_t *) 0x640268
	tmp_cbk = (fop_finodelk_cbk_t) 0x2aaaad92d04b <afr_blocking_inodelk_cbk>
	int_lock = (afr_internal_lock_t *) 0xac10d0
	local = (afr_local_t *) 0xac1018
	priv = (afr_private_t *) 0x64eac8
	fd_ctx = (afr_fd_ctx_t *) 0x2aaab80236c8
	lower = (loc_t *) 0x0
	higher = (loc_t *) 0x0
	lower_name = 0x0
	higher_name = 0x0
	flock = {l_type = 1, l_whence = 16543, l_start = 38912000, l_len = 65536, l_pid = 0}
	ctx = 46912719959752
	ret = 0
	__FUNCTION__ = "afr_lock_blocking"
#2  0x00002aaaad92d044 in afr_lock_cbk (frame=0x2aaaac5562e0, cookie=0x0, this=0x640268, op_ret=-1, op_errno=107) at afr-lk-common.c:757
	int_lock = (afr_internal_lock_t *) 0xac10d0
	local = (afr_local_t *) 0xac1018
	priv = (afr_private_t *) 0x64eac8
	done = 0
	child_index = 0
	__FUNCTION__ = "afr_lock_cbk"
#3  0x00002aaaad92d0ab in afr_blocking_inodelk_cbk (frame=0x2aaaac5562e0, cookie=0x0, this=0x640268, op_ret=-1, op_errno=107) at afr-lk-common.c:771
No locals.
#4  0x00002aaaac830055 in client3_1_finodelk_cbk (req=0x2aaab44768e0, iov=0x409ffe70, count=1, myframe=0x2aaaac34cdf4) at client3_1-fops.c:1067
	fn = (fop_finodelk_cbk_t) 0x2aaaad92d04b <afr_blocking_inodelk_cbk>
---Type <return> to continue, or q <return> to quit---  
	_parent = (call_frame_t *) 0x2aaaac5562e0
	old_THIS = (xlator_t *) 0x63e448
	frame = (call_frame_t *) 0x2aaaac34cdf4
	rsp = {gfs_id = 0, op_ret = -1, op_errno = 107}
	ret = 0
	__FUNCTION__ = "client3_1_finodelk_cbk"
#5  0x00002aaaaaf393c1 in saved_frames_unwind (saved_frames=0x65df88) at rpc-clnt.c:340
	trav = (struct saved_frame *) 0x2aaab46b8cec
	tmp = (struct saved_frame *) 0x2aaab46b85ac
	frame_sent_tm = (struct tm *) 0x2aaaabadace0
	timestr = "2010-09-29 03:24:38.923430", '\0' <repeats 229 times>
	iov = {iov_base = 0x0, iov_len = 0}
	__FUNCTION__ = "saved_frames_unwind"
#6  0x00002aaaaaf39452 in saved_frames_destroy (frames=0x65df88) at rpc-clnt.c:356
No locals.
#7  0x00002aaaaaf3991c in rpc_clnt_connection_cleanup (conn=0x651ff8) at rpc-clnt.c:504
	saved_frames = (struct saved_frames *) 0x65df88
	clnt = (struct rpc_clnt *) 0x651fc8
	__FUNCTION__ = "rpc_clnt_connection_cleanup"
#8  0x00002aaaaaf3ba72 in rpc_clnt_destroy (rpc=0x651fc8) at rpc-clnt.c:1451
No locals.
#9  0x00002aaaac82ac92 in fini (this=0x63e448) at client.c:1896
	conf = (clnt_conf_t *) 0x651ef8
#10 0x0000000000404372 in cleanup_and_exit (signum=15) at glusterfsd.c:685
	ctx = (glusterfs_ctx_t *) 0x62b010
	trav = (xlator_t *) 0x63e448
	__FUNCTION__ = "cleanup_and_exit"
#11 0x000000000040592c in glusterfs_sigwaiter (arg=0x7fff34aff100) at glusterfsd.c:1186
	set = {__val = {46912507406851, 46912496173445, 5, 0, 0, 0, 46912509633128, 1084229968, 0, 46912496197090, 1084229968, 46912507413398, 1084231112, 24, 
    1084228048, 1084229968}}
	ret = 0
	sig = 15
#12 0x00002aaaab570407 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#13 0x00002aaaab859b0d in clone () from /lib64/libc.so.6
No symbol table info available.
============
Comment 1 Shehjar Tikoo 2010-10-04 22:49:43 EDT
Is this related to last night's discussion of the rpc layer bug?
Comment 2 Shehjar Tikoo 2010-10-05 04:30:11 EDT
Resolved as part of conf NULL deref checking patches from Amar.