Description of problem: In afr we added code to fail open on files in split-brain with EIO, this is leading to migration check and the errno becomes ENOENT instead. https://review.gluster.org/#/c/13075/17/tests/basic/afr/split-brain-open.t@36 is where the failure is seen. dht_inode_missing() also considers EIO to be inode missing, which is the reason for this issue. Version-Release number of selected component (if applicable): How reproducible: Steps to Reproduce: 1. 2. 3. Actual results: Expected results: Additional info:
I have my doubts on: we_are_not_migrating() macro, this is the output from gdb session. Thread 8 "glusterepoll1" hit Breakpoint 1, dht_open_cbk (frame=0x7f1720001768, cookie=0x7f172400eb30, this=0x7f1724010520, op_ret=-1, op_errno=5, fd=0x0, xdata=0x0) at dht-inode-read.c:34 34 dht_local_t *local = NULL; (gdb) l 29 30 int 31 dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, 32 int op_ret, int op_errno, fd_t *fd, dict_t *xdata) 33 { 34 dht_local_t *local = NULL; 35 xlator_t *prev = NULL; 36 int ret = 0; 37 38 local = frame->local; (gdb) 39 prev = cookie; 40 41 local->op_errno = op_errno; 42 if ((op_ret == -1) && !dht_inode_missing(op_errno)) { 43 gf_msg_debug (this->name, op_errno, 44 "subvolume %s returned -1", 45 prev->name); 46 goto out; 47 } 48 (gdb) 49 /* Update ctx if the fd has been opened on the target*/ 50 if (!op_ret && (local->call_cnt == 1)) { 51 dht_fd_ctx_set (this, fd, prev); 52 goto out; 53 } 54 55 if (!op_ret || (local->call_cnt != 1)) 56 goto out; 57 58 /* rebalance would have happened */ (gdb) 59 local->rebalance.target_op_fn = dht_open2; 60 ret = dht_rebalance_complete_check (this, frame); 61 if (!ret) 62 return 0; 63 64 out: 65 DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd, xdata); 66 67 return 0; 68 } (gdb) b dht_rebalance_complete_check Breakpoint 2 at 0x7f172b0e5b02: file dht-helper.c, line 1520. (gdb) c Continuing. Thread 8 "glusterepoll1" hit Breakpoint 2, dht_rebalance_complete_check (this=0x7f1724010520, frame=0x7f1720001768) at dht-helper.c:1520 1520 int ret = -1; (gdb) b dht_migration_complete_check_task Breakpoint 3 at 0x7f172b0e5187: file dht-helper.c, line 1290. (gdb) b dht_migration_complete_check_done Breakpoint 4 at 0x7f172b0e50ef: file dht-helper.c, line 1265. (gdb) c Continuing. [Switching to Thread 0x7f172d15c700 (LWP 28975)] Thread 6 "glustersproc1" hit Breakpoint 3, dht_migration_complete_check_task (data=0x7f1720001768) at dht-helper.c:1290 (gdb) b 1498 Breakpoint 5 at 0x7f172b0e5a7a: file dht-helper.c, line 1498. (gdb) c Continuing. Thread 6 "glustersproc1" hit Breakpoint 5, dht_migration_complete_check_task (data=0x7f1720001768) at dht-helper.c:1498 1498 if (open_failed) { (gdb) p open_failed $1 = 1 (gdb) n 1499 ret = -1; (gdb) 1500 goto unlock; (gdb) 1505 UNLOCK(&inode->lock); (gdb) 1508 if (dict) { (gdb) 1512 loc_wipe (&tmp_loc); (gdb) 1514 return ret; (gdb) 1515 } (gdb) synctask_wrap () at syncop.c:376 376 if (task->synccbk) (gdb) 377 task->synccbk (task->ret, task->frame, task->opaque); (gdb) Thread 6 "glustersproc1" hit Breakpoint 4, dht_migration_complete_check_done (op_ret=-1, frame=0x7f1720001768, data=0x7f1720001768) at dht-helper.c:1265 1265 dht_local_t *local = NULL; (gdb) n 1266 xlator_t *subvol = NULL; (gdb) 1268 local = frame->local; (gdb) 1270 if (op_ret != 0) (gdb) 1271 goto out; (gdb) 1281 local->rebalance.target_op_fn (THIS, subvol, frame, op_ret); (gdb) s dht_open2 (this=0x7f1724010520, subvol=0x0, frame=0x7f1720001768, ret=-1) at dht-inode-read.c:73 73 dht_local_t *local = NULL; (gdb) n 74 int op_errno = EINVAL; (gdb) 76 if (!frame || !frame->local) (gdb) 79 local = frame->local; (gdb) 80 op_errno = ENOENT; (gdb) 82 if (we_are_not_migrating (ret)) { (gdb) n 90 if (subvol == NULL) (gdb) 91 goto out; (gdb) 101 DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL); (gdb) p op_errno $3 = 2
*** This bug has been marked as a duplicate of bug 1578823 ***