From Bugzilla Helper: User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.0.1) Gecko/20020830 Description of problem: There are 3 specific problems in nbd 1. It only supports devices up to 2GB 2. nbdclient is unkillable 3. if nbdclient is killed, it won't exit until the server does, but then the posted signal will kill it before it cleans up the kernel queue, leaving the nbd device unusable until a review Version-Release number of selected component (if applicable): How reproducible: Always Steps to Reproduce: For 1. try to use NBD on a file > 2Gb. It will fail for 2 post a kill signal on a running nbd (nothing will happen). then stop the server. nbdclient will die and the system will be unable to restart it again. Actual Results: listed in reproduction steps Expected Results: should support devices up to 2Tb Should be killable and should clean up nicely after itself. Additional info: This patch fixes all of these problems in nbd. It has already been incorporated into nbd in later kernels. --- nbd.c.PRISTINE Mon Mar 4 13:54:34 2002 +++ nbd.c Thu Mar 7 13:47:54 2002 @@ -101,9 +101,12 @@ oldfs = get_fs(); set_fs(get_ds()); + // Allow interception of SIGKILL only + // Don't allow other signals to interrupt the transmission spin_lock_irqsave(¤t->sigmask_lock, flags); oldset = current->blocked; sigfillset(¤t->blocked); + sigdelsetmask(¤t->blocked, sigmask(SIGKILL)); recalc_sigpending(current); spin_unlock_irqrestore(¤t->sigmask_lock, flags); @@ -126,6 +128,17 @@ else result = sock_recvmsg(sock, &msg, size, 0); + if (signal_pending(current)) { + siginfo_t info; + spin_lock_irqsave(¤t->sigmask_lock, flags); + printk(KERN_WARNING "NBD (pid %d: %s) got signal %d\n", + current->pid, current->comm, + dequeue_signal(¤t->blocked, &info)); + spin_unlock_irqrestore(¤t->sigmask_lock, flags); + result = -EINTR; + break; + } + if (result <= 0) { #ifdef PARANOIA printk(KERN_ERR "NBD: %s - sock=%ld at buf=%ld, size=%d returned %d.\n", @@ -337,8 +350,25 @@ spin_unlock_irq(&io_request_lock); down (&lo->queue_lock); + if (!lo->file) { + up(&lo->queue_lock); + printk(KERN_ERR "nbd: failed between accept and semaphore, file lost\n"); + req->errors++; + nbd_end_request(req); + spin_lock_irq(&io_request_lock); + continue; + } + list_add(&req->queue, &lo->queue_head); nbd_send_req(lo->sock, req); /* Why does this block? */ + if (req->errors) { + printk(KERN_ERR "nbd: nbd_send_req failed\n"); + list_del(&req->queue); + up(&lo->queue_lock); + nbd_end_request(req); + spin_lock_irq(&io_request_lock); + continue; + } up (&lo->queue_lock); spin_lock_irq(&io_request_lock); @@ -388,12 +418,14 @@ printk(KERN_ERR "nbd: Some requests are in progress -> can not turn off.\n"); return -EBUSY; } - up(&lo->queue_lock); file = lo->file; - if (!file) + if (!file) { + up(&lo->queue_lock); return -EINVAL; + } lo->file = NULL; lo->sock = NULL; + up(&lo->queue_lock); fput(file); return 0; case NBD_SET_SOCK: @@ -434,9 +466,30 @@ if (!lo->file) return -EINVAL; nbd_do_it(lo); + /* on return tidy up in case we have a signal */ + printk(KERN_WARNING "nbd: nbd_do_it returned\n"); + /* Forcibly shutdown the socket causing all listeners + * to error + * + * FIXME: This code is duplicated from sys_shutdown, but + * there should be a more generic interface rather than + * calling socket ops directly here */ + lo->sock->ops->shutdown(lo->sock, 2); + down(&lo->queue_lock); + printk(KERN_WARNING "nbd: lock acquired\n"); + nbd_clear_que(lo); + printk(KERN_WARNING "nbd: queue cleared\n"); + file = lo->file; + lo->file = NULL; + lo->sock = NULL; + up(&lo->queue_lock); + if (file) + fput(file); return lo->harderror; case NBD_CLEAR_QUE: + down(&lo->queue_lock); nbd_clear_que(lo); + up(&lo->queue_lock); return 0; #ifdef PARANOIA case NBD_PRINT_DEBUG: @@ -511,7 +564,7 @@ init_MUTEX(&nbd_dev[i].queue_lock); nbd_blksizes[i] = 1024; nbd_blksize_bits[i] = 10; - nbd_bytesizes[i] = 0x7ffffc00; /* 2GB */ + nbd_bytesizes[i] = ((u64)0x7ffffc00) << 10; /* 2TB */ nbd_sizes[i] = nbd_bytesizes[i] >> BLOCK_SIZE_BITS; register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &nbd_fops, nbd_bytesizes[i]>>9);
I cant make this level of change in AS2.1 at this point. Please re-open this bug if this stilo needs attention. Larry Woodman