Bug 74674 - [PATCH] nbd kernel driver needs several signal related and other fixes
Summary: [PATCH] nbd kernel driver needs several signal related and other fixes
Keywords:
Status: CLOSED CANTFIX
Alias: None
Product: Red Hat Enterprise Linux 2.1
Classification: Red Hat
Component: kernel
Version: 2.1
Hardware: All
OS: Linux
medium
high
Target Milestone: ---
Assignee: Larry Woodman
QA Contact: Brian Brock
URL:
Whiteboard:
Depends On:
Blocks:
TreeView+ depends on / blocked
 
Reported: 2002-09-30 14:56 UTC by James Bottomley
Modified: 2007-11-30 22:06 UTC (History)
2 users (show)

Fixed In Version:
Doc Type: Bug Fix
Doc Text:
Clone Of:
Environment:
Last Closed: 2005-09-28 11:34:12 UTC
Target Upstream Version:
Embargoed:


Attachments (Terms of Use)

Description James Bottomley 2002-09-30 14:56:38 UTC
From Bugzilla Helper:
User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.0.1) Gecko/20020830

Description of problem:
There are 3 specific problems in nbd

1. It only supports devices up to 2GB
2. nbdclient is unkillable
3. if nbdclient is killed, it won't exit until the server does, but then the
posted signal will kill it before it cleans up the kernel queue, leaving the nbd
device unusable until a review

Version-Release number of selected component (if applicable):


How reproducible:
Always

Steps to Reproduce:
For 1. try to use NBD on a file > 2Gb.  It will fail

for 2 post a kill signal on a running nbd (nothing will happen).  then stop the
server.  nbdclient will die and the system will be unable to restart it again.

	

Actual Results:  listed in reproduction steps

Expected Results:  should support devices up to 2Tb

Should be killable and should clean up nicely after itself.

Additional info:

This patch fixes all of these problems in nbd.  It has already been incorporated
into nbd in later kernels.

--- nbd.c.PRISTINE	Mon Mar  4 13:54:34 2002
+++ nbd.c	Thu Mar  7 13:47:54 2002
@@ -101,9 +101,12 @@
 	oldfs = get_fs();
 	set_fs(get_ds());
 
+	// Allow interception of SIGKILL only
+	// Don't allow other signals to interrupt the transmission
 	spin_lock_irqsave(&current->sigmask_lock, flags);
 	oldset = current->blocked;
 	sigfillset(&current->blocked);
+	sigdelsetmask(&current->blocked, sigmask(SIGKILL));
 	recalc_sigpending(current);
 	spin_unlock_irqrestore(&current->sigmask_lock, flags);
 
@@ -126,6 +128,17 @@
 		else
 			result = sock_recvmsg(sock, &msg, size, 0);
 
+		if (signal_pending(current)) {
+			siginfo_t info;
+			spin_lock_irqsave(&current->sigmask_lock, flags);
+			printk(KERN_WARNING "NBD (pid %d: %s) got signal %d\n",
+				current->pid, current->comm, 
+				dequeue_signal(&current->blocked, &info));
+			spin_unlock_irqrestore(&current->sigmask_lock, flags);
+			result = -EINTR;
+			break;
+		}
+
 		if (result <= 0) {
 #ifdef PARANOIA
 			printk(KERN_ERR "NBD: %s - sock=%ld at buf=%ld, size=%d returned %d.\n",
@@ -337,8 +350,25 @@
 		spin_unlock_irq(&io_request_lock);
 
 		down (&lo->queue_lock);
+		if (!lo->file) {
+			up(&lo->queue_lock);
+			printk(KERN_ERR "nbd: failed between accept and semaphore, file lost\n");
+			req->errors++;
+			nbd_end_request(req);
+			spin_lock_irq(&io_request_lock);
+			continue;
+		}
+
 		list_add(&req->queue, &lo->queue_head);
 		nbd_send_req(lo->sock, req);	/* Why does this block?         */
+		if (req->errors) {
+			printk(KERN_ERR "nbd: nbd_send_req failed\n");
+			list_del(&req->queue);
+			up(&lo->queue_lock);
+			nbd_end_request(req);
+			spin_lock_irq(&io_request_lock);
+			continue;
+		}
 		up (&lo->queue_lock);
 
 		spin_lock_irq(&io_request_lock);
@@ -388,12 +418,14 @@
 			printk(KERN_ERR "nbd: Some requests are in progress -> can not turn off.\n");
 			return -EBUSY;
 		}
-		up(&lo->queue_lock);
 		file = lo->file;
-		if (!file)
+		if (!file) {
+			up(&lo->queue_lock);
 			return -EINVAL;
+		}
 		lo->file = NULL;
 		lo->sock = NULL;
+		up(&lo->queue_lock);
 		fput(file);
 		return 0;
 	case NBD_SET_SOCK:
@@ -434,9 +466,30 @@
 		if (!lo->file)
 			return -EINVAL;
 		nbd_do_it(lo);
+		/* on return tidy up in case we have a signal */
+		printk(KERN_WARNING "nbd: nbd_do_it returned\n");
+		/* Forcibly shutdown the socket causing all listeners
+		 * to error
+		 *
+		 * FIXME: This code is duplicated from sys_shutdown, but
+		 * there should be a more generic interface rather than
+		 * calling socket ops directly here */
+		lo->sock->ops->shutdown(lo->sock, 2);
+		down(&lo->queue_lock);
+		printk(KERN_WARNING "nbd: lock acquired\n");
+		nbd_clear_que(lo);
+		printk(KERN_WARNING "nbd: queue cleared\n");
+		file = lo->file;
+		lo->file = NULL;
+		lo->sock = NULL;
+		up(&lo->queue_lock);
+		if (file)
+			fput(file);
 		return lo->harderror;
 	case NBD_CLEAR_QUE:
+		down(&lo->queue_lock);
 		nbd_clear_que(lo);
+		up(&lo->queue_lock);
 		return 0;
 #ifdef PARANOIA
 	case NBD_PRINT_DEBUG:
@@ -511,7 +564,7 @@
 		init_MUTEX(&nbd_dev[i].queue_lock);
 		nbd_blksizes[i] = 1024;
 		nbd_blksize_bits[i] = 10;
-		nbd_bytesizes[i] = 0x7ffffc00; /* 2GB */
+		nbd_bytesizes[i] = ((u64)0x7ffffc00) << 10; /* 2TB */
 		nbd_sizes[i] = nbd_bytesizes[i] >> BLOCK_SIZE_BITS;
 		register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &nbd_fops,
 				nbd_bytesizes[i]>>9);

Comment 2 Larry Woodman 2005-09-28 11:34:12 UTC
I cant make this level of change in AS2.1 at this point.  Please re-open this
bug if this stilo needs attention.

Larry Woodman


Note You need to log in before you can comment on or make changes to this bug.