Bug 74674 - [PATCH] nbd kernel driver needs several signal related and other fixes
[PATCH] nbd kernel driver needs several signal related and other fixes
Status: CLOSED CANTFIX
Product: Red Hat Enterprise Linux 2.1
Classification: Red Hat
Component: kernel (Show other bugs)
2.1
All Linux
medium Severity high
: ---
: ---
Assigned To: Larry Woodman
Brian Brock
:
Depends On:
Blocks:
  Show dependency treegraph
 
Reported: 2002-09-30 10:56 EDT by James Bottomley
Modified: 2007-11-30 17:06 EST (History)
2 users (show)

See Also:
Fixed In Version:
Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of:
Environment:
Last Closed: 2005-09-28 07:34:12 EDT
Type: ---
Regression: ---
Mount Type: ---
Documentation: ---
CRM:
Verified Versions:
Category: ---
oVirt Team: ---
RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: ---


Attachments (Terms of Use)

  None (edit)
Description James Bottomley 2002-09-30 10:56:38 EDT
From Bugzilla Helper:
User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.0.1) Gecko/20020830

Description of problem:
There are 3 specific problems in nbd

1. It only supports devices up to 2GB
2. nbdclient is unkillable
3. if nbdclient is killed, it won't exit until the server does, but then the
posted signal will kill it before it cleans up the kernel queue, leaving the nbd
device unusable until a review

Version-Release number of selected component (if applicable):


How reproducible:
Always

Steps to Reproduce:
For 1. try to use NBD on a file > 2Gb.  It will fail

for 2 post a kill signal on a running nbd (nothing will happen).  then stop the
server.  nbdclient will die and the system will be unable to restart it again.

	

Actual Results:  listed in reproduction steps

Expected Results:  should support devices up to 2Tb

Should be killable and should clean up nicely after itself.

Additional info:

This patch fixes all of these problems in nbd.  It has already been incorporated
into nbd in later kernels.

--- nbd.c.PRISTINE	Mon Mar  4 13:54:34 2002
+++ nbd.c	Thu Mar  7 13:47:54 2002
@@ -101,9 +101,12 @@
 	oldfs = get_fs();
 	set_fs(get_ds());
 
+	// Allow interception of SIGKILL only
+	// Don't allow other signals to interrupt the transmission
 	spin_lock_irqsave(&current->sigmask_lock, flags);
 	oldset = current->blocked;
 	sigfillset(&current->blocked);
+	sigdelsetmask(&current->blocked, sigmask(SIGKILL));
 	recalc_sigpending(current);
 	spin_unlock_irqrestore(&current->sigmask_lock, flags);
 
@@ -126,6 +128,17 @@
 		else
 			result = sock_recvmsg(sock, &msg, size, 0);
 
+		if (signal_pending(current)) {
+			siginfo_t info;
+			spin_lock_irqsave(&current->sigmask_lock, flags);
+			printk(KERN_WARNING "NBD (pid %d: %s) got signal %d\n",
+				current->pid, current->comm, 
+				dequeue_signal(&current->blocked, &info));
+			spin_unlock_irqrestore(&current->sigmask_lock, flags);
+			result = -EINTR;
+			break;
+		}
+
 		if (result <= 0) {
 #ifdef PARANOIA
 			printk(KERN_ERR "NBD: %s - sock=%ld at buf=%ld, size=%d returned %d.\n",
@@ -337,8 +350,25 @@
 		spin_unlock_irq(&io_request_lock);
 
 		down (&lo->queue_lock);
+		if (!lo->file) {
+			up(&lo->queue_lock);
+			printk(KERN_ERR "nbd: failed between accept and semaphore, file lost\n");
+			req->errors++;
+			nbd_end_request(req);
+			spin_lock_irq(&io_request_lock);
+			continue;
+		}
+
 		list_add(&req->queue, &lo->queue_head);
 		nbd_send_req(lo->sock, req);	/* Why does this block?         */
+		if (req->errors) {
+			printk(KERN_ERR "nbd: nbd_send_req failed\n");
+			list_del(&req->queue);
+			up(&lo->queue_lock);
+			nbd_end_request(req);
+			spin_lock_irq(&io_request_lock);
+			continue;
+		}
 		up (&lo->queue_lock);
 
 		spin_lock_irq(&io_request_lock);
@@ -388,12 +418,14 @@
 			printk(KERN_ERR "nbd: Some requests are in progress -> can not turn off.\n");
 			return -EBUSY;
 		}
-		up(&lo->queue_lock);
 		file = lo->file;
-		if (!file)
+		if (!file) {
+			up(&lo->queue_lock);
 			return -EINVAL;
+		}
 		lo->file = NULL;
 		lo->sock = NULL;
+		up(&lo->queue_lock);
 		fput(file);
 		return 0;
 	case NBD_SET_SOCK:
@@ -434,9 +466,30 @@
 		if (!lo->file)
 			return -EINVAL;
 		nbd_do_it(lo);
+		/* on return tidy up in case we have a signal */
+		printk(KERN_WARNING "nbd: nbd_do_it returned\n");
+		/* Forcibly shutdown the socket causing all listeners
+		 * to error
+		 *
+		 * FIXME: This code is duplicated from sys_shutdown, but
+		 * there should be a more generic interface rather than
+		 * calling socket ops directly here */
+		lo->sock->ops->shutdown(lo->sock, 2);
+		down(&lo->queue_lock);
+		printk(KERN_WARNING "nbd: lock acquired\n");
+		nbd_clear_que(lo);
+		printk(KERN_WARNING "nbd: queue cleared\n");
+		file = lo->file;
+		lo->file = NULL;
+		lo->sock = NULL;
+		up(&lo->queue_lock);
+		if (file)
+			fput(file);
 		return lo->harderror;
 	case NBD_CLEAR_QUE:
+		down(&lo->queue_lock);
 		nbd_clear_que(lo);
+		up(&lo->queue_lock);
 		return 0;
 #ifdef PARANOIA
 	case NBD_PRINT_DEBUG:
@@ -511,7 +564,7 @@
 		init_MUTEX(&nbd_dev[i].queue_lock);
 		nbd_blksizes[i] = 1024;
 		nbd_blksize_bits[i] = 10;
-		nbd_bytesizes[i] = 0x7ffffc00; /* 2GB */
+		nbd_bytesizes[i] = ((u64)0x7ffffc00) << 10; /* 2TB */
 		nbd_sizes[i] = nbd_bytesizes[i] >> BLOCK_SIZE_BITS;
 		register_disk(NULL, MKDEV(MAJOR_NR,i), 1, &nbd_fops,
 				nbd_bytesizes[i]>>9);
Comment 2 Larry Woodman 2005-09-28 07:34:12 EDT
I cant make this level of change in AS2.1 at this point.  Please re-open this
bug if this stilo needs attention.

Larry Woodman

Note You need to log in before you can comment on or make changes to this bug.