Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 150774 Details for
Bug 233653
race condition in nbd driver triggers BUG in kunmap and kernel panic
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
patch #1
nbd-fix-tx-rx-race-condition.patch (text/plain), 9.34 KB, created by
Paul Clements
on 2007-03-23 16:27:00 UTC
(
hide
)
Description:
patch #1
Filename:
MIME Type:
Creator:
Paul Clements
Created:
2007-03-23 16:27:00 UTC
Size:
9.34 KB
patch
obsolete
>The patch titled > > nbd: fix TX/RX race condition > >has been added to the -mm tree. Its filename is > > nbd-fix-tx-rx-race-condition.patch > > >From: Herbert Xu <herbert@gondor.apana.org.au> > >Janos Haar of First NetCenter Bt. reported numerous crashes involving the >NBD driver. With his help, this was tracked down to bogus bio vectors >which in turn was the result of a race condition between the >receive/transmit routines in the NBD driver. > >The bug manifests itself like this: > >CPU0 CPU1 >do_nbd_request > add req to queuelist > nbd_send_request > send req head > for each bio > kmap > send > nbd_read_stat > nbd_find_request > nbd_end_request > kunmap > >When CPU1 finishes nbd_end_request, the request and all its associated >bio's are freed. So when CPU0 calls kunmap whose argument is derived from >the last bio, it may crash. > >Under normal circumstances, the race occurs only on the last bio. However, >if an error is encountered on the remote NBD server (such as an incorrect >magic number in the request), or if there were a bug in the server, it is >possible for the nbd_end_request to occur any time after the request's >addition to the queuelist. > >The following patch fixes this problem by making sure that requests are not >added to the queuelist until after they have been completed transmission. > >In order for the receiving side to be ready for responses involving >requests still being transmitted, the patch introduces the concept of the >active request. > >When a response matches the current active request, its processing is >delayed until after the tranmission has come to a stop. > >This has been tested by Janos and it has been successful in curing this >race condition. > >Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> >Cc: <djani22@dynamicweb.hu> >Cc: Paul Clements <Paul.Clements@SteelEye.com> >Signed-off-by: Andrew Morton <akpm@osdl.org> >--- > > drivers/block/nbd.c | 119 ++++++++++++++++++++++++---------------------------- > include/linux/nbd.h | 8 +++ > 2 files changed, 65 insertions(+), 62 deletions(-) > >diff -puN drivers/block/nbd.c~nbd-fix-tx-rx-race-condition drivers/block/nbd.c >--- devel/drivers/block/nbd.c~nbd-fix-tx-rx-race-condition 2005-11-18 19:43:52.000000000 -0800 >+++ devel-akpm/drivers/block/nbd.c 2005-11-18 19:43:52.000000000 -0800 >@@ -54,11 +54,15 @@ > #include <linux/errno.h> > #include <linux/file.h> > #include <linux/ioctl.h> >+#include <linux/compiler.h> >+#include <linux/err.h> >+#include <linux/kernel.h> > #include <net/sock.h> > > #include <linux/devfs_fs_kernel.h> > > #include <asm/uaccess.h> >+#include <asm/system.h> > #include <asm/types.h> > > #include <linux/nbd.h> >@@ -230,14 +234,6 @@ static int nbd_send_req(struct nbd_devic > request.len = htonl(size); > memcpy(request.handle, &req, sizeof(req)); > >- down(&lo->tx_lock); >- >- if (!sock || !lo->sock) { >- printk(KERN_ERR "%s: Attempted send on closed socket\n", >- lo->disk->disk_name); >- goto error_out; >- } >- > dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n", > lo->disk->disk_name, req, > nbdcmd_to_ascii(nbd_cmd(req)), >@@ -276,11 +272,9 @@ static int nbd_send_req(struct nbd_devic > } > } > } >- up(&lo->tx_lock); > return 0; > > error_out: >- up(&lo->tx_lock); > return 1; > } > >@@ -289,9 +283,14 @@ static struct request *nbd_find_request( > struct request *req; > struct list_head *tmp; > struct request *xreq; >+ int err; > > memcpy(&xreq, handle, sizeof(xreq)); > >+ err = wait_event_interruptible(lo->active_wq, lo->active_req != xreq); >+ if (unlikely(err)) >+ goto out; >+ > spin_lock(&lo->queue_lock); > list_for_each(tmp, &lo->queue_head) { > req = list_entry(tmp, struct request, queuelist); >@@ -302,7 +301,11 @@ static struct request *nbd_find_request( > return req; > } > spin_unlock(&lo->queue_lock); >- return NULL; >+ >+ err = -ENOENT; >+ >+out: >+ return ERR_PTR(err); > } > > static inline int sock_recv_bvec(struct socket *sock, struct bio_vec *bvec) >@@ -331,7 +334,11 @@ static struct request *nbd_read_stat(str > goto harderror; > } > req = nbd_find_request(lo, reply.handle); >- if (req == NULL) { >+ if (unlikely(IS_ERR(req))) { >+ result = PTR_ERR(req); >+ if (result != -ENOENT) >+ goto harderror; >+ > printk(KERN_ERR "%s: Unexpected reply (%p)\n", > lo->disk->disk_name, reply.handle); > result = -EBADR; >@@ -395,19 +402,21 @@ static void nbd_clear_que(struct nbd_dev > > BUG_ON(lo->magic != LO_MAGIC); > >- do { >- req = NULL; >- spin_lock(&lo->queue_lock); >- if (!list_empty(&lo->queue_head)) { >- req = list_entry(lo->queue_head.next, struct request, queuelist); >- list_del_init(&req->queuelist); >- } >- spin_unlock(&lo->queue_lock); >- if (req) { >- req->errors++; >- nbd_end_request(req); >- } >- } while (req); >+ /* >+ * lo->sock == NULL guarantees that the queue will not grow beyond the >+ * current active request. >+ */ >+ BUG_ON(lo->sock); >+ wait_event(lo->active_wq, !lo->active_req); >+ smp_rmb(); >+ >+ while (!list_empty(&lo->queue_head)) { >+ req = list_entry(lo->queue_head.next, struct request, >+ queuelist); >+ list_del_init(&req->queuelist); >+ req->errors++; >+ nbd_end_request(req); >+ } > } > > /* >@@ -435,11 +444,6 @@ static void do_nbd_request(request_queue > > BUG_ON(lo->magic != LO_MAGIC); > >- if (!lo->file) { >- printk(KERN_ERR "%s: Request when not-ready\n", >- lo->disk->disk_name); >- goto error_out; >- } > nbd_cmd(req) = NBD_CMD_READ; > if (rq_data_dir(req) == WRITE) { > nbd_cmd(req) = NBD_CMD_WRITE; >@@ -453,32 +457,34 @@ static void do_nbd_request(request_queue > req->errors = 0; > spin_unlock_irq(q->queue_lock); > >- spin_lock(&lo->queue_lock); >- >- if (!lo->file) { >- spin_unlock(&lo->queue_lock); >- printk(KERN_ERR "%s: failed between accept and semaphore, file lost\n", >- lo->disk->disk_name); >+ down(&lo->tx_lock); >+ if (unlikely(!lo->sock)) { >+ up(&lo->tx_lock); >+ printk(KERN_ERR "%s: Attempted send on closed socket\n", >+ lo->disk->disk_name); > req->errors++; > nbd_end_request(req); > spin_lock_irq(q->queue_lock); > continue; > } > >- list_add(&req->queuelist, &lo->queue_head); >- spin_unlock(&lo->queue_lock); >+ lo->active_req = req; > > if (nbd_send_req(lo, req) != 0) { > printk(KERN_ERR "%s: Request send failed\n", > lo->disk->disk_name); >- if (nbd_find_request(lo, (char *)&req) != NULL) { >- /* we still own req */ >- req->errors++; >- nbd_end_request(req); >- } else /* we're racing with nbd_clear_que */ >- printk(KERN_DEBUG "nbd: can't find req\n"); >+ req->errors++; >+ nbd_end_request(req); >+ } else { >+ spin_lock(&lo->queue_lock); >+ list_add(&req->queuelist, &lo->queue_head); >+ spin_unlock(&lo->queue_lock); > } > >+ lo->active_req = NULL; >+ up(&lo->tx_lock); >+ wake_up_all(&lo->active_wq); >+ > spin_lock_irq(q->queue_lock); > continue; > >@@ -529,17 +535,10 @@ static int nbd_ioctl(struct inode *inode > down(&lo->tx_lock); > lo->sock = NULL; > up(&lo->tx_lock); >- spin_lock(&lo->queue_lock); > file = lo->file; > lo->file = NULL; >- spin_unlock(&lo->queue_lock); > nbd_clear_que(lo); >- spin_lock(&lo->queue_lock); >- if (!list_empty(&lo->queue_head)) { >- printk(KERN_ERR "nbd: disconnect: some requests are in progress -> please try again.\n"); >- error = -EBUSY; >- } >- spin_unlock(&lo->queue_lock); >+ BUG_ON(!list_empty(&lo->queue_head)); > if (file) > fput(file); > return error; >@@ -598,24 +597,19 @@ static int nbd_ioctl(struct inode *inode > lo->sock = NULL; > } > up(&lo->tx_lock); >- spin_lock(&lo->queue_lock); > file = lo->file; > lo->file = NULL; >- spin_unlock(&lo->queue_lock); > nbd_clear_que(lo); > printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name); > if (file) > fput(file); > return lo->harderror; > case NBD_CLEAR_QUE: >- down(&lo->tx_lock); >- if (lo->sock) { >- up(&lo->tx_lock); >- return 0; /* probably should be error, but that would >- * break "nbd-client -d", so just return 0 */ >- } >- up(&lo->tx_lock); >- nbd_clear_que(lo); >+ /* >+ * This is for compatibility only. The queue is always cleared >+ * by NBD_DO_IT or NBD_CLEAR_SOCK. >+ */ >+ BUG_ON(!lo->sock && !list_empty(&lo->queue_head)); > return 0; > case NBD_PRINT_DEBUG: > printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n", >@@ -688,6 +682,7 @@ static int __init nbd_init(void) > spin_lock_init(&nbd_dev[i].queue_lock); > INIT_LIST_HEAD(&nbd_dev[i].queue_head); > init_MUTEX(&nbd_dev[i].tx_lock); >+ init_waitqueue_head(&nbd_dev[i].active_wq); > nbd_dev[i].blksize = 1024; > nbd_dev[i].bytesize = 0x7ffffc00ULL << 10; /* 2TB */ > disk->major = NBD_MAJOR; >diff -puN include/linux/nbd.h~nbd-fix-tx-rx-race-condition include/linux/nbd.h >--- devel/include/linux/nbd.h~nbd-fix-tx-rx-race-condition 2005-11-18 19:43:52.000000000 -0800 >+++ devel-akpm/include/linux/nbd.h 2005-11-18 19:43:52.000000000 -0800 >@@ -37,18 +37,26 @@ enum { > /* userspace doesn't need the nbd_device structure */ > #ifdef __KERNEL__ > >+#include <linux/wait.h> >+ > /* values for flags field */ > #define NBD_READ_ONLY 0x0001 > #define NBD_WRITE_NOCHK 0x0002 > >+struct request; >+ > struct nbd_device { > int flags; > int harderror; /* Code of hard error */ > struct socket * sock; > struct file * file; /* If == NULL, device is not ready, yet */ > int magic; >+ > spinlock_t queue_lock; > struct list_head queue_head;/* Requests are added here... */ >+ struct request *active_req; >+ wait_queue_head_t active_wq; >+ > struct semaphore tx_lock; > struct gendisk *disk; > int blksize; >_ > >Patches currently in -mm which might be from herbert@gondor.apana.org.au are > >linus.patch >nbd-fix-tx-rx-race-condition.patch >git-cryptodev.patch
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 233653
: 150774 |
150775