Bug 600528

Summary: [RHEL6.0 PATCH] dm-replicator: mandatory API change for replicator_resume(), replicator_dev_resume() and reference count fix calling dm_table_get_md()
Product: Red Hat Enterprise Linux 6 Reporter: Tom Coughlan <coughlan>
Component: kernelAssignee: Heinz Mauelshagen <heinzm>
Status: CLOSED DUPLICATE QA Contact: Red Hat Kernel QE team <kernel-qe>
Severity: medium Docs Contact:
Priority: medium    
Version: 6.0CC: lvm-team
Target Milestone: rc   
Target Release: ---   
Hardware: All   
OS: Linux   
Whiteboard:
Fixed In Version: Doc Type: Bug Fix
Doc Text:
Story Points: ---
Clone Of: Environment:
Last Closed: 2010-06-06 01:57:36 UTC Type: ---
Regression: --- Mount Type: ---
Documentation: --- CRM:
Verified Versions: Category: ---
oVirt Team: --- RHEL 7.3 requirements from Atomic Host:
Cloudforms Team: --- Target Upstream Version:
Embargoed:

Description Tom Coughlan 2010-06-04 22:24:39 UTC
This is a mandatory API change to support delaying the resume of the replicator
log device to the resume of the first local (head) device.

That ensures all local (head) devices and their related remote devices can be
constructed *before* the log device starts to try copying any log entries left
over from a previous run, hence making sure that the copies succeed and don't
get dropped because of non-existing devices.

A check is being added to replicator_dtr for any existing devices during
destruction of the log device.

The dm_table_get_md() semantics changed to not take out a reference on
the MD, hence this patch removes the MD reference drop (dm_put() calls)
from dm-replicator avoiding a related OOPS.

Header include adjustments to allow for compile and
dev_io_params structure initialization adjustments get applied.


Signed-off-by: Heinz Mauelshagen <heinzm>
Reviewed-by: Zdenek Kabelac <zkabelac>
Tested-by: Zdenek Kabelac <zkabelac>
--- 
drivers/md/dm-repl-log-ringbuffer.c |   44 +++++++++++++++++++---------------
 drivers/md/dm-repl-slink-blockdev.c |    6 +----
 drivers/md/dm-repl.c                |   37 ++++++++++++++++++++++-------
 3 files changed, 54 insertions(+), 33 deletions(-)

diff --git a/drivers/md/dm-repl-log-ringbuffer.c b/drivers/md/dm-repl-log-ringbuffer.c
index c39eb4a..b60617b 100644
--- a/drivers/md/dm-repl-log-ringbuffer.c
+++ b/drivers/md/dm-repl-log-ringbuffer.c
@@ -30,7 +30,8 @@
 #include <linux/crc32.h>
 #include <linux/dm-io.h>
 #include <linux/kernel.h>
-#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/version.h>
 
 static const char version[] = "v0.028";
@@ -1446,9 +1447,11 @@ header_io(int rw, struct header_io_params *hio)
 	void *disk_header = io->alloc_disk(ring);
 	struct dev_io_params dio = {
 		&l->params.dev, hio->sector, io->size,
-		.mem.type = DM_IO_KMEM,
-		.mem.ptr.addr = disk_header,
-		.mem.offset = 0,
+		.mem = {
+			.type = DM_IO_KMEM,
+			.offset = 0,
+			.ptr.addr = disk_header,
+		},
 		.notify = { NULL, NULL}
 	};
 
@@ -2084,19 +2087,21 @@ ringbuffer_write_entry(struct repl_log *l, struct bio *bio)
 	struct dev_io_params dio[] = {
 		{ /* Data IO specs. */
 		  &l->params.dev, header->pos.data, bio->bi_size,
-		  .mem.type = DM_IO_BVEC,
-		  .mem.ptr.bvec = bio_iovec(bio),
-		  .mem.offset = bio_offset(bio),
-		  .notify.fn = data_endio,
-		  .notify.context = entry,
+		  .mem = {
+			.type = DM_IO_BVEC,
+			.offset = bio_offset(bio),
+			.ptr.bvec = bio_iovec(bio),
+		  },
+		  .notify = { data_endio, entry }
 		},
 		{ /* Header IO specs. */
 		  &l->params.dev, header->pos.header, DATA_HEADER_DISK_SIZE,
-		  .mem.type = DM_IO_KMEM,
-		  .mem.ptr.addr = disk_header,
-		  .mem.offset = 0,
-		  .notify.fn = header_endio,
-		  .notify.context = entry,
+		  .mem = {
+			.type = DM_IO_KMEM,
+			.offset = 0,
+			.ptr.addr = disk_header,
+		  },
+		  .notify = { header_endio, entry }
 		},
 	};
 
@@ -2160,11 +2165,12 @@ ringbuffer_read_bio_vec(struct repl_log *l,
 	struct dev_io_params dio = {
 		&l->params.dev,
 		entry->data.header->pos.data + offset, bio->bi_size,
-		.mem.type = DM_IO_BVEC,
-		.mem.ptr.bvec = bio_iovec(bio),
-		.mem.offset = bio_offset(bio),
-		.notify.fn = read_bio_vec_endio,
-		.notify.context = entry,
+		.mem = {
+			.type = DM_IO_BVEC,
+			.offset = bio_offset(bio),
+			.ptr.bvec = bio_iovec(bio),
+		},
+		.notify = { read_bio_vec_endio, entry }
 	};
 
 	DMDEBUG_LIMIT("in  %s %u", __func__, jiffies_to_msecs(jiffies));
diff --git a/drivers/md/dm-repl-slink-blockdev.c b/drivers/md/dm-repl-slink-blockdev.c
index 20e24e4..1fff8e6 100644
--- a/drivers/md/dm-repl-slink-blockdev.c
+++ b/drivers/md/dm-repl-slink-blockdev.c
@@ -31,6 +31,7 @@ static const char version[] = "v0.022";
 #include <linux/dm-kcopyd.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/slab.h>
 
 #define	DM_MSG_PREFIX	"dm-repl-slink-blockdev"
 #define	DAEMON		DM_MSG_PREFIX	"d"
@@ -864,8 +865,6 @@ static struct sdev *dev_get_by_bdev(struct slink *sl,
 		struct mapped_device *md = dm_table_get_md(dev->ti->table);
 		struct gendisk *gd = dm_disk(md);
 
-		dm_put(md);
-
 		if (bdev->bd_disk == gd)
 			return dev_get(dev);
 	}
@@ -2382,11 +2381,8 @@ blockdev_dev_number(struct dm_repl_slink *slink, struct block_device *bdev)
 		md = dm_table_get_md(dev->ti->table);
 		if (bdev->bd_disk == dm_disk(md)) {
 			read_unlock(&sl->lock);
-			dm_put(md);
 			return dev->dev.number;
 		}
-
-		dm_put(md);
 	}
 
 	read_unlock(&sl->lock);
diff --git a/drivers/md/dm-repl.c b/drivers/md/dm-repl.c
index 40f2592..27718a5 100644
--- a/drivers/md/dm-repl.c
+++ b/drivers/md/dm-repl.c
@@ -42,6 +42,7 @@ static const char version[] = "v0.028";
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/namei.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/vmalloc.h>
 #include <linux/workqueue.h>
@@ -125,7 +126,9 @@ struct slink_c {
 enum replog_c_flags {
 	REPLOG_C_BLOCKED,
 	REPLOG_C_DEVEL_STATS,
-	REPLOG_C_IO_INFLIGHT
+	REPLOG_C_IO_INFLIGHT,
+	REPLOG_C_RESUME_TWICE,
+	REPLOG_C_DEV_RESUME_TWICE,
 };
 struct replog_c {
 	struct {
@@ -164,6 +167,8 @@ struct replog_c {
 DM_BITOPS(ReplBlocked, replog_c, REPLOG_C_BLOCKED);
 DM_BITOPS(ReplDevelStats, replog_c, REPLOG_C_DEVEL_STATS);
 DM_BITOPS(ReplIoInflight, replog_c, REPLOG_C_IO_INFLIGHT);
+DM_BITOPS(ReplResumeTwice, replog_c, REPLOG_C_RESUME_TWICE);
+DM_BITOPS(ReplDevResumeTwice, replog_c, REPLOG_C_DEV_RESUME_TWICE);
 
 /*
  * Per device replication context kept with any mapped device and
@@ -381,7 +386,6 @@ get_ctrl_dev(struct dm_target *ti)
 
 	dev = bdev->bd_dev;
 	bdput(bdev);
-	dm_put(md);
 	return dev;
 }
 
@@ -679,7 +683,6 @@ dc_set_bdi(struct device_c *dc)
 	/* Set congested function and data. */
 	bdi->congested_fn = repl_congested;
 	bdi->congested_data = dc;
-	dm_put(md);
 }
 
 /* Get device on slink and unlink it from the list of devices. */
@@ -834,7 +837,7 @@ static int
 device_ctr(enum ctr_call_type call_type, struct dm_target *ti,
 	   struct replog_c *replog_c,
 	   const char *replicator_path, unsigned dev_nr,
-	   unsigned argc, char **argv, int *args_used)
+	   unsigned argc, char **argv, unsigned *args_used)
 {
 	int dev_params, dirtylog_params, params, r, slink_nr;
 	struct dm_repl_slink *slink;	/* Site link handle. */
@@ -915,7 +918,7 @@ device_ctr(enum ctr_call_type call_type, struct dm_target *ti,
 	 *
 	 * Dummy start/size sufficient here.
 	 */
-	r = dm_get_device(ti, replicator_path,
+	r = dm_get_device(ti, replicator_path, 
 			  FMODE_WRITE, &dc->replicator_dev);
 	if (unlikely(r < 0)) {
 		ti_or_dmerr(call_type, ti,
@@ -1032,8 +1035,6 @@ _replicator_dev_ctr(enum ctr_call_type call_type, struct dm_target *ti,
 
 	/*
 	 * Get reference on replicator control device.
-	 *
-	 * Dummy start/size sufficient here.
 	 */
 	r = dm_get_device(ti, replicator_path, FMODE_WRITE, &ctrl_dev);
 	if (unlikely(r < 0)) {
@@ -1110,7 +1111,7 @@ err_args:
 	ti_or_dmerr(call_type, ti, "Not enough device arguments");
 	return -EINVAL;
 }
-
+//
 /* Constructor method. */
 static int
 replicator_dev_ctr(struct dm_target *ti, unsigned argc, char **argv)
@@ -1167,6 +1168,7 @@ replicator_dev_map(struct dm_target *ti, struct bio *bio,
 

 /* Replication device suspend/resume helper. */
+static void replicator_resume(struct dm_target *ti);
 enum suspend_resume_type { POSTSUSPEND, RESUME };
 static void
 _replicator_dev_suspend_resume(struct dm_target *ti,
@@ -1204,8 +1206,12 @@ _replicator_dev_suspend_resume(struct dm_target *ti,
 			slinks++;
 	}
 
-	if (type == RESUME && slinks)
+	if (type == RESUME && slinks) {
+		if (!TestSetReplDevResumeTwice(replog_c))
+			replicator_resume(replog_c->ti);
+
 		wake_do_repl(replog_c);
+	}
 }
 
 /* Replication device post suspend method. */
@@ -1379,6 +1385,15 @@ replicator_dtr(struct dm_target *ti)
 	replog = replog_c->replog;
 	_BUG_ON_PTR(replog);
 
+	/* Check if any devices still exist. */
+	if (!list_empty(&replog_c->lists.slink_c) &&
+	    !list_empty(&(list_first_entry(&replog_c->lists.slink_c,
+					   struct slink_c,
+					   lists.slink_c)->lists.dc))) {
+		DMERR("Destruction of replication log with devices rejected.");
+		return;
+	}
+
 	/* Pull out replog_c to process destruction cleanly. */
 	mutex_lock(&replog_c_list_mutex);
 	list_del_init(&replog_c->lists.replog_c);
@@ -1676,6 +1691,10 @@ _replicator_suspend_resume(struct replog_c *replog_c,
 		replog->ops->postsuspend(replog, -1);
 		break;
 	case RESUME:
+		/* Initially avoid resuming and wait for second call. */
+		if (!TestSetReplResumeTwice(replog_c))
+			return;
+
 		replog->ops->resume(replog, -1);
 		ClearReplBlocked(replog_c);
 		wake_do_repl(replog_c);

Comment 1 RHEL Program Management 2010-06-04 22:43:16 UTC
This request was evaluated by Red Hat Product Management for inclusion in a Red
Hat Enterprise Linux major release.  Product Management has requested further
review of this request by Red Hat Engineering, for potential inclusion in a Red
Hat Enterprise Linux Major release.  This request is not yet committed for
inclusion.

Comment 2 Tom Coughlan 2010-06-06 01:57:36 UTC

*** This bug has been marked as a duplicate of bug 594922 ***