Commit a654b9d8 authored by NeilBrown's avatar NeilBrown Committed by Linus Torvalds
Browse files

[PATCH] md: allow md intent bitmap to be stored near the superblock.



This provides an alternate to storing the bitmap in a separate file.  The
bitmap can be stored at a given offset from the superblock.  Obviously the
creator of the array must make sure this doesn't intersect with data....
After is good for version-0.90 superblocks.

Signed-off-by: default avatarNeil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 3d310eb7
Loading
Loading
Loading
Loading
+106 −26
Original line number Diff line number Diff line
@@ -116,7 +116,7 @@ static unsigned char *bitmap_alloc_page(struct bitmap *bitmap)
	if (!page)
		printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap));
	else
		printk("%s: bitmap_alloc_page: allocated page at %p\n",
		PRINTK("%s: bitmap_alloc_page: allocated page at %p\n",
			bmname(bitmap), page);
	return page;
}
@@ -258,13 +258,61 @@ char *file_path(struct file *file, char *buf, int count)
 * basic page I/O operations
 */

/* IO operations when bitmap is stored near all superblocks */
static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long index)
{
	/* choose a good rdev and read the page from there */

	mdk_rdev_t *rdev;
	struct list_head *tmp;
	struct page *page = alloc_page(GFP_KERNEL);
	sector_t target;

	if (!page)
		return ERR_PTR(-ENOMEM);
	do {
		ITERATE_RDEV(mddev, rdev, tmp)
			if (rdev->in_sync && !rdev->faulty)
				goto found;
		return ERR_PTR(-EIO);

	found:
		target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512);

	} while (!sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ));

	page->index = index;
	return page;
}

static int write_sb_page(mddev_t *mddev, long offset, struct page *page, int wait)
{
	mdk_rdev_t *rdev;
	struct list_head *tmp;

	ITERATE_RDEV(mddev, rdev, tmp)
		if (rdev->in_sync && !rdev->faulty)
			md_super_write(mddev, rdev,
				       (rdev->sb_offset<<1) + offset
				       + page->index * (PAGE_SIZE/512),
				       PAGE_SIZE,
				       page);

	if (wait)
		wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0);
	return 0;
}

/*
 * write out a page
 * write out a page to a file
 */
static int write_page(struct bitmap *bitmap, struct page *page, int wait)
{
	int ret = -ENOMEM;

	if (bitmap->file == NULL)
		return write_sb_page(bitmap->mddev, bitmap->offset, page, wait);

	lock_page(page);

	ret = page->mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE);
@@ -394,7 +442,12 @@ static int bitmap_read_sb(struct bitmap *bitmap)
	int err = -EINVAL;

	/* page 0 is the superblock, read it... */
	if (bitmap->file)
		bitmap->sb_page = read_page(bitmap->file, 0, &bytes_read);
	else {
		bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, 0);
		bytes_read = PAGE_SIZE;
	}
	if (IS_ERR(bitmap->sb_page)) {
		err = PTR_ERR(bitmap->sb_page);
		bitmap->sb_page = NULL;
@@ -625,6 +678,7 @@ static void bitmap_file_kick(struct bitmap *bitmap)
	bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET);
	bitmap_update_sb(bitmap);

	if (bitmap->file) {
		path = kmalloc(PAGE_SIZE, GFP_KERNEL);
		if (path)
			ptr = file_path(bitmap->file, path, PAGE_SIZE);
@@ -633,6 +687,7 @@ static void bitmap_file_kick(struct bitmap *bitmap)
		       bmname(bitmap), ptr ? ptr : "");

		kfree(path);
	}

	bitmap_file_put(bitmap);

@@ -676,7 +731,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
	void *kaddr;
	unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);

	if (!bitmap->file || !bitmap->filemap) {
	if (!bitmap->filemap) {
		return;
	}

@@ -715,7 +770,7 @@ int bitmap_unplug(struct bitmap *bitmap)
	 * flushed out to disk */
	for (i = 0; i < bitmap->file_pages; i++) {
		spin_lock_irqsave(&bitmap->lock, flags);
		if (!bitmap->file || !bitmap->filemap) {
		if (!bitmap->filemap) {
			spin_unlock_irqrestore(&bitmap->lock, flags);
			return 0;
		}
@@ -732,11 +787,15 @@ int bitmap_unplug(struct bitmap *bitmap)
				return 1;
	}
	if (wait) { /* if any writes were performed, we need to wait on them */
		if (bitmap->file) {
			spin_lock_irq(&bitmap->write_lock);
			wait_event_lock_irq(bitmap->write_wait,
					    list_empty(&bitmap->complete_pages), bitmap->write_lock,
					    wake_up_process(bitmap->writeback_daemon->tsk));
			spin_unlock_irq(&bitmap->write_lock);
		} else
			wait_event(bitmap->mddev->sb_wait,
				   atomic_read(&bitmap->mddev->pending_writes)==0);
	}
	return 0;
}
@@ -764,7 +823,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
	chunks = bitmap->chunks;
	file = bitmap->file;

	BUG_ON(!file);
	BUG_ON(!file && !bitmap->offset);

#if INJECT_FAULTS_3
	outofdate = 1;
@@ -779,7 +838,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)

	num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE;

	if (i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
	if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) {
		printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n",
			bmname(bitmap),
			(unsigned long) i_size_read(file->f_mapping->host),
@@ -816,14 +875,18 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync)
				 */
				page = bitmap->sb_page;
				offset = sizeof(bitmap_super_t);
			} else {
			} else if (file) {
				page = read_page(file, index, &dummy);
				offset = 0;
			} else {
				page = read_sb_page(bitmap->mddev, bitmap->offset, index);
				offset = 0;
			}
			if (IS_ERR(page)) { /* read error */
				ret = PTR_ERR(page);
				goto out;
			}
				offset = 0;
			}

			oldindex = index;
			oldpage = page;
			kmap(page);
@@ -874,6 +937,19 @@ out:
	return ret;
}

void bitmap_write_all(struct bitmap *bitmap)
{
	/* We don't actually write all bitmap blocks here,
	 * just flag them as needing to be written
	 */

	unsigned long chunks = bitmap->chunks;
	unsigned long bytes = (chunks+7)/8 + sizeof(bitmap_super_t);
	unsigned long num_pages = (bytes + PAGE_SIZE-1) / PAGE_SIZE;
	while (num_pages--)
		bitmap->filemap_attr[num_pages] |= BITMAP_PAGE_NEEDWRITE;
}


static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc)
{
@@ -913,7 +989,7 @@ int bitmap_daemon_work(struct bitmap *bitmap)
	for (j = 0; j < bitmap->chunks; j++) {
		bitmap_counter_t *bmc;
		spin_lock_irqsave(&bitmap->lock, flags);
		if (!bitmap->file || !bitmap->filemap) {
		if (!bitmap->filemap) {
			/* error or shutdown */
			spin_unlock_irqrestore(&bitmap->lock, flags);
			break;
@@ -1072,6 +1148,7 @@ static int bitmap_start_daemon(struct bitmap *bitmap, mdk_thread_t **ptr,

	spin_lock_irqsave(&bitmap->lock, flags);
	*ptr = NULL;

	if (!bitmap->file) /* no need for daemon if there's no backing file */
		goto out_unlock;

@@ -1416,9 +1493,11 @@ int bitmap_create(mddev_t *mddev)

	BUG_ON(sizeof(bitmap_super_t) != 256);

	if (!file) /* bitmap disabled, nothing to do */
	if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */
		return 0;

	BUG_ON(file && mddev->bitmap_offset);

	bitmap = kmalloc(sizeof(*bitmap), GFP_KERNEL);
	if (!bitmap)
		return -ENOMEM;
@@ -1438,7 +1517,8 @@ int bitmap_create(mddev_t *mddev)
		return -ENOMEM;

	bitmap->file = file;
	get_file(file);
	bitmap->offset = mddev->bitmap_offset;
	if (file) get_file(file);
	/* read superblock from bitmap file (this sets bitmap->chunksize) */
	err = bitmap_read_sb(bitmap);
	if (err)
+38 −2
Original line number Diff line number Diff line
@@ -337,7 +337,7 @@ static int bi_complete(struct bio *bio, unsigned int bytes_done, int error)
	return 0;
}

static int sync_page_io(struct block_device *bdev, sector_t sector, int size,
int sync_page_io(struct block_device *bdev, sector_t sector, int size,
		   struct page *page, int rw)
{
	struct bio *bio = bio_alloc(GFP_NOIO, 1);
@@ -609,6 +609,17 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
		memcpy(mddev->uuid+12,&sb->set_uuid3, 4);

		mddev->max_disks = MD_SB_DISKS;

		if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
		    mddev->bitmap_file == NULL) {
			if (mddev->level != 1) {
				/* FIXME use a better test */
				printk(KERN_WARNING "md: bitmaps only support for raid1\n");
				return -EINVAL;
			}
			mddev->bitmap_offset = (MD_SB_BYTES >> 9);
		}

	} else if (mddev->pers == NULL) {
		/* Insist on good event counter while assembling */
		__u64 ev1 = md_event(sb);
@@ -702,6 +713,9 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
	sb->layout = mddev->layout;
	sb->chunk_size = mddev->chunk_size;

	if (mddev->bitmap && mddev->bitmap_file == NULL)
		sb->state |= (1<<MD_SB_BITMAP_PRESENT);

	sb->disks[0].state = (1<<MD_DISK_REMOVED);
	ITERATE_RDEV(mddev,rdev2,tmp) {
		mdp_disk_t *d;
@@ -898,6 +912,15 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
		memcpy(mddev->uuid, sb->set_uuid, 16);

		mddev->max_disks =  (4096-256)/2;

		if ((le32_to_cpu(sb->feature_map) & 1) &&
		    mddev->bitmap_file == NULL ) {
			if (mddev->level != 1) {
				printk(KERN_WARNING "md: bitmaps only supported for raid1\n");
				return -EINVAL;
			}
			mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
		}
	} else if (mddev->pers == NULL) {
		/* Insist of good event counter while assembling */
		__u64 ev1 = le64_to_cpu(sb->events);
@@ -960,6 +983,11 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
	else
		sb->resync_offset = cpu_to_le64(0);

	if (mddev->bitmap && mddev->bitmap_file == NULL) {
		sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
		sb->feature_map = cpu_to_le32(1);
	}

	max_dev = 0;
	ITERATE_RDEV(mddev,rdev2,tmp)
		if (rdev2->desc_nr+1 > max_dev)
@@ -2406,7 +2434,8 @@ static int set_bitmap_file(mddev_t *mddev, int fd)
			mdname(mddev));
		fput(mddev->bitmap_file);
		mddev->bitmap_file = NULL;
	}
	} else
		mddev->bitmap_offset = 0; /* file overrides offset */
	return err;
}

@@ -3774,6 +3803,13 @@ void md_check_recovery(mddev_t *mddev)
			set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
			if (!spares)
				set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
			if (spares && mddev->bitmap && ! mddev->bitmap->file) {
				/* We are adding a device or devices to an array
				 * which has the bitmap stored on all devices.
				 * So make sure all bitmap pages get written
				 */
				bitmap_write_all(mddev->bitmap);
			}
			mddev->sync_thread = md_register_thread(md_do_sync,
								mddev,
								"%s_resync");
+2 −0
Original line number Diff line number Diff line
@@ -217,6 +217,7 @@ struct bitmap {
	/* bitmap spinlock */
	spinlock_t lock;

	long offset; /* offset from superblock if file is NULL */
	struct file *file; /* backing disk file */
	struct page *sb_page; /* cached copy of the bitmap file superblock */
	struct page **filemap; /* list of cache pages for the file */
@@ -255,6 +256,7 @@ void bitmap_print_sb(struct bitmap *bitmap);
int bitmap_update_sb(struct bitmap *bitmap);

int  bitmap_setallbits(struct bitmap *bitmap);
void bitmap_write_all(struct bitmap *bitmap);

/* these are exported */
int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors);
+14 −1
Original line number Diff line number Diff line
@@ -60,7 +60,14 @@
 */
#define MD_MAJOR_VERSION                0
#define MD_MINOR_VERSION                90
#define MD_PATCHLEVEL_VERSION           1
/*
 * MD_PATCHLEVEL_VERSION indicates kernel functionality.
 * >=1 means different superblock formats are selectable using SET_ARRAY_INFO
 *     and major_version/minor_version accordingly
 * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT
 *     in the super status byte
 */
#define MD_PATCHLEVEL_VERSION           2

extern int register_md_personality (int p_num, mdk_personality_t *p);
extern int unregister_md_personality (int p_num);
@@ -78,6 +85,12 @@ extern void md_unplug_mddev(mddev_t *mddev);

extern void md_print_devices (void);

extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
			   sector_t sector, int size, struct page *page);
extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
			struct page *page, int rw);


#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }

#endif 
+4 −0
Original line number Diff line number Diff line
@@ -273,6 +273,10 @@ struct mddev_s

	struct bitmap                   *bitmap; /* the bitmap for the device */
	struct file			*bitmap_file; /* the bitmap file */
	long				bitmap_offset; /* offset from superblock of
							* start of bitmap. May be
							* negative, but not '0'
							*/

	struct list_head		all_mddevs;
};
Loading