kernel-ark/drivers/md/raid1.c

/*
 * raid1.c : Multiple Devices driver for Linux
 *
 * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat
 *
 * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
 *
 * RAID-1 management functions.
 *
 * Better read-balancing code written by Mika Kuoppala <miku@iki.fi>, 2000
 *
 * Fixes to reconstruction by Jakob <EFBFBD>stergaard" <jakob@ostenfeld.dk>
 * Various fixes by Neil Brown <neilb@cse.unsw.edu.au>
 *
 * Changes by Peter T. Breuer <ptb@it.uc3m.es> 31/1/2003 to support
 * bitmapped intelligence in resync:
 *
 *      - bitmap marked during normal i/o
 *      - bitmap used to skip nondirty blocks during sync
 *
 * Additions to bitmap code, (C) 2003-2004 Paul Clements, SteelEye Technology:
 * - persistent bitmap code
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * You should have received a copy of the GNU General Public License
 * (for example /usr/src/linux/COPYING); if not, write to the Free
 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include "dm-bio-list.h"
#include <linux/raid/raid1.h>
#include <linux/raid/bitmap.h>

#define DEBUG 0
#if DEBUG
#define PRINTK(x...) printk(x)
#else
#define PRINTK(x...)
#endif

/*
 * Number of guaranteed r1bios in case of extreme VM load:
 */
#define	NR_RAID1_BIOS 256

static mdk_personality_t raid1_personality;

static void unplug_slaves(mddev_t *mddev);


static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
{
	struct pool_info *pi = data;
	r1bio_t *r1_bio;
	int size = offsetof(r1bio_t, bios[pi->raid_disks]);

	/* allocate a r1bio with room for raid_disks entries in the bios array */
	r1_bio = kmalloc(size, gfp_flags);
	if (r1_bio)
		memset(r1_bio, 0, size);
	else
		unplug_slaves(pi->mddev);

	return r1_bio;
}

static void r1bio_pool_free(void *r1_bio, void *data)
{
	kfree(r1_bio);
}

#define RESYNC_BLOCK_SIZE (64*1024)
//#define RESYNC_BLOCK_SIZE PAGE_SIZE
#define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
#define RESYNC_WINDOW (2048*1024)

static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
{
	struct pool_info *pi = data;
	struct page *page;
	r1bio_t *r1_bio;
	struct bio *bio;
	int i, j;

	r1_bio = r1bio_pool_alloc(gfp_flags, pi);
	if (!r1_bio) {
		unplug_slaves(pi->mddev);
		return NULL;
	}

	/*
	 * Allocate bios : 1 for reading, n-1 for writing
	 */
	for (j = pi->raid_disks ; j-- ; ) {
		bio = bio_alloc(gfp_flags, RESYNC_PAGES);
		if (!bio)
			goto out_free_bio;
		r1_bio->bios[j] = bio;
	}
	/*
	 * Allocate RESYNC_PAGES data pages and attach them to
	 * the first bio;
	 */
	bio = r1_bio->bios[0];
	for (i = 0; i < RESYNC_PAGES; i++) {
		page = alloc_page(gfp_flags);
		if (unlikely(!page))
			goto out_free_pages;

		bio->bi_io_vec[i].bv_page = page;
	}

	r1_bio->master_bio = NULL;

	return r1_bio;

out_free_pages:
	for ( ; i > 0 ; i--)
		__free_page(bio->bi_io_vec[i-1].bv_page);
out_free_bio:
	while ( ++j < pi->raid_disks )
		bio_put(r1_bio->bios[j]);
	r1bio_pool_free(r1_bio, data);
	return NULL;
}

static void r1buf_pool_free(void *__r1_bio, void *data)
{
	struct pool_info *pi = data;
	int i;
	r1bio_t *r1bio = __r1_bio;
	struct bio *bio = r1bio->bios[0];

	for (i = 0; i < RESYNC_PAGES; i++) {
		__free_page(bio->bi_io_vec[i].bv_page);
		bio->bi_io_vec[i].bv_page = NULL;
	}
	for (i=0 ; i < pi->raid_disks; i++)
		bio_put(r1bio->bios[i]);

	r1bio_pool_free(r1bio, data);
}

static void put_all_bios(conf_t *conf, r1bio_t *r1_bio)
{
	int i;

	for (i = 0; i < conf->raid_disks; i++) {
		struct bio **bio = r1_bio->bios + i;
		if (*bio)
			bio_put(*bio);
		*bio = NULL;
	}
}

static inline void free_r1bio(r1bio_t *r1_bio)
{
	unsigned long flags;

	conf_t *conf = mddev_to_conf(r1_bio->mddev);

	/*
	 * Wake up any possible resync thread that waits for the device
	 * to go idle.
	 */
	spin_lock_irqsave(&conf->resync_lock, flags);
	if (!--conf->nr_pending) {
		wake_up(&conf->wait_idle);
		wake_up(&conf->wait_resume);
	}
	spin_unlock_irqrestore(&conf->resync_lock, flags);

	put_all_bios(conf, r1_bio);
	mempool_free(r1_bio, conf->r1bio_pool);
}

static inline void put_buf(r1bio_t *r1_bio)
{
	conf_t *conf = mddev_to_conf(r1_bio->mddev);
	unsigned long flags;

	mempool_free(r1_bio, conf->r1buf_pool);

	spin_lock_irqsave(&conf->resync_lock, flags);
	if (!conf->barrier)
		BUG();
	--conf->barrier;
	wake_up(&conf->wait_resume);
	wake_up(&conf->wait_idle);

	if (!--conf->nr_pending) {
		wake_up(&conf->wait_idle);
		wake_up(&conf->wait_resume);
	}
	spin_unlock_irqrestore(&conf->resync_lock, flags);
}

static void reschedule_retry(r1bio_t *r1_bio)
{
	unsigned long flags;
	mddev_t *mddev = r1_bio->mddev;
	conf_t *conf = mddev_to_conf(mddev);

	spin_lock_irqsave(&conf->device_lock, flags);
	list_add(&r1_bio->retry_list, &conf->retry_list);
	spin_unlock_irqrestore(&conf->device_lock, flags);

	md_wakeup_thread(mddev->thread);
}

/*
 * raid_end_bio_io() is called when we have finished servicing a mirrored
 * operation and are ready to return a success/failure code to the buffer
 * cache layer.
 */
static void raid_end_bio_io(r1bio_t *r1_bio)
{
	struct bio *bio = r1_bio->master_bio;

	/* if nobody has done the final endio yet, do it now */
	if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
		PRINTK(KERN_DEBUG "raid1: sync end %s on sectors %llu-%llu\n",
			(bio_data_dir(bio) == WRITE) ? "write" : "read",
			(unsigned long long) bio->bi_sector,
			(unsigned long long) bio->bi_sector +
				(bio->bi_size >> 9) - 1);

		bio_endio(bio, bio->bi_size,
			test_bit(R1BIO_Uptodate, &r1_bio->state) ? 0 : -EIO);
	}
	free_r1bio(r1_bio);
}

/*
 * Update disk head position estimator based on IRQ completion info.
 */
static inline void update_head_pos(int disk, r1bio_t *r1_bio)
{
	conf_t *conf = mddev_to_conf(r1_bio->mddev);

	conf->mirrors[disk].head_position =
		r1_bio->sector + (r1_bio->sectors);
}

static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int error)
{
	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
	r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
	int mirror;
	conf_t *conf = mddev_to_conf(r1_bio->mddev);

	if (bio->bi_size)
		return 1;
	
	mirror = r1_bio->read_disk;
	/*
	 * this branch is our 'one mirror IO has finished' event handler:
	 */
	if (!uptodate)
		md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
	else
		/*
		 * Set R1BIO_Uptodate in our master bio, so that
		 * we will return a good error code for to the higher
		 * levels even if IO on some other mirrored buffer fails.
		 *
		 * The 'master' represents the composite IO operation to
		 * user-side. So if something waits for IO, then it will
		 * wait for the 'master' bio.
		 */
		set_bit(R1BIO_Uptodate, &r1_bio->state);

	update_head_pos(mirror, r1_bio);

	/*
	 * we have only one bio on the read side
	 */
	if (uptodate)
		raid_end_bio_io(r1_bio);
	else {
		/*
		 * oops, read error:
		 */
		char b[BDEVNAME_SIZE];
		if (printk_ratelimit())
			printk(KERN_ERR "raid1: %s: rescheduling sector %llu\n",
			       bdevname(conf->mirrors[mirror].rdev->bdev,b), (unsigned long long)r1_bio->sector);
		reschedule_retry(r1_bio);
	}

	rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
	return 0;
}

static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int error)
{
	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
	r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
	int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
	conf_t *conf = mddev_to_conf(r1_bio->mddev);

	if (bio->bi_size)
		return 1;

	for (mirror = 0; mirror < conf->raid_disks; mirror++)
		if (r1_bio->bios[mirror] == bio)
			break;

	if (error == -ENOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) {
		set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags);
		set_bit(R1BIO_BarrierRetry, &r1_bio->state);
		r1_bio->mddev->barriers_work = 0;
	} else {
		/*
		 * this branch is our 'one mirror IO has finished' event handler:
		 */
		r1_bio->bios[mirror] = NULL;
		bio_put(bio);
		if (!uptodate) {
			md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
			/* an I/O failed, we can't clear the bitmap */
			set_bit(R1BIO_Degraded, &r1_bio->state);
		} else
			/*
			 * Set R1BIO_Uptodate in our master bio, so that
			 * we will return a good error code for to the higher
			 * levels even if IO on some other mirrored buffer fails.
			 *
			 * The 'master' represents the composite IO operation to
			 * user-side. So if something waits for IO, then it will
			 * wait for the 'master' bio.
			 */
			set_bit(R1BIO_Uptodate, &r1_bio->state);

		update_head_pos(mirror, r1_bio);

		if (behind) {
			if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags))
				atomic_dec(&r1_bio->behind_remaining);

			/* In behind mode, we ACK the master bio once the I/O has safely
			 * reached all non-writemostly disks. Setting the Returned bit
			 * ensures that this gets done only once -- we don't ever want to
			 * return -EIO here, instead we'll wait */

			if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) &&
			    test_bit(R1BIO_Uptodate, &r1_bio->state)) {
				/* Maybe we can return now */
				if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
					struct bio *mbio = r1_bio->master_bio;
					PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n",
					       (unsigned long long) mbio->bi_sector,
					       (unsigned long long) mbio->bi_sector +
					       (mbio->bi_size >> 9) - 1);
					bio_endio(mbio, mbio->bi_size, 0);
				}
			}
		}
	}
	/*
	 *
	 * Let's see if all mirrored write operations have finished
	 * already.
	 */
	if (atomic_dec_and_test(&r1_bio->remaining)) {
		if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
			reschedule_retry(r1_bio);
			/* Don't dec_pending yet, we want to hold
			 * the reference over the retry
			 */
			return 0;
		}
		if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
			/* free extra copy of the data pages */
/* FIXME bio has been freed!!! */
			int i = bio->bi_vcnt;
			while (i--)
				__free_page(bio->bi_io_vec[i].bv_page);
		}
		/* clear the bitmap if all writes complete successfully */
		bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
				r1_bio->sectors,
				!test_bit(R1BIO_Degraded, &r1_bio->state),
				behind);
		md_write_end(r1_bio->mddev);
		raid_end_bio_io(r1_bio);
	}

	rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
	return 0;
}


/*
 * This routine returns the disk from which the requested read should
 * be done. There is a per-array 'next expected sequential IO' sector
 * number - if this matches on the next IO then we use the last disk.
 * There is also a per-disk 'last know head position' sector that is
 * maintained from IRQ contexts, both the normal and the resync IO
 * completion handlers update this position correctly. If there is no
 * perfect sequential match then we pick the disk whose head is closest.
 *
 * If there are 2 mirrors in the same 2 devices, performance degrades
 * because position is mirror, not device based.
 *
 * The rdev for the device selected will have nr_pending incremented.
 */
static int read_balance(conf_t *conf, r1bio_t *r1_bio)
{
	const unsigned long this_sector = r1_bio->sector;
	int new_disk = conf->last_used, disk = new_disk;
	int wonly_disk = -1;
	const int sectors = r1_bio->sectors;
	sector_t new_distance, current_distance;
	mdk_rdev_t *rdev;

	rcu_read_lock();
	/*
	 * Check if we can balance. We can balance on the whole
	 * device if no resync is going on, or below the resync window.
	 * We take the first readable disk when above the resync window.
	 */
 retry:
	if (conf->mddev->recovery_cp < MaxSector &&
	    (this_sector + sectors >= conf->next_resync)) {
		/* Choose the first operation device, for consistancy */
		new_disk = 0;

		for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
		     !rdev || !test_bit(In_sync, &rdev->flags)
			     || test_bit(WriteMostly, &rdev->flags);
		     rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) {

			if (rdev && test_bit(In_sync, &rdev->flags))
				wonly_disk = new_disk;

			if (new_disk == conf->raid_disks - 1) {
				new_disk = wonly_disk;
				break;
			}
		}
		goto rb_out;
	}


	/* make sure the disk is operational */
	for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
	     !rdev || !test_bit(In_sync, &rdev->flags) ||
		     test_bit(WriteMostly, &rdev->flags);
	     rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) {

		if (rdev && test_bit(In_sync, &rdev->flags))
			wonly_disk = new_disk;

		if (new_disk <= 0)
			new_disk = conf->raid_disks;
		new_disk--;
		if (new_disk == disk) {
			new_disk = wonly_disk;
			break;
		}
	}

	if (new_disk < 0)
		goto rb_out;

	disk = new_disk;
	/* now disk == new_disk == starting point for search */

	/*
	 * Don't change to another disk for sequential reads:
	 */
	if (conf->next_seq_sect == this_sector)
		goto rb_out;
	if (this_sector == conf->mirrors[new_disk].head_position)
		goto rb_out;

	current_distance = abs(this_sector - conf->mirrors[disk].head_position);

	/* Find the disk whose head is closest */

	do {
		if (disk <= 0)
			disk = conf->raid_disks;
		disk--;

		rdev = rcu_dereference(conf->mirrors[disk].rdev);

		if (!rdev ||
		    !test_bit(In_sync, &rdev->flags) ||
		    test_bit(WriteMostly, &rdev->flags))
			continue;

		if (!atomic_read(&rdev->nr_pending)) {
			new_disk = disk;
			break;
		}
		new_distance = abs(this_sector - conf->mirrors[disk].head_position);
		if (new_distance < current_distance) {
			current_distance = new_distance;
			new_disk = disk;
		}
	} while (disk != conf->last_used);

 rb_out:


	if (new_disk >= 0) {
		rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
		if (!rdev)
			goto retry;
		atomic_inc(&rdev->nr_pending);
		if (!test_bit(In_sync, &rdev->flags)) {
			/* cannot risk returning a device that failed
			 * before we inc'ed nr_pending
			 */
			atomic_dec(&rdev->nr_pending);
			goto retry;
		}
		conf->next_seq_sect = this_sector + sectors;
		conf->last_used = new_disk;
	}
	rcu_read_unlock();

	return new_disk;
}

static void unplug_slaves(mddev_t *mddev)
{
	conf_t *conf = mddev_to_conf(mddev);
	int i;

	rcu_read_lock();
	for (i=0; i<mddev->raid_disks; i++) {
		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
		if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
			request_queue_t *r_queue = bdev_get_queue(rdev->bdev);

			atomic_inc(&rdev->nr_pending);
			rcu_read_unlock();

			if (r_queue->unplug_fn)
				r_queue->unplug_fn(r_queue);

			rdev_dec_pending(rdev, mddev);
			rcu_read_lock();
		}
	}
	rcu_read_unlock();
}

static void raid1_unplug(request_queue_t *q)
{
	mddev_t *mddev = q->queuedata;

	unplug_slaves(mddev);
	md_wakeup_thread(mddev->thread);
}

static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
			     sector_t *error_sector)
{
	mddev_t *mddev = q->queuedata;
	conf_t *conf = mddev_to_conf(mddev);
	int i, ret = 0;

	rcu_read_lock();
	for (i=0; i<mddev->raid_disks && ret == 0; i++) {
		mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
		if (rdev && !test_bit(Faulty, &rdev->flags)) {
			struct block_device *bdev = rdev->bdev;
			request_queue_t *r_queue = bdev_get_queue(bdev);

			if (!r_queue->issue_flush_fn)
				ret = -EOPNOTSUPP;
			else {
				atomic_inc(&rdev->nr_pending);
				rcu_read_unlock();
				ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
							      error_sector);
				rdev_dec_pending(rdev, mddev);
				rcu_read_lock();
			}
		}
	}
	rcu_read_unlock();
	return ret;
}

/*
 * Throttle resync depth, so that we can both get proper overlapping of
 * requests, but are still able to handle normal requests quickly.
 */
#define RESYNC_DEPTH 32

static void device_barrier(conf_t *conf, sector_t sect)
{
	spin_lock_irq(&conf->resync_lock);
	wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume),
			    conf->resync_lock, raid1_unplug(conf->mddev->queue));
	
	if (!conf->barrier++) {
		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
				    conf->resync_lock, raid1_unplug(conf->mddev->queue));
		if (conf->nr_pending)
			BUG();
	}
	wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH,
			    conf->resync_lock, raid1_unplug(conf->mddev->queue));
	conf->next_resync = sect;
	spin_unlock_irq(&conf->resync_lock);
}

/* duplicate the data pages for behind I/O */
static struct page **alloc_behind_pages(struct bio *bio)
{
	int i;
	struct bio_vec *bvec;
	struct page **pages = kmalloc(bio->bi_vcnt * sizeof(struct page *),
					GFP_NOIO);
	if (unlikely(!pages))
		goto do_sync_io;

	memset(pages, 0, bio->bi_vcnt * sizeof(struct page *));

	bio_for_each_segment(bvec, bio, i) {
		pages[i] = alloc_page(GFP_NOIO);
		if (unlikely(!pages[i]))
			goto do_sync_io;
		memcpy(kmap(pages[i]) + bvec->bv_offset,
			kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
		kunmap(pages[i]);
		kunmap(bvec->bv_page);
	}

	return pages;

do_sync_io:
	if (pages)
		for (i = 0; i < bio->bi_vcnt && pages[i]; i++)
			__free_page(pages[i]);
	kfree(pages);
	PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
	return NULL;
}

static int make_request(request_queue_t *q, struct bio * bio)
{
	mddev_t *mddev = q->queuedata;
	conf_t *conf = mddev_to_conf(mddev);
	mirror_info_t *mirror;
	r1bio_t *r1_bio;
	struct bio *read_bio;
	int i, targets = 0, disks;
	mdk_rdev_t *rdev;
	struct bitmap *bitmap = mddev->bitmap;
	unsigned long flags;
	struct bio_list bl;
	struct page **behind_pages = NULL;
	const int rw = bio_data_dir(bio);
	int do_barriers;

	if (unlikely(!mddev->barriers_work && bio_barrier(bio))) {
		bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
		return 0;
	}

	/*
	 * Register the new request and wait if the reconstruction
	 * thread has put up a bar for new requests.
	 * Continue immediately if no resync is active currently.
	 */
	md_write_start(mddev, bio); /* wait on superblock update early */

	spin_lock_irq(&conf->resync_lock);
	wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
	conf->nr_pending++;
	spin_unlock_irq(&conf->resync_lock);

	disk_stat_inc(mddev->gendisk, ios[rw]);
	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));

	/*
	 * make_request() can abort the operation when READA is being
	 * used and no empty request is available.
	 *
	 */
	r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);

	r1_bio->master_bio = bio;
	r1_bio->sectors = bio->bi_size >> 9;
	r1_bio->state = 0;
	r1_bio->mddev = mddev;
	r1_bio->sector = bio->bi_sector;

	if (rw == READ) {
		/*
		 * read balancing logic:
		 */
		int rdisk = read_balance(conf, r1_bio);

		if (rdisk < 0) {
			/* couldn't find anywhere to read from */
			raid_end_bio_io(r1_bio);
			return 0;
		}
		mirror = conf->mirrors + rdisk;

		r1_bio->read_disk = rdisk;

		read_bio = bio_clone(bio, GFP_NOIO);

		r1_bio->bios[rdisk] = read_bio;

		read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
		read_bio->bi_bdev = mirror->rdev->bdev;
		read_bio->bi_end_io = raid1_end_read_request;
		read_bio->bi_rw = READ;
		read_bio->bi_private = r1_bio;

		generic_make_request(read_bio);
		return 0;
	}

	/*
	 * WRITE:
	 */
	/* first select target devices under spinlock and
	 * inc refcount on their rdev.  Record them by setting
	 * bios[x] to bio
	 */
	disks = conf->raid_disks;
#if 0
	{ static int first=1;
	if (first) printk("First Write sector %llu disks %d\n",
			  (unsigned long long)r1_bio->sector, disks);
	first = 0;
	}
#endif
	rcu_read_lock();
	for (i = 0;  i < disks; i++) {
		if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL &&
		    !test_bit(Faulty, &rdev->flags)) {
			atomic_inc(&rdev->nr_pending);
			if (test_bit(Faulty, &rdev->flags)) {
				atomic_dec(&rdev->nr_pending);
				r1_bio->bios[i] = NULL;
			} else
				r1_bio->bios[i] = bio;
			targets++;
		} else
			r1_bio->bios[i] = NULL;
	}
	rcu_read_unlock();

	BUG_ON(targets == 0); /* we never fail the last device */

	if (targets < conf->raid_disks) {
		/* array is degraded, we will not clear the bitmap
		 * on I/O completion (see raid1_end_write_request) */
		set_bit(R1BIO_Degraded, &r1_bio->state);
	}

	/* do behind I/O ? */
	if (bitmap &&
	    atomic_read(&bitmap->behind_writes) < bitmap->max_write_behind &&
	    (behind_pages = alloc_behind_pages(bio)) != NULL)
		set_bit(R1BIO_BehindIO, &r1_bio->state);

	atomic_set(&r1_bio->remaining, 0);
	atomic_set(&r1_bio->behind_remaining, 0);

	do_barriers = bio->bi_rw & BIO_RW_BARRIER;
	if (do_barriers)
		set_bit(R1BIO_Barrier, &r1_bio->state);

	bio_list_init(&bl);
	for (i = 0; i < disks; i++) {
		struct bio *mbio;
		if (!r1_bio->bios[i])
			continue;

		mbio = bio_clone(bio, GFP_NOIO);
		r1_bio->bios[i] = mbio;

		mbio->bi_sector	= r1_bio->sector + conf->mirrors[i].rdev->data_offset;
		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
		mbio->bi_end_io	= raid1_end_write_request;
		mbio->bi_rw = WRITE | do_barriers;
		mbio->bi_private = r1_bio;

		if (behind_pages) {
			struct bio_vec *bvec;
			int j;

			/* Yes, I really want the '__' version so that
			 * we clear any unused pointer in the io_vec, rather
			 * than leave them unchanged.  This is important
			 * because when we come to free the pages, we won't
			 * know the originial bi_idx, so we just free
			 * them all
			 */
			__bio_for_each_segment(bvec, mbio, j, 0)
				bvec->bv_page = behind_pages[j];
			if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
				atomic_inc(&r1_bio->behind_remaining);
		}

		atomic_inc(&r1_bio->remaining);

		bio_list_add(&bl, mbio);
	}
	kfree(behind_pages); /* the behind pages are attached to the bios now */

	bitmap_startwrite(bitmap, bio->bi_sector, r1_bio->sectors,
				test_bit(R1BIO_BehindIO, &r1_bio->state));
	spin_lock_irqsave(&conf->device_lock, flags);
	bio_list_merge(&conf->pending_bio_list, &bl);
	bio_list_init(&bl);

	blk_plug_device(mddev->queue);
	spin_unlock_irqrestore(&conf->device_lock, flags);

#if 0
	while ((bio = bio_list_pop(&bl)) != NULL)
		generic_make_request(bio);
#endif

	return 0;
}

static void status(struct seq_file *seq, mddev_t *mddev)
{
	conf_t *conf = mddev_to_conf(mddev);
	int i;

	seq_printf(seq, " [%d/%d] [", conf->raid_disks,
						conf->working_disks);
	for (i = 0; i < conf->raid_disks; i++)
		seq_printf(seq, "%s",
			      conf->mirrors[i].rdev &&
			      test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_");
	seq_printf(seq, "]");
}


static void error(mddev_t *mddev, mdk_rdev_t *rdev)
{
	char b[BDEVNAME_SIZE];
	conf_t *conf = mddev_to_conf(mddev);

	/*
	 * If it is not operational, then we have already marked it as dead
	 * else if it is the last working disks, ignore the error, let the
	 * next level up know.
	 * else mark the drive as failed
	 */
	if (test_bit(In_sync, &rdev->flags)
	    && conf->working_disks == 1)
		/*
		 * Don't fail the drive, act as though we were just a
		 * normal single drive
		 */
		return;
	if (test_bit(In_sync, &rdev->flags)) {
		mddev->degraded++;
		conf->working_disks--;
		/*
		 * if recovery is running, make sure it aborts.
		 */
		set_bit(MD_RECOVERY_ERR, &mddev->recovery);
	}
	clear_bit(In_sync, &rdev->flags);
	set_bit(Faulty, &rdev->flags);
	mddev->sb_dirty = 1;
	printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
		"	Operation continuing on %d devices\n",
		bdevname(rdev->bdev,b), conf->working_disks);
}

static void print_conf(conf_t *conf)
{
	int i;
	mirror_info_t *tmp;

	printk("RAID1 conf printout:\n");
	if (!conf) {
		printk("(!conf)\n");
		return;
	}
	printk(" --- wd:%d rd:%d\n", conf->working_disks,
		conf->raid_disks);

	for (i = 0; i < conf->raid_disks; i++) {
		char b[BDEVNAME_SIZE];
		tmp = conf->mirrors + i;
		if (tmp->rdev)
			printk(" disk %d, wo:%d, o:%d, dev:%s\n",
				i, !test_bit(In_sync, &tmp->rdev->flags), !test_bit(Faulty, &tmp->rdev->flags),
				bdevname(tmp->rdev->bdev,b));
	}
}

static void close_sync(conf_t *conf)
{
	spin_lock_irq(&conf->resync_lock);
	wait_event_lock_irq(conf->wait_resume, !conf->barrier,
			    conf->resync_lock, 	raid1_unplug(conf->mddev->queue));
	spin_unlock_irq(&conf->resync_lock);

	if (conf->barrier) BUG();
	if (waitqueue_active(&conf->wait_idle)) BUG();

	mempool_destroy(conf->r1buf_pool);
	conf->r1buf_pool = NULL;
}

static int raid1_spare_active(mddev_t *mddev)
{
	int i;
	conf_t *conf = mddev->private;
	mirror_info_t *tmp;

	/*
	 * Find all failed disks within the RAID1 configuration 
	 * and mark them readable
	 */
	for (i = 0; i < conf->raid_disks; i++) {
		tmp = conf->mirrors + i;
		if (tmp->rdev 
		    && !test_bit(Faulty, &tmp->rdev->flags)
		    && !test_bit(In_sync, &tmp->rdev->flags)) {
			conf->working_disks++;
			mddev->degraded--;
			set_bit(In_sync, &tmp->rdev->flags);
		}
	}

	print_conf(conf);
	return 0;
}


static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
{
	conf_t *conf = mddev->private;
	int found = 0;
	int mirror = 0;
	mirror_info_t *p;

	for (mirror=0; mirror < mddev->raid_disks; mirror++)
		if ( !(p=conf->mirrors+mirror)->rdev) {

			blk_queue_stack_limits(mddev->queue,
					       rdev->bdev->bd_disk->queue);
			/* as we don't honour merge_bvec_fn, we must never risk
			 * violating it, so limit ->max_sector to one PAGE, as
			 * a one page request is never in violation.
			 */
			if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
			    mddev->queue->max_sectors > (PAGE_SIZE>>9))
				blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);

			p->head_position = 0;
			rdev->raid_disk = mirror;
			found = 1;
			/* As all devices are equivalent, we don't need a full recovery
			 * if this was recently any drive of the array
			 */
			if (rdev->saved_raid_disk < 0)
				conf->fullsync = 1;
			rcu_assign_pointer(p->rdev, rdev);
			break;
		}

	print_conf(conf);
	return found;
}

static int raid1_remove_disk(mddev_t *mddev, int number)
{
	conf_t *conf = mddev->private;
	int err = 0;
	mdk_rdev_t *rdev;
	mirror_info_t *p = conf->mirrors+ number;

	print_conf(conf);
	rdev = p->rdev;
	if (rdev) {
		if (test_bit(In_sync, &rdev->flags) ||
		    atomic_read(&rdev->nr_pending)) {
			err = -EBUSY;
			goto abort;
		}
		p->rdev = NULL;
		synchronize_rcu();
		if (atomic_read(&rdev->nr_pending)) {
			/* lost the race, try later */
			err = -EBUSY;
			p->rdev = rdev;
		}
	}
abort:

	print_conf(conf);
	return err;
}


static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
{
	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
	r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
	conf_t *conf = mddev_to_conf(r1_bio->mddev);

	if (bio->bi_size)
		return 1;

	if (r1_bio->bios[r1_bio->read_disk] != bio)
		BUG();
	update_head_pos(r1_bio->read_disk, r1_bio);
	/*
	 * we have read a block, now it needs to be re-written,
	 * or re-read if the read failed.
	 * We don't do much here, just schedule handling by raid1d
	 */
	if (!uptodate) {
		md_error(r1_bio->mddev,
			 conf->mirrors[r1_bio->read_disk].rdev);
	} else
		set_bit(R1BIO_Uptodate, &r1_bio->state);
	rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev);
	reschedule_retry(r1_bio);
	return 0;
}

static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error)
{
	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
	r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
	mddev_t *mddev = r1_bio->mddev;
	conf_t *conf = mddev_to_conf(mddev);
	int i;
	int mirror=0;

	if (bio->bi_size)
		return 1;

	for (i = 0; i < conf->raid_disks; i++)
		if (r1_bio->bios[i] == bio) {
			mirror = i;
			break;
		}
	if (!uptodate)
		md_error(mddev, conf->mirrors[mirror].rdev);

	update_head_pos(mirror, r1_bio);

	if (atomic_dec_and_test(&r1_bio->remaining)) {
		md_done_sync(mddev, r1_bio->sectors, uptodate);
		put_buf(r1_bio);
	}
	rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
	return 0;
}

static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
{
	conf_t *conf = mddev_to_conf(mddev);
	int i;
	int disks = conf->raid_disks;
	struct bio *bio, *wbio;

	bio = r1_bio->bios[r1_bio->read_disk];

/*
	if (r1_bio->sector == 0) printk("First sync write startss\n");
*/
	/*
	 * schedule writes
	 */
	if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
		/*
		 * There is no point trying a read-for-reconstruct as
		 * reconstruct is about to be aborted
		 */
		char b[BDEVNAME_SIZE];
		printk(KERN_ALERT "raid1: %s: unrecoverable I/O read error"
			" for block %llu\n",
			bdevname(bio->bi_bdev,b), 
			(unsigned long long)r1_bio->sector);
		md_done_sync(mddev, r1_bio->sectors, 0);
		put_buf(r1_bio);
		return;
	}

	atomic_set(&r1_bio->remaining, 1);
	for (i = 0; i < disks ; i++) {
		wbio = r1_bio->bios[i];
		if (wbio->bi_end_io != end_sync_write)
			continue;

		atomic_inc(&conf->mirrors[i].rdev->nr_pending);
		atomic_inc(&r1_bio->remaining);
		md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);

		generic_make_request(wbio);
	}

	if (atomic_dec_and_test(&r1_bio->remaining)) {
		/* if we're here, all write(s) have completed, so clean up */
		md_done_sync(mddev, r1_bio->sectors, 1);
		put_buf(r1_bio);
	}
}

/*
 * This is a kernel thread which:
 *
 *	1.	Retries failed read operations on working mirrors.
 *	2.	Updates the raid superblock when problems encounter.
 *	3.	Performs writes following reads for array syncronising.
 */

static void raid1d(mddev_t *mddev)
{
	r1bio_t *r1_bio;
	struct bio *bio;
	unsigned long flags;
	conf_t *conf = mddev_to_conf(mddev);
	struct list_head *head = &conf->retry_list;
	int unplug=0;
	mdk_rdev_t *rdev;

	md_check_recovery(mddev);
	
	for (;;) {
		char b[BDEVNAME_SIZE];
		spin_lock_irqsave(&conf->device_lock, flags);

		if (conf->pending_bio_list.head) {
			bio = bio_list_get(&conf->pending_bio_list);
			blk_remove_plug(mddev->queue);
			spin_unlock_irqrestore(&conf->device_lock, flags);
			/* flush any pending bitmap writes to disk before proceeding w/ I/O */
			if (bitmap_unplug(mddev->bitmap) != 0)
				printk("%s: bitmap file write failed!\n", mdname(mddev));

			while (bio) { /* submit pending writes */
				struct bio *next = bio->bi_next;
				bio->bi_next = NULL;
				generic_make_request(bio);
				bio = next;
			}
			unplug = 1;

			continue;
		}

		if (list_empty(head))
			break;
		r1_bio = list_entry(head->prev, r1bio_t, retry_list);
		list_del(head->prev);
		spin_unlock_irqrestore(&conf->device_lock, flags);

		mddev = r1_bio->mddev;
		conf = mddev_to_conf(mddev);
		if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
			sync_request_write(mddev, r1_bio);
			unplug = 1;
		} else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
			/* some requests in the r1bio were BIO_RW_BARRIER
			 * requests which failed with -ENOTSUPP.  Hohumm..
			 * Better resubmit without the barrier.
			 * We know which devices to resubmit for, because
			 * all others have had their bios[] entry cleared.
			 */
			int i;
			clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
			clear_bit(R1BIO_Barrier, &r1_bio->state);
			for (i=0; i < conf->raid_disks; i++)
				if (r1_bio->bios[i]) {
					struct bio_vec *bvec;
					int j;

					bio = bio_clone(r1_bio->master_bio, GFP_NOIO);
					/* copy pages from the failed bio, as
					 * this might be a write-behind device */
					__bio_for_each_segment(bvec, bio, j, 0)
						bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page;
					bio_put(r1_bio->bios[i]);
					bio->bi_sector = r1_bio->sector +
						conf->mirrors[i].rdev->data_offset;
					bio->bi_bdev = conf->mirrors[i].rdev->bdev;
					bio->bi_end_io = raid1_end_write_request;
					bio->bi_rw = WRITE;
					bio->bi_private = r1_bio;
					r1_bio->bios[i] = bio;
					generic_make_request(bio);
				}
		} else {
			int disk;
			bio = r1_bio->bios[r1_bio->read_disk];
			if ((disk=read_balance(conf, r1_bio)) == -1) {
				printk(KERN_ALERT "raid1: %s: unrecoverable I/O"
				       " read error for block %llu\n",
				       bdevname(bio->bi_bdev,b),
				       (unsigned long long)r1_bio->sector);
				raid_end_bio_io(r1_bio);
			} else {
				r1_bio->bios[r1_bio->read_disk] = NULL;
				r1_bio->read_disk = disk;
				bio_put(bio);
				bio = bio_clone(r1_bio->master_bio, GFP_NOIO);
				r1_bio->bios[r1_bio->read_disk] = bio;
				rdev = conf->mirrors[disk].rdev;
				if (printk_ratelimit())
					printk(KERN_ERR "raid1: %s: redirecting sector %llu to"
					       " another mirror\n",
					       bdevname(rdev->bdev,b),
					       (unsigned long long)r1_bio->sector);
				bio->bi_sector = r1_bio->sector + rdev->data_offset;
				bio->bi_bdev = rdev->bdev;
				bio->bi_end_io = raid1_end_read_request;
				bio->bi_rw = READ;
				bio->bi_private = r1_bio;
				unplug = 1;
				generic_make_request(bio);
			}
		}
	}
	spin_unlock_irqrestore(&conf->device_lock, flags);
	if (unplug)
		unplug_slaves(mddev);
}


static int init_resync(conf_t *conf)
{
	int buffs;

	buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
	if (conf->r1buf_pool)
		BUG();
	conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free,
					  conf->poolinfo);
	if (!conf->r1buf_pool)
		return -ENOMEM;
	conf->next_resync = 0;
	return 0;
}

/*
 * perform a "sync" on one "block"
 *
 * We need to make sure that no normal I/O request - particularly write
 * requests - conflict with active sync requests.
 *
 * This is achieved by tracking pending requests and a 'barrier' concept
 * that can be installed to exclude normal IO requests.
 */

static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
{
	conf_t *conf = mddev_to_conf(mddev);
	mirror_info_t *mirror;
	r1bio_t *r1_bio;
	struct bio *bio;
	sector_t max_sector, nr_sectors;
	int disk;
	int i;
	int wonly;
	int write_targets = 0;
	int sync_blocks;
	int still_degraded = 0;

	if (!conf->r1buf_pool)
	{
/*
		printk("sync start - bitmap %p\n", mddev->bitmap);
*/
		if (init_resync(conf))
			return 0;
	}

	max_sector = mddev->size << 1;
	if (sector_nr >= max_sector) {
		/* If we aborted, we need to abort the
		 * sync on the 'current' bitmap chunk (there will
		 * only be one in raid1 resync.
		 * We can find the current addess in mddev->curr_resync
		 */
		if (mddev->curr_resync < max_sector) /* aborted */
			bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
						&sync_blocks, 1);
		else /* completed sync */
			conf->fullsync = 0;

		bitmap_close_sync(mddev->bitmap);
		close_sync(conf);
		return 0;
	}

	/* before building a request, check if we can skip these blocks..
	 * This call the bitmap_start_sync doesn't actually record anything
	 */
	if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
	    !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
		/* We can skip this block, and probably several more */
		*skipped = 1;
		return sync_blocks;
	}
	/*
	 * If there is non-resync activity waiting for us then
	 * put in a delay to throttle resync.
	 */
	if (!go_faster && waitqueue_active(&conf->wait_resume))
		msleep_interruptible(1000);
	device_barrier(conf, sector_nr + RESYNC_SECTORS);

	/*
	 * If reconstructing, and >1 working disc,
	 * could dedicate one to rebuild and others to
	 * service read requests ..
	 */
	disk = conf->last_used;
	/* make sure disk is operational */
	wonly = disk;
	while (conf->mirrors[disk].rdev == NULL ||
	       !test_bit(In_sync, &conf->mirrors[disk].rdev->flags) ||
	       test_bit(WriteMostly, &conf->mirrors[disk].rdev->flags)
		) {
		if (conf->mirrors[disk].rdev  &&
		    test_bit(In_sync, &conf->mirrors[disk].rdev->flags))
			wonly = disk;
		if (disk <= 0)
			disk = conf->raid_disks;
		disk--;
		if (disk == conf->last_used) {
			disk = wonly;
			break;
		}
	}
	conf->last_used = disk;
	atomic_inc(&conf->mirrors[disk].rdev->nr_pending);


	mirror = conf->mirrors + disk;

	r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);

	spin_lock_irq(&conf->resync_lock);
	conf->nr_pending++;
	spin_unlock_irq(&conf->resync_lock);

	r1_bio->mddev = mddev;
	r1_bio->sector = sector_nr;
	r1_bio->state = 0;
	set_bit(R1BIO_IsSync, &r1_bio->state);
	r1_bio->read_disk = disk;

	for (i=0; i < conf->raid_disks; i++) {
		bio = r1_bio->bios[i];

		/* take from bio_init */
		bio->bi_next = NULL;
		bio->bi_flags |= 1 << BIO_UPTODATE;
		bio->bi_rw = 0;
		bio->bi_vcnt = 0;
		bio->bi_idx = 0;
		bio->bi_phys_segments = 0;
		bio->bi_hw_segments = 0;
		bio->bi_size = 0;
		bio->bi_end_io = NULL;
		bio->bi_private = NULL;

		if (i == disk) {
			bio->bi_rw = READ;
			bio->bi_end_io = end_sync_read;
		} else if (conf->mirrors[i].rdev == NULL ||
			   test_bit(Faulty, &conf->mirrors[i].rdev->flags)) {
			still_degraded = 1;
			continue;
		} else if (!test_bit(In_sync, &conf->mirrors[i].rdev->flags) ||
			   sector_nr + RESYNC_SECTORS > mddev->recovery_cp   ||
			   test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
			bio->bi_rw = WRITE;
			bio->bi_end_io = end_sync_write;
			write_targets ++;
		} else
			/* no need to read or write here */
			continue;
		bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset;
		bio->bi_bdev = conf->mirrors[i].rdev->bdev;
		bio->bi_private = r1_bio;
	}

	if (write_targets == 0) {
		/* There is nowhere to write, so all non-sync
		 * drives must be failed - so we are finished
		 */
		sector_t rv = max_sector - sector_nr;
		*skipped = 1;
		put_buf(r1_bio);
		rdev_dec_pending(conf->mirrors[disk].rdev, mddev);
		return rv;
	}

	nr_sectors = 0;
	sync_blocks = 0;
	do {
		struct page *page;
		int len = PAGE_SIZE;
		if (sector_nr + (len>>9) > max_sector)
			len = (max_sector - sector_nr) << 9;
		if (len == 0)
			break;
		if (sync_blocks == 0) {
			if (!bitmap_start_sync(mddev->bitmap, sector_nr,
					       &sync_blocks, still_degraded) &&
			    !conf->fullsync &&
			    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
				break;
			if (sync_blocks < (PAGE_SIZE>>9))
				BUG();
			if (len > (sync_blocks<<9))
				len = sync_blocks<<9;
		}

		for (i=0 ; i < conf->raid_disks; i++) {
			bio = r1_bio->bios[i];
			if (bio->bi_end_io) {
				page = r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page;
				if (bio_add_page(bio, page, len, 0) == 0) {
					/* stop here */
					r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page = page;
					while (i > 0) {
						i--;
						bio = r1_bio->bios[i];
						if (bio->bi_end_io==NULL)
							continue;
						/* remove last page from this bio */
						bio->bi_vcnt--;
						bio->bi_size -= len;
						bio->bi_flags &= ~(1<< BIO_SEG_VALID);
					}
					goto bio_full;
				}
			}
		}
		nr_sectors += len>>9;
		sector_nr += len>>9;
		sync_blocks -= (len>>9);
	} while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
 bio_full:
	bio = r1_bio->bios[disk];
	r1_bio->sectors = nr_sectors;

	md_sync_acct(mirror->rdev->bdev, nr_sectors);

	generic_make_request(bio);

	return nr_sectors;
}

static int run(mddev_t *mddev)
{
	conf_t *conf;
	int i, j, disk_idx;
	mirror_info_t *disk;
	mdk_rdev_t *rdev;
	struct list_head *tmp;

	if (mddev->level != 1) {
		printk("raid1: %s: raid level not set to mirroring (%d)\n",
		       mdname(mddev), mddev->level);
		goto out;
	}
	/*
	 * copy the already verified devices into our private RAID1
	 * bookkeeping area. [whatever we allocate in run(),
	 * should be freed in stop()]
	 */
	conf = kmalloc(sizeof(conf_t), GFP_KERNEL);
	mddev->private = conf;
	if (!conf)
		goto out_no_mem;

	memset(conf, 0, sizeof(*conf));
	conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks, 
				 GFP_KERNEL);
	if (!conf->mirrors)
		goto out_no_mem;

	memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks);

	conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
	if (!conf->poolinfo)
		goto out_no_mem;
	conf->poolinfo->mddev = mddev;
	conf->poolinfo->raid_disks = mddev->raid_disks;
	conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
					  r1bio_pool_free,
					  conf->poolinfo);
	if (!conf->r1bio_pool)
		goto out_no_mem;

	ITERATE_RDEV(mddev, rdev, tmp) {
		disk_idx = rdev->raid_disk;
		if (disk_idx >= mddev->raid_disks
		    || disk_idx < 0)
			continue;
		disk = conf->mirrors + disk_idx;

		disk->rdev = rdev;

		blk_queue_stack_limits(mddev->queue,
				       rdev->bdev->bd_disk->queue);
		/* as we don't honour merge_bvec_fn, we must never risk
		 * violating it, so limit ->max_sector to one PAGE, as
		 * a one page request is never in violation.
		 */
		if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
		    mddev->queue->max_sectors > (PAGE_SIZE>>9))
			blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);

		disk->head_position = 0;
		if (!test_bit(Faulty, &rdev->flags) && test_bit(In_sync, &rdev->flags))
			conf->working_disks++;
	}
	conf->raid_disks = mddev->raid_disks;
	conf->mddev = mddev;
	spin_lock_init(&conf->device_lock);
	INIT_LIST_HEAD(&conf->retry_list);
	if (conf->working_disks == 1)
		mddev->recovery_cp = MaxSector;

	spin_lock_init(&conf->resync_lock);
	init_waitqueue_head(&conf->wait_idle);
	init_waitqueue_head(&conf->wait_resume);

	bio_list_init(&conf->pending_bio_list);
	bio_list_init(&conf->flushing_bio_list);

	if (!conf->working_disks) {
		printk(KERN_ERR "raid1: no operational mirrors for %s\n",
			mdname(mddev));
		goto out_free_conf;
	}

	mddev->degraded = 0;
	for (i = 0; i < conf->raid_disks; i++) {

		disk = conf->mirrors + i;

		if (!disk->rdev) {
			disk->head_position = 0;
			mddev->degraded++;
		}
	}

	/*
	 * find the first working one and use it as a starting point
	 * to read balancing.
	 */
	for (j = 0; j < conf->raid_disks &&
		     (!conf->mirrors[j].rdev ||
		      !test_bit(In_sync, &conf->mirrors[j].rdev->flags)) ; j++)
		/* nothing */;
	conf->last_used = j;


	mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1");
	if (!mddev->thread) {
		printk(KERN_ERR
		       "raid1: couldn't allocate thread for %s\n",
		       mdname(mddev));
		goto out_free_conf;
	}
	if (mddev->bitmap) mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;

	printk(KERN_INFO 
		"raid1: raid set %s active with %d out of %d mirrors\n",
		mdname(mddev), mddev->raid_disks - mddev->degraded, 
		mddev->raid_disks);
	/*
	 * Ok, everything is just fine now
	 */
	mddev->array_size = mddev->size;

	mddev->queue->unplug_fn = raid1_unplug;
	mddev->queue->issue_flush_fn = raid1_issue_flush;

	return 0;

out_no_mem:
	printk(KERN_ERR "raid1: couldn't allocate memory for %s\n",
	       mdname(mddev));

out_free_conf:
	if (conf) {
		if (conf->r1bio_pool)
			mempool_destroy(conf->r1bio_pool);
		kfree(conf->mirrors);
		kfree(conf->poolinfo);
		kfree(conf);
		mddev->private = NULL;
	}
out:
	return -EIO;
}

static int stop(mddev_t *mddev)
{
	conf_t *conf = mddev_to_conf(mddev);
	struct bitmap *bitmap = mddev->bitmap;
	int behind_wait = 0;

	/* wait for behind writes to complete */
	while (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
		behind_wait++;
		printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop (%d)\n", mdname(mddev), behind_wait);
		set_current_state(TASK_UNINTERRUPTIBLE);
		schedule_timeout(HZ); /* wait a second */
		/* need to kick something here to make sure I/O goes? */
	}

	md_unregister_thread(mddev->thread);
	mddev->thread = NULL;
	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
	if (conf->r1bio_pool)
		mempool_destroy(conf->r1bio_pool);
	kfree(conf->mirrors);
	kfree(conf->poolinfo);
	kfree(conf);
	mddev->private = NULL;
	return 0;
}

static int raid1_resize(mddev_t *mddev, sector_t sectors)
{
	/* no resync is happening, and there is enough space
	 * on all devices, so we can resize.
	 * We need to make sure resync covers any new space.
	 * If the array is shrinking we should possibly wait until
	 * any io in the removed space completes, but it hardly seems
	 * worth it.
	 */
	mddev->array_size = sectors>>1;
	set_capacity(mddev->gendisk, mddev->array_size << 1);
	mddev->changed = 1;
	if (mddev->array_size > mddev->size && mddev->recovery_cp == MaxSector) {
		mddev->recovery_cp = mddev->size << 1;
		set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
	}
	mddev->size = mddev->array_size;
	mddev->resync_max_sectors = sectors;
	return 0;
}

static int raid1_reshape(mddev_t *mddev, int raid_disks)
{
	/* We need to:
	 * 1/ resize the r1bio_pool
	 * 2/ resize conf->mirrors
	 *
	 * We allocate a new r1bio_pool if we can.
	 * Then raise a device barrier and wait until all IO stops.
	 * Then resize conf->mirrors and swap in the new r1bio pool.
	 *
	 * At the same time, we "pack" the devices so that all the missing
	 * devices have the higher raid_disk numbers.
	 */
	mempool_t *newpool, *oldpool;
	struct pool_info *newpoolinfo;
	mirror_info_t *newmirrors;
	conf_t *conf = mddev_to_conf(mddev);
	int cnt;

	int d, d2;

	if (raid_disks < conf->raid_disks) {
		cnt=0;
		for (d= 0; d < conf->raid_disks; d++)
			if (conf->mirrors[d].rdev)
				cnt++;
		if (cnt > raid_disks)
			return -EBUSY;
	}

	newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL);
	if (!newpoolinfo)
		return -ENOMEM;
	newpoolinfo->mddev = mddev;
	newpoolinfo->raid_disks = raid_disks;

	newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
				 r1bio_pool_free, newpoolinfo);
	if (!newpool) {
		kfree(newpoolinfo);
		return -ENOMEM;
	}
	newmirrors = kmalloc(sizeof(struct mirror_info) * raid_disks, GFP_KERNEL);
	if (!newmirrors) {
		kfree(newpoolinfo);
		mempool_destroy(newpool);
		return -ENOMEM;
	}
	memset(newmirrors, 0, sizeof(struct mirror_info)*raid_disks);

	spin_lock_irq(&conf->resync_lock);
	conf->barrier++;
	wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
			    conf->resync_lock, raid1_unplug(mddev->queue));
	spin_unlock_irq(&conf->resync_lock);

	/* ok, everything is stopped */
	oldpool = conf->r1bio_pool;
	conf->r1bio_pool = newpool;

	for (d=d2=0; d < conf->raid_disks; d++)
		if (conf->mirrors[d].rdev) {
			conf->mirrors[d].rdev->raid_disk = d2;
			newmirrors[d2++].rdev = conf->mirrors[d].rdev;
		}
	kfree(conf->mirrors);
	conf->mirrors = newmirrors;
	kfree(conf->poolinfo);
	conf->poolinfo = newpoolinfo;

	mddev->degraded += (raid_disks - conf->raid_disks);
	conf->raid_disks = mddev->raid_disks = raid_disks;

	conf->last_used = 0; /* just make sure it is in-range */
	spin_lock_irq(&conf->resync_lock);
	conf->barrier--;
	spin_unlock_irq(&conf->resync_lock);
	wake_up(&conf->wait_resume);
	wake_up(&conf->wait_idle);


	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
	md_wakeup_thread(mddev->thread);

	mempool_destroy(oldpool);
	return 0;
}

static void raid1_quiesce(mddev_t *mddev, int state)
{
	conf_t *conf = mddev_to_conf(mddev);

	switch(state) {
	case 1:
		spin_lock_irq(&conf->resync_lock);
		conf->barrier++;
		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
				    conf->resync_lock, raid1_unplug(mddev->queue));
		spin_unlock_irq(&conf->resync_lock);
		break;
	case 0:
		spin_lock_irq(&conf->resync_lock);
		conf->barrier--;
		spin_unlock_irq(&conf->resync_lock);
		wake_up(&conf->wait_resume);
		wake_up(&conf->wait_idle);
		break;
	}
	if (mddev->thread) {
		if (mddev->bitmap)
			mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
		else
			mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
		md_wakeup_thread(mddev->thread);
	}
}


static mdk_personality_t raid1_personality =
{
	.name		= "raid1",
	.owner		= THIS_MODULE,
	.make_request	= make_request,
	.run		= run,
	.stop		= stop,
	.status		= status,
	.error_handler	= error,
	.hot_add_disk	= raid1_add_disk,
	.hot_remove_disk= raid1_remove_disk,
	.spare_active	= raid1_spare_active,
	.sync_request	= sync_request,
	.resize		= raid1_resize,
	.reshape	= raid1_reshape,
	.quiesce	= raid1_quiesce,
};

static int __init raid_init(void)
{
	return register_md_personality(RAID1, &raid1_personality);
}

static void raid_exit(void)
{
	unregister_md_personality(RAID1);
}

module_init(raid_init);
module_exit(raid_exit);
MODULE_LICENSE("GPL");
MODULE_ALIAS("md-personality-3"); /* RAID1 */
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+								/*
 								 * raid1.c : Multiple Devices driver for Linux
 								 *
 								 * Copyright (C) 1999, 2000, 2001 Ingo Molnar, Red Hat
 								 *
 								 * Copyright (C) 1996, 1997, 1998 Ingo Molnar, Miguel de Icaza, Gadi Oxman
 								 *
 								 * RAID-1 management functions.
 								 *
 								 * Better read-balancing code written by Mika Kuoppala <miku@iki.fi>, 2000
 								 *
 								 * Fixes to reconstruction by Jakob <EFBFBD>stergaard" <jakob@ostenfeld.dk>
 								 * Various fixes by Neil Brown <neilb@cse.unsw.edu.au>
 								 *
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+								 * Changes by Peter T. Breuer <ptb@it.uc3m.es> 31/1/2003 to support
 								 * bitmapped intelligence in resync:
 								 *
 								 *      - bitmap marked during normal i/o
 								 *      - bitmap used to skip nondirty blocks during sync
 								 *
 								 * Additions to bitmap code, (C) 2003-2004 Paul Clements, SteelEye Technology:
 								 * - persistent bitmap code
 								 *
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+								 * This program is free software; you can redistribute it and/or modify
 								 * it under the terms of the GNU General Public License as published by
 								 * the Free Software Foundation; either version 2, or (at your option)
 								 * any later version.
 								 *
 								 * You should have received a copy of the GNU General Public License
 								 * (for example /usr/src/linux/COPYING); if not, write to the Free
 								 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 								 */
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+								#include "dm-bio-list.h"
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+								#include <linux/raid/raid1.h>
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+								#include <linux/raid/bitmap.h>
 								#define DEBUG 0
 								#if DEBUG
 								#define PRINTK(x...) printk(x)
 								#else
 								#define PRINTK(x...)
 								#endif
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
 								/*
 								 * Number of guaranteed r1bios in case of extreme VM load:
 								 */
 								#define	NR_RAID1_BIOS 256
 								static mdk_personality_t raid1_personality;
 								static void unplug_slaves(mddev_t *mddev);
-												[PATCH] gfp flags annotations - part 1

 - added typedef unsigned int __nocast gfp_t;

 - replaced __nocast uses for gfp flags with gfp_t - it gives exactly
   the same warnings as far as sparse is concerned, doesn't change
   generated code (from gcc point of view we replaced unsigned int with
   typedef) and documents what's going on far better.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-10-07 06:46:04 +00:00
+								static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+								{
 									struct pool_info *pi = data;
 									r1bio_t *r1_bio;
 									int size = offsetof(r1bio_t, bios[pi->raid_disks]);
 									/* allocate a r1bio with room for raid_disks entries in the bios array */
 									r1_bio = kmalloc(size, gfp_flags);
 									if (r1_bio)
 										memset(r1_bio, 0, size);
 									else
 										unplug_slaves(pi->mddev);
 									return r1_bio;
 								}
 								static void r1bio_pool_free(void *r1_bio, void *data)
 								{
 									kfree(r1_bio);
 								}
 								#define RESYNC_BLOCK_SIZE (64*1024)
 								//#define RESYNC_BLOCK_SIZE PAGE_SIZE
 								#define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9)
 								#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
 								#define RESYNC_WINDOW (2048*1024)
-												[PATCH] gfp flags annotations - part 1

 - added typedef unsigned int __nocast gfp_t;

 - replaced __nocast uses for gfp flags with gfp_t - it gives exactly
   the same warnings as far as sparse is concerned, doesn't change
   generated code (from gcc point of view we replaced unsigned int with
   typedef) and documents what's going on far better.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-10-07 06:46:04 +00:00
+								static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+								{
 									struct pool_info *pi = data;
 									struct page *page;
 									r1bio_t *r1_bio;
 									struct bio *bio;
 									int i, j;
 									r1_bio = r1bio_pool_alloc(gfp_flags, pi);
 									if (!r1_bio) {
 										unplug_slaves(pi->mddev);
 										return NULL;
 									}
 									/*
 									 * Allocate bios : 1 for reading, n-1 for writing
 									 */
 									for (j = pi->raid_disks ; j-- ; ) {
 										bio = bio_alloc(gfp_flags, RESYNC_PAGES);
 										if (!bio)
 											goto out_free_bio;
 										r1_bio->bios[j] = bio;
 									}
 									/*
 									 * Allocate RESYNC_PAGES data pages and attach them to
 									 * the first bio;
 									 */
 									bio = r1_bio->bios[0];
 									for (i = 0; i < RESYNC_PAGES; i++) {
 										page = alloc_page(gfp_flags);
 										if (unlikely(!page))
 											goto out_free_pages;
 										bio->bi_io_vec[i].bv_page = page;
 									}
 									r1_bio->master_bio = NULL;
 									return r1_bio;
 								out_free_pages:
 									for ( ; i > 0 ; i--)
 										__free_page(bio->bi_io_vec[i-1].bv_page);
 								out_free_bio:
 									while ( ++j < pi->raid_disks )
 										bio_put(r1_bio->bios[j]);
 									r1bio_pool_free(r1_bio, data);
 									return NULL;
 								}
 								static void r1buf_pool_free(void *__r1_bio, void *data)
 								{
 									struct pool_info *pi = data;
 									int i;
 									r1bio_t *r1bio = __r1_bio;
 									struct bio *bio = r1bio->bios[0];
 									for (i = 0; i < RESYNC_PAGES; i++) {
 										__free_page(bio->bi_io_vec[i].bv_page);
 										bio->bi_io_vec[i].bv_page = NULL;
 									}
 									for (i=0 ; i < pi->raid_disks; i++)
 										bio_put(r1bio->bios[i]);
 									r1bio_pool_free(r1bio, data);
 								}
 								static void put_all_bios(conf_t *conf, r1bio_t *r1_bio)
 								{
 									int i;
 									for (i = 0; i < conf->raid_disks; i++) {
 										struct bio **bio = r1_bio->bios + i;
 										if (*bio)
 											bio_put(*bio);
 										*bio = NULL;
 									}
 								}
 								static inline void free_r1bio(r1bio_t *r1_bio)
 								{
 									unsigned long flags;
 									conf_t *conf = mddev_to_conf(r1_bio->mddev);
 									/*
 									 * Wake up any possible resync thread that waits for the device
 									 * to go idle.
 									 */
 									spin_lock_irqsave(&conf->resync_lock, flags);
 									if (!--conf->nr_pending) {
 										wake_up(&conf->wait_idle);
 										wake_up(&conf->wait_resume);
 									}
 									spin_unlock_irqrestore(&conf->resync_lock, flags);
 									put_all_bios(conf, r1_bio);
 									mempool_free(r1_bio, conf->r1bio_pool);
 								}
 								static inline void put_buf(r1bio_t *r1_bio)
 								{
 									conf_t *conf = mddev_to_conf(r1_bio->mddev);
 									unsigned long flags;
 									mempool_free(r1_bio, conf->r1buf_pool);
 									spin_lock_irqsave(&conf->resync_lock, flags);
 									if (!conf->barrier)
 										BUG();
 									--conf->barrier;
 									wake_up(&conf->wait_resume);
 									wake_up(&conf->wait_idle);
 									if (!--conf->nr_pending) {
 										wake_up(&conf->wait_idle);
 										wake_up(&conf->wait_resume);
 									}
 									spin_unlock_irqrestore(&conf->resync_lock, flags);
 								}
 								static void reschedule_retry(r1bio_t *r1_bio)
 								{
 									unsigned long flags;
 									mddev_t *mddev = r1_bio->mddev;
 									conf_t *conf = mddev_to_conf(mddev);
 									spin_lock_irqsave(&conf->device_lock, flags);
 									list_add(&r1_bio->retry_list, &conf->retry_list);
 									spin_unlock_irqrestore(&conf->device_lock, flags);
 									md_wakeup_thread(mddev->thread);
 								}
 								/*
 								 * raid_end_bio_io() is called when we have finished servicing a mirrored
 								 * operation and are ready to return a success/failure code to the buffer
 								 * cache layer.
 								 */
 								static void raid_end_bio_io(r1bio_t *r1_bio)
 								{
 									struct bio *bio = r1_bio->master_bio;
-												[PATCH] md: add write-behind support for md/raid1

If a device is flagged 'WriteMostly' and the array has a bitmap, and the
bitmap superblock indicates that write_behind is allowed, then write_behind is
enabled for WriteMostly devices.

Write requests will be acknowledges as complete to the caller (via b_end_io)
when all non-WriteMostly devices have completed the write, but will not be
cleared from the bitmap until all devices complete.

This requires memory allocation to make a local copy of the data being
written.  If there is insufficient memory, then we fall-back on normal write
semantics.

Signed-Off-By: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:47 +00:00
+									/* if nobody has done the final endio yet, do it now */
 									if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
 										PRINTK(KERN_DEBUG "raid1: sync end %s on sectors %llu-%llu\n",
 											(bio_data_dir(bio) == WRITE) ? "write" : "read",
 											(unsigned long long) bio->bi_sector,
 											(unsigned long long) bio->bi_sector +
 												(bio->bi_size >> 9) - 1);
 										bio_endio(bio, bio->bi_size,
 											test_bit(R1BIO_Uptodate, &r1_bio->state) ? 0 : -EIO);
 									}
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									free_r1bio(r1_bio);
 								}
 								/*
 								 * Update disk head position estimator based on IRQ completion info.
 								 */
 								static inline void update_head_pos(int disk, r1bio_t *r1_bio)
 								{
 									conf_t *conf = mddev_to_conf(r1_bio->mddev);
 									conf->mirrors[disk].head_position =
 										r1_bio->sector + (r1_bio->sectors);
 								}
 								static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int error)
 								{
 									int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 									r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
 									int mirror;
 									conf_t *conf = mddev_to_conf(r1_bio->mddev);
 									if (bio->bi_size)
 										return 1;
 									mirror = r1_bio->read_disk;
 									/*
 									 * this branch is our 'one mirror IO has finished' event handler:
 									 */
 									if (!uptodate)
 										md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
 									else
 										/*
 										 * Set R1BIO_Uptodate in our master bio, so that
 										 * we will return a good error code for to the higher
 										 * levels even if IO on some other mirrored buffer fails.
 										 *
 										 * The 'master' represents the composite IO operation to
 										 * user-side. So if something waits for IO, then it will
 										 * wait for the 'master' bio.
 										 */
 										set_bit(R1BIO_Uptodate, &r1_bio->state);
 									update_head_pos(mirror, r1_bio);
 									/*
 									 * we have only one bio on the read side
 									 */
 									if (uptodate)
 										raid_end_bio_io(r1_bio);
 									else {
 										/*
 										 * oops, read error:
 										 */
 										char b[BDEVNAME_SIZE];
 										if (printk_ratelimit())
 											printk(KERN_ERR "raid1: %s: rescheduling sector %llu\n",
 											       bdevname(conf->mirrors[mirror].rdev->bdev,b), (unsigned long long)r1_bio->sector);
 										reschedule_retry(r1_bio);
 									}
 									rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
 									return 0;
 								}
 								static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int error)
 								{
 									int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 									r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
-												[PATCH] md: support BIO_RW_BARRIER for md/raid1

We can only accept BARRIER requests if all slaves handle
barriers, and that can, of course, change with time....

So we keep track of whether the whole array seems safe for barriers,
and also whether each individual rdev handles barriers.

We initially assumes barriers are OK.

When writing the superblock we try a barrier, and if that fails, we flag
things for no-barriers.  This will usually clear the flags fairly quickly.

If writing the superblock finds that BIO_RW_BARRIER is -ENOTSUPP, we need to
resubmit, so introduce function "md_super_wait" which waits for requests to
finish, and retries ENOTSUPP requests without the barrier flag.

When writing the real raid1, write requests which were BIO_RW_BARRIER but
which aresn't supported need to be retried.  So raid1d is enhanced to do this,
and when any bio write completes (i.e.  no retry needed) we remove it from the
r1bio, so that devices needing retry are easy to find.

We should hardly ever get -ENOTSUPP errors when writing data to the raid.
It should only happen if:
  1/ the device used to support BARRIER, but now doesn't.  Few devices
     change like this, though raid1 can!
or
  2/ the array has no persistent superblock, so there was no opportunity to
     pre-test for barriers when writing the superblock.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:34 +00:00
+									int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									conf_t *conf = mddev_to_conf(r1_bio->mddev);
 									if (bio->bi_size)
 										return 1;
 									for (mirror = 0; mirror < conf->raid_disks; mirror++)
 										if (r1_bio->bios[mirror] == bio)
 											break;
-												[PATCH] md: support BIO_RW_BARRIER for md/raid1

We can only accept BARRIER requests if all slaves handle
barriers, and that can, of course, change with time....

So we keep track of whether the whole array seems safe for barriers,
and also whether each individual rdev handles barriers.

We initially assumes barriers are OK.

When writing the superblock we try a barrier, and if that fails, we flag
things for no-barriers.  This will usually clear the flags fairly quickly.

If writing the superblock finds that BIO_RW_BARRIER is -ENOTSUPP, we need to
resubmit, so introduce function "md_super_wait" which waits for requests to
finish, and retries ENOTSUPP requests without the barrier flag.

When writing the real raid1, write requests which were BIO_RW_BARRIER but
which aresn't supported need to be retried.  So raid1d is enhanced to do this,
and when any bio write completes (i.e.  no retry needed) we remove it from the
r1bio, so that devices needing retry are easy to find.

We should hardly ever get -ENOTSUPP errors when writing data to the raid.
It should only happen if:
  1/ the device used to support BARRIER, but now doesn't.  Few devices
     change like this, though raid1 can!
or
  2/ the array has no persistent superblock, so there was no opportunity to
     pre-test for barriers when writing the superblock.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:34 +00:00
+									if (error == -ENOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) {
 										set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags);
 										set_bit(R1BIO_BarrierRetry, &r1_bio->state);
 										r1_bio->mddev->barriers_work = 0;
 									} else {
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										/*
-												[PATCH] md: support BIO_RW_BARRIER for md/raid1

We can only accept BARRIER requests if all slaves handle
barriers, and that can, of course, change with time....

So we keep track of whether the whole array seems safe for barriers,
and also whether each individual rdev handles barriers.

We initially assumes barriers are OK.

When writing the superblock we try a barrier, and if that fails, we flag
things for no-barriers.  This will usually clear the flags fairly quickly.

If writing the superblock finds that BIO_RW_BARRIER is -ENOTSUPP, we need to
resubmit, so introduce function "md_super_wait" which waits for requests to
finish, and retries ENOTSUPP requests without the barrier flag.

When writing the real raid1, write requests which were BIO_RW_BARRIER but
which aresn't supported need to be retried.  So raid1d is enhanced to do this,
and when any bio write completes (i.e.  no retry needed) we remove it from the
r1bio, so that devices needing retry are easy to find.

We should hardly ever get -ENOTSUPP errors when writing data to the raid.
It should only happen if:
  1/ the device used to support BARRIER, but now doesn't.  Few devices
     change like this, though raid1 can!
or
  2/ the array has no persistent superblock, so there was no opportunity to
     pre-test for barriers when writing the superblock.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:34 +00:00
+										 * this branch is our 'one mirror IO has finished' event handler:
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										 */
-												[PATCH] md: support BIO_RW_BARRIER for md/raid1

We can only accept BARRIER requests if all slaves handle
barriers, and that can, of course, change with time....

So we keep track of whether the whole array seems safe for barriers,
and also whether each individual rdev handles barriers.

We initially assumes barriers are OK.

When writing the superblock we try a barrier, and if that fails, we flag
things for no-barriers.  This will usually clear the flags fairly quickly.

If writing the superblock finds that BIO_RW_BARRIER is -ENOTSUPP, we need to
resubmit, so introduce function "md_super_wait" which waits for requests to
finish, and retries ENOTSUPP requests without the barrier flag.

When writing the real raid1, write requests which were BIO_RW_BARRIER but
which aresn't supported need to be retried.  So raid1d is enhanced to do this,
and when any bio write completes (i.e.  no retry needed) we remove it from the
r1bio, so that devices needing retry are easy to find.

We should hardly ever get -ENOTSUPP errors when writing data to the raid.
It should only happen if:
  1/ the device used to support BARRIER, but now doesn't.  Few devices
     change like this, though raid1 can!
or
  2/ the array has no persistent superblock, so there was no opportunity to
     pre-test for barriers when writing the superblock.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:34 +00:00
+										r1_bio->bios[mirror] = NULL;
 										bio_put(bio);
 										if (!uptodate) {
 											md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
 											/* an I/O failed, we can't clear the bitmap */
 											set_bit(R1BIO_Degraded, &r1_bio->state);
 										} else
 											/*
 											 * Set R1BIO_Uptodate in our master bio, so that
 											 * we will return a good error code for to the higher
 											 * levels even if IO on some other mirrored buffer fails.
 											 *
 											 * The 'master' represents the composite IO operation to
 											 * user-side. So if something waits for IO, then it will
 											 * wait for the 'master' bio.
 											 */
 											set_bit(R1BIO_Uptodate, &r1_bio->state);
 										update_head_pos(mirror, r1_bio);
 										if (behind) {
 											if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags))
 												atomic_dec(&r1_bio->behind_remaining);
 											/* In behind mode, we ACK the master bio once the I/O has safely
 											 * reached all non-writemostly disks. Setting the Returned bit
 											 * ensures that this gets done only once -- we don't ever want to
 											 * return -EIO here, instead we'll wait */
 											if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) &&
 											    test_bit(R1BIO_Uptodate, &r1_bio->state)) {
 												/* Maybe we can return now */
 												if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
 													struct bio *mbio = r1_bio->master_bio;
 													PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n",
 													       (unsigned long long) mbio->bi_sector,
 													       (unsigned long long) mbio->bi_sector +
 													       (mbio->bi_size >> 9) - 1);
 													bio_endio(mbio, mbio->bi_size, 0);
 												}
-												[PATCH] md: add write-behind support for md/raid1

If a device is flagged 'WriteMostly' and the array has a bitmap, and the
bitmap superblock indicates that write_behind is allowed, then write_behind is
enabled for WriteMostly devices.

Write requests will be acknowledges as complete to the caller (via b_end_io)
when all non-WriteMostly devices have completed the write, but will not be
cleared from the bitmap until all devices complete.

This requires memory allocation to make a local copy of the data being
written.  If there is insufficient memory, then we fall-back on normal write
semantics.

Signed-Off-By: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:47 +00:00
+											}
 										}
 									}
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									/*
 									 *
 									 * Let's see if all mirrored write operations have finished
 									 * already.
 									 */
 									if (atomic_dec_and_test(&r1_bio->remaining)) {
-												[PATCH] md: support BIO_RW_BARRIER for md/raid1

We can only accept BARRIER requests if all slaves handle
barriers, and that can, of course, change with time....

So we keep track of whether the whole array seems safe for barriers,
and also whether each individual rdev handles barriers.

We initially assumes barriers are OK.

When writing the superblock we try a barrier, and if that fails, we flag
things for no-barriers.  This will usually clear the flags fairly quickly.

If writing the superblock finds that BIO_RW_BARRIER is -ENOTSUPP, we need to
resubmit, so introduce function "md_super_wait" which waits for requests to
finish, and retries ENOTSUPP requests without the barrier flag.

When writing the real raid1, write requests which were BIO_RW_BARRIER but
which aresn't supported need to be retried.  So raid1d is enhanced to do this,
and when any bio write completes (i.e.  no retry needed) we remove it from the
r1bio, so that devices needing retry are easy to find.

We should hardly ever get -ENOTSUPP errors when writing data to the raid.
It should only happen if:
  1/ the device used to support BARRIER, but now doesn't.  Few devices
     change like this, though raid1 can!
or
  2/ the array has no persistent superblock, so there was no opportunity to
     pre-test for barriers when writing the superblock.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:34 +00:00
+										if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
 											reschedule_retry(r1_bio);
 											/* Don't dec_pending yet, we want to hold
 											 * the reference over the retry
 											 */
 											return 0;
 										}
-												[PATCH] md: add write-behind support for md/raid1

If a device is flagged 'WriteMostly' and the array has a bitmap, and the
bitmap superblock indicates that write_behind is allowed, then write_behind is
enabled for WriteMostly devices.

Write requests will be acknowledges as complete to the caller (via b_end_io)
when all non-WriteMostly devices have completed the write, but will not be
cleared from the bitmap until all devices complete.

This requires memory allocation to make a local copy of the data being
written.  If there is insufficient memory, then we fall-back on normal write
semantics.

Signed-Off-By: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:47 +00:00
+										if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
 											/* free extra copy of the data pages */
-												[PATCH] md: support BIO_RW_BARRIER for md/raid1

We can only accept BARRIER requests if all slaves handle
barriers, and that can, of course, change with time....

So we keep track of whether the whole array seems safe for barriers,
and also whether each individual rdev handles barriers.

We initially assumes barriers are OK.

When writing the superblock we try a barrier, and if that fails, we flag
things for no-barriers.  This will usually clear the flags fairly quickly.

If writing the superblock finds that BIO_RW_BARRIER is -ENOTSUPP, we need to
resubmit, so introduce function "md_super_wait" which waits for requests to
finish, and retries ENOTSUPP requests without the barrier flag.

When writing the real raid1, write requests which were BIO_RW_BARRIER but
which aresn't supported need to be retried.  So raid1d is enhanced to do this,
and when any bio write completes (i.e.  no retry needed) we remove it from the
r1bio, so that devices needing retry are easy to find.

We should hardly ever get -ENOTSUPP errors when writing data to the raid.
It should only happen if:
  1/ the device used to support BARRIER, but now doesn't.  Few devices
     change like this, though raid1 can!
or
  2/ the array has no persistent superblock, so there was no opportunity to
     pre-test for barriers when writing the superblock.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:34 +00:00
+								/* FIXME bio has been freed!!! */
-												[PATCH] md: add write-behind support for md/raid1

If a device is flagged 'WriteMostly' and the array has a bitmap, and the
bitmap superblock indicates that write_behind is allowed, then write_behind is
enabled for WriteMostly devices.

Write requests will be acknowledges as complete to the caller (via b_end_io)
when all non-WriteMostly devices have completed the write, but will not be
cleared from the bitmap until all devices complete.

This requires memory allocation to make a local copy of the data being
written.  If there is insufficient memory, then we fall-back on normal write
semantics.

Signed-Off-By: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:47 +00:00
+											int i = bio->bi_vcnt;
 											while (i--)
 												__free_page(bio->bi_io_vec[i].bv_page);
 										}
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+										/* clear the bitmap if all writes complete successfully */
 										bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
 												r1_bio->sectors,
-												[PATCH] md: add write-behind support for md/raid1

If a device is flagged 'WriteMostly' and the array has a bitmap, and the
bitmap superblock indicates that write_behind is allowed, then write_behind is
enabled for WriteMostly devices.

Write requests will be acknowledges as complete to the caller (via b_end_io)
when all non-WriteMostly devices have completed the write, but will not be
cleared from the bitmap until all devices complete.

This requires memory allocation to make a local copy of the data being
written.  If there is insufficient memory, then we fall-back on normal write
semantics.

Signed-Off-By: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:47 +00:00
+												!test_bit(R1BIO_Degraded, &r1_bio->state),
 												behind);
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										md_write_end(r1_bio->mddev);
 										raid_end_bio_io(r1_bio);
 									}
 									rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
 									return 0;
 								}
 								/*
 								 * This routine returns the disk from which the requested read should
 								 * be done. There is a per-array 'next expected sequential IO' sector
 								 * number - if this matches on the next IO then we use the last disk.
 								 * There is also a per-disk 'last know head position' sector that is
 								 * maintained from IRQ contexts, both the normal and the resync IO
 								 * completion handlers update this position correctly. If there is no
 								 * perfect sequential match then we pick the disk whose head is closest.
 								 *
 								 * If there are 2 mirrors in the same 2 devices, performance degrades
 								 * because position is mirror, not device based.
 								 *
 								 * The rdev for the device selected will have nr_pending incremented.
 								 */
 								static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 								{
 									const unsigned long this_sector = r1_bio->sector;
 									int new_disk = conf->last_used, disk = new_disk;
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+									int wonly_disk = -1;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									const int sectors = r1_bio->sectors;
 									sector_t new_distance, current_distance;
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+									mdk_rdev_t *rdev;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
 									rcu_read_lock();
 									/*
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+									 * Check if we can balance. We can balance on the whole
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									 * device if no resync is going on, or below the resync window.
 									 * We take the first readable disk when above the resync window.
 									 */
 								 retry:
 									if (conf->mddev->recovery_cp < MaxSector &&
 									    (this_sector + sectors >= conf->next_resync)) {
 										/* Choose the first operation device, for consistancy */
 										new_disk = 0;
-												[PATCH] md: provide proper rcu_dereference / rcu_assign_pointer annotations in md

Acked-by: <paulmck@us.ibm.com>
Signed-off-by: Suzanne Wood <suzannew@cs.pdx.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:27 +00:00
+										for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+										     !rdev || !test_bit(In_sync, &rdev->flags)
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+											     || test_bit(WriteMostly, &rdev->flags);
-												[PATCH] md: provide proper rcu_dereference / rcu_assign_pointer annotations in md

Acked-by: <paulmck@us.ibm.com>
Signed-off-by: Suzanne Wood <suzannew@cs.pdx.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:27 +00:00
+										     rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) {
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+											if (rdev && test_bit(In_sync, &rdev->flags))
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+												wonly_disk = new_disk;
 											if (new_disk == conf->raid_disks - 1) {
 												new_disk = wonly_disk;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+												break;
 											}
 										}
 										goto rb_out;
 									}
 									/* make sure the disk is operational */
-												[PATCH] md: provide proper rcu_dereference / rcu_assign_pointer annotations in md

Acked-by: <paulmck@us.ibm.com>
Signed-off-by: Suzanne Wood <suzannew@cs.pdx.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:27 +00:00
+									for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+									     !rdev || !test_bit(In_sync, &rdev->flags) ||
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+										     test_bit(WriteMostly, &rdev->flags);
-												[PATCH] md: provide proper rcu_dereference / rcu_assign_pointer annotations in md

Acked-by: <paulmck@us.ibm.com>
Signed-off-by: Suzanne Wood <suzannew@cs.pdx.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:27 +00:00
+									     rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) {
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+										if (rdev && test_bit(In_sync, &rdev->flags))
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+											wonly_disk = new_disk;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										if (new_disk <= 0)
 											new_disk = conf->raid_disks;
 										new_disk--;
 										if (new_disk == disk) {
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+											new_disk = wonly_disk;
 											break;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										}
 									}
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
 									if (new_disk < 0)
 										goto rb_out;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									disk = new_disk;
 									/* now disk == new_disk == starting point for search */
 									/*
 									 * Don't change to another disk for sequential reads:
 									 */
 									if (conf->next_seq_sect == this_sector)
 										goto rb_out;
 									if (this_sector == conf->mirrors[new_disk].head_position)
 										goto rb_out;
 									current_distance = abs(this_sector - conf->mirrors[disk].head_position);
 									/* Find the disk whose head is closest */
 									do {
 										if (disk <= 0)
 											disk = conf->raid_disks;
 										disk--;
-												[PATCH] md: provide proper rcu_dereference / rcu_assign_pointer annotations in md

Acked-by: <paulmck@us.ibm.com>
Signed-off-by: Suzanne Wood <suzannew@cs.pdx.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:27 +00:00
+										rdev = rcu_dereference(conf->mirrors[disk].rdev);
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
 										if (!rdev ||
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+										    !test_bit(In_sync, &rdev->flags) ||
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+										    test_bit(WriteMostly, &rdev->flags))
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+											continue;
 										if (!atomic_read(&rdev->nr_pending)) {
 											new_disk = disk;
 											break;
 										}
 										new_distance = abs(this_sector - conf->mirrors[disk].head_position);
 										if (new_distance < current_distance) {
 											current_distance = new_distance;
 											new_disk = disk;
 										}
 									} while (disk != conf->last_used);
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+								 rb_out:
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
 									if (new_disk >= 0) {
-												[PATCH] md: provide proper rcu_dereference / rcu_assign_pointer annotations in md

Acked-by: <paulmck@us.ibm.com>
Signed-off-by: Suzanne Wood <suzannew@cs.pdx.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:27 +00:00
+										rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+										if (!rdev)
 											goto retry;
 										atomic_inc(&rdev->nr_pending);
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+										if (!test_bit(In_sync, &rdev->flags)) {
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+											/* cannot risk returning a device that failed
 											 * before we inc'ed nr_pending
 											 */
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+											atomic_dec(&rdev->nr_pending);
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+											goto retry;
 										}
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+										conf->next_seq_sect = this_sector + sectors;
 										conf->last_used = new_disk;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									}
 									rcu_read_unlock();
 									return new_disk;
 								}
 								static void unplug_slaves(mddev_t *mddev)
 								{
 									conf_t *conf = mddev_to_conf(mddev);
 									int i;
 									rcu_read_lock();
 									for (i=0; i<mddev->raid_disks; i++) {
-												[PATCH] md: provide proper rcu_dereference / rcu_assign_pointer annotations in md

Acked-by: <paulmck@us.ibm.com>
Signed-off-by: Suzanne Wood <suzannew@cs.pdx.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:27 +00:00
+										mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+										if (rdev && !test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)) {
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+											request_queue_t *r_queue = bdev_get_queue(rdev->bdev);
 											atomic_inc(&rdev->nr_pending);
 											rcu_read_unlock();
 											if (r_queue->unplug_fn)
 												r_queue->unplug_fn(r_queue);
 											rdev_dec_pending(rdev, mddev);
 											rcu_read_lock();
 										}
 									}
 									rcu_read_unlock();
 								}
 								static void raid1_unplug(request_queue_t *q)
 								{
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									mddev_t *mddev = q->queuedata;
 									unplug_slaves(mddev);
 									md_wakeup_thread(mddev->thread);
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+								}
 								static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
 											     sector_t *error_sector)
 								{
 									mddev_t *mddev = q->queuedata;
 									conf_t *conf = mddev_to_conf(mddev);
 									int i, ret = 0;
 									rcu_read_lock();
 									for (i=0; i<mddev->raid_disks && ret == 0; i++) {
-												[PATCH] md: provide proper rcu_dereference / rcu_assign_pointer annotations in md

Acked-by: <paulmck@us.ibm.com>
Signed-off-by: Suzanne Wood <suzannew@cs.pdx.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:27 +00:00
+										mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+										if (rdev && !test_bit(Faulty, &rdev->flags)) {
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+											struct block_device *bdev = rdev->bdev;
 											request_queue_t *r_queue = bdev_get_queue(bdev);
 											if (!r_queue->issue_flush_fn)
 												ret = -EOPNOTSUPP;
 											else {
 												atomic_inc(&rdev->nr_pending);
 												rcu_read_unlock();
 												ret = r_queue->issue_flush_fn(r_queue, bdev->bd_disk,
 															      error_sector);
 												rdev_dec_pending(rdev, mddev);
 												rcu_read_lock();
 											}
 										}
 									}
 									rcu_read_unlock();
 									return ret;
 								}
 								/*
 								 * Throttle resync depth, so that we can both get proper overlapping of
 								 * requests, but are still able to handle normal requests quickly.
 								 */
 								#define RESYNC_DEPTH 32
 								static void device_barrier(conf_t *conf, sector_t sect)
 								{
 									spin_lock_irq(&conf->resync_lock);
 									wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume),
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+											    conf->resync_lock, raid1_unplug(conf->mddev->queue));
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
 									if (!conf->barrier++) {
 										wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+												    conf->resync_lock, raid1_unplug(conf->mddev->queue));
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										if (conf->nr_pending)
 											BUG();
 									}
 									wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH,
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+											    conf->resync_lock, raid1_unplug(conf->mddev->queue));
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									conf->next_resync = sect;
 									spin_unlock_irq(&conf->resync_lock);
 								}
-												[PATCH] md: add write-behind support for md/raid1

If a device is flagged 'WriteMostly' and the array has a bitmap, and the
bitmap superblock indicates that write_behind is allowed, then write_behind is
enabled for WriteMostly devices.

Write requests will be acknowledges as complete to the caller (via b_end_io)
when all non-WriteMostly devices have completed the write, but will not be
cleared from the bitmap until all devices complete.

This requires memory allocation to make a local copy of the data being
written.  If there is insufficient memory, then we fall-back on normal write
semantics.

Signed-Off-By: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:47 +00:00
+								/* duplicate the data pages for behind I/O */
 								static struct page **alloc_behind_pages(struct bio *bio)
 								{
 									int i;
 									struct bio_vec *bvec;
 									struct page **pages = kmalloc(bio->bi_vcnt * sizeof(struct page *),
 													GFP_NOIO);
 									if (unlikely(!pages))
 										goto do_sync_io;
 									memset(pages, 0, bio->bi_vcnt * sizeof(struct page *));
 									bio_for_each_segment(bvec, bio, i) {
 										pages[i] = alloc_page(GFP_NOIO);
 										if (unlikely(!pages[i]))
 											goto do_sync_io;
 										memcpy(kmap(pages[i]) + bvec->bv_offset,
 											kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len);
 										kunmap(pages[i]);
 										kunmap(bvec->bv_page);
 									}
 									return pages;
 								do_sync_io:
 									if (pages)
 										for (i = 0; i < bio->bi_vcnt && pages[i]; i++)
 											__free_page(pages[i]);
 									kfree(pages);
 									PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size);
 									return NULL;
 								}
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+								static int make_request(request_queue_t *q, struct bio * bio)
 								{
 									mddev_t *mddev = q->queuedata;
 									conf_t *conf = mddev_to_conf(mddev);
 									mirror_info_t *mirror;
 									r1bio_t *r1_bio;
 									struct bio *read_bio;
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									int i, targets = 0, disks;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									mdk_rdev_t *rdev;
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									struct bitmap *bitmap = mddev->bitmap;
 									unsigned long flags;
 									struct bio_list bl;
-												[PATCH] md: add write-behind support for md/raid1

If a device is flagged 'WriteMostly' and the array has a bitmap, and the
bitmap superblock indicates that write_behind is allowed, then write_behind is
enabled for WriteMostly devices.

Write requests will be acknowledges as complete to the caller (via b_end_io)
when all non-WriteMostly devices have completed the write, but will not be
cleared from the bitmap until all devices complete.

This requires memory allocation to make a local copy of the data being
written.  If there is insufficient memory, then we fall-back on normal write
semantics.

Signed-Off-By: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:47 +00:00
+									struct page **behind_pages = NULL;
-												[BLOCK] Unify the seperate read/write io stat fields into arrays

Instead of having ->read_sectors and ->write_sectors, combine the two
into ->sectors[2] and similar for the other fields. This saves a branch
several places in the io path, since we don't have to care for what the
actual io direction is. On my x86-64 box, that's 200 bytes less text in
just the core (not counting the various drivers).

Signed-off-by: Jens Axboe <axboe@suse.de>

											
										
										
											2005-11-01 08:26:16 +00:00
+									const int rw = bio_data_dir(bio);
-												[PATCH] md: support BIO_RW_BARRIER for md/raid1

We can only accept BARRIER requests if all slaves handle
barriers, and that can, of course, change with time....

So we keep track of whether the whole array seems safe for barriers,
and also whether each individual rdev handles barriers.

We initially assumes barriers are OK.

When writing the superblock we try a barrier, and if that fails, we flag
things for no-barriers.  This will usually clear the flags fairly quickly.

If writing the superblock finds that BIO_RW_BARRIER is -ENOTSUPP, we need to
resubmit, so introduce function "md_super_wait" which waits for requests to
finish, and retries ENOTSUPP requests without the barrier flag.

When writing the real raid1, write requests which were BIO_RW_BARRIER but
which aresn't supported need to be retried.  So raid1d is enhanced to do this,
and when any bio write completes (i.e.  no retry needed) we remove it from the
r1bio, so that devices needing retry are easy to find.

We should hardly ever get -ENOTSUPP errors when writing data to the raid.
It should only happen if:
  1/ the device used to support BARRIER, but now doesn't.  Few devices
     change like this, though raid1 can!
or
  2/ the array has no persistent superblock, so there was no opportunity to
     pre-test for barriers when writing the superblock.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:34 +00:00
+									int do_barriers;
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
-												[PATCH] md: support BIO_RW_BARRIER for md/raid1

We can only accept BARRIER requests if all slaves handle
barriers, and that can, of course, change with time....

So we keep track of whether the whole array seems safe for barriers,
and also whether each individual rdev handles barriers.

We initially assumes barriers are OK.

When writing the superblock we try a barrier, and if that fails, we flag
things for no-barriers.  This will usually clear the flags fairly quickly.

If writing the superblock finds that BIO_RW_BARRIER is -ENOTSUPP, we need to
resubmit, so introduce function "md_super_wait" which waits for requests to
finish, and retries ENOTSUPP requests without the barrier flag.

When writing the real raid1, write requests which were BIO_RW_BARRIER but
which aresn't supported need to be retried.  So raid1d is enhanced to do this,
and when any bio write completes (i.e.  no retry needed) we remove it from the
r1bio, so that devices needing retry are easy to find.

We should hardly ever get -ENOTSUPP errors when writing data to the raid.
It should only happen if:
  1/ the device used to support BARRIER, but now doesn't.  Few devices
     change like this, though raid1 can!
or
  2/ the array has no persistent superblock, so there was no opportunity to
     pre-test for barriers when writing the superblock.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:34 +00:00
+									if (unlikely(!mddev->barriers_work && bio_barrier(bio))) {
-												[PATCH] md: fail IO request to md that require a barrier.

md does not yet support BIO_RW_BARRIER, so be honest about it and fail
(-EOPNOTSUPP) any such requests.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:41 +00:00
+										bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
 										return 0;
 									}
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
 									/*
 									 * Register the new request and wait if the reconstruction
 									 * thread has put up a bar for new requests.
 									 * Continue immediately if no resync is active currently.
 									 */
-												[PATCH] md: fix deadlock due to md thread processing delayed requests.

Before completing a 'write' the md superblock might need to be updated.
This is best done by the md_thread.

The current code schedules this up and queues the write request for later
handling by the md_thread.

However some personalities (Raid5/raid6) will deadlock if the md_thread
tries to submit requests to its own array.

So this patch changes things so the processes submitting the request waits
for the superblock to be written and then submits the request itself.

This fixes a recently-created deadlock in raid5/raid6

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:26 +00:00
+									md_write_start(mddev, bio); /* wait on superblock update early */
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									spin_lock_irq(&conf->resync_lock);
 									wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
 									conf->nr_pending++;
 									spin_unlock_irq(&conf->resync_lock);
-												[BLOCK] Unify the seperate read/write io stat fields into arrays

Instead of having ->read_sectors and ->write_sectors, combine the two
into ->sectors[2] and similar for the other fields. This saves a branch
several places in the io path, since we don't have to care for what the
actual io direction is. On my x86-64 box, that's 200 bytes less text in
just the core (not counting the various drivers).

Signed-off-by: Jens Axboe <axboe@suse.de>

											
										
										
											2005-11-01 08:26:16 +00:00
+									disk_stat_inc(mddev->gendisk, ios[rw]);
 									disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
 									/*
 									 * make_request() can abort the operation when READA is being
 									 * used and no empty request is available.
 									 *
 									 */
 									r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO);
 									r1_bio->master_bio = bio;
 									r1_bio->sectors = bio->bi_size >> 9;
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									r1_bio->state = 0;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									r1_bio->mddev = mddev;
 									r1_bio->sector = bio->bi_sector;
-												[BLOCK] Unify the seperate read/write io stat fields into arrays

Instead of having ->read_sectors and ->write_sectors, combine the two
into ->sectors[2] and similar for the other fields. This saves a branch
several places in the io path, since we don't have to care for what the
actual io direction is. On my x86-64 box, that's 200 bytes less text in
just the core (not counting the various drivers).

Signed-off-by: Jens Axboe <axboe@suse.de>

											
										
										
											2005-11-01 08:26:16 +00:00
+									if (rw == READ) {
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										/*
 										 * read balancing logic:
 										 */
 										int rdisk = read_balance(conf, r1_bio);
 										if (rdisk < 0) {
 											/* couldn't find anywhere to read from */
 											raid_end_bio_io(r1_bio);
 											return 0;
 										}
 										mirror = conf->mirrors + rdisk;
 										r1_bio->read_disk = rdisk;
 										read_bio = bio_clone(bio, GFP_NOIO);
 										r1_bio->bios[rdisk] = read_bio;
 										read_bio->bi_sector = r1_bio->sector + mirror->rdev->data_offset;
 										read_bio->bi_bdev = mirror->rdev->bdev;
 										read_bio->bi_end_io = raid1_end_read_request;
 										read_bio->bi_rw = READ;
 										read_bio->bi_private = r1_bio;
 										generic_make_request(read_bio);
 										return 0;
 									}
 									/*
 									 * WRITE:
 									 */
 									/* first select target devices under spinlock and
 									 * inc refcount on their rdev.  Record them by setting
 									 * bios[x] to bio
 									 */
 									disks = conf->raid_disks;
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+								#if 0
 									{ static int first=1;
 									if (first) printk("First Write sector %llu disks %d\n",
 											  (unsigned long long)r1_bio->sector, disks);
 									first = 0;
 									}
 								#endif
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									rcu_read_lock();
 									for (i = 0;  i < disks; i++) {
-												[PATCH] md: provide proper rcu_dereference / rcu_assign_pointer annotations in md

Acked-by: <paulmck@us.ibm.com>
Signed-off-by: Suzanne Wood <suzannew@cs.pdx.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:27 +00:00
+										if ((rdev=rcu_dereference(conf->mirrors[i].rdev)) != NULL &&
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+										    !test_bit(Faulty, &rdev->flags)) {
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+											atomic_inc(&rdev->nr_pending);
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+											if (test_bit(Faulty, &rdev->flags)) {
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+												atomic_dec(&rdev->nr_pending);
 												r1_bio->bios[i] = NULL;
 											} else
 												r1_bio->bios[i] = bio;
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+											targets++;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										} else
 											r1_bio->bios[i] = NULL;
 									}
 									rcu_read_unlock();
-												[PATCH] md: add write-behind support for md/raid1

If a device is flagged 'WriteMostly' and the array has a bitmap, and the
bitmap superblock indicates that write_behind is allowed, then write_behind is
enabled for WriteMostly devices.

Write requests will be acknowledges as complete to the caller (via b_end_io)
when all non-WriteMostly devices have completed the write, but will not be
cleared from the bitmap until all devices complete.

This requires memory allocation to make a local copy of the data being
written.  If there is insufficient memory, then we fall-back on normal write
semantics.

Signed-Off-By: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:47 +00:00
+									BUG_ON(targets == 0); /* we never fail the last device */
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									if (targets < conf->raid_disks) {
 										/* array is degraded, we will not clear the bitmap
 										 * on I/O completion (see raid1_end_write_request) */
 										set_bit(R1BIO_Degraded, &r1_bio->state);
 									}
-												[PATCH] md: add write-behind support for md/raid1

If a device is flagged 'WriteMostly' and the array has a bitmap, and the
bitmap superblock indicates that write_behind is allowed, then write_behind is
enabled for WriteMostly devices.

Write requests will be acknowledges as complete to the caller (via b_end_io)
when all non-WriteMostly devices have completed the write, but will not be
cleared from the bitmap until all devices complete.

This requires memory allocation to make a local copy of the data being
written.  If there is insufficient memory, then we fall-back on normal write
semantics.

Signed-Off-By: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:47 +00:00
+									/* do behind I/O ? */
 									if (bitmap &&
 									    atomic_read(&bitmap->behind_writes) < bitmap->max_write_behind &&
 									    (behind_pages = alloc_behind_pages(bio)) != NULL)
 										set_bit(R1BIO_BehindIO, &r1_bio->state);
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									atomic_set(&r1_bio->remaining, 0);
-												[PATCH] md: add write-behind support for md/raid1

If a device is flagged 'WriteMostly' and the array has a bitmap, and the
bitmap superblock indicates that write_behind is allowed, then write_behind is
enabled for WriteMostly devices.

Write requests will be acknowledges as complete to the caller (via b_end_io)
when all non-WriteMostly devices have completed the write, but will not be
cleared from the bitmap until all devices complete.

This requires memory allocation to make a local copy of the data being
written.  If there is insufficient memory, then we fall-back on normal write
semantics.

Signed-Off-By: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:47 +00:00
+									atomic_set(&r1_bio->behind_remaining, 0);
-												[PATCH] md: improve locking on 'safemode' and move superblock writes

When md marks the superblock dirty before a write, it calls
generic_make_request (to write the superblock) from within
generic_make_request (to write the first dirty block), which could cause
problems later.

With this patch, the superblock write is always done by the helper thread, and
write request are delayed until that write completes.

Also, the locking around marking the array dirty and writing the superblock is
improved to avoid possible races.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:12 +00:00
-												[PATCH] md: support BIO_RW_BARRIER for md/raid1

We can only accept BARRIER requests if all slaves handle
barriers, and that can, of course, change with time....

So we keep track of whether the whole array seems safe for barriers,
and also whether each individual rdev handles barriers.

We initially assumes barriers are OK.

When writing the superblock we try a barrier, and if that fails, we flag
things for no-barriers.  This will usually clear the flags fairly quickly.

If writing the superblock finds that BIO_RW_BARRIER is -ENOTSUPP, we need to
resubmit, so introduce function "md_super_wait" which waits for requests to
finish, and retries ENOTSUPP requests without the barrier flag.

When writing the real raid1, write requests which were BIO_RW_BARRIER but
which aresn't supported need to be retried.  So raid1d is enhanced to do this,
and when any bio write completes (i.e.  no retry needed) we remove it from the
r1bio, so that devices needing retry are easy to find.

We should hardly ever get -ENOTSUPP errors when writing data to the raid.
It should only happen if:
  1/ the device used to support BARRIER, but now doesn't.  Few devices
     change like this, though raid1 can!
or
  2/ the array has no persistent superblock, so there was no opportunity to
     pre-test for barriers when writing the superblock.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:34 +00:00
+									do_barriers = bio->bi_rw & BIO_RW_BARRIER;
 									if (do_barriers)
 										set_bit(R1BIO_Barrier, &r1_bio->state);
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									bio_list_init(&bl);
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									for (i = 0; i < disks; i++) {
 										struct bio *mbio;
 										if (!r1_bio->bios[i])
 											continue;
 										mbio = bio_clone(bio, GFP_NOIO);
 										r1_bio->bios[i] = mbio;
 										mbio->bi_sector	= r1_bio->sector + conf->mirrors[i].rdev->data_offset;
 										mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
 										mbio->bi_end_io	= raid1_end_write_request;
-												[PATCH] md: support BIO_RW_BARRIER for md/raid1

We can only accept BARRIER requests if all slaves handle
barriers, and that can, of course, change with time....

So we keep track of whether the whole array seems safe for barriers,
and also whether each individual rdev handles barriers.

We initially assumes barriers are OK.

When writing the superblock we try a barrier, and if that fails, we flag
things for no-barriers.  This will usually clear the flags fairly quickly.

If writing the superblock finds that BIO_RW_BARRIER is -ENOTSUPP, we need to
resubmit, so introduce function "md_super_wait" which waits for requests to
finish, and retries ENOTSUPP requests without the barrier flag.

When writing the real raid1, write requests which were BIO_RW_BARRIER but
which aresn't supported need to be retried.  So raid1d is enhanced to do this,
and when any bio write completes (i.e.  no retry needed) we remove it from the
r1bio, so that devices needing retry are easy to find.

We should hardly ever get -ENOTSUPP errors when writing data to the raid.
It should only happen if:
  1/ the device used to support BARRIER, but now doesn't.  Few devices
     change like this, though raid1 can!
or
  2/ the array has no persistent superblock, so there was no opportunity to
     pre-test for barriers when writing the superblock.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:34 +00:00
+										mbio->bi_rw = WRITE | do_barriers;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										mbio->bi_private = r1_bio;
-												[PATCH] md: add write-behind support for md/raid1

If a device is flagged 'WriteMostly' and the array has a bitmap, and the
bitmap superblock indicates that write_behind is allowed, then write_behind is
enabled for WriteMostly devices.

Write requests will be acknowledges as complete to the caller (via b_end_io)
when all non-WriteMostly devices have completed the write, but will not be
cleared from the bitmap until all devices complete.

This requires memory allocation to make a local copy of the data being
written.  If there is insufficient memory, then we fall-back on normal write
semantics.

Signed-Off-By: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:47 +00:00
+										if (behind_pages) {
 											struct bio_vec *bvec;
 											int j;
 											/* Yes, I really want the '__' version so that
 											 * we clear any unused pointer in the io_vec, rather
 											 * than leave them unchanged.  This is important
 											 * because when we come to free the pages, we won't
 											 * know the originial bi_idx, so we just free
 											 * them all
 											 */
 											__bio_for_each_segment(bvec, mbio, j, 0)
 												bvec->bv_page = behind_pages[j];
 											if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
 												atomic_inc(&r1_bio->behind_remaining);
 										}
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										atomic_inc(&r1_bio->remaining);
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+										bio_list_add(&bl, mbio);
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									}
-												[PATCH] md: add write-behind support for md/raid1

If a device is flagged 'WriteMostly' and the array has a bitmap, and the
bitmap superblock indicates that write_behind is allowed, then write_behind is
enabled for WriteMostly devices.

Write requests will be acknowledges as complete to the caller (via b_end_io)
when all non-WriteMostly devices have completed the write, but will not be
cleared from the bitmap until all devices complete.

This requires memory allocation to make a local copy of the data being
written.  If there is insufficient memory, then we fall-back on normal write
semantics.

Signed-Off-By: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:47 +00:00
+									kfree(behind_pages); /* the behind pages are attached to the bios now */
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
-												[PATCH] md: add write-behind support for md/raid1

If a device is flagged 'WriteMostly' and the array has a bitmap, and the
bitmap superblock indicates that write_behind is allowed, then write_behind is
enabled for WriteMostly devices.

Write requests will be acknowledges as complete to the caller (via b_end_io)
when all non-WriteMostly devices have completed the write, but will not be
cleared from the bitmap until all devices complete.

This requires memory allocation to make a local copy of the data being
written.  If there is insufficient memory, then we fall-back on normal write
semantics.

Signed-Off-By: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:47 +00:00
+									bitmap_startwrite(bitmap, bio->bi_sector, r1_bio->sectors,
 												test_bit(R1BIO_BehindIO, &r1_bio->state));
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									spin_lock_irqsave(&conf->device_lock, flags);
 									bio_list_merge(&conf->pending_bio_list, &bl);
 									bio_list_init(&bl);
 									blk_plug_device(mddev->queue);
 									spin_unlock_irqrestore(&conf->device_lock, flags);
 								#if 0
 									while ((bio = bio_list_pop(&bl)) != NULL)
 										generic_make_request(bio);
 								#endif
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									return 0;
 								}
 								static void status(struct seq_file *seq, mddev_t *mddev)
 								{
 									conf_t *conf = mddev_to_conf(mddev);
 									int i;
 									seq_printf(seq, " [%d/%d] [", conf->raid_disks,
 														conf->working_disks);
 									for (i = 0; i < conf->raid_disks; i++)
 										seq_printf(seq, "%s",
 											      conf->mirrors[i].rdev &&
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+											      test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_");
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									seq_printf(seq, "]");
 								}
 								static void error(mddev_t *mddev, mdk_rdev_t *rdev)
 								{
 									char b[BDEVNAME_SIZE];
 									conf_t *conf = mddev_to_conf(mddev);
 									/*
 									 * If it is not operational, then we have already marked it as dead
 									 * else if it is the last working disks, ignore the error, let the
 									 * next level up know.
 									 * else mark the drive as failed
 									 */
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+									if (test_bit(In_sync, &rdev->flags)
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									    && conf->working_disks == 1)
 										/*
 										 * Don't fail the drive, act as though we were just a
 										 * normal single drive
 										 */
 										return;
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+									if (test_bit(In_sync, &rdev->flags)) {
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										mddev->degraded++;
 										conf->working_disks--;
 										/*
 										 * if recovery is running, make sure it aborts.
 										 */
 										set_bit(MD_RECOVERY_ERR, &mddev->recovery);
 									}
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+									clear_bit(In_sync, &rdev->flags);
 									set_bit(Faulty, &rdev->flags);
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									mddev->sb_dirty = 1;
 									printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
 										"	Operation continuing on %d devices\n",
 										bdevname(rdev->bdev,b), conf->working_disks);
 								}
 								static void print_conf(conf_t *conf)
 								{
 									int i;
 									mirror_info_t *tmp;
 									printk("RAID1 conf printout:\n");
 									if (!conf) {
 										printk("(!conf)\n");
 										return;
 									}
 									printk(" --- wd:%d rd:%d\n", conf->working_disks,
 										conf->raid_disks);
 									for (i = 0; i < conf->raid_disks; i++) {
 										char b[BDEVNAME_SIZE];
 										tmp = conf->mirrors + i;
 										if (tmp->rdev)
 											printk(" disk %d, wo:%d, o:%d, dev:%s\n",
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+												i, !test_bit(In_sync, &tmp->rdev->flags), !test_bit(Faulty, &tmp->rdev->flags),
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+												bdevname(tmp->rdev->bdev,b));
 									}
 								}
 								static void close_sync(conf_t *conf)
 								{
 									spin_lock_irq(&conf->resync_lock);
 									wait_event_lock_irq(conf->wait_resume, !conf->barrier,
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+											    conf->resync_lock, 	raid1_unplug(conf->mddev->queue));
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									spin_unlock_irq(&conf->resync_lock);
 									if (conf->barrier) BUG();
 									if (waitqueue_active(&conf->wait_idle)) BUG();
 									mempool_destroy(conf->r1buf_pool);
 									conf->r1buf_pool = NULL;
 								}
 								static int raid1_spare_active(mddev_t *mddev)
 								{
 									int i;
 									conf_t *conf = mddev->private;
 									mirror_info_t *tmp;
 									/*
 									 * Find all failed disks within the RAID1 configuration
 									 * and mark them readable
 									 */
 									for (i = 0; i < conf->raid_disks; i++) {
 										tmp = conf->mirrors + i;
 										if (tmp->rdev
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+										    && !test_bit(Faulty, &tmp->rdev->flags)
 										    && !test_bit(In_sync, &tmp->rdev->flags)) {
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+											conf->working_disks++;
 											mddev->degraded--;
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+											set_bit(In_sync, &tmp->rdev->flags);
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										}
 									}
 									print_conf(conf);
 									return 0;
 								}
 								static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 								{
 									conf_t *conf = mddev->private;
 									int found = 0;
-												[PATCH] md: optimise reconstruction when re-adding a recently failed drive.

When an array is degraded, bit in the intent-bitmap are never cleared.  So if
a recently failed drive is re-added, we only need to reconstruct the block
that are still reflected in the bitmap.

This patch adds support for this re-adding.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:25 +00:00
+									int mirror = 0;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									mirror_info_t *p;
 									for (mirror=0; mirror < mddev->raid_disks; mirror++)
 										if ( !(p=conf->mirrors+mirror)->rdev) {
 											blk_queue_stack_limits(mddev->queue,
 													       rdev->bdev->bd_disk->queue);
 											/* as we don't honour merge_bvec_fn, we must never risk
 											 * violating it, so limit ->max_sector to one PAGE, as
 											 * a one page request is never in violation.
 											 */
 											if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
 											    mddev->queue->max_sectors > (PAGE_SIZE>>9))
 												blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 											p->head_position = 0;
 											rdev->raid_disk = mirror;
 											found = 1;
-												[PATCH] md: fix --re-add for raid1 and raid6

If you have an array with a write-intent-bitmap, and you remove a device, then
re-add it, a full recovery isn't needed.  We detect a re-add by looking at
saved_raid_disk.  For raid1, it doesn't matter which disk it was, only whether
or not it was an active device.  The old code being removed set a value of
'mirror' which was then ignored, so it can go.  The changed code performs the
correct check.

For raid6, if there are two missing devices, make sure we chose the right slot
on --re-add rather than always the first slot.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-28 21:44:13 +00:00
+											/* As all devices are equivalent, we don't need a full recovery
 											 * if this was recently any drive of the array
 											 */
 											if (rdev->saved_raid_disk < 0)
-												[PATCH] md: optimise reconstruction when re-adding a recently failed drive.

When an array is degraded, bit in the intent-bitmap are never cleared.  So if
a recently failed drive is re-added, we only need to reconstruct the block
that are still reflected in the bitmap.

This patch adds support for this re-adding.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:25 +00:00
+												conf->fullsync = 1;
-												[PATCH] md: provide proper rcu_dereference / rcu_assign_pointer annotations in md

Acked-by: <paulmck@us.ibm.com>
Signed-off-by: Suzanne Wood <suzannew@cs.pdx.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:27 +00:00
+											rcu_assign_pointer(p->rdev, rdev);
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+											break;
 										}
 									print_conf(conf);
 									return found;
 								}
 								static int raid1_remove_disk(mddev_t *mddev, int number)
 								{
 									conf_t *conf = mddev->private;
 									int err = 0;
 									mdk_rdev_t *rdev;
 									mirror_info_t *p = conf->mirrors+ number;
 									print_conf(conf);
 									rdev = p->rdev;
 									if (rdev) {
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+										if (test_bit(In_sync, &rdev->flags) ||
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										    atomic_read(&rdev->nr_pending)) {
 											err = -EBUSY;
 											goto abort;
 										}
 										p->rdev = NULL;
-												[PATCH] Change synchronize_kernel to _rcu and _sched

This patch changes calls to synchronize_kernel(), deprecated in the earlier
"Deprecate synchronize_kernel, GPL replacement" patch to instead call the new
synchronize_rcu() and synchronize_sched() APIs.

Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-05-01 15:59:04 +00:00
+										synchronize_rcu();
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										if (atomic_read(&rdev->nr_pending)) {
 											/* lost the race, try later */
 											err = -EBUSY;
 											p->rdev = rdev;
 										}
 									}
 								abort:
 									print_conf(conf);
 									return err;
 								}
 								static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
 								{
 									int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 									r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
 									conf_t *conf = mddev_to_conf(r1_bio->mddev);
 									if (bio->bi_size)
 										return 1;
 									if (r1_bio->bios[r1_bio->read_disk] != bio)
 										BUG();
 									update_head_pos(r1_bio->read_disk, r1_bio);
 									/*
 									 * we have read a block, now it needs to be re-written,
 									 * or re-read if the read failed.
 									 * We don't do much here, just schedule handling by raid1d
 									 */
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									if (!uptodate) {
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										md_error(r1_bio->mddev,
 											 conf->mirrors[r1_bio->read_disk].rdev);
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									} else
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										set_bit(R1BIO_Uptodate, &r1_bio->state);
 									rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev);
 									reschedule_retry(r1_bio);
 									return 0;
 								}
 								static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error)
 								{
 									int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 									r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
 									mddev_t *mddev = r1_bio->mddev;
 									conf_t *conf = mddev_to_conf(mddev);
 									int i;
 									int mirror=0;
 									if (bio->bi_size)
 										return 1;
 									for (i = 0; i < conf->raid_disks; i++)
 										if (r1_bio->bios[i] == bio) {
 											mirror = i;
 											break;
 										}
-												[PATCH] md: yet another attempt to get bitmap-based resync to do the right thing in all cases...

Firstly, R1BIO_Degraded was being set in a number of places in the resync
code, but is never used there, so get rid of those settings.

Then: When doing a resync, we want to clear the bit in the bitmap iff the
array will be non-degraded when the sync has completed.  However the current
code would clear the bitmap if the array was non-degraded when the resync
*started*, which obviously isn't right (it is for 'resync' but not for
'recovery' - i.e.  rebuilding a failed drive).

This patch calculated 'still_degraded' and uses the to tell bitmap_start_sync
whether this sync should clear the corresponding bit.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-08-04 19:53:34 +00:00
+									if (!uptodate)
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										md_error(mddev, conf->mirrors[mirror].rdev);
-												[PATCH] md: yet another attempt to get bitmap-based resync to do the right thing in all cases...

Firstly, R1BIO_Degraded was being set in a number of places in the resync
code, but is never used there, so get rid of those settings.

Then: When doing a resync, we want to clear the bit in the bitmap iff the
array will be non-degraded when the sync has completed.  However the current
code would clear the bitmap if the array was non-degraded when the resync
*started*, which obviously isn't right (it is for 'resync' but not for
'recovery' - i.e.  rebuilding a failed drive).

This patch calculated 'still_degraded' and uses the to tell bitmap_start_sync
whether this sync should clear the corresponding bit.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-08-04 19:53:34 +00:00
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									update_head_pos(mirror, r1_bio);
 									if (atomic_dec_and_test(&r1_bio->remaining)) {
 										md_done_sync(mddev, r1_bio->sectors, uptodate);
 										put_buf(r1_bio);
 									}
 									rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
 									return 0;
 								}
 								static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
 								{
 									conf_t *conf = mddev_to_conf(mddev);
 									int i;
 									int disks = conf->raid_disks;
 									struct bio *bio, *wbio;
 									bio = r1_bio->bios[r1_bio->read_disk];
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+								/*
 									if (r1_bio->sector == 0) printk("First sync write startss\n");
 								*/
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									/*
 									 * schedule writes
 									 */
 									if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
 										/*
 										 * There is no point trying a read-for-reconstruct as
 										 * reconstruct is about to be aborted
 										 */
 										char b[BDEVNAME_SIZE];
 										printk(KERN_ALERT "raid1: %s: unrecoverable I/O read error"
 											" for block %llu\n",
 											bdevname(bio->bi_bdev,b),
 											(unsigned long long)r1_bio->sector);
 										md_done_sync(mddev, r1_bio->sectors, 0);
 										put_buf(r1_bio);
 										return;
 									}
 									atomic_set(&r1_bio->remaining, 1);
 									for (i = 0; i < disks ; i++) {
 										wbio = r1_bio->bios[i];
 										if (wbio->bi_end_io != end_sync_write)
 											continue;
 										atomic_inc(&conf->mirrors[i].rdev->nr_pending);
 										atomic_inc(&r1_bio->remaining);
 										md_sync_acct(conf->mirrors[i].rdev->bdev, wbio->bi_size >> 9);
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										generic_make_request(wbio);
 									}
 									if (atomic_dec_and_test(&r1_bio->remaining)) {
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+										/* if we're here, all write(s) have completed, so clean up */
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										md_done_sync(mddev, r1_bio->sectors, 1);
 										put_buf(r1_bio);
 									}
 								}
 								/*
 								 * This is a kernel thread which:
 								 *
 								 *	1.	Retries failed read operations on working mirrors.
 								 *	2.	Updates the raid superblock when problems encounter.
 								 *	3.	Performs writes following reads for array syncronising.
 								 */
 								static void raid1d(mddev_t *mddev)
 								{
 									r1bio_t *r1_bio;
 									struct bio *bio;
 									unsigned long flags;
 									conf_t *conf = mddev_to_conf(mddev);
 									struct list_head *head = &conf->retry_list;
 									int unplug=0;
 									mdk_rdev_t *rdev;
 									md_check_recovery(mddev);
 									for (;;) {
 										char b[BDEVNAME_SIZE];
 										spin_lock_irqsave(&conf->device_lock, flags);
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
 										if (conf->pending_bio_list.head) {
 											bio = bio_list_get(&conf->pending_bio_list);
 											blk_remove_plug(mddev->queue);
 											spin_unlock_irqrestore(&conf->device_lock, flags);
 											/* flush any pending bitmap writes to disk before proceeding w/ I/O */
 											if (bitmap_unplug(mddev->bitmap) != 0)
 												printk("%s: bitmap file write failed!\n", mdname(mddev));
 											while (bio) { /* submit pending writes */
 												struct bio *next = bio->bi_next;
 												bio->bi_next = NULL;
 												generic_make_request(bio);
 												bio = next;
 											}
 											unplug = 1;
 											continue;
 										}
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										if (list_empty(head))
 											break;
 										r1_bio = list_entry(head->prev, r1bio_t, retry_list);
 										list_del(head->prev);
 										spin_unlock_irqrestore(&conf->device_lock, flags);
 										mddev = r1_bio->mddev;
 										conf = mddev_to_conf(mddev);
 										if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
 											sync_request_write(mddev, r1_bio);
 											unplug = 1;
-												[PATCH] md: support BIO_RW_BARRIER for md/raid1

We can only accept BARRIER requests if all slaves handle
barriers, and that can, of course, change with time....

So we keep track of whether the whole array seems safe for barriers,
and also whether each individual rdev handles barriers.

We initially assumes barriers are OK.

When writing the superblock we try a barrier, and if that fails, we flag
things for no-barriers.  This will usually clear the flags fairly quickly.

If writing the superblock finds that BIO_RW_BARRIER is -ENOTSUPP, we need to
resubmit, so introduce function "md_super_wait" which waits for requests to
finish, and retries ENOTSUPP requests without the barrier flag.

When writing the real raid1, write requests which were BIO_RW_BARRIER but
which aresn't supported need to be retried.  So raid1d is enhanced to do this,
and when any bio write completes (i.e.  no retry needed) we remove it from the
r1bio, so that devices needing retry are easy to find.

We should hardly ever get -ENOTSUPP errors when writing data to the raid.
It should only happen if:
  1/ the device used to support BARRIER, but now doesn't.  Few devices
     change like this, though raid1 can!
or
  2/ the array has no persistent superblock, so there was no opportunity to
     pre-test for barriers when writing the superblock.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:34 +00:00
+										} else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
 											/* some requests in the r1bio were BIO_RW_BARRIER
 											 * requests which failed with -ENOTSUPP.  Hohumm..
 											 * Better resubmit without the barrier.
 											 * We know which devices to resubmit for, because
 											 * all others have had their bios[] entry cleared.
 											 */
 											int i;
 											clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
 											clear_bit(R1BIO_Barrier, &r1_bio->state);
 											for (i=0; i < conf->raid_disks; i++)
 												if (r1_bio->bios[i]) {
 													struct bio_vec *bvec;
 													int j;
 													bio = bio_clone(r1_bio->master_bio, GFP_NOIO);
 													/* copy pages from the failed bio, as
 													 * this might be a write-behind device */
 													__bio_for_each_segment(bvec, bio, j, 0)
 														bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page;
 													bio_put(r1_bio->bios[i]);
 													bio->bi_sector = r1_bio->sector +
 														conf->mirrors[i].rdev->data_offset;
 													bio->bi_bdev = conf->mirrors[i].rdev->bdev;
 													bio->bi_end_io = raid1_end_write_request;
 													bio->bi_rw = WRITE;
 													bio->bi_private = r1_bio;
 													r1_bio->bios[i] = bio;
 													generic_make_request(bio);
 												}
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										} else {
 											int disk;
 											bio = r1_bio->bios[r1_bio->read_disk];
 											if ((disk=read_balance(conf, r1_bio)) == -1) {
 												printk(KERN_ALERT "raid1: %s: unrecoverable I/O"
 												       " read error for block %llu\n",
 												       bdevname(bio->bi_bdev,b),
 												       (unsigned long long)r1_bio->sector);
 												raid_end_bio_io(r1_bio);
 											} else {
 												r1_bio->bios[r1_bio->read_disk] = NULL;
 												r1_bio->read_disk = disk;
 												bio_put(bio);
 												bio = bio_clone(r1_bio->master_bio, GFP_NOIO);
 												r1_bio->bios[r1_bio->read_disk] = bio;
 												rdev = conf->mirrors[disk].rdev;
 												if (printk_ratelimit())
 													printk(KERN_ERR "raid1: %s: redirecting sector %llu to"
 													       " another mirror\n",
 													       bdevname(rdev->bdev,b),
 													       (unsigned long long)r1_bio->sector);
 												bio->bi_sector = r1_bio->sector + rdev->data_offset;
 												bio->bi_bdev = rdev->bdev;
 												bio->bi_end_io = raid1_end_read_request;
 												bio->bi_rw = READ;
 												bio->bi_private = r1_bio;
 												unplug = 1;
 												generic_make_request(bio);
 											}
 										}
 									}
 									spin_unlock_irqrestore(&conf->device_lock, flags);
 									if (unplug)
 										unplug_slaves(mddev);
 								}
 								static int init_resync(conf_t *conf)
 								{
 									int buffs;
 									buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE;
 									if (conf->r1buf_pool)
 										BUG();
 									conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free,
 													  conf->poolinfo);
 									if (!conf->r1buf_pool)
 										return -ENOMEM;
 									conf->next_resync = 0;
 									return 0;
 								}
 								/*
 								 * perform a "sync" on one "block"
 								 *
 								 * We need to make sure that no normal I/O request - particularly write
 								 * requests - conflict with active sync requests.
 								 *
 								 * This is achieved by tracking pending requests and a 'barrier' concept
 								 * that can be installed to exclude normal IO requests.
 								 */
-												[PATCH] md: improve the interface to sync_request

1/ change the return value (which is number-of-sectors synced)
 from 'int' to 'sector_t'.
 The number of sectors is usually easily small enough to fit
 in an int, but if resync needs to abort, it may want to return
 the total number of remaining sectors, which could be large.
 Also errors cannot be returned as negative numbers now, so use
 0 instead
2/ Add a 'skipped' return parameter to allow the array to report
 that it skipped the sectors.  This allows md to take this into account
 in the speed calculations.
 Currently there is no important skipping, but the bitmap-based-resync
 that is coming will use this.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:13 +00:00
+								static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, int go_faster)
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+								{
 									conf_t *conf = mddev_to_conf(mddev);
 									mirror_info_t *mirror;
 									r1bio_t *r1_bio;
 									struct bio *bio;
 									sector_t max_sector, nr_sectors;
 									int disk;
 									int i;
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+									int wonly;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									int write_targets = 0;
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									int sync_blocks;
-												[PATCH] md: yet another attempt to get bitmap-based resync to do the right thing in all cases...

Firstly, R1BIO_Degraded was being set in a number of places in the resync
code, but is never used there, so get rid of those settings.

Then: When doing a resync, we want to clear the bit in the bitmap iff the
array will be non-degraded when the sync has completed.  However the current
code would clear the bitmap if the array was non-degraded when the resync
*started*, which obviously isn't right (it is for 'resync' but not for
'recovery' - i.e.  rebuilding a failed drive).

This patch calculated 'still_degraded' and uses the to tell bitmap_start_sync
whether this sync should clear the corresponding bit.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-08-04 19:53:34 +00:00
+									int still_degraded = 0;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
 									if (!conf->r1buf_pool)
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									{
 								/*
 										printk("sync start - bitmap %p\n", mddev->bitmap);
 								*/
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										if (init_resync(conf))
-												[PATCH] md: improve the interface to sync_request

1/ change the return value (which is number-of-sectors synced)
 from 'int' to 'sector_t'.
 The number of sectors is usually easily small enough to fit
 in an int, but if resync needs to abort, it may want to return
 the total number of remaining sectors, which could be large.
 Also errors cannot be returned as negative numbers now, so use
 0 instead
2/ Add a 'skipped' return parameter to allow the array to report
 that it skipped the sectors.  This allows md to take this into account
 in the speed calculations.
 Currently there is no important skipping, but the bitmap-based-resync
 that is coming will use this.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:13 +00:00
+											return 0;
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									}
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
 									max_sector = mddev->size << 1;
 									if (sector_nr >= max_sector) {
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+										/* If we aborted, we need to abort the
 										 * sync on the 'current' bitmap chunk (there will
 										 * only be one in raid1 resync.
 										 * We can find the current addess in mddev->curr_resync
 										 */
-												[PATCH] md/raid1: clear bitmap when fullsync completes

We need to be careful differentiating between a resync of a complete array,
in which we can clear the bitmap, and a resync of a degraded array, in
which we cannot.

This patch cleans all that up.

Cc: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-07-15 10:56:35 +00:00
+										if (mddev->curr_resync < max_sector) /* aborted */
 											bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+														&sync_blocks, 1);
-												[PATCH] md/raid1: clear bitmap when fullsync completes

We need to be careful differentiating between a resync of a complete array,
in which we can clear the bitmap, and a resync of a degraded array, in
which we cannot.

This patch cleans all that up.

Cc: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-07-15 10:56:35 +00:00
+										else /* completed sync */
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+											conf->fullsync = 0;
-												[PATCH] md/raid1: clear bitmap when fullsync completes

We need to be careful differentiating between a resync of a complete array,
in which we can clear the bitmap, and a resync of a degraded array, in
which we cannot.

This patch cleans all that up.

Cc: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-07-15 10:56:35 +00:00
 										bitmap_close_sync(mddev->bitmap);
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										close_sync(conf);
 										return 0;
 									}
-												[PATCH] md: yet another attempt to get bitmap-based resync to do the right thing in all cases...

Firstly, R1BIO_Degraded was being set in a number of places in the resync
code, but is never used there, so get rid of those settings.

Then: When doing a resync, we want to clear the bit in the bitmap iff the
array will be non-degraded when the sync has completed.  However the current
code would clear the bitmap if the array was non-degraded when the resync
*started*, which obviously isn't right (it is for 'resync' but not for
'recovery' - i.e.  rebuilding a failed drive).

This patch calculated 'still_degraded' and uses the to tell bitmap_start_sync
whether this sync should clear the corresponding bit.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-08-04 19:53:34 +00:00
+									/* before building a request, check if we can skip these blocks..
 									 * This call the bitmap_start_sync doesn't actually record anything
 									 */
 									if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
-												[PATCH] md: make manual repair work for raid1

Raid1 currently optimises resync using the intent bitmap etc.  This
optimisation is not wanted when we explicitly request a repair through sysfs,
so add appropriate checks.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:38 +00:00
+									    !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+										/* We can skip this block, and probably several more */
 										*skipped = 1;
 										return sync_blocks;
 									}
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									/*
 									 * If there is non-resync activity waiting for us then
 									 * put in a delay to throttle resync.
 									 */
 									if (!go_faster && waitqueue_active(&conf->wait_resume))
 										msleep_interruptible(1000);
 									device_barrier(conf, sector_nr + RESYNC_SECTORS);
 									/*
 									 * If reconstructing, and >1 working disc,
 									 * could dedicate one to rebuild and others to
 									 * service read requests ..
 									 */
 									disk = conf->last_used;
 									/* make sure disk is operational */
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+									wonly = disk;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									while (conf->mirrors[disk].rdev == NULL ||
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+									       !test_bit(In_sync, &conf->mirrors[disk].rdev->flags) ||
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+									       test_bit(WriteMostly, &conf->mirrors[disk].rdev->flags)
 										) {
 										if (conf->mirrors[disk].rdev  &&
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+										    test_bit(In_sync, &conf->mirrors[disk].rdev->flags))
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+											wonly = disk;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										if (disk <= 0)
 											disk = conf->raid_disks;
 										disk--;
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+										if (disk == conf->last_used) {
 											disk = wonly;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+											break;
-												[PATCH] md: support write-mostly device in raid1

This allows a device in a raid1 to be marked as "write mostly".  Read requests
will only be sent if there is no other option.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+										}
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									}
 									conf->last_used = disk;
 									atomic_inc(&conf->mirrors[disk].rdev->nr_pending);
 									mirror = conf->mirrors + disk;
 									r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
 									spin_lock_irq(&conf->resync_lock);
 									conf->nr_pending++;
 									spin_unlock_irq(&conf->resync_lock);
 									r1_bio->mddev = mddev;
 									r1_bio->sector = sector_nr;
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									r1_bio->state = 0;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									set_bit(R1BIO_IsSync, &r1_bio->state);
 									r1_bio->read_disk = disk;
 									for (i=0; i < conf->raid_disks; i++) {
 										bio = r1_bio->bios[i];
 										/* take from bio_init */
 										bio->bi_next = NULL;
 										bio->bi_flags |= 1 << BIO_UPTODATE;
 										bio->bi_rw = 0;
 										bio->bi_vcnt = 0;
 										bio->bi_idx = 0;
 										bio->bi_phys_segments = 0;
 										bio->bi_hw_segments = 0;
 										bio->bi_size = 0;
 										bio->bi_end_io = NULL;
 										bio->bi_private = NULL;
 										if (i == disk) {
 											bio->bi_rw = READ;
 											bio->bi_end_io = end_sync_read;
-												[PATCH] md: yet another attempt to get bitmap-based resync to do the right thing in all cases...

Firstly, R1BIO_Degraded was being set in a number of places in the resync
code, but is never used there, so get rid of those settings.

Then: When doing a resync, we want to clear the bit in the bitmap iff the
array will be non-degraded when the sync has completed.  However the current
code would clear the bitmap if the array was non-degraded when the resync
*started*, which obviously isn't right (it is for 'resync' but not for
'recovery' - i.e.  rebuilding a failed drive).

This patch calculated 'still_degraded' and uses the to tell bitmap_start_sync
whether this sync should clear the corresponding bit.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-08-04 19:53:34 +00:00
+										} else if (conf->mirrors[i].rdev == NULL ||
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+											   test_bit(Faulty, &conf->mirrors[i].rdev->flags)) {
-												[PATCH] md: yet another attempt to get bitmap-based resync to do the right thing in all cases...

Firstly, R1BIO_Degraded was being set in a number of places in the resync
code, but is never used there, so get rid of those settings.

Then: When doing a resync, we want to clear the bit in the bitmap iff the
array will be non-degraded when the sync has completed.  However the current
code would clear the bitmap if the array was non-degraded when the resync
*started*, which obviously isn't right (it is for 'resync' but not for
'recovery' - i.e.  rebuilding a failed drive).

This patch calculated 'still_degraded' and uses the to tell bitmap_start_sync
whether this sync should clear the corresponding bit.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-08-04 19:53:34 +00:00
+											still_degraded = 1;
 											continue;
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+										} else if (!test_bit(In_sync, &conf->mirrors[i].rdev->flags) ||
-												[PATCH] md: make manual repair work for raid1

Raid1 currently optimises resync using the intent bitmap etc.  This
optimisation is not wanted when we explicitly request a repair through sysfs,
so add appropriate checks.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:38 +00:00
+											   sector_nr + RESYNC_SECTORS > mddev->recovery_cp   ||
 											   test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+											bio->bi_rw = WRITE;
 											bio->bi_end_io = end_sync_write;
 											write_targets ++;
 										} else
-												[PATCH] md: yet another attempt to get bitmap-based resync to do the right thing in all cases...

Firstly, R1BIO_Degraded was being set in a number of places in the resync
code, but is never used there, so get rid of those settings.

Then: When doing a resync, we want to clear the bit in the bitmap iff the
array will be non-degraded when the sync has completed.  However the current
code would clear the bitmap if the array was non-degraded when the resync
*started*, which obviously isn't right (it is for 'resync' but not for
'recovery' - i.e.  rebuilding a failed drive).

This patch calculated 'still_degraded' and uses the to tell bitmap_start_sync
whether this sync should clear the corresponding bit.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-08-04 19:53:34 +00:00
+											/* no need to read or write here */
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+											continue;
 										bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset;
 										bio->bi_bdev = conf->mirrors[i].rdev->bdev;
 										bio->bi_private = r1_bio;
 									}
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									if (write_targets == 0) {
 										/* There is nowhere to write, so all non-sync
 										 * drives must be failed - so we are finished
 										 */
-												[PATCH] md: improve the interface to sync_request

1/ change the return value (which is number-of-sectors synced)
 from 'int' to 'sector_t'.
 The number of sectors is usually easily small enough to fit
 in an int, but if resync needs to abort, it may want to return
 the total number of remaining sectors, which could be large.
 Also errors cannot be returned as negative numbers now, so use
 0 instead
2/ Add a 'skipped' return parameter to allow the array to report
 that it skipped the sectors.  This allows md to take this into account
 in the speed calculations.
 Currently there is no important skipping, but the bitmap-based-resync
 that is coming will use this.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:13 +00:00
+										sector_t rv = max_sector - sector_nr;
 										*skipped = 1;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										put_buf(r1_bio);
 										rdev_dec_pending(conf->mirrors[disk].rdev, mddev);
 										return rv;
 									}
 									nr_sectors = 0;
-												[PATCH] md: initialise sync_blocks in raid1 resync

Otherwise it could have a random value and might BUG.  This fixes a BUG
during resync problem in raid1 introduced by the bitmap-based-intent-loggin
patches.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:24 +00:00
+									sync_blocks = 0;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									do {
 										struct page *page;
 										int len = PAGE_SIZE;
 										if (sector_nr + (len>>9) > max_sector)
 											len = (max_sector - sector_nr) << 9;
 										if (len == 0)
 											break;
-												[PATCH] md/raid1: clear bitmap when fullsync completes

We need to be careful differentiating between a resync of a complete array,
in which we can clear the bitmap, and a resync of a degraded array, in
which we cannot.

This patch cleans all that up.

Cc: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-07-15 10:56:35 +00:00
+										if (sync_blocks == 0) {
 											if (!bitmap_start_sync(mddev->bitmap, sector_nr,
-												[PATCH] md: make manual repair work for raid1

Raid1 currently optimises resync using the intent bitmap etc.  This
optimisation is not wanted when we explicitly request a repair through sysfs,
so add appropriate checks.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:38 +00:00
+													       &sync_blocks, still_degraded) &&
 											    !conf->fullsync &&
 											    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
-												[PATCH] md/raid1: clear bitmap when fullsync completes

We need to be careful differentiating between a resync of a complete array,
in which we can clear the bitmap, and a resync of a degraded array, in
which we cannot.

This patch cleans all that up.

Cc: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-07-15 10:56:35 +00:00
+												break;
 											if (sync_blocks < (PAGE_SIZE>>9))
 												BUG();
 											if (len > (sync_blocks<<9))
 												len = sync_blocks<<9;
-												[PATCH] md: fix bug when raid1 attempts a partial reconstruct.

The logic here is wrong.  if fullsync is 0, it WILL BUG.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+										}
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										for (i=0 ; i < conf->raid_disks; i++) {
 											bio = r1_bio->bios[i];
 											if (bio->bi_end_io) {
 												page = r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page;
 												if (bio_add_page(bio, page, len, 0) == 0) {
 													/* stop here */
 													r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page = page;
 													while (i > 0) {
 														i--;
 														bio = r1_bio->bios[i];
-												[PATCH] md/raid1: clear bitmap when fullsync completes

We need to be careful differentiating between a resync of a complete array,
in which we can clear the bitmap, and a resync of a degraded array, in
which we cannot.

This patch cleans all that up.

Cc: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-07-15 10:56:35 +00:00
+														if (bio->bi_end_io==NULL)
 															continue;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+														/* remove last page from this bio */
 														bio->bi_vcnt--;
 														bio->bi_size -= len;
 														bio->bi_flags &= ~(1<< BIO_SEG_VALID);
 													}
 													goto bio_full;
 												}
 											}
 										}
 										nr_sectors += len>>9;
 										sector_nr += len>>9;
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+										sync_blocks -= (len>>9);
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									} while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
 								 bio_full:
 									bio = r1_bio->bios[disk];
 									r1_bio->sectors = nr_sectors;
 									md_sync_acct(mirror->rdev->bdev, nr_sectors);
 									generic_make_request(bio);
 									return nr_sectors;
 								}
 								static int run(mddev_t *mddev)
 								{
 									conf_t *conf;
 									int i, j, disk_idx;
 									mirror_info_t *disk;
 									mdk_rdev_t *rdev;
 									struct list_head *tmp;
 									if (mddev->level != 1) {
 										printk("raid1: %s: raid level not set to mirroring (%d)\n",
 										       mdname(mddev), mddev->level);
 										goto out;
 									}
 									/*
 									 * copy the already verified devices into our private RAID1
 									 * bookkeeping area. [whatever we allocate in run(),
 									 * should be freed in stop()]
 									 */
 									conf = kmalloc(sizeof(conf_t), GFP_KERNEL);
 									mddev->private = conf;
 									if (!conf)
 										goto out_no_mem;
 									memset(conf, 0, sizeof(*conf));
 									conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks,
 												 GFP_KERNEL);
 									if (!conf->mirrors)
 										goto out_no_mem;
 									memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks);
 									conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
 									if (!conf->poolinfo)
 										goto out_no_mem;
 									conf->poolinfo->mddev = mddev;
 									conf->poolinfo->raid_disks = mddev->raid_disks;
 									conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
 													  r1bio_pool_free,
 													  conf->poolinfo);
 									if (!conf->r1bio_pool)
 										goto out_no_mem;
 									ITERATE_RDEV(mddev, rdev, tmp) {
 										disk_idx = rdev->raid_disk;
 										if (disk_idx >= mddev->raid_disks
 										    || disk_idx < 0)
 											continue;
 										disk = conf->mirrors + disk_idx;
 										disk->rdev = rdev;
 										blk_queue_stack_limits(mddev->queue,
 												       rdev->bdev->bd_disk->queue);
 										/* as we don't honour merge_bvec_fn, we must never risk
 										 * violating it, so limit ->max_sector to one PAGE, as
 										 * a one page request is never in violation.
 										 */
 										if (rdev->bdev->bd_disk->queue->merge_bvec_fn &&
 										    mddev->queue->max_sectors > (PAGE_SIZE>>9))
 											blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
 										disk->head_position = 0;
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+										if (!test_bit(Faulty, &rdev->flags) && test_bit(In_sync, &rdev->flags))
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+											conf->working_disks++;
 									}
 									conf->raid_disks = mddev->raid_disks;
 									conf->mddev = mddev;
 									spin_lock_init(&conf->device_lock);
 									INIT_LIST_HEAD(&conf->retry_list);
 									if (conf->working_disks == 1)
 										mddev->recovery_cp = MaxSector;
 									spin_lock_init(&conf->resync_lock);
 									init_waitqueue_head(&conf->wait_idle);
 									init_waitqueue_head(&conf->wait_resume);
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									bio_list_init(&conf->pending_bio_list);
 									bio_list_init(&conf->flushing_bio_list);
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									if (!conf->working_disks) {
 										printk(KERN_ERR "raid1: no operational mirrors for %s\n",
 											mdname(mddev));
 										goto out_free_conf;
 									}
 									mddev->degraded = 0;
 									for (i = 0; i < conf->raid_disks; i++) {
 										disk = conf->mirrors + i;
 										if (!disk->rdev) {
 											disk->head_position = 0;
 											mddev->degraded++;
 										}
 									}
 									/*
 									 * find the first working one and use it as a starting point
 									 * to read balancing.
 									 */
 									for (j = 0; j < conf->raid_disks &&
 										     (!conf->mirrors[j].rdev ||
-												[PATCH] md: convert 'faulty' and 'in_sync' fields to bits in 'flags' field

This has the advantage of removing the confusion caused by 'rdev_t' and
'mddev_t' both having 'in_sync' fields.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-11-09 05:39:31 +00:00
+										      !test_bit(In_sync, &conf->mirrors[j].rdev->flags)) ; j++)
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										/* nothing */;
 									conf->last_used = j;
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									mddev->thread = md_register_thread(raid1d, mddev, "%s_raid1");
 									if (!mddev->thread) {
 										printk(KERN_ERR
 										       "raid1: couldn't allocate thread for %s\n",
 										       mdname(mddev));
 										goto out_free_conf;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									}
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+									if (mddev->bitmap) mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									printk(KERN_INFO
 										"raid1: raid set %s active with %d out of %d mirrors\n",
 										mdname(mddev), mddev->raid_disks - mddev->degraded,
 										mddev->raid_disks);
 									/*
 									 * Ok, everything is just fine now
 									 */
 									mddev->array_size = mddev->size;
-												[PATCH] md: set the unplug_fn and issue_flush_fn for md devices *after* committed to creation

We we set the too early, they may still be in place and possibly get called
even though the array didn't get set up properly.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-05-17 04:53:16 +00:00
+									mddev->queue->unplug_fn = raid1_unplug;
 									mddev->queue->issue_flush_fn = raid1_issue_flush;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									return 0;
 								out_no_mem:
 									printk(KERN_ERR "raid1: couldn't allocate memory for %s\n",
 									       mdname(mddev));
 								out_free_conf:
 									if (conf) {
 										if (conf->r1bio_pool)
 											mempool_destroy(conf->r1bio_pool);
-												[PATCH] md: remove unneeded NULL checks before kfree

This patch removes some unneeded checks of pointers being NULL before
calling kfree() on them.  kfree() handles NULL pointers just fine, checking
first is pointless.

Signed-off-by: Jesper Juhl <juhl-lkml@dif.dk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:30 +00:00
+										kfree(conf->mirrors);
 										kfree(conf->poolinfo);
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+										kfree(conf);
 										mddev->private = NULL;
 									}
 								out:
 									return -EIO;
 								}
 								static int stop(mddev_t *mddev)
 								{
 									conf_t *conf = mddev_to_conf(mddev);
-												[PATCH] md: add write-behind support for md/raid1

If a device is flagged 'WriteMostly' and the array has a bitmap, and the
bitmap superblock indicates that write_behind is allowed, then write_behind is
enabled for WriteMostly devices.

Write requests will be acknowledges as complete to the caller (via b_end_io)
when all non-WriteMostly devices have completed the write, but will not be
cleared from the bitmap until all devices complete.

This requires memory allocation to make a local copy of the data being
written.  If there is insufficient memory, then we fall-back on normal write
semantics.

Signed-Off-By: Paul Clements <paul.clements@steeleye.com>
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:47 +00:00
+									struct bitmap *bitmap = mddev->bitmap;
 									int behind_wait = 0;
 									/* wait for behind writes to complete */
 									while (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
 										behind_wait++;
 										printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop (%d)\n", mdname(mddev), behind_wait);
 										set_current_state(TASK_UNINTERRUPTIBLE);
 										schedule_timeout(HZ); /* wait a second */
 										/* need to kick something here to make sure I/O goes? */
 									}
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
 									md_unregister_thread(mddev->thread);
 									mddev->thread = NULL;
 									blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 									if (conf->r1bio_pool)
 										mempool_destroy(conf->r1bio_pool);
-												[PATCH] md: remove unneeded NULL checks before kfree

This patch removes some unneeded checks of pointers being NULL before
calling kfree() on them.  kfree() handles NULL pointers just fine, checking
first is pointless.

Signed-off-by: Jesper Juhl <juhl-lkml@dif.dk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:30 +00:00
+									kfree(conf->mirrors);
 									kfree(conf->poolinfo);
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									kfree(conf);
 									mddev->private = NULL;
 									return 0;
 								}
 								static int raid1_resize(mddev_t *mddev, sector_t sectors)
 								{
 									/* no resync is happening, and there is enough space
 									 * on all devices, so we can resize.
 									 * We need to make sure resync covers any new space.
 									 * If the array is shrinking we should possibly wait until
 									 * any io in the removed space completes, but it hardly seems
 									 * worth it.
 									 */
 									mddev->array_size = sectors>>1;
 									set_capacity(mddev->gendisk, mddev->array_size << 1);
 									mddev->changed = 1;
 									if (mddev->array_size > mddev->size && mddev->recovery_cp == MaxSector) {
 										mddev->recovery_cp = mddev->size << 1;
 										set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 									}
 									mddev->size = mddev->array_size;
-												[PATCH] md: when resizing an array, we need to update resync_max_sectors as well as size

Without this, and attempt to 'grow' an array will claim to have synced the
extra part without actually having done anything.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-07-27 18:43:28 +00:00
+									mddev->resync_max_sectors = sectors;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									return 0;
 								}
 								static int raid1_reshape(mddev_t *mddev, int raid_disks)
 								{
 									/* We need to:
 									 * 1/ resize the r1bio_pool
 									 * 2/ resize conf->mirrors
 									 *
 									 * We allocate a new r1bio_pool if we can.
 									 * Then raise a device barrier and wait until all IO stops.
 									 * Then resize conf->mirrors and swap in the new r1bio pool.
-												[PATCH] md: cause md/raid1 to "repack" working devices when number of drives is changed

i.e.  missing or failed drives are moved to the end of the list.  The means
a 3 drive md array with the first drive missing can be shrunk to a two
drive array.  Currently that isn't possible.

Also, the "last_used" device number might be out-of-range after the number
of devices is reduced, so we set it to 0.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:09 +00:00
+									 *
 									 * At the same time, we "pack" the devices so that all the missing
 									 * devices have the higher raid_disk numbers.
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									 */
 									mempool_t *newpool, *oldpool;
 									struct pool_info *newpoolinfo;
 									mirror_info_t *newmirrors;
 									conf_t *conf = mddev_to_conf(mddev);
-												[PATCH] md: cause md/raid1 to "repack" working devices when number of drives is changed

i.e.  missing or failed drives are moved to the end of the list.  The means
a 3 drive md array with the first drive missing can be shrunk to a two
drive array.  Currently that isn't possible.

Also, the "last_used" device number might be out-of-range after the number
of devices is reduced, so we set it to 0.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:09 +00:00
+									int cnt;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
-												[PATCH] md: cause md/raid1 to "repack" working devices when number of drives is changed

i.e.  missing or failed drives are moved to the end of the list.  The means
a 3 drive md array with the first drive missing can be shrunk to a two
drive array.  Currently that isn't possible.

Also, the "last_used" device number might be out-of-range after the number
of devices is reduced, so we set it to 0.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:09 +00:00
+									int d, d2;
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
-												[PATCH] md: cause md/raid1 to "repack" working devices when number of drives is changed

i.e.  missing or failed drives are moved to the end of the list.  The means
a 3 drive md array with the first drive missing can be shrunk to a two
drive array.  Currently that isn't possible.

Also, the "last_used" device number might be out-of-range after the number
of devices is reduced, so we set it to 0.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:09 +00:00
+									if (raid_disks < conf->raid_disks) {
 										cnt=0;
 										for (d= 0; d < conf->raid_disks; d++)
 											if (conf->mirrors[d].rdev)
 												cnt++;
 										if (cnt > raid_disks)
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+											return -EBUSY;
-												[PATCH] md: cause md/raid1 to "repack" working devices when number of drives is changed

i.e.  missing or failed drives are moved to the end of the list.  The means
a 3 drive md array with the first drive missing can be shrunk to a two
drive array.  Currently that isn't possible.

Also, the "last_used" device number might be out-of-range after the number
of devices is reduced, so we set it to 0.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:09 +00:00
+									}
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
 									newpoolinfo = kmalloc(sizeof(*newpoolinfo), GFP_KERNEL);
 									if (!newpoolinfo)
 										return -ENOMEM;
 									newpoolinfo->mddev = mddev;
 									newpoolinfo->raid_disks = raid_disks;
 									newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
 												 r1bio_pool_free, newpoolinfo);
 									if (!newpool) {
 										kfree(newpoolinfo);
 										return -ENOMEM;
 									}
 									newmirrors = kmalloc(sizeof(struct mirror_info) * raid_disks, GFP_KERNEL);
 									if (!newmirrors) {
 										kfree(newpoolinfo);
 										mempool_destroy(newpool);
 										return -ENOMEM;
 									}
 									memset(newmirrors, 0, sizeof(struct mirror_info)*raid_disks);
 									spin_lock_irq(&conf->resync_lock);
 									conf->barrier++;
 									wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
-												[PATCH] md: raid1 support for bitmap intent logging

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:23 +00:00
+											    conf->resync_lock, raid1_unplug(mddev->queue));
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									spin_unlock_irq(&conf->resync_lock);
 									/* ok, everything is stopped */
 									oldpool = conf->r1bio_pool;
 									conf->r1bio_pool = newpool;
-												[PATCH] md: cause md/raid1 to "repack" working devices when number of drives is changed

i.e.  missing or failed drives are moved to the end of the list.  The means
a 3 drive md array with the first drive missing can be shrunk to a two
drive array.  Currently that isn't possible.

Also, the "last_used" device number might be out-of-range after the number
of devices is reduced, so we set it to 0.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:09 +00:00
 									for (d=d2=0; d < conf->raid_disks; d++)
 										if (conf->mirrors[d].rdev) {
 											conf->mirrors[d].rdev->raid_disk = d2;
 											newmirrors[d2++].rdev = conf->mirrors[d].rdev;
 										}
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									kfree(conf->mirrors);
 									conf->mirrors = newmirrors;
 									kfree(conf->poolinfo);
 									conf->poolinfo = newpoolinfo;
 									mddev->degraded += (raid_disks - conf->raid_disks);
 									conf->raid_disks = mddev->raid_disks = raid_disks;
-												[PATCH] md: cause md/raid1 to "repack" working devices when number of drives is changed

i.e.  missing or failed drives are moved to the end of the list.  The means
a 3 drive md array with the first drive missing can be shrunk to a two
drive array.  Currently that isn't possible.

Also, the "last_used" device number might be out-of-range after the number
of devices is reduced, so we set it to 0.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-06-22 00:17:09 +00:00
+									conf->last_used = 0; /* just make sure it is in-range */
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+									spin_lock_irq(&conf->resync_lock);
 									conf->barrier--;
 									spin_unlock_irq(&conf->resync_lock);
 									wake_up(&conf->wait_resume);
 									wake_up(&conf->wait_idle);
 									set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 									md_wakeup_thread(mddev->thread);
 									mempool_destroy(oldpool);
 									return 0;
 								}
-												[PATCH] md: tidy up daemon stop/start code in md/bitmap.c

The bitmap code used to have two daemons, so there is some 'common' start/stop
code.  But now there is only one, so the common code is just noise.

This patch tidies this up somewhat.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:58 +00:00
+								static void raid1_quiesce(mddev_t *mddev, int state)
-												[PATCH] md: all hot-add and hot-remove of md intent logging bitmaps

Both file-bitmaps and superblock bitmaps are supported.

If you add a bitmap file on the array device, you lose.

This introduces a 'default_bitmap_offset' field in mddev, as the ioctl used
for adding a superblock bitmap doesn't have room for giving an offset.  Later,
this value will be setable via sysfs.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+								{
 									conf_t *conf = mddev_to_conf(mddev);
 									switch(state) {
-												[PATCH] md: raid1_quiesce is back to front, fix it.

A state of 0 mean 'not quiesced'
A state of 1 means 'is quiesced'

The original code got this wrong.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:48 +00:00
+									case 1:
-												[PATCH] md: all hot-add and hot-remove of md intent logging bitmaps

Both file-bitmaps and superblock bitmaps are supported.

If you add a bitmap file on the array device, you lose.

This introduces a 'default_bitmap_offset' field in mddev, as the ioctl used
for adding a superblock bitmap doesn't have room for giving an offset.  Later,
this value will be setable via sysfs.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+										spin_lock_irq(&conf->resync_lock);
 										conf->barrier++;
 										wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
 												    conf->resync_lock, raid1_unplug(mddev->queue));
 										spin_unlock_irq(&conf->resync_lock);
 										break;
-												[PATCH] md: raid1_quiesce is back to front, fix it.

A state of 0 mean 'not quiesced'
A state of 1 means 'is quiesced'

The original code got this wrong.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:48 +00:00
+									case 0:
-												[PATCH] md: all hot-add and hot-remove of md intent logging bitmaps

Both file-bitmaps and superblock bitmaps are supported.

If you add a bitmap file on the array device, you lose.

This introduces a 'default_bitmap_offset' field in mddev, as the ioctl used
for adding a superblock bitmap doesn't have room for giving an offset.  Later,
this value will be setable via sysfs.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+										spin_lock_irq(&conf->resync_lock);
 										conf->barrier--;
 										spin_unlock_irq(&conf->resync_lock);
 										wake_up(&conf->wait_resume);
 										wake_up(&conf->wait_idle);
 										break;
 									}
 									if (mddev->thread) {
 										if (mddev->bitmap)
 											mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
 										else
 											mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
 										md_wakeup_thread(mddev->thread);
 									}
 								}
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
 								static mdk_personality_t raid1_personality =
 								{
 									.name		= "raid1",
 									.owner		= THIS_MODULE,
 									.make_request	= make_request,
 									.run		= run,
 									.stop		= stop,
 									.status		= status,
 									.error_handler	= error,
 									.hot_add_disk	= raid1_add_disk,
 									.hot_remove_disk= raid1_remove_disk,
 									.spare_active	= raid1_spare_active,
 									.sync_request	= sync_request,
 									.resize		= raid1_resize,
 									.reshape	= raid1_reshape,
-												[PATCH] md: all hot-add and hot-remove of md intent logging bitmaps

Both file-bitmaps and superblock bitmaps are supported.

If you add a bitmap file on the array device, you lose.

This introduces a 'default_bitmap_offset' field in mddev, as the ioctl used
for adding a superblock bitmap doesn't have room for giving an offset.  Later,
this value will be setable via sysfs.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

											
										
										
											2005-09-09 23:23:45 +00:00
+									.quiesce	= raid1_quiesce,
-												Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!

											
										
										
											2005-04-16 22:20:36 +00:00
+								};
 								static int __init raid_init(void)
 								{
 									return register_md_personality(RAID1, &raid1_personality);
 								}
 								static void raid_exit(void)
 								{
 									unregister_md_personality(RAID1);
 								}
 								module_init(raid_init);
 								module_exit(raid_exit);
 								MODULE_LICENSE("GPL");
 								MODULE_ALIAS("md-personality-3"); /* RAID1 */