kernel/linux-2.6-ext4-quota-metadata-reservation.patch
2010-07-29 17:18:45 -07:00

724 lines
25 KiB
Diff

For bug #608770
Do not speculatively allocate metadata for quota; it's too complex, and we
usually get it wrong.
Backport of these upstream patches, some of which just play a supporting role.
-Eric
515f41c33a9d44a964264c9511ad2c869af1fac3 ext4: Ensure zeroout blocks have no dirty metadata
d21cd8f163ac44b15c465aab7306db931c606908 ext4: Fix potential quota deadlock
0637c6f4135f592f094207c7c21e7c0fc5557834 ext4: Patch up how we claim metadata blocks for quota purposes
9d0be50230b333005635967f7ecd4897dbfd181b ext4: Calculate metadata requirements more accurately
1db913823c0f8360fccbd24ca67eb073966a5ffd ext4: Handle -EDQUOT error on write
5f634d064c709ea02c3cdaa850a08323a4a4bf28 ext4: Fix quota accounting error with fallocate
56246f9ae4cfa95b460f9dfbcfb1b772d85db046 quota: use flags interface for dquot alloc/free space
0e05842bc117ea70ceb979cca798fd026879951b quota: add the option to not fail with EDQUOT in block
72b8ab9dde211ea518ff27e631b2046ef90c29a2 ext4: don't use quota reservation for speculative metadata
Index: linux-2.6.32.noarch/fs/ext4/extents.c
===================================================================
--- linux-2.6.32.noarch.orig/fs/ext4/extents.c
+++ linux-2.6.32.noarch/fs/ext4/extents.c
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_id
* to allocate @blocks
* Worse case is one block per extent
*/
-int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
+int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
{
- int lcap, icap, rcap, leafs, idxs, num;
- int newextents = blocks;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int idxs, num = 0;
- rcap = ext4_ext_space_root_idx(inode, 0);
- lcap = ext4_ext_space_block(inode, 0);
- icap = ext4_ext_space_block_idx(inode, 0);
+ idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
+ / sizeof(struct ext4_extent_idx));
- /* number of new leaf blocks needed */
- num = leafs = (newextents + lcap - 1) / lcap;
+ /*
+ * If the new delayed allocation block is contiguous with the
+ * previous da block, it can share index blocks with the
+ * previous block, so we only need to allocate a new index
+ * block every idxs leaf blocks. At ldxs**2 blocks, we need
+ * an additional index block, and at ldxs**3 blocks, yet
+ * another index blocks.
+ */
+ if (ei->i_da_metadata_calc_len &&
+ ei->i_da_metadata_calc_last_lblock+1 == lblock) {
+ if ((ei->i_da_metadata_calc_len % idxs) == 0)
+ num++;
+ if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
+ num++;
+ if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
+ num++;
+ ei->i_da_metadata_calc_len = 0;
+ } else
+ ei->i_da_metadata_calc_len++;
+ ei->i_da_metadata_calc_last_lblock++;
+ return num;
+ }
/*
- * Worse case, we need separate index block(s)
- * to link all new leaf blocks
+ * In the worst case we need a new set of index blocks at
+ * every level of the inode's extent tree.
*/
- idxs = (leafs + icap - 1) / icap;
- do {
- num += idxs;
- idxs = (idxs + icap - 1) / icap;
- } while (idxs > rcap);
-
- return num;
+ ei->i_da_metadata_calc_len = 1;
+ ei->i_da_metadata_calc_last_lblock = lblock;
+ return ext_depth(inode) + 1;
}
static int
@@ -3029,6 +3044,14 @@ out:
return err;
}
+static void unmap_underlying_metadata_blocks(struct block_device *bdev,
+ sector_t block, int count)
+{
+ int i;
+ for (i = 0; i < count; i++)
+ unmap_underlying_metadata(bdev, block + i);
+}
+
static int
ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3104,6 +3127,30 @@ out:
} else
allocated = ret;
set_buffer_new(bh_result);
+ /*
+ * if we allocated more blocks than requested
+ * we need to make sure we unmap the extra block
+ * allocated. The actual needed block will get
+ * unmapped later when we find the buffer_head marked
+ * new.
+ */
+ if (allocated > max_blocks) {
+ unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
+ newblock + max_blocks,
+ allocated - max_blocks);
+ allocated = max_blocks;
+ }
+
+ /*
+ * If we have done fallocate with the offset that is already
+ * delayed allocated, we would have block reservation
+ * and quota reservation done in the delayed write path.
+ * But fallocate would have already updated quota and block
+ * count for this offset. So cancel these reservation
+ */
+ if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)
+ ext4_da_update_reserve_space(inode, allocated, 0);
+
map_out:
set_buffer_mapped(bh_result);
out1:
@@ -3333,9 +3380,18 @@ int ext4_ext_get_blocks(handle_t *handle
/* previous routine could use block we allocated */
newblock = ext_pblock(&newex);
allocated = ext4_ext_get_actual_len(&newex);
+ if (allocated > max_blocks)
+ allocated = max_blocks;
set_buffer_new(bh_result);
/*
+ * Update reserved blocks/metadata blocks after successful
+ * block allocation which had been deferred till now.
+ */
+ if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)
+ ext4_da_update_reserve_space(inode, allocated, 1);
+
+ /*
* Cache the extent and update transaction to commit on fdatasync only
* when it is _not_ an uninitialized extent.
*/
Index: linux-2.6.32.noarch/fs/ext4/inode.c
===================================================================
--- linux-2.6.32.noarch.orig/fs/ext4/inode.c
+++ linux-2.6.32.noarch/fs/ext4/inode.c
@@ -1051,81 +1051,105 @@ qsize_t *ext4_get_reserved_space(struct
return &EXT4_I(inode)->i_reserved_quota;
}
#endif
+
/*
* Calculate the number of metadata blocks need to reserve
- * to allocate @blocks for non extent file based file
+ * to allocate a new block at @lblocks for non extent file based file
*/
-static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
+static int ext4_indirect_calc_metadata_amount(struct inode *inode,
+ sector_t lblock)
{
- int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int ind_blks, dind_blks, tind_blks;
-
- /* number of new indirect blocks needed */
- ind_blks = (blocks + icap - 1) / icap;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
+ int blk_bits;
- dind_blks = (ind_blks + icap - 1) / icap;
+ if (lblock < EXT4_NDIR_BLOCKS)
+ return 0;
- tind_blks = 1;
+ lblock -= EXT4_NDIR_BLOCKS;
- return ind_blks + dind_blks + tind_blks;
+ if (ei->i_da_metadata_calc_len &&
+ (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
+ ei->i_da_metadata_calc_len++;
+ return 0;
+ }
+ ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
+ ei->i_da_metadata_calc_len = 1;
+ blk_bits = roundup_pow_of_two(lblock + 1);
+ return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
}
/*
* Calculate the number of metadata blocks need to reserve
- * to allocate given number of blocks
+ * to allocate a block located at @lblock
*/
-static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
+static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
{
- if (!blocks)
- return 0;
-
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
- return ext4_ext_calc_metadata_amount(inode, blocks);
+ return ext4_ext_calc_metadata_amount(inode, lblock);
- return ext4_indirect_calc_metadata_amount(inode, blocks);
+ return ext4_indirect_calc_metadata_amount(inode, lblock);
}
-static void ext4_da_update_reserve_space(struct inode *inode, int used)
+/*
+ * Called with i_data_sem down, which is important since we can call
+ * ext4_discard_preallocations() from here.
+ */
+void ext4_da_update_reserve_space(struct inode *inode,
+ int used, int quota_claim)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- int total, mdb, mdb_free;
+ struct ext4_inode_info *ei = EXT4_I(inode);
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- /* recalculate the number of metablocks still need to be reserved */
- total = EXT4_I(inode)->i_reserved_data_blocks - used;
- mdb = ext4_calc_metadata_amount(inode, total);
-
- /* figure out how many metablocks to release */
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
- mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
-
- if (mdb_free) {
- /* Account for allocated meta_blocks */
- mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
-
- /* update fs dirty blocks counter */
- percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
- EXT4_I(inode)->i_allocated_meta_blocks = 0;
- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
- }
-
- /* update per-inode reservations */
- BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
- EXT4_I(inode)->i_reserved_data_blocks -= used;
+ spin_lock(&ei->i_block_reservation_lock);
+ if (unlikely(used > ei->i_reserved_data_blocks)) {
+ ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
+ "with only %d reserved data blocks\n",
+ __func__, inode->i_ino, used,
+ ei->i_reserved_data_blocks);
+ WARN_ON(1);
+ used = ei->i_reserved_data_blocks;
+ }
+
+ /* Update per-inode reservations */
+ ei->i_reserved_data_blocks -= used;
+ ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+ used + ei->i_allocated_meta_blocks);
+ ei->i_allocated_meta_blocks = 0;
+
+ if (ei->i_reserved_data_blocks == 0) {
+ /*
+ * We can release all of the reserved metadata blocks
+ * only when we have written all of the delayed
+ * allocation blocks.
+ */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+ ei->i_reserved_meta_blocks);
+ ei->i_reserved_meta_blocks = 0;
+ ei->i_da_metadata_calc_len = 0;
+ }
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- /*
- * free those over-booking quota for metadata blocks
- */
- if (mdb_free)
- vfs_dq_release_reservation_block(inode, mdb_free);
+ /* Update quota subsystem for data blocks */
+ if (quota_claim) {
+ vfs_dq_claim_block(inode, used);
+ } else {
+ /*
+ * We did fallocate with an offset that is already delayed
+ * allocated. So on delayed allocated writeback we should
+ * not re-claim the quota for fallocated blocks.
+ */
+ vfs_dq_release_reservation_block(inode, used);
+ }
/*
* If we have done all the pending block allocations and if
* there aren't any writers on the inode, we can discard the
* inode's preallocations.
*/
- if (!total && (atomic_read(&inode->i_writecount) == 0))
+ if ((ei->i_reserved_data_blocks == 0) &&
+ (atomic_read(&inode->i_writecount) == 0))
ext4_discard_preallocations(inode);
}
@@ -1317,18 +1341,20 @@ int ext4_get_blocks(handle_t *handle, st
*/
EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
}
- }
+ /*
+ * Update reserved blocks/metadata blocks after successful
+ * block allocation which had been deferred till now. We don't
+ * support fallocate for non extent files. So we can update
+ * reserve space here.
+ */
+ if ((retval > 0) &&
+ (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
+ ext4_da_update_reserve_space(inode, retval, 1);
+ }
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
EXT4_I(inode)->i_delalloc_reserved_flag = 0;
- /*
- * Update reserved blocks/metadata blocks after successful
- * block allocation which had been deferred till now.
- */
- if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
- ext4_da_update_reserve_space(inode, retval);
-
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && buffer_mapped(bh)) {
int ret = check_block_validity(inode, "file system "
@@ -1834,11 +1860,15 @@ static int ext4_journalled_write_end(str
return ret ? ret : copied;
}
-static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
+/*
+ * Reserve a single block located at lblock
+ */
+static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
{
int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- unsigned long md_needed, mdblocks, total = 0;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned long md_needed;
/*
* recalculate the amount of metadata blocks to reserve
@@ -1846,35 +1876,34 @@ static int ext4_da_reserve_space(struct
* worse case is one extent per block
*/
repeat:
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
- mdblocks = ext4_calc_metadata_amount(inode, total);
- BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
-
- md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
- total = md_needed + nrblocks;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ spin_lock(&ei->i_block_reservation_lock);
+ md_needed = ext4_calc_metadata_amount(inode, lblock);
+ spin_unlock(&ei->i_block_reservation_lock);
/*
- * Make quota reservation here to prevent quota overflow
- * later. Real quota accounting is done at pages writeout
- * time.
+ * We will charge metadata quota at writeout time; this saves
+ * us from metadata over-estimation, though we may go over by
+ * a small amount in the end. Here we just reserve for data.
*/
- if (vfs_dq_reserve_block(inode, total))
+ if (vfs_dq_reserve_block(inode, 1))
return -EDQUOT;
- if (ext4_claim_free_blocks(sbi, total)) {
- vfs_dq_release_reservation_block(inode, total);
+ /*
+ * We do still charge estimated metadata to the sb though;
+ * we cannot afford to run out of free blocks.
+ */
+ if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
+ vfs_dq_release_reservation_block(inode, 1);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
yield();
goto repeat;
}
return -ENOSPC;
}
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
- EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ spin_lock(&ei->i_block_reservation_lock);
+ ei->i_reserved_data_blocks++;
+ ei->i_reserved_meta_blocks += md_needed;
+ spin_unlock(&ei->i_block_reservation_lock);
return 0; /* success */
}
@@ -1882,49 +1911,47 @@ repeat:
static void ext4_da_release_space(struct inode *inode, int to_free)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- int total, mdb, mdb_free, release;
+ struct ext4_inode_info *ei = EXT4_I(inode);
if (!to_free)
return; /* Nothing to release, exit */
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- if (!EXT4_I(inode)->i_reserved_data_blocks) {
+ if (unlikely(to_free > ei->i_reserved_data_blocks)) {
/*
- * if there is no reserved blocks, but we try to free some
- * then the counter is messed up somewhere.
- * but since this function is called from invalidate
- * page, it's harmless to return without any action
- */
- printk(KERN_INFO "ext4 delalloc try to release %d reserved "
- "blocks for inode %lu, but there is no reserved "
- "data blocks\n", to_free, inode->i_ino);
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- return;
+ * if there aren't enough reserved blocks, then the
+ * counter is messed up somewhere. Since this
+ * function is called from invalidate page, it's
+ * harmless to return without any action.
+ */
+ ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
+ "ino %lu, to_free %d with only %d reserved "
+ "data blocks\n", inode->i_ino, to_free,
+ ei->i_reserved_data_blocks);
+ WARN_ON(1);
+ to_free = ei->i_reserved_data_blocks;
}
+ ei->i_reserved_data_blocks -= to_free;
- /* recalculate the number of metablocks still need to be reserved */
- total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
- mdb = ext4_calc_metadata_amount(inode, total);
-
- /* figure out how many metablocks to release */
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
- mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
-
- release = to_free + mdb_free;
-
- /* update fs dirty blocks counter for truncate case */
- percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
+ if (ei->i_reserved_data_blocks == 0) {
+ /*
+ * We can release all of the reserved metadata blocks
+ * only when we have written all of the delayed
+ * allocation blocks.
+ */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter,
+ ei->i_reserved_meta_blocks);
+ ei->i_reserved_meta_blocks = 0;
+ ei->i_da_metadata_calc_len = 0;
+ }
- /* update per-inode reservations */
- BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
- EXT4_I(inode)->i_reserved_data_blocks -= to_free;
+ /* update fs dirty data blocks counter */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- vfs_dq_release_reservation_block(inode, release);
+ vfs_dq_release_reservation_block(inode, to_free);
}
static void ext4_da_page_release_reservation(struct page *page,
@@ -2530,7 +2557,7 @@ static int ext4_da_get_block_prep(struct
* XXX: __block_prepare_write() unmaps passed block,
* is it OK?
*/
- ret = ext4_da_reserve_space(inode, 1);
+ ret = ext4_da_reserve_space(inode, iblock);
if (ret)
/* not enough space to reserve */
return ret;
Index: linux-2.6.32.noarch/fs/ext4/mballoc.c
===================================================================
--- linux-2.6.32.noarch.orig/fs/ext4/mballoc.c
+++ linux-2.6.32.noarch/fs/ext4/mballoc.c
@@ -2756,12 +2756,6 @@ ext4_mb_mark_diskspace_used(struct ext4_
if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
/* release all the reserved blocks if non delalloc */
percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
- else {
- percpu_counter_sub(&sbi->s_dirtyblocks_counter,
- ac->ac_b_ex.fe_len);
- /* convert reserved quota blocks to real quota blocks */
- vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
- }
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi,
Index: linux-2.6.32.noarch/fs/ext4/ext4.h
===================================================================
--- linux-2.6.32.noarch.orig/fs/ext4/ext4.h
+++ linux-2.6.32.noarch/fs/ext4/ext4.h
@@ -693,6 +693,8 @@ struct ext4_inode_info {
unsigned int i_reserved_meta_blocks;
unsigned int i_allocated_meta_blocks;
unsigned short i_delalloc_reserved_flag;
+ sector_t i_da_metadata_calc_last_lblock;
+ int i_da_metadata_calc_len;
/* on-disk additional length */
__u16 i_extra_isize;
@@ -1438,6 +1440,8 @@ extern int ext4_block_truncate_page(hand
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int flush_aio_dio_completed_IO(struct inode *inode);
+extern void ext4_da_update_reserve_space(struct inode *inode,
+ int used, int quota_claim);
/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
Index: linux-2.6.32.noarch/fs/ext4/ext4_extents.h
===================================================================
--- linux-2.6.32.noarch.orig/fs/ext4/ext4_extents.h
+++ linux-2.6.32.noarch/fs/ext4/ext4_extents.h
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initial
ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
}
-extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
+extern int ext4_ext_calc_metadata_amount(struct inode *inode,
+ sector_t lblocks);
extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
Index: linux-2.6.32.noarch/fs/ext4/super.c
===================================================================
--- linux-2.6.32.noarch.orig/fs/ext4/super.c
+++ linux-2.6.32.noarch/fs/ext4/super.c
@@ -702,6 +702,7 @@ static struct inode *ext4_alloc_inode(st
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
ei->i_allocated_meta_blocks = 0;
+ ei->i_da_metadata_calc_len = 0;
ei->i_delalloc_reserved_flag = 0;
spin_lock_init(&(ei->i_block_reservation_lock));
#ifdef CONFIG_QUOTA
Index: linux-2.6.32.noarch/fs/quota/dquot.c
===================================================================
--- linux-2.6.32.noarch.orig/fs/quota/dquot.c
+++ linux-2.6.32.noarch/fs/quota/dquot.c
@@ -1492,11 +1492,13 @@ static void inode_decr_space(struct inod
/*
* This operation can block, but only after everything is updated
*/
-int __dquot_alloc_space(struct inode *inode, qsize_t number,
- int warn, int reserve)
+int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
{
int cnt, ret = QUOTA_OK;
char warntype[MAXQUOTAS];
+ int warn = flags & DQUOT_SPACE_WARN;
+ int reserve = flags & DQUOT_SPACE_RESERVE;
+ int nofail = flags & DQUOT_SPACE_NOFAIL;
/*
* First test before acquiring mutex - solves deadlocks when we
@@ -1521,7 +1523,7 @@ int __dquot_alloc_space(struct inode *in
if (!inode->i_dquot[cnt])
continue;
if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
- == NO_QUOTA) {
+ == NO_QUOTA && !nofail) {
ret = NO_QUOTA;
spin_unlock(&dq_data_lock);
goto out_flush_warn;
@@ -1552,15 +1554,19 @@ out:
return ret;
}
-int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
+int dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
{
- return __dquot_alloc_space(inode, number, warn, 0);
+ return __dquot_alloc_space(inode, number, flags);
}
EXPORT_SYMBOL(dquot_alloc_space);
int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
{
- return __dquot_alloc_space(inode, number, warn, 1);
+ int flags = DQUOT_SPACE_RESERVE;
+
+ if (warn)
+ flags |= DQUOT_SPACE_WARN;
+ return __dquot_alloc_space(inode, number, flags);
}
EXPORT_SYMBOL(dquot_reserve_space);
@@ -1651,10 +1657,11 @@ EXPORT_SYMBOL(dquot_claim_space);
/*
* This operation can block, but only after everything is updated
*/
-int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
+int __dquot_free_space(struct inode *inode, qsize_t number, int flags)
{
unsigned int cnt;
char warntype[MAXQUOTAS];
+ int reserve = flags & DQUOT_SPACE_RESERVE;
/* First test before acquiring mutex - solves deadlocks when we
* re-enter the quota code and are already holding the mutex */
@@ -1706,7 +1713,7 @@ EXPORT_SYMBOL(dquot_free_space);
*/
void dquot_release_reserved_space(struct inode *inode, qsize_t number)
{
- __dquot_free_space(inode, number, 1);
+ __dquot_free_space(inode, number, DQUOT_SPACE_RESERVE);
}
EXPORT_SYMBOL(dquot_release_reserved_space);
Index: linux-2.6.32.noarch/include/linux/quotaops.h
===================================================================
--- linux-2.6.32.noarch.orig/include/linux/quotaops.h
+++ linux-2.6.32.noarch/include/linux/quotaops.h
@@ -14,6 +14,10 @@ static inline struct quota_info *sb_dqop
return &sb->s_dquot;
}
+#define DQUOT_SPACE_WARN 0x1
+#define DQUOT_SPACE_RESERVE 0x2
+#define DQUOT_SPACE_NOFAIL 0x4
+
#if defined(CONFIG_QUOTA)
/*
@@ -159,7 +163,7 @@ static inline int vfs_dq_prealloc_space_
{
if (sb_any_quota_active(inode->i_sb)) {
/* Used space is updated in alloc_space() */
- if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA)
+ if (inode->i_sb->dq_op->alloc_space(inode, nr, DQUOT_SPACE_WARN) == NO_QUOTA)
return 1;
}
else
@@ -187,6 +191,16 @@ static inline int vfs_dq_alloc_space_nod
return 0;
}
+static inline void vfs_dq_alloc_space_nofail(struct inode *inode, qsize_t nr)
+{
+ if (sb_any_quota_active(inode->i_sb)) {
+ /* Used space is updated in alloc_space() */
+ inode->i_sb->dq_op->alloc_space(inode, nr, DQUOT_SPACE_NOFAIL);
+ } else
+ inode_add_bytes(inode, nr);
+ mark_inode_dirty(inode);
+}
+
static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
{
int ret;
@@ -382,6 +396,12 @@ static inline int vfs_dq_alloc_space_nod
return 0;
}
+static inline void vfs_dq_alloc_space_nofail(struct inode *inode, qsize_t nr)
+{
+ inode_add_bytes(inode, nr);
+ mark_inode_dirty(inode);
+}
+
static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
{
vfs_dq_alloc_space_nodirty(inode, nr);
@@ -433,6 +453,11 @@ static inline int vfs_dq_alloc_block_nod
return vfs_dq_alloc_space_nodirty(inode, nr << inode->i_blkbits);
}
+static inline void vfs_dq_alloc_block_nofail(struct inode *inode, qsize_t nr)
+{
+ vfs_dq_alloc_space_nofail(inode, nr << inode->i_blkbits);
+}
+
static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
{
return vfs_dq_alloc_space(inode, nr << inode->i_blkbits);
Index: linux-2.6.32.noarch/fs/ext4/balloc.c
===================================================================
--- linux-2.6.32.noarch.orig/fs/ext4/balloc.c
+++ linux-2.6.32.noarch/fs/ext4/balloc.c
@@ -642,14 +642,15 @@ ext4_fsblk_t ext4_new_meta_blocks(handle
ret = ext4_mb_new_blocks(handle, &ar, errp);
if (count)
*count = ar.len;
-
/*
- * Account for the allocated meta blocks
+ * Account for the allocated meta blocks. We will never
+ * fail EDQUOT for metdata, but we do account for it.
*/
if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ vfs_dq_alloc_block_nofail(inode, ar.len);
}
return ret;
}