724 lines
25 KiB
Diff
724 lines
25 KiB
Diff
|
For bug #608770
|
||
|
|
||
|
Do not speculatively allocate metadata for quota; it's too complex, and we
|
||
|
usually get it wrong.
|
||
|
|
||
|
Backport of these upstream patches, some of which just play a supporting role.
|
||
|
|
||
|
-Eric
|
||
|
|
||
|
515f41c33a9d44a964264c9511ad2c869af1fac3 ext4: Ensure zeroout blocks have no dirty metadata
|
||
|
d21cd8f163ac44b15c465aab7306db931c606908 ext4: Fix potential quota deadlock
|
||
|
0637c6f4135f592f094207c7c21e7c0fc5557834 ext4: Patch up how we claim metadata blocks for quota purposes
|
||
|
9d0be50230b333005635967f7ecd4897dbfd181b ext4: Calculate metadata requirements more accurately
|
||
|
1db913823c0f8360fccbd24ca67eb073966a5ffd ext4: Handle -EDQUOT error on write
|
||
|
5f634d064c709ea02c3cdaa850a08323a4a4bf28 ext4: Fix quota accounting error with fallocate
|
||
|
56246f9ae4cfa95b460f9dfbcfb1b772d85db046 quota: use flags interface for dquot alloc/free space
|
||
|
0e05842bc117ea70ceb979cca798fd026879951b quota: add the option to not fail with EDQUOT in block
|
||
|
72b8ab9dde211ea518ff27e631b2046ef90c29a2 ext4: don't use quota reservation for speculative metadata
|
||
|
|
||
|
|
||
|
Index: linux-2.6.32.noarch/fs/ext4/extents.c
|
||
|
===================================================================
|
||
|
--- linux-2.6.32.noarch.orig/fs/ext4/extents.c
|
||
|
+++ linux-2.6.32.noarch/fs/ext4/extents.c
|
||
|
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_id
|
||
|
* to allocate @blocks
|
||
|
* Worse case is one block per extent
|
||
|
*/
|
||
|
-int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
|
||
|
+int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
|
||
|
{
|
||
|
- int lcap, icap, rcap, leafs, idxs, num;
|
||
|
- int newextents = blocks;
|
||
|
+ struct ext4_inode_info *ei = EXT4_I(inode);
|
||
|
+ int idxs, num = 0;
|
||
|
|
||
|
- rcap = ext4_ext_space_root_idx(inode, 0);
|
||
|
- lcap = ext4_ext_space_block(inode, 0);
|
||
|
- icap = ext4_ext_space_block_idx(inode, 0);
|
||
|
+ idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
|
||
|
+ / sizeof(struct ext4_extent_idx));
|
||
|
|
||
|
- /* number of new leaf blocks needed */
|
||
|
- num = leafs = (newextents + lcap - 1) / lcap;
|
||
|
+ /*
|
||
|
+ * If the new delayed allocation block is contiguous with the
|
||
|
+ * previous da block, it can share index blocks with the
|
||
|
+ * previous block, so we only need to allocate a new index
|
||
|
+ * block every idxs leaf blocks. At ldxs**2 blocks, we need
|
||
|
+ * an additional index block, and at ldxs**3 blocks, yet
|
||
|
+ * another index blocks.
|
||
|
+ */
|
||
|
+ if (ei->i_da_metadata_calc_len &&
|
||
|
+ ei->i_da_metadata_calc_last_lblock+1 == lblock) {
|
||
|
+ if ((ei->i_da_metadata_calc_len % idxs) == 0)
|
||
|
+ num++;
|
||
|
+ if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
|
||
|
+ num++;
|
||
|
+ if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
|
||
|
+ num++;
|
||
|
+ ei->i_da_metadata_calc_len = 0;
|
||
|
+ } else
|
||
|
+ ei->i_da_metadata_calc_len++;
|
||
|
+ ei->i_da_metadata_calc_last_lblock++;
|
||
|
+ return num;
|
||
|
+ }
|
||
|
|
||
|
/*
|
||
|
- * Worse case, we need separate index block(s)
|
||
|
- * to link all new leaf blocks
|
||
|
+ * In the worst case we need a new set of index blocks at
|
||
|
+ * every level of the inode's extent tree.
|
||
|
*/
|
||
|
- idxs = (leafs + icap - 1) / icap;
|
||
|
- do {
|
||
|
- num += idxs;
|
||
|
- idxs = (idxs + icap - 1) / icap;
|
||
|
- } while (idxs > rcap);
|
||
|
-
|
||
|
- return num;
|
||
|
+ ei->i_da_metadata_calc_len = 1;
|
||
|
+ ei->i_da_metadata_calc_last_lblock = lblock;
|
||
|
+ return ext_depth(inode) + 1;
|
||
|
}
|
||
|
|
||
|
static int
|
||
|
@@ -3029,6 +3044,14 @@ out:
|
||
|
return err;
|
||
|
}
|
||
|
|
||
|
+static void unmap_underlying_metadata_blocks(struct block_device *bdev,
|
||
|
+ sector_t block, int count)
|
||
|
+{
|
||
|
+ int i;
|
||
|
+ for (i = 0; i < count; i++)
|
||
|
+ unmap_underlying_metadata(bdev, block + i);
|
||
|
+}
|
||
|
+
|
||
|
static int
|
||
|
ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
|
||
|
ext4_lblk_t iblock, unsigned int max_blocks,
|
||
|
@@ -3104,6 +3127,30 @@ out:
|
||
|
} else
|
||
|
allocated = ret;
|
||
|
set_buffer_new(bh_result);
|
||
|
+ /*
|
||
|
+ * if we allocated more blocks than requested
|
||
|
+ * we need to make sure we unmap the extra block
|
||
|
+ * allocated. The actual needed block will get
|
||
|
+ * unmapped later when we find the buffer_head marked
|
||
|
+ * new.
|
||
|
+ */
|
||
|
+ if (allocated > max_blocks) {
|
||
|
+ unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
|
||
|
+ newblock + max_blocks,
|
||
|
+ allocated - max_blocks);
|
||
|
+ allocated = max_blocks;
|
||
|
+ }
|
||
|
+
|
||
|
+ /*
|
||
|
+ * If we have done fallocate with the offset that is already
|
||
|
+ * delayed allocated, we would have block reservation
|
||
|
+ * and quota reservation done in the delayed write path.
|
||
|
+ * But fallocate would have already updated quota and block
|
||
|
+ * count for this offset. So cancel these reservation
|
||
|
+ */
|
||
|
+ if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)
|
||
|
+ ext4_da_update_reserve_space(inode, allocated, 0);
|
||
|
+
|
||
|
map_out:
|
||
|
set_buffer_mapped(bh_result);
|
||
|
out1:
|
||
|
@@ -3333,9 +3380,18 @@ int ext4_ext_get_blocks(handle_t *handle
|
||
|
/* previous routine could use block we allocated */
|
||
|
newblock = ext_pblock(&newex);
|
||
|
allocated = ext4_ext_get_actual_len(&newex);
|
||
|
+ if (allocated > max_blocks)
|
||
|
+ allocated = max_blocks;
|
||
|
set_buffer_new(bh_result);
|
||
|
|
||
|
/*
|
||
|
+ * Update reserved blocks/metadata blocks after successful
|
||
|
+ * block allocation which had been deferred till now.
|
||
|
+ */
|
||
|
+ if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)
|
||
|
+ ext4_da_update_reserve_space(inode, allocated, 1);
|
||
|
+
|
||
|
+ /*
|
||
|
* Cache the extent and update transaction to commit on fdatasync only
|
||
|
* when it is _not_ an uninitialized extent.
|
||
|
*/
|
||
|
Index: linux-2.6.32.noarch/fs/ext4/inode.c
|
||
|
===================================================================
|
||
|
--- linux-2.6.32.noarch.orig/fs/ext4/inode.c
|
||
|
+++ linux-2.6.32.noarch/fs/ext4/inode.c
|
||
|
@@ -1051,81 +1051,105 @@ qsize_t *ext4_get_reserved_space(struct
|
||
|
return &EXT4_I(inode)->i_reserved_quota;
|
||
|
}
|
||
|
#endif
|
||
|
+
|
||
|
/*
|
||
|
* Calculate the number of metadata blocks need to reserve
|
||
|
- * to allocate @blocks for non extent file based file
|
||
|
+ * to allocate a new block at @lblocks for non extent file based file
|
||
|
*/
|
||
|
-static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
|
||
|
+static int ext4_indirect_calc_metadata_amount(struct inode *inode,
|
||
|
+ sector_t lblock)
|
||
|
{
|
||
|
- int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
|
||
|
- int ind_blks, dind_blks, tind_blks;
|
||
|
-
|
||
|
- /* number of new indirect blocks needed */
|
||
|
- ind_blks = (blocks + icap - 1) / icap;
|
||
|
+ struct ext4_inode_info *ei = EXT4_I(inode);
|
||
|
+ int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
|
||
|
+ int blk_bits;
|
||
|
|
||
|
- dind_blks = (ind_blks + icap - 1) / icap;
|
||
|
+ if (lblock < EXT4_NDIR_BLOCKS)
|
||
|
+ return 0;
|
||
|
|
||
|
- tind_blks = 1;
|
||
|
+ lblock -= EXT4_NDIR_BLOCKS;
|
||
|
|
||
|
- return ind_blks + dind_blks + tind_blks;
|
||
|
+ if (ei->i_da_metadata_calc_len &&
|
||
|
+ (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
|
||
|
+ ei->i_da_metadata_calc_len++;
|
||
|
+ return 0;
|
||
|
+ }
|
||
|
+ ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
|
||
|
+ ei->i_da_metadata_calc_len = 1;
|
||
|
+ blk_bits = roundup_pow_of_two(lblock + 1);
|
||
|
+ return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Calculate the number of metadata blocks need to reserve
|
||
|
- * to allocate given number of blocks
|
||
|
+ * to allocate a block located at @lblock
|
||
|
*/
|
||
|
-static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
|
||
|
+static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
|
||
|
{
|
||
|
- if (!blocks)
|
||
|
- return 0;
|
||
|
-
|
||
|
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
|
||
|
- return ext4_ext_calc_metadata_amount(inode, blocks);
|
||
|
+ return ext4_ext_calc_metadata_amount(inode, lblock);
|
||
|
|
||
|
- return ext4_indirect_calc_metadata_amount(inode, blocks);
|
||
|
+ return ext4_indirect_calc_metadata_amount(inode, lblock);
|
||
|
}
|
||
|
|
||
|
-static void ext4_da_update_reserve_space(struct inode *inode, int used)
|
||
|
+/*
|
||
|
+ * Called with i_data_sem down, which is important since we can call
|
||
|
+ * ext4_discard_preallocations() from here.
|
||
|
+ */
|
||
|
+void ext4_da_update_reserve_space(struct inode *inode,
|
||
|
+ int used, int quota_claim)
|
||
|
{
|
||
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||
|
- int total, mdb, mdb_free;
|
||
|
+ struct ext4_inode_info *ei = EXT4_I(inode);
|
||
|
|
||
|
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
|
||
|
- /* recalculate the number of metablocks still need to be reserved */
|
||
|
- total = EXT4_I(inode)->i_reserved_data_blocks - used;
|
||
|
- mdb = ext4_calc_metadata_amount(inode, total);
|
||
|
-
|
||
|
- /* figure out how many metablocks to release */
|
||
|
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
|
||
|
- mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
|
||
|
-
|
||
|
- if (mdb_free) {
|
||
|
- /* Account for allocated meta_blocks */
|
||
|
- mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
|
||
|
-
|
||
|
- /* update fs dirty blocks counter */
|
||
|
- percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
|
||
|
- EXT4_I(inode)->i_allocated_meta_blocks = 0;
|
||
|
- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
|
||
|
- }
|
||
|
-
|
||
|
- /* update per-inode reservations */
|
||
|
- BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
|
||
|
- EXT4_I(inode)->i_reserved_data_blocks -= used;
|
||
|
+ spin_lock(&ei->i_block_reservation_lock);
|
||
|
+ if (unlikely(used > ei->i_reserved_data_blocks)) {
|
||
|
+ ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
|
||
|
+ "with only %d reserved data blocks\n",
|
||
|
+ __func__, inode->i_ino, used,
|
||
|
+ ei->i_reserved_data_blocks);
|
||
|
+ WARN_ON(1);
|
||
|
+ used = ei->i_reserved_data_blocks;
|
||
|
+ }
|
||
|
+
|
||
|
+ /* Update per-inode reservations */
|
||
|
+ ei->i_reserved_data_blocks -= used;
|
||
|
+ ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
|
||
|
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter,
|
||
|
+ used + ei->i_allocated_meta_blocks);
|
||
|
+ ei->i_allocated_meta_blocks = 0;
|
||
|
+
|
||
|
+ if (ei->i_reserved_data_blocks == 0) {
|
||
|
+ /*
|
||
|
+ * We can release all of the reserved metadata blocks
|
||
|
+ * only when we have written all of the delayed
|
||
|
+ * allocation blocks.
|
||
|
+ */
|
||
|
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter,
|
||
|
+ ei->i_reserved_meta_blocks);
|
||
|
+ ei->i_reserved_meta_blocks = 0;
|
||
|
+ ei->i_da_metadata_calc_len = 0;
|
||
|
+ }
|
||
|
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
||
|
|
||
|
- /*
|
||
|
- * free those over-booking quota for metadata blocks
|
||
|
- */
|
||
|
- if (mdb_free)
|
||
|
- vfs_dq_release_reservation_block(inode, mdb_free);
|
||
|
+ /* Update quota subsystem for data blocks */
|
||
|
+ if (quota_claim) {
|
||
|
+ vfs_dq_claim_block(inode, used);
|
||
|
+ } else {
|
||
|
+ /*
|
||
|
+ * We did fallocate with an offset that is already delayed
|
||
|
+ * allocated. So on delayed allocated writeback we should
|
||
|
+ * not re-claim the quota for fallocated blocks.
|
||
|
+ */
|
||
|
+ vfs_dq_release_reservation_block(inode, used);
|
||
|
+ }
|
||
|
|
||
|
/*
|
||
|
* If we have done all the pending block allocations and if
|
||
|
* there aren't any writers on the inode, we can discard the
|
||
|
* inode's preallocations.
|
||
|
*/
|
||
|
- if (!total && (atomic_read(&inode->i_writecount) == 0))
|
||
|
+ if ((ei->i_reserved_data_blocks == 0) &&
|
||
|
+ (atomic_read(&inode->i_writecount) == 0))
|
||
|
ext4_discard_preallocations(inode);
|
||
|
}
|
||
|
|
||
|
@@ -1317,18 +1341,20 @@ int ext4_get_blocks(handle_t *handle, st
|
||
|
*/
|
||
|
EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
|
||
|
}
|
||
|
- }
|
||
|
|
||
|
+ /*
|
||
|
+ * Update reserved blocks/metadata blocks after successful
|
||
|
+ * block allocation which had been deferred till now. We don't
|
||
|
+ * support fallocate for non extent files. So we can update
|
||
|
+ * reserve space here.
|
||
|
+ */
|
||
|
+ if ((retval > 0) &&
|
||
|
+ (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
|
||
|
+ ext4_da_update_reserve_space(inode, retval, 1);
|
||
|
+ }
|
||
|
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
|
||
|
EXT4_I(inode)->i_delalloc_reserved_flag = 0;
|
||
|
|
||
|
- /*
|
||
|
- * Update reserved blocks/metadata blocks after successful
|
||
|
- * block allocation which had been deferred till now.
|
||
|
- */
|
||
|
- if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
|
||
|
- ext4_da_update_reserve_space(inode, retval);
|
||
|
-
|
||
|
up_write((&EXT4_I(inode)->i_data_sem));
|
||
|
if (retval > 0 && buffer_mapped(bh)) {
|
||
|
int ret = check_block_validity(inode, "file system "
|
||
|
@@ -1834,11 +1860,15 @@ static int ext4_journalled_write_end(str
|
||
|
return ret ? ret : copied;
|
||
|
}
|
||
|
|
||
|
-static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
|
||
|
+/*
|
||
|
+ * Reserve a single block located at lblock
|
||
|
+ */
|
||
|
+static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
|
||
|
{
|
||
|
int retries = 0;
|
||
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||
|
- unsigned long md_needed, mdblocks, total = 0;
|
||
|
+ struct ext4_inode_info *ei = EXT4_I(inode);
|
||
|
+ unsigned long md_needed;
|
||
|
|
||
|
/*
|
||
|
* recalculate the amount of metadata blocks to reserve
|
||
|
@@ -1846,35 +1876,34 @@ static int ext4_da_reserve_space(struct
|
||
|
* worse case is one extent per block
|
||
|
*/
|
||
|
repeat:
|
||
|
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
|
||
|
- total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
|
||
|
- mdblocks = ext4_calc_metadata_amount(inode, total);
|
||
|
- BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
|
||
|
-
|
||
|
- md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
|
||
|
- total = md_needed + nrblocks;
|
||
|
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
||
|
+ spin_lock(&ei->i_block_reservation_lock);
|
||
|
+ md_needed = ext4_calc_metadata_amount(inode, lblock);
|
||
|
+ spin_unlock(&ei->i_block_reservation_lock);
|
||
|
|
||
|
/*
|
||
|
- * Make quota reservation here to prevent quota overflow
|
||
|
- * later. Real quota accounting is done at pages writeout
|
||
|
- * time.
|
||
|
+ * We will charge metadata quota at writeout time; this saves
|
||
|
+ * us from metadata over-estimation, though we may go over by
|
||
|
+ * a small amount in the end. Here we just reserve for data.
|
||
|
*/
|
||
|
- if (vfs_dq_reserve_block(inode, total))
|
||
|
+ if (vfs_dq_reserve_block(inode, 1))
|
||
|
return -EDQUOT;
|
||
|
|
||
|
- if (ext4_claim_free_blocks(sbi, total)) {
|
||
|
- vfs_dq_release_reservation_block(inode, total);
|
||
|
+ /*
|
||
|
+ * We do still charge estimated metadata to the sb though;
|
||
|
+ * we cannot afford to run out of free blocks.
|
||
|
+ */
|
||
|
+ if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
|
||
|
+ vfs_dq_release_reservation_block(inode, 1);
|
||
|
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
|
||
|
yield();
|
||
|
goto repeat;
|
||
|
}
|
||
|
return -ENOSPC;
|
||
|
}
|
||
|
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
|
||
|
- EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
|
||
|
- EXT4_I(inode)->i_reserved_meta_blocks += md_needed;
|
||
|
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
||
|
+ spin_lock(&ei->i_block_reservation_lock);
|
||
|
+ ei->i_reserved_data_blocks++;
|
||
|
+ ei->i_reserved_meta_blocks += md_needed;
|
||
|
+ spin_unlock(&ei->i_block_reservation_lock);
|
||
|
|
||
|
return 0; /* success */
|
||
|
}
|
||
|
@@ -1882,49 +1911,47 @@ repeat:
|
||
|
static void ext4_da_release_space(struct inode *inode, int to_free)
|
||
|
{
|
||
|
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
||
|
- int total, mdb, mdb_free, release;
|
||
|
+ struct ext4_inode_info *ei = EXT4_I(inode);
|
||
|
|
||
|
if (!to_free)
|
||
|
return; /* Nothing to release, exit */
|
||
|
|
||
|
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
|
||
|
|
||
|
- if (!EXT4_I(inode)->i_reserved_data_blocks) {
|
||
|
+ if (unlikely(to_free > ei->i_reserved_data_blocks)) {
|
||
|
/*
|
||
|
- * if there is no reserved blocks, but we try to free some
|
||
|
- * then the counter is messed up somewhere.
|
||
|
- * but since this function is called from invalidate
|
||
|
- * page, it's harmless to return without any action
|
||
|
- */
|
||
|
- printk(KERN_INFO "ext4 delalloc try to release %d reserved "
|
||
|
- "blocks for inode %lu, but there is no reserved "
|
||
|
- "data blocks\n", to_free, inode->i_ino);
|
||
|
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
||
|
- return;
|
||
|
+ * if there aren't enough reserved blocks, then the
|
||
|
+ * counter is messed up somewhere. Since this
|
||
|
+ * function is called from invalidate page, it's
|
||
|
+ * harmless to return without any action.
|
||
|
+ */
|
||
|
+ ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
|
||
|
+ "ino %lu, to_free %d with only %d reserved "
|
||
|
+ "data blocks\n", inode->i_ino, to_free,
|
||
|
+ ei->i_reserved_data_blocks);
|
||
|
+ WARN_ON(1);
|
||
|
+ to_free = ei->i_reserved_data_blocks;
|
||
|
}
|
||
|
+ ei->i_reserved_data_blocks -= to_free;
|
||
|
|
||
|
- /* recalculate the number of metablocks still need to be reserved */
|
||
|
- total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
|
||
|
- mdb = ext4_calc_metadata_amount(inode, total);
|
||
|
-
|
||
|
- /* figure out how many metablocks to release */
|
||
|
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
|
||
|
- mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
|
||
|
-
|
||
|
- release = to_free + mdb_free;
|
||
|
-
|
||
|
- /* update fs dirty blocks counter for truncate case */
|
||
|
- percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
|
||
|
+ if (ei->i_reserved_data_blocks == 0) {
|
||
|
+ /*
|
||
|
+ * We can release all of the reserved metadata blocks
|
||
|
+ * only when we have written all of the delayed
|
||
|
+ * allocation blocks.
|
||
|
+ */
|
||
|
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter,
|
||
|
+ ei->i_reserved_meta_blocks);
|
||
|
+ ei->i_reserved_meta_blocks = 0;
|
||
|
+ ei->i_da_metadata_calc_len = 0;
|
||
|
+ }
|
||
|
|
||
|
- /* update per-inode reservations */
|
||
|
- BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
|
||
|
- EXT4_I(inode)->i_reserved_data_blocks -= to_free;
|
||
|
+ /* update fs dirty data blocks counter */
|
||
|
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
|
||
|
|
||
|
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
|
||
|
- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
|
||
|
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
||
|
|
||
|
- vfs_dq_release_reservation_block(inode, release);
|
||
|
+ vfs_dq_release_reservation_block(inode, to_free);
|
||
|
}
|
||
|
|
||
|
static void ext4_da_page_release_reservation(struct page *page,
|
||
|
@@ -2530,7 +2557,7 @@ static int ext4_da_get_block_prep(struct
|
||
|
* XXX: __block_prepare_write() unmaps passed block,
|
||
|
* is it OK?
|
||
|
*/
|
||
|
- ret = ext4_da_reserve_space(inode, 1);
|
||
|
+ ret = ext4_da_reserve_space(inode, iblock);
|
||
|
if (ret)
|
||
|
/* not enough space to reserve */
|
||
|
return ret;
|
||
|
Index: linux-2.6.32.noarch/fs/ext4/mballoc.c
|
||
|
===================================================================
|
||
|
--- linux-2.6.32.noarch.orig/fs/ext4/mballoc.c
|
||
|
+++ linux-2.6.32.noarch/fs/ext4/mballoc.c
|
||
|
@@ -2756,12 +2756,6 @@ ext4_mb_mark_diskspace_used(struct ext4_
|
||
|
if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
|
||
|
/* release all the reserved blocks if non delalloc */
|
||
|
percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
|
||
|
- else {
|
||
|
- percpu_counter_sub(&sbi->s_dirtyblocks_counter,
|
||
|
- ac->ac_b_ex.fe_len);
|
||
|
- /* convert reserved quota blocks to real quota blocks */
|
||
|
- vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
|
||
|
- }
|
||
|
|
||
|
if (sbi->s_log_groups_per_flex) {
|
||
|
ext4_group_t flex_group = ext4_flex_group(sbi,
|
||
|
Index: linux-2.6.32.noarch/fs/ext4/ext4.h
|
||
|
===================================================================
|
||
|
--- linux-2.6.32.noarch.orig/fs/ext4/ext4.h
|
||
|
+++ linux-2.6.32.noarch/fs/ext4/ext4.h
|
||
|
@@ -693,6 +693,8 @@ struct ext4_inode_info {
|
||
|
unsigned int i_reserved_meta_blocks;
|
||
|
unsigned int i_allocated_meta_blocks;
|
||
|
unsigned short i_delalloc_reserved_flag;
|
||
|
+ sector_t i_da_metadata_calc_last_lblock;
|
||
|
+ int i_da_metadata_calc_len;
|
||
|
|
||
|
/* on-disk additional length */
|
||
|
__u16 i_extra_isize;
|
||
|
@@ -1438,6 +1440,8 @@ extern int ext4_block_truncate_page(hand
|
||
|
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
|
||
|
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
|
||
|
extern int flush_aio_dio_completed_IO(struct inode *inode);
|
||
|
+extern void ext4_da_update_reserve_space(struct inode *inode,
|
||
|
+ int used, int quota_claim);
|
||
|
/* ioctl.c */
|
||
|
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
|
||
|
extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
|
||
|
Index: linux-2.6.32.noarch/fs/ext4/ext4_extents.h
|
||
|
===================================================================
|
||
|
--- linux-2.6.32.noarch.orig/fs/ext4/ext4_extents.h
|
||
|
+++ linux-2.6.32.noarch/fs/ext4/ext4_extents.h
|
||
|
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initial
|
||
|
ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
|
||
|
}
|
||
|
|
||
|
-extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
|
||
|
+extern int ext4_ext_calc_metadata_amount(struct inode *inode,
|
||
|
+ sector_t lblocks);
|
||
|
extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
|
||
|
extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
|
||
|
extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
|
||
|
Index: linux-2.6.32.noarch/fs/ext4/super.c
|
||
|
===================================================================
|
||
|
--- linux-2.6.32.noarch.orig/fs/ext4/super.c
|
||
|
+++ linux-2.6.32.noarch/fs/ext4/super.c
|
||
|
@@ -702,6 +702,7 @@ static struct inode *ext4_alloc_inode(st
|
||
|
ei->i_reserved_data_blocks = 0;
|
||
|
ei->i_reserved_meta_blocks = 0;
|
||
|
ei->i_allocated_meta_blocks = 0;
|
||
|
+ ei->i_da_metadata_calc_len = 0;
|
||
|
ei->i_delalloc_reserved_flag = 0;
|
||
|
spin_lock_init(&(ei->i_block_reservation_lock));
|
||
|
#ifdef CONFIG_QUOTA
|
||
|
Index: linux-2.6.32.noarch/fs/quota/dquot.c
|
||
|
===================================================================
|
||
|
--- linux-2.6.32.noarch.orig/fs/quota/dquot.c
|
||
|
+++ linux-2.6.32.noarch/fs/quota/dquot.c
|
||
|
@@ -1492,11 +1492,13 @@ static void inode_decr_space(struct inod
|
||
|
/*
|
||
|
* This operation can block, but only after everything is updated
|
||
|
*/
|
||
|
-int __dquot_alloc_space(struct inode *inode, qsize_t number,
|
||
|
- int warn, int reserve)
|
||
|
+int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
|
||
|
{
|
||
|
int cnt, ret = QUOTA_OK;
|
||
|
char warntype[MAXQUOTAS];
|
||
|
+ int warn = flags & DQUOT_SPACE_WARN;
|
||
|
+ int reserve = flags & DQUOT_SPACE_RESERVE;
|
||
|
+ int nofail = flags & DQUOT_SPACE_NOFAIL;
|
||
|
|
||
|
/*
|
||
|
* First test before acquiring mutex - solves deadlocks when we
|
||
|
@@ -1521,7 +1523,7 @@ int __dquot_alloc_space(struct inode *in
|
||
|
if (!inode->i_dquot[cnt])
|
||
|
continue;
|
||
|
if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
|
||
|
- == NO_QUOTA) {
|
||
|
+ == NO_QUOTA && !nofail) {
|
||
|
ret = NO_QUOTA;
|
||
|
spin_unlock(&dq_data_lock);
|
||
|
goto out_flush_warn;
|
||
|
@@ -1552,15 +1554,19 @@ out:
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
-int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
|
||
|
+int dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
|
||
|
{
|
||
|
- return __dquot_alloc_space(inode, number, warn, 0);
|
||
|
+ return __dquot_alloc_space(inode, number, flags);
|
||
|
}
|
||
|
EXPORT_SYMBOL(dquot_alloc_space);
|
||
|
|
||
|
int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
|
||
|
{
|
||
|
- return __dquot_alloc_space(inode, number, warn, 1);
|
||
|
+ int flags = DQUOT_SPACE_RESERVE;
|
||
|
+
|
||
|
+ if (warn)
|
||
|
+ flags |= DQUOT_SPACE_WARN;
|
||
|
+ return __dquot_alloc_space(inode, number, flags);
|
||
|
}
|
||
|
EXPORT_SYMBOL(dquot_reserve_space);
|
||
|
|
||
|
@@ -1651,10 +1657,11 @@ EXPORT_SYMBOL(dquot_claim_space);
|
||
|
/*
|
||
|
* This operation can block, but only after everything is updated
|
||
|
*/
|
||
|
-int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
|
||
|
+int __dquot_free_space(struct inode *inode, qsize_t number, int flags)
|
||
|
{
|
||
|
unsigned int cnt;
|
||
|
char warntype[MAXQUOTAS];
|
||
|
+ int reserve = flags & DQUOT_SPACE_RESERVE;
|
||
|
|
||
|
/* First test before acquiring mutex - solves deadlocks when we
|
||
|
* re-enter the quota code and are already holding the mutex */
|
||
|
@@ -1706,7 +1713,7 @@ EXPORT_SYMBOL(dquot_free_space);
|
||
|
*/
|
||
|
void dquot_release_reserved_space(struct inode *inode, qsize_t number)
|
||
|
{
|
||
|
- __dquot_free_space(inode, number, 1);
|
||
|
+ __dquot_free_space(inode, number, DQUOT_SPACE_RESERVE);
|
||
|
|
||
|
}
|
||
|
EXPORT_SYMBOL(dquot_release_reserved_space);
|
||
|
Index: linux-2.6.32.noarch/include/linux/quotaops.h
|
||
|
===================================================================
|
||
|
--- linux-2.6.32.noarch.orig/include/linux/quotaops.h
|
||
|
+++ linux-2.6.32.noarch/include/linux/quotaops.h
|
||
|
@@ -14,6 +14,10 @@ static inline struct quota_info *sb_dqop
|
||
|
return &sb->s_dquot;
|
||
|
}
|
||
|
|
||
|
+#define DQUOT_SPACE_WARN 0x1
|
||
|
+#define DQUOT_SPACE_RESERVE 0x2
|
||
|
+#define DQUOT_SPACE_NOFAIL 0x4
|
||
|
+
|
||
|
#if defined(CONFIG_QUOTA)
|
||
|
|
||
|
/*
|
||
|
@@ -159,7 +163,7 @@ static inline int vfs_dq_prealloc_space_
|
||
|
{
|
||
|
if (sb_any_quota_active(inode->i_sb)) {
|
||
|
/* Used space is updated in alloc_space() */
|
||
|
- if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA)
|
||
|
+ if (inode->i_sb->dq_op->alloc_space(inode, nr, DQUOT_SPACE_WARN) == NO_QUOTA)
|
||
|
return 1;
|
||
|
}
|
||
|
else
|
||
|
@@ -187,6 +191,16 @@ static inline int vfs_dq_alloc_space_nod
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
+static inline void vfs_dq_alloc_space_nofail(struct inode *inode, qsize_t nr)
|
||
|
+{
|
||
|
+ if (sb_any_quota_active(inode->i_sb)) {
|
||
|
+ /* Used space is updated in alloc_space() */
|
||
|
+ inode->i_sb->dq_op->alloc_space(inode, nr, DQUOT_SPACE_NOFAIL);
|
||
|
+ } else
|
||
|
+ inode_add_bytes(inode, nr);
|
||
|
+ mark_inode_dirty(inode);
|
||
|
+}
|
||
|
+
|
||
|
static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
|
||
|
{
|
||
|
int ret;
|
||
|
@@ -382,6 +396,12 @@ static inline int vfs_dq_alloc_space_nod
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
+static inline void vfs_dq_alloc_space_nofail(struct inode *inode, qsize_t nr)
|
||
|
+{
|
||
|
+ inode_add_bytes(inode, nr);
|
||
|
+ mark_inode_dirty(inode);
|
||
|
+}
|
||
|
+
|
||
|
static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr)
|
||
|
{
|
||
|
vfs_dq_alloc_space_nodirty(inode, nr);
|
||
|
@@ -433,6 +453,11 @@ static inline int vfs_dq_alloc_block_nod
|
||
|
return vfs_dq_alloc_space_nodirty(inode, nr << inode->i_blkbits);
|
||
|
}
|
||
|
|
||
|
+static inline void vfs_dq_alloc_block_nofail(struct inode *inode, qsize_t nr)
|
||
|
+{
|
||
|
+ vfs_dq_alloc_space_nofail(inode, nr << inode->i_blkbits);
|
||
|
+}
|
||
|
+
|
||
|
static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr)
|
||
|
{
|
||
|
return vfs_dq_alloc_space(inode, nr << inode->i_blkbits);
|
||
|
Index: linux-2.6.32.noarch/fs/ext4/balloc.c
|
||
|
===================================================================
|
||
|
--- linux-2.6.32.noarch.orig/fs/ext4/balloc.c
|
||
|
+++ linux-2.6.32.noarch/fs/ext4/balloc.c
|
||
|
@@ -642,14 +642,15 @@ ext4_fsblk_t ext4_new_meta_blocks(handle
|
||
|
ret = ext4_mb_new_blocks(handle, &ar, errp);
|
||
|
if (count)
|
||
|
*count = ar.len;
|
||
|
-
|
||
|
/*
|
||
|
- * Account for the allocated meta blocks
|
||
|
+ * Account for the allocated meta blocks. We will never
|
||
|
+ * fail EDQUOT for metdata, but we do account for it.
|
||
|
*/
|
||
|
if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) {
|
||
|
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
|
||
|
EXT4_I(inode)->i_allocated_meta_blocks += ar.len;
|
||
|
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
|
||
|
+ vfs_dq_alloc_block_nofail(inode, ar.len);
|
||
|
}
|
||
|
return ret;
|
||
|
}
|
||
|
|