For bug #608770 Do not speculatively allocate metadata for quota; it's too complex, and we usually get it wrong. Backport of these upstream patches, some of which just play a supporting role. -Eric 515f41c33a9d44a964264c9511ad2c869af1fac3 ext4: Ensure zeroout blocks have no dirty metadata d21cd8f163ac44b15c465aab7306db931c606908 ext4: Fix potential quota deadlock 0637c6f4135f592f094207c7c21e7c0fc5557834 ext4: Patch up how we claim metadata blocks for quota purposes 9d0be50230b333005635967f7ecd4897dbfd181b ext4: Calculate metadata requirements more accurately 1db913823c0f8360fccbd24ca67eb073966a5ffd ext4: Handle -EDQUOT error on write 5f634d064c709ea02c3cdaa850a08323a4a4bf28 ext4: Fix quota accounting error with fallocate 56246f9ae4cfa95b460f9dfbcfb1b772d85db046 quota: use flags interface for dquot alloc/free space 0e05842bc117ea70ceb979cca798fd026879951b quota: add the option to not fail with EDQUOT in block 72b8ab9dde211ea518ff27e631b2046ef90c29a2 ext4: don't use quota reservation for speculative metadata Index: linux-2.6.32.noarch/fs/ext4/extents.c =================================================================== --- linux-2.6.32.noarch.orig/fs/ext4/extents.c +++ linux-2.6.32.noarch/fs/ext4/extents.c @@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_id * to allocate @blocks * Worse case is one block per extent */ -int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks) +int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock) { - int lcap, icap, rcap, leafs, idxs, num; - int newextents = blocks; + struct ext4_inode_info *ei = EXT4_I(inode); + int idxs, num = 0; - rcap = ext4_ext_space_root_idx(inode, 0); - lcap = ext4_ext_space_block(inode, 0); - icap = ext4_ext_space_block_idx(inode, 0); + idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header)) + / sizeof(struct ext4_extent_idx)); - /* number of new leaf blocks needed */ - num = leafs = (newextents + lcap - 1) / lcap; + /* + * If the new delayed allocation block is contiguous with the + * previous da block, it can share index blocks with the + * previous block, so we only need to allocate a new index + * block every idxs leaf blocks. At ldxs**2 blocks, we need + * an additional index block, and at ldxs**3 blocks, yet + * another index blocks. + */ + if (ei->i_da_metadata_calc_len && + ei->i_da_metadata_calc_last_lblock+1 == lblock) { + if ((ei->i_da_metadata_calc_len % idxs) == 0) + num++; + if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0) + num++; + if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) { + num++; + ei->i_da_metadata_calc_len = 0; + } else + ei->i_da_metadata_calc_len++; + ei->i_da_metadata_calc_last_lblock++; + return num; + } /* - * Worse case, we need separate index block(s) - * to link all new leaf blocks + * In the worst case we need a new set of index blocks at + * every level of the inode's extent tree. */ - idxs = (leafs + icap - 1) / icap; - do { - num += idxs; - idxs = (idxs + icap - 1) / icap; - } while (idxs > rcap); - - return num; + ei->i_da_metadata_calc_len = 1; + ei->i_da_metadata_calc_last_lblock = lblock; + return ext_depth(inode) + 1; } static int @@ -3029,6 +3044,14 @@ out: return err; } +static void unmap_underlying_metadata_blocks(struct block_device *bdev, + sector_t block, int count) +{ + int i; + for (i = 0; i < count; i++) + unmap_underlying_metadata(bdev, block + i); +} + static int ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, unsigned int max_blocks, @@ -3104,6 +3127,30 @@ out: } else allocated = ret; set_buffer_new(bh_result); + /* + * if we allocated more blocks than requested + * we need to make sure we unmap the extra block + * allocated. The actual needed block will get + * unmapped later when we find the buffer_head marked + * new. + */ + if (allocated > max_blocks) { + unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, + newblock + max_blocks, + allocated - max_blocks); + allocated = max_blocks; + } + + /* + * If we have done fallocate with the offset that is already + * delayed allocated, we would have block reservation + * and quota reservation done in the delayed write path. + * But fallocate would have already updated quota and block + * count for this offset. So cancel these reservation + */ + if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE) + ext4_da_update_reserve_space(inode, allocated, 0); + map_out: set_buffer_mapped(bh_result); out1: @@ -3333,9 +3380,18 @@ int ext4_ext_get_blocks(handle_t *handle /* previous routine could use block we allocated */ newblock = ext_pblock(&newex); allocated = ext4_ext_get_actual_len(&newex); + if (allocated > max_blocks) + allocated = max_blocks; set_buffer_new(bh_result); /* + * Update reserved blocks/metadata blocks after successful + * block allocation which had been deferred till now. + */ + if (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE) + ext4_da_update_reserve_space(inode, allocated, 1); + + /* * Cache the extent and update transaction to commit on fdatasync only * when it is _not_ an uninitialized extent. */ Index: linux-2.6.32.noarch/fs/ext4/inode.c =================================================================== --- linux-2.6.32.noarch.orig/fs/ext4/inode.c +++ linux-2.6.32.noarch/fs/ext4/inode.c @@ -1051,81 +1051,105 @@ qsize_t *ext4_get_reserved_space(struct return &EXT4_I(inode)->i_reserved_quota; } #endif + /* * Calculate the number of metadata blocks need to reserve - * to allocate @blocks for non extent file based file + * to allocate a new block at @lblocks for non extent file based file */ -static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) +static int ext4_indirect_calc_metadata_amount(struct inode *inode, + sector_t lblock) { - int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); - int ind_blks, dind_blks, tind_blks; - - /* number of new indirect blocks needed */ - ind_blks = (blocks + icap - 1) / icap; + struct ext4_inode_info *ei = EXT4_I(inode); + int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1; + int blk_bits; - dind_blks = (ind_blks + icap - 1) / icap; + if (lblock < EXT4_NDIR_BLOCKS) + return 0; - tind_blks = 1; + lblock -= EXT4_NDIR_BLOCKS; - return ind_blks + dind_blks + tind_blks; + if (ei->i_da_metadata_calc_len && + (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) { + ei->i_da_metadata_calc_len++; + return 0; + } + ei->i_da_metadata_calc_last_lblock = lblock & dind_mask; + ei->i_da_metadata_calc_len = 1; + blk_bits = roundup_pow_of_two(lblock + 1); + return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; } /* * Calculate the number of metadata blocks need to reserve - * to allocate given number of blocks + * to allocate a block located at @lblock */ -static int ext4_calc_metadata_amount(struct inode *inode, int blocks) +static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock) { - if (!blocks) - return 0; - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) - return ext4_ext_calc_metadata_amount(inode, blocks); + return ext4_ext_calc_metadata_amount(inode, lblock); - return ext4_indirect_calc_metadata_amount(inode, blocks); + return ext4_indirect_calc_metadata_amount(inode, lblock); } -static void ext4_da_update_reserve_space(struct inode *inode, int used) +/* + * Called with i_data_sem down, which is important since we can call + * ext4_discard_preallocations() from here. + */ +void ext4_da_update_reserve_space(struct inode *inode, + int used, int quota_claim) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - int total, mdb, mdb_free; + struct ext4_inode_info *ei = EXT4_I(inode); - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - /* recalculate the number of metablocks still need to be reserved */ - total = EXT4_I(inode)->i_reserved_data_blocks - used; - mdb = ext4_calc_metadata_amount(inode, total); - - /* figure out how many metablocks to release */ - BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); - mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; - - if (mdb_free) { - /* Account for allocated meta_blocks */ - mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; - - /* update fs dirty blocks counter */ - percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); - EXT4_I(inode)->i_allocated_meta_blocks = 0; - EXT4_I(inode)->i_reserved_meta_blocks = mdb; - } - - /* update per-inode reservations */ - BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); - EXT4_I(inode)->i_reserved_data_blocks -= used; + spin_lock(&ei->i_block_reservation_lock); + if (unlikely(used > ei->i_reserved_data_blocks)) { + ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d " + "with only %d reserved data blocks\n", + __func__, inode->i_ino, used, + ei->i_reserved_data_blocks); + WARN_ON(1); + used = ei->i_reserved_data_blocks; + } + + /* Update per-inode reservations */ + ei->i_reserved_data_blocks -= used; + ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks; + percpu_counter_sub(&sbi->s_dirtyblocks_counter, + used + ei->i_allocated_meta_blocks); + ei->i_allocated_meta_blocks = 0; + + if (ei->i_reserved_data_blocks == 0) { + /* + * We can release all of the reserved metadata blocks + * only when we have written all of the delayed + * allocation blocks. + */ + percpu_counter_sub(&sbi->s_dirtyblocks_counter, + ei->i_reserved_meta_blocks); + ei->i_reserved_meta_blocks = 0; + ei->i_da_metadata_calc_len = 0; + } spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - /* - * free those over-booking quota for metadata blocks - */ - if (mdb_free) - vfs_dq_release_reservation_block(inode, mdb_free); + /* Update quota subsystem for data blocks */ + if (quota_claim) { + vfs_dq_claim_block(inode, used); + } else { + /* + * We did fallocate with an offset that is already delayed + * allocated. So on delayed allocated writeback we should + * not re-claim the quota for fallocated blocks. + */ + vfs_dq_release_reservation_block(inode, used); + } /* * If we have done all the pending block allocations and if * there aren't any writers on the inode, we can discard the * inode's preallocations. */ - if (!total && (atomic_read(&inode->i_writecount) == 0)) + if ((ei->i_reserved_data_blocks == 0) && + (atomic_read(&inode->i_writecount) == 0)) ext4_discard_preallocations(inode); } @@ -1317,18 +1341,20 @@ int ext4_get_blocks(handle_t *handle, st */ EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; } - } + /* + * Update reserved blocks/metadata blocks after successful + * block allocation which had been deferred till now. We don't + * support fallocate for non extent files. So we can update + * reserve space here. + */ + if ((retval > 0) && + (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) + ext4_da_update_reserve_space(inode, retval, 1); + } if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) EXT4_I(inode)->i_delalloc_reserved_flag = 0; - /* - * Update reserved blocks/metadata blocks after successful - * block allocation which had been deferred till now. - */ - if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE)) - ext4_da_update_reserve_space(inode, retval); - up_write((&EXT4_I(inode)->i_data_sem)); if (retval > 0 && buffer_mapped(bh)) { int ret = check_block_validity(inode, "file system " @@ -1834,11 +1860,15 @@ static int ext4_journalled_write_end(str return ret ? ret : copied; } -static int ext4_da_reserve_space(struct inode *inode, int nrblocks) +/* + * Reserve a single block located at lblock + */ +static int ext4_da_reserve_space(struct inode *inode, sector_t lblock) { int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - unsigned long md_needed, mdblocks, total = 0; + struct ext4_inode_info *ei = EXT4_I(inode); + unsigned long md_needed; /* * recalculate the amount of metadata blocks to reserve @@ -1846,35 +1876,34 @@ static int ext4_da_reserve_space(struct * worse case is one extent per block */ repeat: - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; - mdblocks = ext4_calc_metadata_amount(inode, total); - BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks); - - md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; - total = md_needed + nrblocks; - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + spin_lock(&ei->i_block_reservation_lock); + md_needed = ext4_calc_metadata_amount(inode, lblock); + spin_unlock(&ei->i_block_reservation_lock); /* - * Make quota reservation here to prevent quota overflow - * later. Real quota accounting is done at pages writeout - * time. + * We will charge metadata quota at writeout time; this saves + * us from metadata over-estimation, though we may go over by + * a small amount in the end. Here we just reserve for data. */ - if (vfs_dq_reserve_block(inode, total)) + if (vfs_dq_reserve_block(inode, 1)) return -EDQUOT; - if (ext4_claim_free_blocks(sbi, total)) { - vfs_dq_release_reservation_block(inode, total); + /* + * We do still charge estimated metadata to the sb though; + * we cannot afford to run out of free blocks. + */ + if (ext4_claim_free_blocks(sbi, md_needed + 1)) { + vfs_dq_release_reservation_block(inode, 1); if (ext4_should_retry_alloc(inode->i_sb, &retries)) { yield(); goto repeat; } return -ENOSPC; } - spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - EXT4_I(inode)->i_reserved_data_blocks += nrblocks; - EXT4_I(inode)->i_reserved_meta_blocks += md_needed; - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + spin_lock(&ei->i_block_reservation_lock); + ei->i_reserved_data_blocks++; + ei->i_reserved_meta_blocks += md_needed; + spin_unlock(&ei->i_block_reservation_lock); return 0; /* success */ } @@ -1882,49 +1911,47 @@ repeat: static void ext4_da_release_space(struct inode *inode, int to_free) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - int total, mdb, mdb_free, release; + struct ext4_inode_info *ei = EXT4_I(inode); if (!to_free) return; /* Nothing to release, exit */ spin_lock(&EXT4_I(inode)->i_block_reservation_lock); - if (!EXT4_I(inode)->i_reserved_data_blocks) { + if (unlikely(to_free > ei->i_reserved_data_blocks)) { /* - * if there is no reserved blocks, but we try to free some - * then the counter is messed up somewhere. - * but since this function is called from invalidate - * page, it's harmless to return without any action - */ - printk(KERN_INFO "ext4 delalloc try to release %d reserved " - "blocks for inode %lu, but there is no reserved " - "data blocks\n", to_free, inode->i_ino); - spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - return; + * if there aren't enough reserved blocks, then the + * counter is messed up somewhere. Since this + * function is called from invalidate page, it's + * harmless to return without any action. + */ + ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: " + "ino %lu, to_free %d with only %d reserved " + "data blocks\n", inode->i_ino, to_free, + ei->i_reserved_data_blocks); + WARN_ON(1); + to_free = ei->i_reserved_data_blocks; } + ei->i_reserved_data_blocks -= to_free; - /* recalculate the number of metablocks still need to be reserved */ - total = EXT4_I(inode)->i_reserved_data_blocks - to_free; - mdb = ext4_calc_metadata_amount(inode, total); - - /* figure out how many metablocks to release */ - BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); - mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; - - release = to_free + mdb_free; - - /* update fs dirty blocks counter for truncate case */ - percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); + if (ei->i_reserved_data_blocks == 0) { + /* + * We can release all of the reserved metadata blocks + * only when we have written all of the delayed + * allocation blocks. + */ + percpu_counter_sub(&sbi->s_dirtyblocks_counter, + ei->i_reserved_meta_blocks); + ei->i_reserved_meta_blocks = 0; + ei->i_da_metadata_calc_len = 0; + } - /* update per-inode reservations */ - BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); - EXT4_I(inode)->i_reserved_data_blocks -= to_free; + /* update fs dirty data blocks counter */ + percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free); - BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); - EXT4_I(inode)->i_reserved_meta_blocks = mdb; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); - vfs_dq_release_reservation_block(inode, release); + vfs_dq_release_reservation_block(inode, to_free); } static void ext4_da_page_release_reservation(struct page *page, @@ -2530,7 +2557,7 @@ static int ext4_da_get_block_prep(struct * XXX: __block_prepare_write() unmaps passed block, * is it OK? */ - ret = ext4_da_reserve_space(inode, 1); + ret = ext4_da_reserve_space(inode, iblock); if (ret) /* not enough space to reserve */ return ret; Index: linux-2.6.32.noarch/fs/ext4/mballoc.c =================================================================== --- linux-2.6.32.noarch.orig/fs/ext4/mballoc.c +++ linux-2.6.32.noarch/fs/ext4/mballoc.c @@ -2756,12 +2756,6 @@ ext4_mb_mark_diskspace_used(struct ext4_ if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) /* release all the reserved blocks if non delalloc */ percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); - else { - percpu_counter_sub(&sbi->s_dirtyblocks_counter, - ac->ac_b_ex.fe_len); - /* convert reserved quota blocks to real quota blocks */ - vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len); - } if (sbi->s_log_groups_per_flex) { ext4_group_t flex_group = ext4_flex_group(sbi, Index: linux-2.6.32.noarch/fs/ext4/ext4.h =================================================================== --- linux-2.6.32.noarch.orig/fs/ext4/ext4.h +++ linux-2.6.32.noarch/fs/ext4/ext4.h @@ -693,6 +693,8 @@ struct ext4_inode_info { unsigned int i_reserved_meta_blocks; unsigned int i_allocated_meta_blocks; unsigned short i_delalloc_reserved_flag; + sector_t i_da_metadata_calc_last_lblock; + int i_da_metadata_calc_len; /* on-disk additional length */ __u16 i_extra_isize; @@ -1438,6 +1440,8 @@ extern int ext4_block_truncate_page(hand extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern int flush_aio_dio_completed_IO(struct inode *inode); +extern void ext4_da_update_reserve_space(struct inode *inode, + int used, int quota_claim); /* ioctl.c */ extern long ext4_ioctl(struct file *, unsigned int, unsigned long); extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); Index: linux-2.6.32.noarch/fs/ext4/ext4_extents.h =================================================================== --- linux-2.6.32.noarch.orig/fs/ext4/ext4_extents.h +++ linux-2.6.32.noarch/fs/ext4/ext4_extents.h @@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initial ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext)); } -extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); +extern int ext4_ext_calc_metadata_amount(struct inode *inode, + sector_t lblocks); extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex); extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); Index: linux-2.6.32.noarch/fs/ext4/super.c =================================================================== --- linux-2.6.32.noarch.orig/fs/ext4/super.c +++ linux-2.6.32.noarch/fs/ext4/super.c @@ -702,6 +702,7 @@ static struct inode *ext4_alloc_inode(st ei->i_reserved_data_blocks = 0; ei->i_reserved_meta_blocks = 0; ei->i_allocated_meta_blocks = 0; + ei->i_da_metadata_calc_len = 0; ei->i_delalloc_reserved_flag = 0; spin_lock_init(&(ei->i_block_reservation_lock)); #ifdef CONFIG_QUOTA Index: linux-2.6.32.noarch/fs/quota/dquot.c =================================================================== --- linux-2.6.32.noarch.orig/fs/quota/dquot.c +++ linux-2.6.32.noarch/fs/quota/dquot.c @@ -1492,11 +1492,13 @@ static void inode_decr_space(struct inod /* * This operation can block, but only after everything is updated */ -int __dquot_alloc_space(struct inode *inode, qsize_t number, - int warn, int reserve) +int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) { int cnt, ret = QUOTA_OK; char warntype[MAXQUOTAS]; + int warn = flags & DQUOT_SPACE_WARN; + int reserve = flags & DQUOT_SPACE_RESERVE; + int nofail = flags & DQUOT_SPACE_NOFAIL; /* * First test before acquiring mutex - solves deadlocks when we @@ -1521,7 +1523,7 @@ int __dquot_alloc_space(struct inode *in if (!inode->i_dquot[cnt]) continue; if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt) - == NO_QUOTA) { + == NO_QUOTA && !nofail) { ret = NO_QUOTA; spin_unlock(&dq_data_lock); goto out_flush_warn; @@ -1552,15 +1554,19 @@ out: return ret; } -int dquot_alloc_space(struct inode *inode, qsize_t number, int warn) +int dquot_alloc_space(struct inode *inode, qsize_t number, int flags) { - return __dquot_alloc_space(inode, number, warn, 0); + return __dquot_alloc_space(inode, number, flags); } EXPORT_SYMBOL(dquot_alloc_space); int dquot_reserve_space(struct inode *inode, qsize_t number, int warn) { - return __dquot_alloc_space(inode, number, warn, 1); + int flags = DQUOT_SPACE_RESERVE; + + if (warn) + flags |= DQUOT_SPACE_WARN; + return __dquot_alloc_space(inode, number, flags); } EXPORT_SYMBOL(dquot_reserve_space); @@ -1651,10 +1657,11 @@ EXPORT_SYMBOL(dquot_claim_space); /* * This operation can block, but only after everything is updated */ -int __dquot_free_space(struct inode *inode, qsize_t number, int reserve) +int __dquot_free_space(struct inode *inode, qsize_t number, int flags) { unsigned int cnt; char warntype[MAXQUOTAS]; + int reserve = flags & DQUOT_SPACE_RESERVE; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ @@ -1706,7 +1713,7 @@ EXPORT_SYMBOL(dquot_free_space); */ void dquot_release_reserved_space(struct inode *inode, qsize_t number) { - __dquot_free_space(inode, number, 1); + __dquot_free_space(inode, number, DQUOT_SPACE_RESERVE); } EXPORT_SYMBOL(dquot_release_reserved_space); Index: linux-2.6.32.noarch/include/linux/quotaops.h =================================================================== --- linux-2.6.32.noarch.orig/include/linux/quotaops.h +++ linux-2.6.32.noarch/include/linux/quotaops.h @@ -14,6 +14,10 @@ static inline struct quota_info *sb_dqop return &sb->s_dquot; } +#define DQUOT_SPACE_WARN 0x1 +#define DQUOT_SPACE_RESERVE 0x2 +#define DQUOT_SPACE_NOFAIL 0x4 + #if defined(CONFIG_QUOTA) /* @@ -159,7 +163,7 @@ static inline int vfs_dq_prealloc_space_ { if (sb_any_quota_active(inode->i_sb)) { /* Used space is updated in alloc_space() */ - if (inode->i_sb->dq_op->alloc_space(inode, nr, 1) == NO_QUOTA) + if (inode->i_sb->dq_op->alloc_space(inode, nr, DQUOT_SPACE_WARN) == NO_QUOTA) return 1; } else @@ -187,6 +191,16 @@ static inline int vfs_dq_alloc_space_nod return 0; } +static inline void vfs_dq_alloc_space_nofail(struct inode *inode, qsize_t nr) +{ + if (sb_any_quota_active(inode->i_sb)) { + /* Used space is updated in alloc_space() */ + inode->i_sb->dq_op->alloc_space(inode, nr, DQUOT_SPACE_NOFAIL); + } else + inode_add_bytes(inode, nr); + mark_inode_dirty(inode); +} + static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr) { int ret; @@ -382,6 +396,12 @@ static inline int vfs_dq_alloc_space_nod return 0; } +static inline void vfs_dq_alloc_space_nofail(struct inode *inode, qsize_t nr) +{ + inode_add_bytes(inode, nr); + mark_inode_dirty(inode); +} + static inline int vfs_dq_alloc_space(struct inode *inode, qsize_t nr) { vfs_dq_alloc_space_nodirty(inode, nr); @@ -433,6 +453,11 @@ static inline int vfs_dq_alloc_block_nod return vfs_dq_alloc_space_nodirty(inode, nr << inode->i_blkbits); } +static inline void vfs_dq_alloc_block_nofail(struct inode *inode, qsize_t nr) +{ + vfs_dq_alloc_space_nofail(inode, nr << inode->i_blkbits); +} + static inline int vfs_dq_alloc_block(struct inode *inode, qsize_t nr) { return vfs_dq_alloc_space(inode, nr << inode->i_blkbits); Index: linux-2.6.32.noarch/fs/ext4/balloc.c =================================================================== --- linux-2.6.32.noarch.orig/fs/ext4/balloc.c +++ linux-2.6.32.noarch/fs/ext4/balloc.c @@ -642,14 +642,15 @@ ext4_fsblk_t ext4_new_meta_blocks(handle ret = ext4_mb_new_blocks(handle, &ar, errp); if (count) *count = ar.len; - /* - * Account for the allocated meta blocks + * Account for the allocated meta blocks. We will never + * fail EDQUOT for metdata, but we do account for it. */ if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { spin_lock(&EXT4_I(inode)->i_block_reservation_lock); EXT4_I(inode)->i_allocated_meta_blocks += ar.len; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); + vfs_dq_alloc_block_nofail(inode, ar.len); } return ret; }