259 lines
9.2 KiB
Diff
259 lines
9.2 KiB
Diff
From: Lukas Czerner <lczerner@redhat.com>
|
|
Date: Mon, 6 Jun 2011 04:05:17 +0000 (-0400)
|
|
Subject: ext4: Fix max file size and logical block counting of extent format file
|
|
X-Git-Tag: v3.0-rc5~53^2~4
|
|
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Ftorvalds%2Flinux-2.6.git;a=commitdiff_plain;h=f17722f917b2f21497deb6edc62fb1683daa08e6
|
|
|
|
ext4: Fix max file size and logical block counting of extent format file
|
|
|
|
[ backport for 2.6.35 ]
|
|
|
|
Kazuya Mio reported that he was able to hit BUG_ON(next == lblock)
|
|
in ext4_ext_put_gap_in_cache() while creating a sparse file in extent
|
|
format and fill the tail of file up to its end. We will hit the BUG_ON
|
|
when we write the last block (2^32-1) into the sparse file.
|
|
|
|
The root cause of the problem lies in the fact that we specifically set
|
|
s_maxbytes so that block at s_maxbytes fit into on-disk extent format,
|
|
which is 32 bit long. However, we are not storing start and end block
|
|
number, but rather start block number and length in blocks. It means
|
|
that in order to cover extent from 0 to EXT_MAX_BLOCK we need
|
|
EXT_MAX_BLOCK+1 to fit into len (because we counting block 0 as well) -
|
|
and it does not.
|
|
|
|
The only way to fix it without changing the meaning of the struct
|
|
ext4_extent members is, as Kazuya Mio suggested, to lower s_maxbytes
|
|
by one fs block so we can cover the whole extent we can get by the
|
|
on-disk extent format.
|
|
|
|
Also in many places EXT_MAX_BLOCK is used as length instead of maximum
|
|
logical block number as the name suggests, it is all a bit messy. So
|
|
this commit renames it to EXT_MAX_BLOCKS and change its usage in some
|
|
places to actually be maximum number of blocks in the extent.
|
|
|
|
The bug which this commit fixes can be reproduced as follows:
|
|
|
|
dd if=/dev/zero of=/mnt/mp1/file bs=<blocksize> count=1 seek=$((2**32-2))
|
|
sync
|
|
dd if=/dev/zero of=/mnt/mp1/file bs=<blocksize> count=1 seek=$((2**32-1))
|
|
|
|
Reported-by: Kazuya Mio <k-mio@sx.jp.nec.com>
|
|
Signed-off-by: Lukas Czerner <lczerner@redhat.com>
|
|
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
|
|
---
|
|
|
|
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
|
|
index 2e29abb..4764146 100644
|
|
--- a/fs/ext4/ext4_extents.h
|
|
+++ b/fs/ext4/ext4_extents.h
|
|
@@ -133,8 +133,11 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
|
|
#define EXT_BREAK 1
|
|
#define EXT_REPEAT 2
|
|
|
|
-/* Maximum logical block in a file; ext4_extent's ee_block is __le32 */
|
|
-#define EXT_MAX_BLOCK 0xffffffff
|
|
+/*
|
|
+ * Maximum number of logical blocks in a file; ext4_extent's ee_block is
|
|
+ * __le32.
|
|
+ */
|
|
+#define EXT_MAX_BLOCKS 0xffffffff
|
|
|
|
/*
|
|
* EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
|
|
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
|
|
index 5199bac..4157570 100644
|
|
--- a/fs/ext4/extents.c
|
|
+++ b/fs/ext4/extents.c
|
|
@@ -1408,7 +1408,7 @@ got_index:
|
|
|
|
/*
|
|
* ext4_ext_next_allocated_block:
|
|
- * returns allocated block in subsequent extent or EXT_MAX_BLOCK.
|
|
+ * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
|
|
* NOTE: it considers block number from index entry as
|
|
* allocated block. Thus, index entries have to be consistent
|
|
* with leaves.
|
|
@@ -1422,7 +1422,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
|
|
depth = path->p_depth;
|
|
|
|
if (depth == 0 && path->p_ext == NULL)
|
|
- return EXT_MAX_BLOCK;
|
|
+ return EXT_MAX_BLOCKS;
|
|
|
|
while (depth >= 0) {
|
|
if (depth == path->p_depth) {
|
|
@@ -1439,12 +1439,12 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
|
|
depth--;
|
|
}
|
|
|
|
- return EXT_MAX_BLOCK;
|
|
+ return EXT_MAX_BLOCKS;
|
|
}
|
|
|
|
/*
|
|
* ext4_ext_next_leaf_block:
|
|
- * returns first allocated block from next leaf or EXT_MAX_BLOCK
|
|
+ * returns first allocated block from next leaf or EXT_MAX_BLOCKS
|
|
*/
|
|
static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
|
|
struct ext4_ext_path *path)
|
|
@@ -1456,7 +1456,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
|
|
|
|
/* zero-tree has no leaf blocks at all */
|
|
if (depth == 0)
|
|
- return EXT_MAX_BLOCK;
|
|
+ return EXT_MAX_BLOCKS;
|
|
|
|
/* go to index block */
|
|
depth--;
|
|
@@ -1469,7 +1469,7 @@ static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
|
|
depth--;
|
|
}
|
|
|
|
- return EXT_MAX_BLOCK;
|
|
+ return EXT_MAX_BLOCKS;
|
|
}
|
|
|
|
/*
|
|
@@ -1677,13 +1677,13 @@ static unsigned int ext4_ext_check_overlap(struct inode *inode,
|
|
*/
|
|
if (b2 < b1) {
|
|
b2 = ext4_ext_next_allocated_block(path);
|
|
- if (b2 == EXT_MAX_BLOCK)
|
|
+ if (b2 == EXT_MAX_BLOCKS)
|
|
goto out;
|
|
}
|
|
|
|
/* check for wrap through zero on extent logical start block*/
|
|
if (b1 + len1 < b1) {
|
|
- len1 = EXT_MAX_BLOCK - b1;
|
|
+ len1 = EXT_MAX_BLOCKS - b1;
|
|
newext->ee_len = cpu_to_le16(len1);
|
|
ret = 1;
|
|
}
|
|
@@ -1767,7 +1767,7 @@ repeat:
|
|
fex = EXT_LAST_EXTENT(eh);
|
|
next = ext4_ext_next_leaf_block(inode, path);
|
|
if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)
|
|
- && next != EXT_MAX_BLOCK) {
|
|
+ && next != EXT_MAX_BLOCKS) {
|
|
ext_debug("next leaf block - %d\n", next);
|
|
BUG_ON(npath != NULL);
|
|
npath = ext4_ext_find_extent(inode, next, NULL);
|
|
@@ -1887,7 +1887,7 @@ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
|
|
BUG_ON(func == NULL);
|
|
BUG_ON(inode == NULL);
|
|
|
|
- while (block < last && block != EXT_MAX_BLOCK) {
|
|
+ while (block < last && block != EXT_MAX_BLOCKS) {
|
|
num = last - block;
|
|
/* find extent for this block */
|
|
down_read(&EXT4_I(inode)->i_data_sem);
|
|
@@ -2020,7 +2020,7 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
|
|
if (ex == NULL) {
|
|
/* there is no extent yet, so gap is [0;-] */
|
|
lblock = 0;
|
|
- len = EXT_MAX_BLOCK;
|
|
+ len = EXT_MAX_BLOCKS;
|
|
ext_debug("cache gap(whole file):");
|
|
} else if (block < le32_to_cpu(ex->ee_block)) {
|
|
lblock = block;
|
|
@@ -2239,8 +2239,8 @@ ext4_ext_rm_leaf(handle_t *handle, struc
|
|
path[depth].p_ext = ex;
|
|
|
|
a = ex_ee_block > start ? ex_ee_block : start;
|
|
- b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ?
|
|
- ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK;
|
|
+ b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCKS ?
|
|
+ ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCKS;
|
|
|
|
ext_debug(" border %u:%u\n", a, b);
|
|
|
|
@@ -3869,15 +3869,15 @@ static int ext4_ext_fiemap_cb(struct ino
|
|
flags |= FIEMAP_EXTENT_UNWRITTEN;
|
|
|
|
/*
|
|
- * If this extent reaches EXT_MAX_BLOCK, it must be last.
|
|
+ * If this extent reaches EXT_MAX_BLOCKS, it must be last.
|
|
*
|
|
- * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
|
|
+ * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCKS,
|
|
* this also indicates no more allocated blocks.
|
|
*
|
|
* XXX this might miss a single-block extent at EXT_MAX_BLOCK
|
|
*/
|
|
- if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK ||
|
|
- newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) {
|
|
+ if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCKS ||
|
|
+ newex->ec_block + newex->ec_len == EXT_MAX_BLOCKS) {
|
|
loff_t size = i_size_read(inode);
|
|
loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb);
|
|
|
|
@@ -4347,8 +4347,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
|
|
|
start_blk = start >> inode->i_sb->s_blocksize_bits;
|
|
last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
|
|
- if (last_blk >= EXT_MAX_BLOCK)
|
|
- last_blk = EXT_MAX_BLOCK-1;
|
|
+ if (last_blk >= EXT_MAX_BLOCKS)
|
|
+ last_blk = EXT_MAX_BLOCKS-1;
|
|
len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
|
|
|
|
/*
|
|
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
|
|
index 2b8304b..f57455a 100644
|
|
--- a/fs/ext4/move_extent.c
|
|
+++ b/fs/ext4/move_extent.c
|
|
@@ -1002,12 +1002,12 @@ mext_check_arguments(struct inode *orig_inode,
|
|
return -EINVAL;
|
|
}
|
|
|
|
- if ((orig_start > EXT_MAX_BLOCK) ||
|
|
- (donor_start > EXT_MAX_BLOCK) ||
|
|
- (*len > EXT_MAX_BLOCK) ||
|
|
- (orig_start + *len > EXT_MAX_BLOCK)) {
|
|
+ if ((orig_start >= EXT_MAX_BLOCKS) ||
|
|
+ (donor_start >= EXT_MAX_BLOCKS) ||
|
|
+ (*len > EXT_MAX_BLOCKS) ||
|
|
+ (orig_start + *len >= EXT_MAX_BLOCKS)) {
|
|
ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
|
|
- "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK,
|
|
+ "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
|
|
orig_inode->i_ino, donor_inode->i_ino);
|
|
return -EINVAL;
|
|
}
|
|
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
|
|
index cc5c157..9ea71aa 100644
|
|
--- a/fs/ext4/super.c
|
|
+++ b/fs/ext4/super.c
|
|
@@ -2243,6 +2243,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,
|
|
* in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
|
|
* so that won't be a limiting factor.
|
|
*
|
|
+ * However there is other limiting factor. We do store extents in the form
|
|
+ * of starting block and length, hence the resulting length of the extent
|
|
+ * covering maximum file size must fit into on-disk format containers as
|
|
+ * well. Given that length is always by 1 unit bigger than max unit (because
|
|
+ * we count 0 as well) we have to lower the s_maxbytes by one fs block.
|
|
+ *
|
|
* Note, this does *not* consider any metadata overhead for vfs i_blocks.
|
|
*/
|
|
static loff_t ext4_max_size(int blkbits, int has_huge_files)
|
|
@@ -2264,10 +2270,13 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files)
|
|
upper_limit <<= blkbits;
|
|
}
|
|
|
|
- /* 32-bit extent-start container, ee_block */
|
|
- res = 1LL << 32;
|
|
+ /*
|
|
+ * 32-bit extent-start container, ee_block. We lower the maxbytes
|
|
+ * by one fs block, so ee_len can cover the extent of maximum file
|
|
+ * size
|
|
+ */
|
|
+ res = (1LL << 32) - 1;
|
|
res <<= blkbits;
|
|
- res -= 1;
|
|
|
|
/* Sanity check against vm- & vfs- imposed limits */
|
|
if (res > upper_limit)
|