309a85b686
ocfs2_block_group_alloc_discontig() disables chain relink by setting ac->ac_allow_chain_relink = 0 because it grabs clusters from multiple cluster groups. It doesn't keep the credits for all chain relink,but ocfs2_claim_suballoc_bits overrides this in this call trace: ocfs2_block_group_claim_bits()->ocfs2_claim_clusters()-> __ocfs2_claim_clusters()->ocfs2_claim_suballoc_bits() ocfs2_claim_suballoc_bits set ac->ac_allow_chain_relink = 1; then call ocfs2_search_chain() one time and disable it again, and then we run out of credits. Fix is to allow relink by default and disable it in ocfs2_block_group_alloc_discontig. Without this patch, End-users will run into a crash due to run out of credits, backtrace like this: RIP: 0010:[<ffffffffa0808b14>] [<ffffffffa0808b14>] jbd2_journal_dirty_metadata+0x164/0x170 [jbd2] RSP: 0018:ffff8801b919b5b8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff88022139ddc0 RCX: ffff880159f652d0 RDX: ffff880178aa3000 RSI: ffff880159f652d0 RDI: ffff880087f09bf8 RBP: ffff8801b919b5e8 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000001e00 R11: 00000000000150b0 R12: ffff880159f652d0 R13: ffff8801a0cae908 R14: ffff880087f09bf8 R15: ffff88018d177800 FS: 00007fc9b0b6b6e0(0000) GS:ffff88022fd40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 000000000040819c CR3: 0000000184017000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process dd (pid: 9945, threadinfo ffff8801b919a000, task ffff880149a264c0) Call Trace: ocfs2_journal_dirty+0x2f/0x70 [ocfs2] ocfs2_relink_block_group+0x111/0x480 [ocfs2] ocfs2_search_chain+0x455/0x9a0 [ocfs2] ... Signed-off-by: Xiaowei.Hu <xiaowei.hu@oracle.com> Reviewed-by: Srinivas Eeda <srinivas.eeda@oracle.com> Cc: Mark Fasheh <mfasheh@suse.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
222 lines
7.1 KiB
C
222 lines
7.1 KiB
C
/* -*- mode: c; c-basic-offset: 8; -*-
|
|
* vim: noexpandtab sw=8 ts=8 sts=0:
|
|
*
|
|
* suballoc.h
|
|
*
|
|
* Defines sub allocator api
|
|
*
|
|
* Copyright (C) 2003, 2004 Oracle. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public
|
|
* License along with this program; if not, write to the
|
|
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
* Boston, MA 021110-1307, USA.
|
|
*/
|
|
|
|
#ifndef _CHAINALLOC_H_
|
|
#define _CHAINALLOC_H_
|
|
|
|
struct ocfs2_suballoc_result;
|
|
typedef int (group_search_t)(struct inode *,
|
|
struct buffer_head *,
|
|
u32, /* bits_wanted */
|
|
u32, /* min_bits */
|
|
u64, /* max_block */
|
|
struct ocfs2_suballoc_result *);
|
|
/* found bits */
|
|
|
|
struct ocfs2_alloc_context {
|
|
struct inode *ac_inode; /* which bitmap are we allocating from? */
|
|
struct buffer_head *ac_bh; /* file entry bh */
|
|
u32 ac_alloc_slot; /* which slot are we allocating from? */
|
|
u32 ac_bits_wanted;
|
|
u32 ac_bits_given;
|
|
#define OCFS2_AC_USE_LOCAL 1
|
|
#define OCFS2_AC_USE_MAIN 2
|
|
#define OCFS2_AC_USE_INODE 3
|
|
#define OCFS2_AC_USE_META 4
|
|
u32 ac_which;
|
|
|
|
/* these are used by the chain search */
|
|
u16 ac_chain;
|
|
int ac_disable_chain_relink;
|
|
group_search_t *ac_group_search;
|
|
|
|
u64 ac_last_group;
|
|
u64 ac_max_block; /* Highest block number to allocate. 0 is
|
|
is the same as ~0 - unlimited */
|
|
|
|
int ac_find_loc_only; /* hack for reflink operation ordering */
|
|
struct ocfs2_suballoc_result *ac_find_loc_priv; /* */
|
|
|
|
struct ocfs2_alloc_reservation *ac_resv;
|
|
};
|
|
|
|
void ocfs2_init_steal_slots(struct ocfs2_super *osb);
|
|
void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);
|
|
static inline int ocfs2_alloc_context_bits_left(struct ocfs2_alloc_context *ac)
|
|
{
|
|
return ac->ac_bits_wanted - ac->ac_bits_given;
|
|
}
|
|
|
|
/*
|
|
* Please note that the caller must make sure that root_el is the root
|
|
* of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
|
|
* the result may be wrong.
|
|
*/
|
|
int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
|
|
struct ocfs2_extent_list *root_el,
|
|
struct ocfs2_alloc_context **ac);
|
|
int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
|
|
int blocks,
|
|
struct ocfs2_alloc_context **ac);
|
|
int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
|
|
struct ocfs2_alloc_context **ac);
|
|
int ocfs2_reserve_clusters(struct ocfs2_super *osb,
|
|
u32 bits_wanted,
|
|
struct ocfs2_alloc_context **ac);
|
|
|
|
int ocfs2_claim_metadata(handle_t *handle,
|
|
struct ocfs2_alloc_context *ac,
|
|
u32 bits_wanted,
|
|
u64 *suballoc_loc,
|
|
u16 *suballoc_bit_start,
|
|
u32 *num_bits,
|
|
u64 *blkno_start);
|
|
int ocfs2_claim_new_inode(handle_t *handle,
|
|
struct inode *dir,
|
|
struct buffer_head *parent_fe_bh,
|
|
struct ocfs2_alloc_context *ac,
|
|
u64 *suballoc_loc,
|
|
u16 *suballoc_bit,
|
|
u64 *fe_blkno);
|
|
int ocfs2_claim_clusters(handle_t *handle,
|
|
struct ocfs2_alloc_context *ac,
|
|
u32 min_clusters,
|
|
u32 *cluster_start,
|
|
u32 *num_clusters);
|
|
/*
|
|
* Use this variant of ocfs2_claim_clusters to specify a maxiumum
|
|
* number of clusters smaller than the allocation reserved.
|
|
*/
|
|
int __ocfs2_claim_clusters(handle_t *handle,
|
|
struct ocfs2_alloc_context *ac,
|
|
u32 min_clusters,
|
|
u32 max_clusters,
|
|
u32 *cluster_start,
|
|
u32 *num_clusters);
|
|
|
|
int ocfs2_free_suballoc_bits(handle_t *handle,
|
|
struct inode *alloc_inode,
|
|
struct buffer_head *alloc_bh,
|
|
unsigned int start_bit,
|
|
u64 bg_blkno,
|
|
unsigned int count);
|
|
int ocfs2_free_dinode(handle_t *handle,
|
|
struct inode *inode_alloc_inode,
|
|
struct buffer_head *inode_alloc_bh,
|
|
struct ocfs2_dinode *di);
|
|
int ocfs2_free_clusters(handle_t *handle,
|
|
struct inode *bitmap_inode,
|
|
struct buffer_head *bitmap_bh,
|
|
u64 start_blk,
|
|
unsigned int num_clusters);
|
|
int ocfs2_release_clusters(handle_t *handle,
|
|
struct inode *bitmap_inode,
|
|
struct buffer_head *bitmap_bh,
|
|
u64 start_blk,
|
|
unsigned int num_clusters);
|
|
|
|
static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
|
|
{
|
|
u64 group = block - (u64) bit;
|
|
|
|
return group;
|
|
}
|
|
|
|
static inline u32 ocfs2_cluster_from_desc(struct ocfs2_super *osb,
|
|
u64 bg_blkno)
|
|
{
|
|
/* This should work for all block group descriptors as only
|
|
* the 1st group descriptor of the cluster bitmap is
|
|
* different. */
|
|
|
|
if (bg_blkno == osb->first_cluster_group_blkno)
|
|
return 0;
|
|
|
|
/* the rest of the block groups are located at the beginning
|
|
* of their 1st cluster, so a direct translation just
|
|
* works. */
|
|
return ocfs2_blocks_to_clusters(osb->sb, bg_blkno);
|
|
}
|
|
|
|
static inline int ocfs2_is_cluster_bitmap(struct inode *inode)
|
|
{
|
|
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
|
|
return osb->bitmap_blkno == OCFS2_I(inode)->ip_blkno;
|
|
}
|
|
|
|
/* This is for local alloc ONLY. Others should use the task-specific
|
|
* apis above. */
|
|
int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
|
|
struct ocfs2_alloc_context *ac);
|
|
void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
|
|
|
|
/* given a cluster offset, calculate which block group it belongs to
|
|
* and return that block offset. */
|
|
u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
|
|
|
|
/*
|
|
* By default, ocfs2_read_group_descriptor() calls ocfs2_error() when it
|
|
* finds a problem. A caller that wants to check a group descriptor
|
|
* without going readonly should read the block with ocfs2_read_block[s]()
|
|
* and then checking it with this function. This is only resize, really.
|
|
* Everyone else should be using ocfs2_read_group_descriptor().
|
|
*/
|
|
int ocfs2_check_group_descriptor(struct super_block *sb,
|
|
struct ocfs2_dinode *di,
|
|
struct buffer_head *bh);
|
|
/*
|
|
* Read a group descriptor block into *bh. If *bh is NULL, a bh will be
|
|
* allocated. This is a cached read. The descriptor will be validated with
|
|
* ocfs2_validate_group_descriptor().
|
|
*/
|
|
int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
|
|
u64 gd_blkno, struct buffer_head **bh);
|
|
|
|
int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
|
|
u32 clusters_to_add, u32 extents_to_split,
|
|
struct ocfs2_alloc_context **data_ac,
|
|
struct ocfs2_alloc_context **meta_ac);
|
|
|
|
int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res);
|
|
|
|
|
|
|
|
/*
|
|
* The following two interfaces are for ocfs2_create_inode_in_orphan().
|
|
*/
|
|
int ocfs2_find_new_inode_loc(struct inode *dir,
|
|
struct buffer_head *parent_fe_bh,
|
|
struct ocfs2_alloc_context *ac,
|
|
u64 *fe_blkno);
|
|
|
|
int ocfs2_claim_new_inode_at_loc(handle_t *handle,
|
|
struct inode *dir,
|
|
struct ocfs2_alloc_context *ac,
|
|
u64 *suballoc_loc,
|
|
u16 *suballoc_bit,
|
|
u64 di_blkno);
|
|
|
|
#endif /* _CHAINALLOC_H_ */
|