kernel-ark/fs/xfs/xfs_dir2_node.c
David Chinner da353b0d64 [XFS] Radix tree based inode caching
One of the perpetual scaling problems XFS has is indexing it's incore
inodes. We currently uses hashes and the default hash sizes chosen can
only ever be a tradeoff between memory consumption and the maximum
realistic size of the cache.

As a result, anyone who has millions of inodes cached on a filesystem
needs to tunes the size of the cache via the ihashsize mount option to
allow decent scalability with inode cache operations.

A further problem is the separate inode cluster hash, whose size is based
on the ihashsize but is smaller, and so under certain conditions (sparse
cluster cache population) this can become a limitation long before the
inode hash is causing issues.

The following patchset removes the inode hash and cluster hash and
replaces them with radix trees to avoid the scalability limitations of the
hashes. It also reduces the size of the inodes by 3 pointers....

SGI-PV: 969561
SGI-Modid: xfs-linux-melb:xfs-kern:29481a

Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Tim Shimmin <tes@sgi.com>
2007-10-15 16:50:50 +10:00

2001 lines
57 KiB
C

/*
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it would be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_types.h"
#include "xfs_log.h"
#include "xfs_inum.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_dir2_sf.h"
#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_dir2_data.h"
#include "xfs_dir2_leaf.h"
#include "xfs_dir2_block.h"
#include "xfs_dir2_node.h"
#include "xfs_dir2_trace.h"
#include "xfs_error.h"
/*
* Function declarations.
*/
static void xfs_dir2_free_log_header(xfs_trans_t *tp, xfs_dabuf_t *bp);
static int xfs_dir2_leafn_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index);
#ifdef DEBUG
static void xfs_dir2_leafn_check(xfs_inode_t *dp, xfs_dabuf_t *bp);
#else
#define xfs_dir2_leafn_check(dp, bp)
#endif
static void xfs_dir2_leafn_moveents(xfs_da_args_t *args, xfs_dabuf_t *bp_s,
int start_s, xfs_dabuf_t *bp_d, int start_d,
int count);
static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state,
xfs_da_state_blk_t *blk1,
xfs_da_state_blk_t *blk2);
static int xfs_dir2_leafn_remove(xfs_da_args_t *args, xfs_dabuf_t *bp,
int index, xfs_da_state_blk_t *dblk,
int *rval);
static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
xfs_da_state_blk_t *fblk);
/*
* Log entries from a freespace block.
*/
void
xfs_dir2_free_log_bests(
xfs_trans_t *tp, /* transaction pointer */
xfs_dabuf_t *bp, /* freespace buffer */
int first, /* first entry to log */
int last) /* last entry to log */
{
xfs_dir2_free_t *free; /* freespace structure */
free = bp->data;
ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
xfs_da_log_buf(tp, bp,
(uint)((char *)&free->bests[first] - (char *)free),
(uint)((char *)&free->bests[last] - (char *)free +
sizeof(free->bests[0]) - 1));
}
/*
* Log header from a freespace block.
*/
static void
xfs_dir2_free_log_header(
xfs_trans_t *tp, /* transaction pointer */
xfs_dabuf_t *bp) /* freespace buffer */
{
xfs_dir2_free_t *free; /* freespace structure */
free = bp->data;
ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
(uint)(sizeof(xfs_dir2_free_hdr_t) - 1));
}
/*
* Convert a leaf-format directory to a node-format directory.
* We need to change the magic number of the leaf block, and copy
* the freespace table out of the leaf block into its own block.
*/
int /* error */
xfs_dir2_leaf_to_node(
xfs_da_args_t *args, /* operation arguments */
xfs_dabuf_t *lbp) /* leaf buffer */
{
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return value */
xfs_dabuf_t *fbp; /* freespace buffer */
xfs_dir2_db_t fdb; /* freespace block number */
xfs_dir2_free_t *free; /* freespace structure */
__be16 *from; /* pointer to freespace entry */
int i; /* leaf freespace index */
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
xfs_mount_t *mp; /* filesystem mount point */
int n; /* count of live freespc ents */
xfs_dir2_data_off_t off; /* freespace entry value */
__be16 *to; /* pointer to freespace entry */
xfs_trans_t *tp; /* transaction pointer */
xfs_dir2_trace_args_b("leaf_to_node", args, lbp);
dp = args->dp;
mp = dp->i_mount;
tp = args->trans;
/*
* Add a freespace block to the directory.
*/
if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fdb))) {
return error;
}
ASSERT(fdb == XFS_DIR2_FREE_FIRSTDB(mp));
/*
* Get the buffer for the new freespace block.
*/
if ((error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb), -1, &fbp,
XFS_DATA_FORK))) {
return error;
}
ASSERT(fbp != NULL);
free = fbp->data;
leaf = lbp->data;
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
/*
* Initialize the freespace block header.
*/
free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
free->hdr.firstdb = 0;
ASSERT(be32_to_cpu(ltp->bestcount) <= (uint)dp->i_d.di_size / mp->m_dirblksize);
free->hdr.nvalid = ltp->bestcount;
/*
* Copy freespace entries from the leaf block to the new block.
* Count active entries.
*/
for (i = n = 0, from = xfs_dir2_leaf_bests_p(ltp), to = free->bests;
i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {
if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
n++;
*to = cpu_to_be16(off);
}
free->hdr.nused = cpu_to_be32(n);
leaf->hdr.info.magic = cpu_to_be16(XFS_DIR2_LEAFN_MAGIC);
/*
* Log everything.
*/
xfs_dir2_leaf_log_header(tp, lbp);
xfs_dir2_free_log_header(tp, fbp);
xfs_dir2_free_log_bests(tp, fbp, 0, be32_to_cpu(free->hdr.nvalid) - 1);
xfs_da_buf_done(fbp);
xfs_dir2_leafn_check(dp, lbp);
return 0;
}
/*
* Add a leaf entry to a leaf block in a node-form directory.
* The other work necessary is done from the caller.
*/
static int /* error */
xfs_dir2_leafn_add(
xfs_dabuf_t *bp, /* leaf buffer */
xfs_da_args_t *args, /* operation arguments */
int index) /* insertion pt for new entry */
{
int compact; /* compacting stale leaves */
xfs_inode_t *dp; /* incore directory inode */
int highstale; /* next stale entry */
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_entry_t *lep; /* leaf entry */
int lfloghigh; /* high leaf entry logging */
int lfloglow; /* low leaf entry logging */
int lowstale; /* previous stale entry */
xfs_mount_t *mp; /* filesystem mount point */
xfs_trans_t *tp; /* transaction pointer */
xfs_dir2_trace_args_sb("leafn_add", args, index, bp);
dp = args->dp;
mp = dp->i_mount;
tp = args->trans;
leaf = bp->data;
/*
* Quick check just to make sure we are not going to index
* into other peoples memory
*/
if (index < 0)
return XFS_ERROR(EFSCORRUPTED);
/*
* If there are already the maximum number of leaf entries in
* the block, if there are no stale entries it won't fit.
* Caller will do a split. If there are stale entries we'll do
* a compact.
*/
if (be16_to_cpu(leaf->hdr.count) == xfs_dir2_max_leaf_ents(mp)) {
if (!leaf->hdr.stale)
return XFS_ERROR(ENOSPC);
compact = be16_to_cpu(leaf->hdr.stale) > 1;
} else
compact = 0;
ASSERT(index == 0 || be32_to_cpu(leaf->ents[index - 1].hashval) <= args->hashval);
ASSERT(index == be16_to_cpu(leaf->hdr.count) ||
be32_to_cpu(leaf->ents[index].hashval) >= args->hashval);
if (args->justcheck)
return 0;
/*
* Compact out all but one stale leaf entry. Leaves behind
* the entry closest to index.
*/
if (compact) {
xfs_dir2_leaf_compact_x1(bp, &index, &lowstale, &highstale,
&lfloglow, &lfloghigh);
}
/*
* Set impossible logging indices for this case.
*/
else if (leaf->hdr.stale) {
lfloglow = be16_to_cpu(leaf->hdr.count);
lfloghigh = -1;
}
/*
* No stale entries, just insert a space for the new entry.
*/
if (!leaf->hdr.stale) {
lep = &leaf->ents[index];
if (index < be16_to_cpu(leaf->hdr.count))
memmove(lep + 1, lep,
(be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
lfloglow = index;
lfloghigh = be16_to_cpu(leaf->hdr.count);
be16_add(&leaf->hdr.count, 1);
}
/*
* There are stale entries. We'll use one for the new entry.
*/
else {
/*
* If we didn't do a compact then we need to figure out
* which stale entry will be used.
*/
if (compact == 0) {
/*
* Find first stale entry before our insertion point.
*/
for (lowstale = index - 1;
lowstale >= 0 &&
be32_to_cpu(leaf->ents[lowstale].address) !=
XFS_DIR2_NULL_DATAPTR;
lowstale--)
continue;
/*
* Find next stale entry after insertion point.
* Stop looking if the answer would be worse than
* lowstale already found.
*/
for (highstale = index;
highstale < be16_to_cpu(leaf->hdr.count) &&
be32_to_cpu(leaf->ents[highstale].address) !=
XFS_DIR2_NULL_DATAPTR &&
(lowstale < 0 ||
index - lowstale - 1 >= highstale - index);
highstale++)
continue;
}
/*
* Using the low stale entry.
* Shift entries up toward the stale slot.
*/
if (lowstale >= 0 &&
(highstale == be16_to_cpu(leaf->hdr.count) ||
index - lowstale - 1 < highstale - index)) {
ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
XFS_DIR2_NULL_DATAPTR);
ASSERT(index - lowstale - 1 >= 0);
if (index - lowstale - 1 > 0)
memmove(&leaf->ents[lowstale],
&leaf->ents[lowstale + 1],
(index - lowstale - 1) * sizeof(*lep));
lep = &leaf->ents[index - 1];
lfloglow = MIN(lowstale, lfloglow);
lfloghigh = MAX(index - 1, lfloghigh);
}
/*
* Using the high stale entry.
* Shift entries down toward the stale slot.
*/
else {
ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
XFS_DIR2_NULL_DATAPTR);
ASSERT(highstale - index >= 0);
if (highstale - index > 0)
memmove(&leaf->ents[index + 1],
&leaf->ents[index],
(highstale - index) * sizeof(*lep));
lep = &leaf->ents[index];
lfloglow = MIN(index, lfloglow);
lfloghigh = MAX(highstale, lfloghigh);
}
be16_add(&leaf->hdr.stale, -1);
}
/*
* Insert the new entry, log everything.
*/
lep->hashval = cpu_to_be32(args->hashval);
lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
args->blkno, args->index));
xfs_dir2_leaf_log_header(tp, bp);
xfs_dir2_leaf_log_ents(tp, bp, lfloglow, lfloghigh);
xfs_dir2_leafn_check(dp, bp);
return 0;
}
#ifdef DEBUG
/*
* Check internal consistency of a leafn block.
*/
void
xfs_dir2_leafn_check(
xfs_inode_t *dp, /* incore directory inode */
xfs_dabuf_t *bp) /* leaf buffer */
{
int i; /* leaf index */
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_mount_t *mp; /* filesystem mount point */
int stale; /* count of stale leaves */
leaf = bp->data;
mp = dp->i_mount;
ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
be32_to_cpu(leaf->ents[i + 1].hashval));
}
if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
stale++;
}
ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
}
#endif /* DEBUG */
/*
* Return the last hash value in the leaf.
* Stale entries are ok.
*/
xfs_dahash_t /* hash value */
xfs_dir2_leafn_lasthash(
xfs_dabuf_t *bp, /* leaf buffer */
int *count) /* count of entries in leaf */
{
xfs_dir2_leaf_t *leaf; /* leaf structure */
leaf = bp->data;
ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
if (count)
*count = be16_to_cpu(leaf->hdr.count);
if (!leaf->hdr.count)
return 0;
return be32_to_cpu(leaf->ents[be16_to_cpu(leaf->hdr.count) - 1].hashval);
}
/*
* Look up a leaf entry in a node-format leaf block.
* If this is an addname then the extrablk in state is a freespace block,
* otherwise it's a data block.
*/
int
xfs_dir2_leafn_lookup_int(
xfs_dabuf_t *bp, /* leaf buffer */
xfs_da_args_t *args, /* operation arguments */
int *indexp, /* out: leaf entry index */
xfs_da_state_t *state) /* state to fill in */
{
xfs_dabuf_t *curbp; /* current data/free buffer */
xfs_dir2_db_t curdb; /* current data block number */
xfs_dir2_db_t curfdb; /* current free block number */
xfs_dir2_data_entry_t *dep; /* data block entry */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return value */
int fi; /* free entry index */
xfs_dir2_free_t *free=NULL; /* free block structure */
int index; /* leaf entry index */
xfs_dir2_leaf_t *leaf; /* leaf structure */
int length=0; /* length of new data entry */
xfs_dir2_leaf_entry_t *lep; /* leaf entry */
xfs_mount_t *mp; /* filesystem mount point */
xfs_dir2_db_t newdb; /* new data block number */
xfs_dir2_db_t newfdb; /* new free block number */
xfs_trans_t *tp; /* transaction pointer */
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
leaf = bp->data;
ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
#ifdef __KERNEL__
ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
#endif
xfs_dir2_leafn_check(dp, bp);
/*
* Look up the hash value in the leaf entries.
*/
index = xfs_dir2_leaf_search_hash(args, bp);
/*
* Do we have a buffer coming in?
*/
if (state->extravalid)
curbp = state->extrablk.bp;
else
curbp = NULL;
/*
* For addname, it's a free block buffer, get the block number.
*/
if (args->addname) {
curfdb = curbp ? state->extrablk.blkno : -1;
curdb = -1;
length = xfs_dir2_data_entsize(args->namelen);
if ((free = (curbp ? curbp->data : NULL)))
ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
}
/*
* For others, it's a data block buffer, get the block number.
*/
else {
curfdb = -1;
curdb = curbp ? state->extrablk.blkno : -1;
}
/*
* Loop over leaf entries with the right hash value.
*/
for (lep = &leaf->ents[index];
index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval;
lep++, index++) {
/*
* Skip stale leaf entries.
*/
if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)
continue;
/*
* Pull the data block number from the entry.
*/
newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
/*
* For addname, we're looking for a place to put the new entry.
* We want to use a data block with an entry of equal
* hash value to ours if there is one with room.
*/
if (args->addname) {
/*
* If this block isn't the data block we already have
* in hand, take a look at it.
*/
if (newdb != curdb) {
curdb = newdb;
/*
* Convert the data block to the free block
* holding its freespace information.
*/
newfdb = xfs_dir2_db_to_fdb(mp, newdb);
/*
* If it's not the one we have in hand,
* read it in.
*/
if (newfdb != curfdb) {
/*
* If we had one before, drop it.
*/
if (curbp)
xfs_da_brelse(tp, curbp);
/*
* Read the free block.
*/
if ((error = xfs_da_read_buf(tp, dp,
xfs_dir2_db_to_da(mp,
newfdb),
-1, &curbp,
XFS_DATA_FORK))) {
return error;
}
free = curbp->data;
ASSERT(be32_to_cpu(free->hdr.magic) ==
XFS_DIR2_FREE_MAGIC);
ASSERT((be32_to_cpu(free->hdr.firstdb) %
XFS_DIR2_MAX_FREE_BESTS(mp)) ==
0);
ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
ASSERT(curdb <
be32_to_cpu(free->hdr.firstdb) +
be32_to_cpu(free->hdr.nvalid));
}
/*
* Get the index for our entry.
*/
fi = xfs_dir2_db_to_fdindex(mp, curdb);
/*
* If it has room, return it.
*/
if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
XFS_ERRLEVEL_LOW, mp);
if (curfdb != newfdb)
xfs_da_brelse(tp, curbp);
return XFS_ERROR(EFSCORRUPTED);
}
curfdb = newfdb;
if (be16_to_cpu(free->bests[fi]) >= length) {
*indexp = index;
state->extravalid = 1;
state->extrablk.bp = curbp;
state->extrablk.blkno = curfdb;
state->extrablk.index = fi;
state->extrablk.magic =
XFS_DIR2_FREE_MAGIC;
ASSERT(args->oknoent);
return XFS_ERROR(ENOENT);
}
}
}
/*
* Not adding a new entry, so we really want to find
* the name given to us.
*/
else {
/*
* If it's a different data block, go get it.
*/
if (newdb != curdb) {
/*
* If we had a block before, drop it.
*/
if (curbp)
xfs_da_brelse(tp, curbp);
/*
* Read the data block.
*/
if ((error =
xfs_da_read_buf(tp, dp,
xfs_dir2_db_to_da(mp, newdb), -1,
&curbp, XFS_DATA_FORK))) {
return error;
}
xfs_dir2_data_check(dp, curbp);
curdb = newdb;
}
/*
* Point to the data entry.
*/
dep = (xfs_dir2_data_entry_t *)
((char *)curbp->data +
xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
/*
* Compare the entry, return it if it matches.
*/
if (dep->namelen == args->namelen &&
dep->name[0] == args->name[0] &&
memcmp(dep->name, args->name, args->namelen) == 0) {
args->inumber = be64_to_cpu(dep->inumber);
*indexp = index;
state->extravalid = 1;
state->extrablk.bp = curbp;
state->extrablk.blkno = curdb;
state->extrablk.index =
(int)((char *)dep -
(char *)curbp->data);
state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
return XFS_ERROR(EEXIST);
}
}
}
/*
* Didn't find a match.
* If we are holding a buffer, give it back in case our caller
* finds it useful.
*/
if ((state->extravalid = (curbp != NULL))) {
state->extrablk.bp = curbp;
state->extrablk.index = -1;
/*
* For addname, giving back a free block.
*/
if (args->addname) {
state->extrablk.blkno = curfdb;
state->extrablk.magic = XFS_DIR2_FREE_MAGIC;
}
/*
* For other callers, giving back a data block.
*/
else {
state->extrablk.blkno = curdb;
state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
}
}
/*
* Return the final index, that will be the insertion point.
*/
*indexp = index;
ASSERT(index == be16_to_cpu(leaf->hdr.count) || args->oknoent);
return XFS_ERROR(ENOENT);
}
/*
* Move count leaf entries from source to destination leaf.
* Log entries and headers. Stale entries are preserved.
*/
static void
xfs_dir2_leafn_moveents(
xfs_da_args_t *args, /* operation arguments */
xfs_dabuf_t *bp_s, /* source leaf buffer */
int start_s, /* source leaf index */
xfs_dabuf_t *bp_d, /* destination leaf buffer */
int start_d, /* destination leaf index */
int count) /* count of leaves to copy */
{
xfs_dir2_leaf_t *leaf_d; /* destination leaf structure */
xfs_dir2_leaf_t *leaf_s; /* source leaf structure */
int stale; /* count stale leaves copied */
xfs_trans_t *tp; /* transaction pointer */
xfs_dir2_trace_args_bibii("leafn_moveents", args, bp_s, start_s, bp_d,
start_d, count);
/*
* Silently return if nothing to do.
*/
if (count == 0) {
return;
}
tp = args->trans;
leaf_s = bp_s->data;
leaf_d = bp_d->data;
/*
* If the destination index is not the end of the current
* destination leaf entries, open up a hole in the destination
* to hold the new entries.
*/
if (start_d < be16_to_cpu(leaf_d->hdr.count)) {
memmove(&leaf_d->ents[start_d + count], &leaf_d->ents[start_d],
(be16_to_cpu(leaf_d->hdr.count) - start_d) *
sizeof(xfs_dir2_leaf_entry_t));
xfs_dir2_leaf_log_ents(tp, bp_d, start_d + count,
count + be16_to_cpu(leaf_d->hdr.count) - 1);
}
/*
* If the source has stale leaves, count the ones in the copy range
* so we can update the header correctly.
*/
if (leaf_s->hdr.stale) {
int i; /* temp leaf index */
for (i = start_s, stale = 0; i < start_s + count; i++) {
if (be32_to_cpu(leaf_s->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
stale++;
}
} else
stale = 0;
/*
* Copy the leaf entries from source to destination.
*/
memcpy(&leaf_d->ents[start_d], &leaf_s->ents[start_s],
count * sizeof(xfs_dir2_leaf_entry_t));
xfs_dir2_leaf_log_ents(tp, bp_d, start_d, start_d + count - 1);
/*
* If there are source entries after the ones we copied,
* delete the ones we copied by sliding the next ones down.
*/
if (start_s + count < be16_to_cpu(leaf_s->hdr.count)) {
memmove(&leaf_s->ents[start_s], &leaf_s->ents[start_s + count],
count * sizeof(xfs_dir2_leaf_entry_t));
xfs_dir2_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1);
}
/*
* Update the headers and log them.
*/
be16_add(&leaf_s->hdr.count, -(count));
be16_add(&leaf_s->hdr.stale, -(stale));
be16_add(&leaf_d->hdr.count, count);
be16_add(&leaf_d->hdr.stale, stale);
xfs_dir2_leaf_log_header(tp, bp_s);
xfs_dir2_leaf_log_header(tp, bp_d);
xfs_dir2_leafn_check(args->dp, bp_s);
xfs_dir2_leafn_check(args->dp, bp_d);
}
/*
* Determine the sort order of two leaf blocks.
* Returns 1 if both are valid and leaf2 should be before leaf1, else 0.
*/
int /* sort order */
xfs_dir2_leafn_order(
xfs_dabuf_t *leaf1_bp, /* leaf1 buffer */
xfs_dabuf_t *leaf2_bp) /* leaf2 buffer */
{
xfs_dir2_leaf_t *leaf1; /* leaf1 structure */
xfs_dir2_leaf_t *leaf2; /* leaf2 structure */
leaf1 = leaf1_bp->data;
leaf2 = leaf2_bp->data;
ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
if (be16_to_cpu(leaf1->hdr.count) > 0 &&
be16_to_cpu(leaf2->hdr.count) > 0 &&
(be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) ||
be32_to_cpu(leaf2->ents[be16_to_cpu(leaf2->hdr.count) - 1].hashval) <
be32_to_cpu(leaf1->ents[be16_to_cpu(leaf1->hdr.count) - 1].hashval)))
return 1;
return 0;
}
/*
* Rebalance leaf entries between two leaf blocks.
* This is actually only called when the second block is new,
* though the code deals with the general case.
* A new entry will be inserted in one of the blocks, and that
* entry is taken into account when balancing.
*/
static void
xfs_dir2_leafn_rebalance(
xfs_da_state_t *state, /* btree cursor */
xfs_da_state_blk_t *blk1, /* first btree block */
xfs_da_state_blk_t *blk2) /* second btree block */
{
xfs_da_args_t *args; /* operation arguments */
int count; /* count (& direction) leaves */
int isleft; /* new goes in left leaf */
xfs_dir2_leaf_t *leaf1; /* first leaf structure */
xfs_dir2_leaf_t *leaf2; /* second leaf structure */
int mid; /* midpoint leaf index */
#ifdef DEBUG
int oldstale; /* old count of stale leaves */
#endif
int oldsum; /* old total leaf count */
int swap; /* swapped leaf blocks */
args = state->args;
/*
* If the block order is wrong, swap the arguments.
*/
if ((swap = xfs_dir2_leafn_order(blk1->bp, blk2->bp))) {
xfs_da_state_blk_t *tmp; /* temp for block swap */
tmp = blk1;
blk1 = blk2;
blk2 = tmp;
}
leaf1 = blk1->bp->data;
leaf2 = blk2->bp->data;
oldsum = be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count);
#ifdef DEBUG
oldstale = be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale);
#endif
mid = oldsum >> 1;
/*
* If the old leaf count was odd then the new one will be even,
* so we need to divide the new count evenly.
*/
if (oldsum & 1) {
xfs_dahash_t midhash; /* middle entry hash value */
if (mid >= be16_to_cpu(leaf1->hdr.count))
midhash = be32_to_cpu(leaf2->ents[mid - be16_to_cpu(leaf1->hdr.count)].hashval);
else
midhash = be32_to_cpu(leaf1->ents[mid].hashval);
isleft = args->hashval <= midhash;
}
/*
* If the old count is even then the new count is odd, so there's
* no preferred side for the new entry.
* Pick the left one.
*/
else
isleft = 1;
/*
* Calculate moved entry count. Positive means left-to-right,
* negative means right-to-left. Then move the entries.
*/
count = be16_to_cpu(leaf1->hdr.count) - mid + (isleft == 0);
if (count > 0)
xfs_dir2_leafn_moveents(args, blk1->bp,
be16_to_cpu(leaf1->hdr.count) - count, blk2->bp, 0, count);
else if (count < 0)
xfs_dir2_leafn_moveents(args, blk2->bp, 0, blk1->bp,
be16_to_cpu(leaf1->hdr.count), count);
ASSERT(be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count) == oldsum);
ASSERT(be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale) == oldstale);
/*
* Mark whether we're inserting into the old or new leaf.
*/
if (be16_to_cpu(leaf1->hdr.count) < be16_to_cpu(leaf2->hdr.count))
state->inleaf = swap;
else if (be16_to_cpu(leaf1->hdr.count) > be16_to_cpu(leaf2->hdr.count))
state->inleaf = !swap;
else
state->inleaf =
swap ^ (blk1->index <= be16_to_cpu(leaf1->hdr.count));
/*
* Adjust the expected index for insertion.
*/
if (!state->inleaf)
blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count);
/*
* Finally sanity check just to make sure we are not returning a negative index
*/
if(blk2->index < 0) {
state->inleaf = 1;
blk2->index = 0;
cmn_err(CE_ALERT,
"xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting original leaf: "
"blk1->index %d\n",
blk1->index);
}
}
/*
* Remove an entry from a node directory.
* This removes the leaf entry and the data entry,
* and updates the free block if necessary.
*/
static int /* error */
xfs_dir2_leafn_remove(
xfs_da_args_t *args, /* operation arguments */
xfs_dabuf_t *bp, /* leaf buffer */
int index, /* leaf entry index */
xfs_da_state_blk_t *dblk, /* data block */
int *rval) /* resulting block needs join */
{
xfs_dir2_data_t *data; /* data block structure */
xfs_dir2_db_t db; /* data block number */
xfs_dabuf_t *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data block entry */
xfs_inode_t *dp; /* incore directory inode */
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_entry_t *lep; /* leaf entry */
int longest; /* longest data free entry */
int off; /* data block entry offset */
xfs_mount_t *mp; /* filesystem mount point */
int needlog; /* need to log data header */
int needscan; /* need to rescan data frees */
xfs_trans_t *tp; /* transaction pointer */
xfs_dir2_trace_args_sb("leafn_remove", args, index, bp);
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
leaf = bp->data;
ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
/*
* Point to the entry we're removing.
*/
lep = &leaf->ents[index];
/*
* Extract the data block and offset from the entry.
*/
db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
ASSERT(dblk->blkno == db);
off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address));
ASSERT(dblk->index == off);
/*
* Kill the leaf entry by marking it stale.
* Log the leaf block changes.
*/
be16_add(&leaf->hdr.stale, 1);
xfs_dir2_leaf_log_header(tp, bp);
lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
xfs_dir2_leaf_log_ents(tp, bp, index, index);
/*
* Make the data entry free. Keep track of the longest freespace
* in the data block in case it changes.
*/
dbp = dblk->bp;
data = dbp->data;
dep = (xfs_dir2_data_entry_t *)((char *)data + off);
longest = be16_to_cpu(data->hdr.bestfree[0].length);
needlog = needscan = 0;
xfs_dir2_data_make_free(tp, dbp, off,
xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
/*
* Rescan the data block freespaces for bestfree.
* Log the data block header if needed.
*/
if (needscan)
xfs_dir2_data_freescan(mp, data, &needlog);
if (needlog)
xfs_dir2_data_log_header(tp, dbp);
xfs_dir2_data_check(dp, dbp);
/*
* If the longest data block freespace changes, need to update
* the corresponding freeblock entry.
*/
if (longest < be16_to_cpu(data->hdr.bestfree[0].length)) {
int error; /* error return value */
xfs_dabuf_t *fbp; /* freeblock buffer */
xfs_dir2_db_t fdb; /* freeblock block number */
int findex; /* index in freeblock entries */
xfs_dir2_free_t *free; /* freeblock structure */
int logfree; /* need to log free entry */
/*
* Convert the data block number to a free block,
* read in the free block.
*/
fdb = xfs_dir2_db_to_fdb(mp, db);
if ((error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, fdb),
-1, &fbp, XFS_DATA_FORK))) {
return error;
}
free = fbp->data;
ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
ASSERT(be32_to_cpu(free->hdr.firstdb) ==
XFS_DIR2_MAX_FREE_BESTS(mp) *
(fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
/*
* Calculate which entry we need to fix.
*/
findex = xfs_dir2_db_to_fdindex(mp, db);
longest = be16_to_cpu(data->hdr.bestfree[0].length);
/*
* If the data block is now empty we can get rid of it
* (usually).
*/
if (longest == mp->m_dirblksize - (uint)sizeof(data->hdr)) {
/*
* Try to punch out the data block.
*/
error = xfs_dir2_shrink_inode(args, db, dbp);
if (error == 0) {
dblk->bp = NULL;
data = NULL;
}
/*
* We can get ENOSPC if there's no space reservation.
* In this case just drop the buffer and some one else
* will eventually get rid of the empty block.
*/
else if (error == ENOSPC && args->total == 0)
xfs_da_buf_done(dbp);
else
return error;
}
/*
* If we got rid of the data block, we can eliminate that entry
* in the free block.
*/
if (data == NULL) {
/*
* One less used entry in the free table.
*/
be32_add(&free->hdr.nused, -1);
xfs_dir2_free_log_header(tp, fbp);
/*
* If this was the last entry in the table, we can
* trim the table size back. There might be other
* entries at the end referring to non-existent
* data blocks, get those too.
*/
if (findex == be32_to_cpu(free->hdr.nvalid) - 1) {
int i; /* free entry index */
for (i = findex - 1;
i >= 0 && be16_to_cpu(free->bests[i]) == NULLDATAOFF;
i--)
continue;
free->hdr.nvalid = cpu_to_be32(i + 1);
logfree = 0;
}
/*
* Not the last entry, just punch it out.
*/
else {
free->bests[findex] = cpu_to_be16(NULLDATAOFF);
logfree = 1;
}
/*
* If there are no useful entries left in the block,
* get rid of the block if we can.
*/
if (!free->hdr.nused) {
error = xfs_dir2_shrink_inode(args, fdb, fbp);
if (error == 0) {
fbp = NULL;
logfree = 0;
} else if (error != ENOSPC || args->total != 0)
return error;
/*
* It's possible to get ENOSPC if there is no
* space reservation. In this case some one
* else will eventually get rid of this block.
*/
}
}
/*
* Data block is not empty, just set the free entry to
* the new value.
*/
else {
free->bests[findex] = cpu_to_be16(longest);
logfree = 1;
}
/*
* Log the free entry that changed, unless we got rid of it.
*/
if (logfree)
xfs_dir2_free_log_bests(tp, fbp, findex, findex);
/*
* Drop the buffer if we still have it.
*/
if (fbp)
xfs_da_buf_done(fbp);
}
xfs_dir2_leafn_check(dp, bp);
/*
* Return indication of whether this leaf block is emtpy enough
* to justify trying to join it with a neighbor.
*/
*rval =
((uint)sizeof(leaf->hdr) +
(uint)sizeof(leaf->ents[0]) *
(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale))) <
mp->m_dir_magicpct;
return 0;
}
/*
* Split the leaf entries in the old block into old and new blocks.
*/
int /* error */
xfs_dir2_leafn_split(
xfs_da_state_t *state, /* btree cursor */
xfs_da_state_blk_t *oldblk, /* original block */
xfs_da_state_blk_t *newblk) /* newly created block */
{
xfs_da_args_t *args; /* operation arguments */
xfs_dablk_t blkno; /* new leaf block number */
int error; /* error return value */
xfs_mount_t *mp; /* filesystem mount point */
/*
* Allocate space for a new leaf node.
*/
args = state->args;
mp = args->dp->i_mount;
ASSERT(args != NULL);
ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC);
error = xfs_da_grow_inode(args, &blkno);
if (error) {
return error;
}
/*
* Initialize the new leaf block.
*/
error = xfs_dir2_leaf_init(args, xfs_dir2_da_to_db(mp, blkno),
&newblk->bp, XFS_DIR2_LEAFN_MAGIC);
if (error) {
return error;
}
newblk->blkno = blkno;
newblk->magic = XFS_DIR2_LEAFN_MAGIC;
/*
* Rebalance the entries across the two leaves, link the new
* block into the leaves.
*/
xfs_dir2_leafn_rebalance(state, oldblk, newblk);
error = xfs_da_blk_link(state, oldblk, newblk);
if (error) {
return error;
}
/*
* Insert the new entry in the correct block.
*/
if (state->inleaf)
error = xfs_dir2_leafn_add(oldblk->bp, args, oldblk->index);
else
error = xfs_dir2_leafn_add(newblk->bp, args, newblk->index);
/*
* Update last hashval in each block since we added the name.
*/
oldblk->hashval = xfs_dir2_leafn_lasthash(oldblk->bp, NULL);
newblk->hashval = xfs_dir2_leafn_lasthash(newblk->bp, NULL);
xfs_dir2_leafn_check(args->dp, oldblk->bp);
xfs_dir2_leafn_check(args->dp, newblk->bp);
return error;
}
/*
* Check a leaf block and its neighbors to see if the block should be
* collapsed into one or the other neighbor. Always keep the block
* with the smaller block number.
* If the current block is over 50% full, don't try to join it, return 0.
* If the block is empty, fill in the state structure and return 2.
* If it can be collapsed, fill in the state structure and return 1.
* If nothing can be done, return 0.
*/
int /* error */
xfs_dir2_leafn_toosmall(
xfs_da_state_t *state, /* btree cursor */
int *action) /* resulting action to take */
{
xfs_da_state_blk_t *blk; /* leaf block */
xfs_dablk_t blkno; /* leaf block number */
xfs_dabuf_t *bp; /* leaf buffer */
int bytes; /* bytes in use */
int count; /* leaf live entry count */
int error; /* error return value */
int forward; /* sibling block direction */
int i; /* sibling counter */
xfs_da_blkinfo_t *info; /* leaf block header */
xfs_dir2_leaf_t *leaf; /* leaf structure */
int rval; /* result from path_shift */
/*
* Check for the degenerate case of the block being over 50% full.
* If so, it's not worth even looking to see if we might be able
* to coalesce with a sibling.
*/
blk = &state->path.blk[state->path.active - 1];
info = blk->bp->data;
ASSERT(be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC);
leaf = (xfs_dir2_leaf_t *)info;
count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]);
if (bytes > (state->blocksize >> 1)) {
/*
* Blk over 50%, don't try to join.
*/
*action = 0;
return 0;
}
/*
* Check for the degenerate case of the block being empty.
* If the block is empty, we'll simply delete it, no need to
* coalesce it with a sibling block. We choose (arbitrarily)
* to merge with the forward block unless it is NULL.
*/
if (count == 0) {
/*
* Make altpath point to the block we want to keep and
* path point to the block we want to drop (this one).
*/
forward = (info->forw != 0);
memcpy(&state->altpath, &state->path, sizeof(state->path));
error = xfs_da_path_shift(state, &state->altpath, forward, 0,
&rval);
if (error)
return error;
*action = rval ? 2 : 0;
return 0;
}
/*
* Examine each sibling block to see if we can coalesce with
* at least 25% free space to spare. We need to figure out
* whether to merge with the forward or the backward block.
* We prefer coalescing with the lower numbered sibling so as
* to shrink a directory over time.
*/
forward = be32_to_cpu(info->forw) < be32_to_cpu(info->back);
for (i = 0, bp = NULL; i < 2; forward = !forward, i++) {
blkno = forward ? be32_to_cpu(info->forw) : be32_to_cpu(info->back);
if (blkno == 0)
continue;
/*
* Read the sibling leaf block.
*/
if ((error =
xfs_da_read_buf(state->args->trans, state->args->dp, blkno,
-1, &bp, XFS_DATA_FORK))) {
return error;
}
ASSERT(bp != NULL);
/*
* Count bytes in the two blocks combined.
*/
leaf = (xfs_dir2_leaf_t *)info;
count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
bytes = state->blocksize - (state->blocksize >> 2);
leaf = bp->data;
ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
bytes -= count * (uint)sizeof(leaf->ents[0]);
/*
* Fits with at least 25% to spare.
*/
if (bytes >= 0)
break;
xfs_da_brelse(state->args->trans, bp);
}
/*
* Didn't like either block, give up.
*/
if (i >= 2) {
*action = 0;
return 0;
}
/*
* Done with the sibling leaf block here, drop the dabuf
* so path_shift can get it.
*/
xfs_da_buf_done(bp);
/*
* Make altpath point to the block we want to keep (the lower
* numbered block) and path point to the block we want to drop.
*/
memcpy(&state->altpath, &state->path, sizeof(state->path));
if (blkno < blk->blkno)
error = xfs_da_path_shift(state, &state->altpath, forward, 0,
&rval);
else
error = xfs_da_path_shift(state, &state->path, forward, 0,
&rval);
if (error) {
return error;
}
*action = rval ? 0 : 1;
return 0;
}
/*
* Move all the leaf entries from drop_blk to save_blk.
* This is done as part of a join operation.
*/
void
xfs_dir2_leafn_unbalance(
xfs_da_state_t *state, /* cursor */
xfs_da_state_blk_t *drop_blk, /* dead block */
xfs_da_state_blk_t *save_blk) /* surviving block */
{
xfs_da_args_t *args; /* operation arguments */
xfs_dir2_leaf_t *drop_leaf; /* dead leaf structure */
xfs_dir2_leaf_t *save_leaf; /* surviving leaf structure */
args = state->args;
ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
drop_leaf = drop_blk->bp->data;
save_leaf = save_blk->bp->data;
ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
/*
* If there are any stale leaf entries, take this opportunity
* to purge them.
*/
if (drop_leaf->hdr.stale)
xfs_dir2_leaf_compact(args, drop_blk->bp);
if (save_leaf->hdr.stale)
xfs_dir2_leaf_compact(args, save_blk->bp);
/*
* Move the entries from drop to the appropriate end of save.
*/
drop_blk->hashval = be32_to_cpu(drop_leaf->ents[be16_to_cpu(drop_leaf->hdr.count) - 1].hashval);
if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp))
xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp, 0,
be16_to_cpu(drop_leaf->hdr.count));
else
xfs_dir2_leafn_moveents(args, drop_blk->bp, 0, save_blk->bp,
be16_to_cpu(save_leaf->hdr.count), be16_to_cpu(drop_leaf->hdr.count));
save_blk->hashval = be32_to_cpu(save_leaf->ents[be16_to_cpu(save_leaf->hdr.count) - 1].hashval);
xfs_dir2_leafn_check(args->dp, save_blk->bp);
}
/*
* Top-level node form directory addname routine.
*/
int /* error */
xfs_dir2_node_addname(
xfs_da_args_t *args) /* operation arguments */
{
xfs_da_state_blk_t *blk; /* leaf block for insert */
int error; /* error return value */
int rval; /* sub-return value */
xfs_da_state_t *state; /* btree cursor */
xfs_dir2_trace_args("node_addname", args);
/*
* Allocate and initialize the state (btree cursor).
*/
state = xfs_da_state_alloc();
state->args = args;
state->mp = args->dp->i_mount;
state->blocksize = state->mp->m_dirblksize;
state->node_ents = state->mp->m_dir_node_ents;
/*
* Look up the name. We're not supposed to find it, but
* this gives us the insertion point.
*/
error = xfs_da_node_lookup_int(state, &rval);
if (error)
rval = error;
if (rval != ENOENT) {
goto done;
}
/*
* Add the data entry to a data block.
* Extravalid is set to a freeblock found by lookup.
*/
rval = xfs_dir2_node_addname_int(args,
state->extravalid ? &state->extrablk : NULL);
if (rval) {
goto done;
}
blk = &state->path.blk[state->path.active - 1];
ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
/*
* Add the new leaf entry.
*/
rval = xfs_dir2_leafn_add(blk->bp, args, blk->index);
if (rval == 0) {
/*
* It worked, fix the hash values up the btree.
*/
if (!args->justcheck)
xfs_da_fixhashpath(state, &state->path);
} else {
/*
* It didn't work, we need to split the leaf block.
*/
if (args->total == 0) {
ASSERT(rval == ENOSPC);
goto done;
}
/*
* Split the leaf block and insert the new entry.
*/
rval = xfs_da_split(state);
}
done:
xfs_da_state_free(state);
return rval;
}
/*
* Add the data entry for a node-format directory name addition.
* The leaf entry is added in xfs_dir2_leafn_add.
* We may enter with a freespace block that the lookup found.
*/
static int /* error */
xfs_dir2_node_addname_int(
xfs_da_args_t *args, /* operation arguments */
xfs_da_state_blk_t *fblk) /* optional freespace block */
{
xfs_dir2_data_t *data; /* data block structure */
xfs_dir2_db_t dbno; /* data block number */
xfs_dabuf_t *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data entry pointer */
xfs_inode_t *dp; /* incore directory inode */
xfs_dir2_data_unused_t *dup; /* data unused entry pointer */
int error; /* error return value */
xfs_dir2_db_t fbno; /* freespace block number */
xfs_dabuf_t *fbp; /* freespace buffer */
int findex; /* freespace entry index */
xfs_dir2_free_t *free=NULL; /* freespace block structure */
xfs_dir2_db_t ifbno; /* initial freespace block no */
xfs_dir2_db_t lastfbno=0; /* highest freespace block no */
int length; /* length of the new entry */
int logfree; /* need to log free entry */
xfs_mount_t *mp; /* filesystem mount point */
int needlog; /* need to log data header */
int needscan; /* need to rescan data frees */
__be16 *tagp; /* data entry tag pointer */
xfs_trans_t *tp; /* transaction pointer */
dp = args->dp;
mp = dp->i_mount;
tp = args->trans;
length = xfs_dir2_data_entsize(args->namelen);
/*
* If we came in with a freespace block that means that lookup
* found an entry with our hash value. This is the freespace
* block for that data entry.
*/
if (fblk) {
fbp = fblk->bp;
/*
* Remember initial freespace block number.
*/
ifbno = fblk->blkno;
free = fbp->data;
ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
findex = fblk->index;
/*
* This means the free entry showed that the data block had
* space for our entry, so we remembered it.
* Use that data block.
*/
if (findex >= 0) {
ASSERT(findex < be32_to_cpu(free->hdr.nvalid));
ASSERT(be16_to_cpu(free->bests[findex]) != NULLDATAOFF);
ASSERT(be16_to_cpu(free->bests[findex]) >= length);
dbno = be32_to_cpu(free->hdr.firstdb) + findex;
}
/*
* The data block looked at didn't have enough room.
* We'll start at the beginning of the freespace entries.
*/
else {
dbno = -1;
findex = 0;
}
}
/*
* Didn't come in with a freespace block, so don't have a data block.
*/
else {
ifbno = dbno = -1;
fbp = NULL;
findex = 0;
}
/*
* If we don't have a data block yet, we're going to scan the
* freespace blocks looking for one. Figure out what the
* highest freespace block number is.
*/
if (dbno == -1) {
xfs_fileoff_t fo; /* freespace block number */
if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK)))
return error;
lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo);
fbno = ifbno;
}
/*
* While we haven't identified a data block, search the freeblock
* data for a good data block. If we find a null freeblock entry,
* indicating a hole in the data blocks, remember that.
*/
while (dbno == -1) {
/*
* If we don't have a freeblock in hand, get the next one.
*/
if (fbp == NULL) {
/*
* Happens the first time through unless lookup gave
* us a freespace block to start with.
*/
if (++fbno == 0)
fbno = XFS_DIR2_FREE_FIRSTDB(mp);
/*
* If it's ifbno we already looked at it.
*/
if (fbno == ifbno)
fbno++;
/*
* If it's off the end we're done.
*/
if (fbno >= lastfbno)
break;
/*
* Read the block. There can be holes in the
* freespace blocks, so this might not succeed.
* This should be really rare, so there's no reason
* to avoid it.
*/
if ((error = xfs_da_read_buf(tp, dp,
xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
XFS_DATA_FORK))) {
return error;
}
if (unlikely(fbp == NULL)) {
continue;
}
free = fbp->data;
ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
findex = 0;
}
/*
* Look at the current free entry. Is it good enough?
*/
if (be16_to_cpu(free->bests[findex]) != NULLDATAOFF &&
be16_to_cpu(free->bests[findex]) >= length)
dbno = be32_to_cpu(free->hdr.firstdb) + findex;
else {
/*
* Are we done with the freeblock?
*/
if (++findex == be32_to_cpu(free->hdr.nvalid)) {
/*
* Drop the block.
*/
xfs_da_brelse(tp, fbp);
fbp = NULL;
if (fblk && fblk->bp)
fblk->bp = NULL;
}
}
}
/*
* If we don't have a data block, we need to allocate one and make
* the freespace entries refer to it.
*/
if (unlikely(dbno == -1)) {
/*
* Not allowed to allocate, return failure.
*/
if (args->justcheck || args->total == 0) {
/*
* Drop the freespace buffer unless it came from our
* caller.
*/
if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
xfs_da_buf_done(fbp);
return XFS_ERROR(ENOSPC);
}
/*
* Allocate and initialize the new data block.
*/
if (unlikely((error = xfs_dir2_grow_inode(args,
XFS_DIR2_DATA_SPACE,
&dbno)) ||
(error = xfs_dir2_data_init(args, dbno, &dbp)))) {
/*
* Drop the freespace buffer unless it came from our
* caller.
*/
if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
xfs_da_buf_done(fbp);
return error;
}
/*
* If (somehow) we have a freespace block, get rid of it.
*/
if (fbp)
xfs_da_brelse(tp, fbp);
if (fblk && fblk->bp)
fblk->bp = NULL;
/*
* Get the freespace block corresponding to the data block
* that was just allocated.
*/
fbno = xfs_dir2_db_to_fdb(mp, dbno);
if (unlikely(error = xfs_da_read_buf(tp, dp,
xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
XFS_DATA_FORK))) {
xfs_da_buf_done(dbp);
return error;
}
/*
* If there wasn't a freespace block, the read will
* return a NULL fbp. Allocate and initialize a new one.
*/
if( fbp == NULL ) {
if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE,
&fbno))) {
return error;
}
if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) {
cmn_err(CE_ALERT,
"xfs_dir2_node_addname_int: dir ino "
"%llu needed freesp block %lld for\n"
" data block %lld, got %lld\n"
" ifbno %llu lastfbno %d\n",
(unsigned long long)dp->i_ino,
(long long)xfs_dir2_db_to_fdb(mp, dbno),
(long long)dbno, (long long)fbno,
(unsigned long long)ifbno, lastfbno);
if (fblk) {
cmn_err(CE_ALERT,
" fblk 0x%p blkno %llu "
"index %d magic 0x%x\n",
fblk,
(unsigned long long)fblk->blkno,
fblk->index,
fblk->magic);
} else {
cmn_err(CE_ALERT,
" ... fblk is NULL\n");
}
XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
XFS_ERRLEVEL_LOW, mp);
return XFS_ERROR(EFSCORRUPTED);
}
/*
* Get a buffer for the new block.
*/
if ((error = xfs_da_get_buf(tp, dp,
xfs_dir2_db_to_da(mp, fbno),
-1, &fbp, XFS_DATA_FORK))) {
return error;
}
ASSERT(fbp != NULL);
/*
* Initialize the new block to be empty, and remember
* its first slot as our empty slot.
*/
free = fbp->data;
free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
free->hdr.firstdb = cpu_to_be32(
(fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
XFS_DIR2_MAX_FREE_BESTS(mp));
free->hdr.nvalid = 0;
free->hdr.nused = 0;
} else {
free = fbp->data;
ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
}
/*
* Set the freespace block index from the data block number.
*/
findex = xfs_dir2_db_to_fdindex(mp, dbno);
/*
* If it's after the end of the current entries in the
* freespace block, extend that table.
*/
if (findex >= be32_to_cpu(free->hdr.nvalid)) {
ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp));
free->hdr.nvalid = cpu_to_be32(findex + 1);
/*
* Tag new entry so nused will go up.
*/
free->bests[findex] = cpu_to_be16(NULLDATAOFF);
}
/*
* If this entry was for an empty data block
* (this should always be true) then update the header.
*/
if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) {
be32_add(&free->hdr.nused, 1);
xfs_dir2_free_log_header(tp, fbp);
}
/*
* Update the real value in the table.
* We haven't allocated the data entry yet so this will
* change again.
*/
data = dbp->data;
free->bests[findex] = data->hdr.bestfree[0].length;
logfree = 1;
}
/*
* We had a data block so we don't have to make a new one.
*/
else {
/*
* If just checking, we succeeded.
*/
if (args->justcheck) {
if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
xfs_da_buf_done(fbp);
return 0;
}
/*
* Read the data block in.
*/
if (unlikely(
error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno),
-1, &dbp, XFS_DATA_FORK))) {
if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
xfs_da_buf_done(fbp);
return error;
}
data = dbp->data;
logfree = 0;
}
ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) >= length);
/*
* Point to the existing unused space.
*/
dup = (xfs_dir2_data_unused_t *)
((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
needscan = needlog = 0;
/*
* Mark the first part of the unused space, inuse for us.
*/
xfs_dir2_data_use_free(tp, dbp, dup,
(xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
&needlog, &needscan);
/*
* Fill in the new entry and log it.
*/
dep = (xfs_dir2_data_entry_t *)dup;
dep->inumber = cpu_to_be64(args->inumber);
dep->namelen = args->namelen;
memcpy(dep->name, args->name, dep->namelen);
tagp = xfs_dir2_data_entry_tag_p(dep);
*tagp = cpu_to_be16((char *)dep - (char *)data);
xfs_dir2_data_log_entry(tp, dbp, dep);
/*
* Rescan the block for bestfree if needed.
*/
if (needscan)
xfs_dir2_data_freescan(mp, data, &needlog);
/*
* Log the data block header if needed.
*/
if (needlog)
xfs_dir2_data_log_header(tp, dbp);
/*
* If the freespace entry is now wrong, update it.
*/
if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
free->bests[findex] = data->hdr.bestfree[0].length;
logfree = 1;
}
/*
* Log the freespace entry if needed.
*/
if (logfree)
xfs_dir2_free_log_bests(tp, fbp, findex, findex);
/*
* If the caller didn't hand us the freespace block, drop it.
*/
if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
xfs_da_buf_done(fbp);
/*
* Return the data block and offset in args, then drop the data block.
*/
args->blkno = (xfs_dablk_t)dbno;
args->index = be16_to_cpu(*tagp);
xfs_da_buf_done(dbp);
return 0;
}
/*
* Lookup an entry in a node-format directory.
* All the real work happens in xfs_da_node_lookup_int.
* The only real output is the inode number of the entry.
*/
int /* error */
xfs_dir2_node_lookup(
xfs_da_args_t *args) /* operation arguments */
{
int error; /* error return value */
int i; /* btree level */
int rval; /* operation return value */
xfs_da_state_t *state; /* btree cursor */
xfs_dir2_trace_args("node_lookup", args);
/*
* Allocate and initialize the btree cursor.
*/
state = xfs_da_state_alloc();
state->args = args;
state->mp = args->dp->i_mount;
state->blocksize = state->mp->m_dirblksize;
state->node_ents = state->mp->m_dir_node_ents;
/*
* Fill in the path to the entry in the cursor.
*/
error = xfs_da_node_lookup_int(state, &rval);
if (error)
rval = error;
/*
* Release the btree blocks and leaf block.
*/
for (i = 0; i < state->path.active; i++) {
xfs_da_brelse(args->trans, state->path.blk[i].bp);
state->path.blk[i].bp = NULL;
}
/*
* Release the data block if we have it.
*/
if (state->extravalid && state->extrablk.bp) {
xfs_da_brelse(args->trans, state->extrablk.bp);
state->extrablk.bp = NULL;
}
xfs_da_state_free(state);
return rval;
}
/*
* Remove an entry from a node-format directory.
*/
int /* error */
xfs_dir2_node_removename(
xfs_da_args_t *args) /* operation arguments */
{
xfs_da_state_blk_t *blk; /* leaf block */
int error; /* error return value */
int rval; /* operation return value */
xfs_da_state_t *state; /* btree cursor */
xfs_dir2_trace_args("node_removename", args);
/*
* Allocate and initialize the btree cursor.
*/
state = xfs_da_state_alloc();
state->args = args;
state->mp = args->dp->i_mount;
state->blocksize = state->mp->m_dirblksize;
state->node_ents = state->mp->m_dir_node_ents;
/*
* Look up the entry we're deleting, set up the cursor.
*/
error = xfs_da_node_lookup_int(state, &rval);
if (error) {
rval = error;
}
/*
* Didn't find it, upper layer screwed up.
*/
if (rval != EEXIST) {
xfs_da_state_free(state);
return rval;
}
blk = &state->path.blk[state->path.active - 1];
ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
ASSERT(state->extravalid);
/*
* Remove the leaf and data entries.
* Extrablk refers to the data block.
*/
error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,
&state->extrablk, &rval);
if (error) {
return error;
}
/*
* Fix the hash values up the btree.
*/
xfs_da_fixhashpath(state, &state->path);
/*
* If we need to join leaf blocks, do it.
*/
if (rval && state->path.active > 1)
error = xfs_da_join(state);
/*
* If no errors so far, try conversion to leaf format.
*/
if (!error)
error = xfs_dir2_node_to_leaf(state);
xfs_da_state_free(state);
return error;
}
/*
* Replace an entry's inode number in a node-format directory.
*/
int /* error */
xfs_dir2_node_replace(
xfs_da_args_t *args) /* operation arguments */
{
xfs_da_state_blk_t *blk; /* leaf block */
xfs_dir2_data_t *data; /* data block structure */
xfs_dir2_data_entry_t *dep; /* data entry changed */
int error; /* error return value */
int i; /* btree level */
xfs_ino_t inum; /* new inode number */
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_entry_t *lep; /* leaf entry being changed */
int rval; /* internal return value */
xfs_da_state_t *state; /* btree cursor */
xfs_dir2_trace_args("node_replace", args);
/*
* Allocate and initialize the btree cursor.
*/
state = xfs_da_state_alloc();
state->args = args;
state->mp = args->dp->i_mount;
state->blocksize = state->mp->m_dirblksize;
state->node_ents = state->mp->m_dir_node_ents;
inum = args->inumber;
/*
* Lookup the entry to change in the btree.
*/
error = xfs_da_node_lookup_int(state, &rval);
if (error) {
rval = error;
}
/*
* It should be found, since the vnodeops layer has looked it up
* and locked it. But paranoia is good.
*/
if (rval == EEXIST) {
/*
* Find the leaf entry.
*/
blk = &state->path.blk[state->path.active - 1];
ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
leaf = blk->bp->data;
lep = &leaf->ents[blk->index];
ASSERT(state->extravalid);
/*
* Point to the data entry.
*/
data = state->extrablk.bp->data;
ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
dep = (xfs_dir2_data_entry_t *)
((char *)data +
xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
ASSERT(inum != be64_to_cpu(dep->inumber));
/*
* Fill in the new inode number and log the entry.
*/
dep->inumber = cpu_to_be64(inum);
xfs_dir2_data_log_entry(args->trans, state->extrablk.bp, dep);
rval = 0;
}
/*
* Didn't find it, and we're holding a data block. Drop it.
*/
else if (state->extravalid) {
xfs_da_brelse(args->trans, state->extrablk.bp);
state->extrablk.bp = NULL;
}
/*
* Release all the buffers in the cursor.
*/
for (i = 0; i < state->path.active; i++) {
xfs_da_brelse(args->trans, state->path.blk[i].bp);
state->path.blk[i].bp = NULL;
}
xfs_da_state_free(state);
return rval;
}
/*
* Trim off a trailing empty freespace block.
* Return (in rvalp) 1 if we did it, 0 if not.
*/
int /* error */
xfs_dir2_node_trim_free(
xfs_da_args_t *args, /* operation arguments */
xfs_fileoff_t fo, /* free block number */
int *rvalp) /* out: did something */
{
xfs_dabuf_t *bp; /* freespace buffer */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return code */
xfs_dir2_free_t *free; /* freespace structure */
xfs_mount_t *mp; /* filesystem mount point */
xfs_trans_t *tp; /* transaction pointer */
dp = args->dp;
mp = dp->i_mount;
tp = args->trans;
/*
* Read the freespace block.
*/
if (unlikely(error = xfs_da_read_buf(tp, dp, (xfs_dablk_t)fo, -2, &bp,
XFS_DATA_FORK))) {
return error;
}
/*
* There can be holes in freespace. If fo is a hole, there's
* nothing to do.
*/
if (bp == NULL) {
return 0;
}
free = bp->data;
ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
/*
* If there are used entries, there's nothing to do.
*/
if (be32_to_cpu(free->hdr.nused) > 0) {
xfs_da_brelse(tp, bp);
*rvalp = 0;
return 0;
}
/*
* Blow the block away.
*/
if ((error =
xfs_dir2_shrink_inode(args, xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo),
bp))) {
/*
* Can't fail with ENOSPC since that only happens with no
* space reservation, when breaking up an extent into two
* pieces. This is the last block of an extent.
*/
ASSERT(error != ENOSPC);
xfs_da_brelse(tp, bp);
return error;
}
/*
* Return that we succeeded.
*/
*rvalp = 1;
return 0;
}