3cb29d1117
The limit equals 32 and is imposed by the number of entries in the
fs_poolid_map and shared_fs_poolid_map. Nowadays it is insufficient,
because with containers on board a Linux host can have hundreds of
active fs mounts.
These maps were introduced by commit 49a9ab815a
("mm: cleancache:
lazy initialization to allow tmem backends to build/run as modules") in
order to allow compiling cleancache drivers as modules. Real pool ids
are stored in these maps while super_block->cleancache_poolid points to
an entry in the map, so that on cleancache registration we can walk over
all (if there are <= 32 of them, of course) cleancache-enabled super
blocks and assign real pool ids.
Actually, there is absolutely no need in these maps, because we can
iterate over all super blocks immediately using iterate_supers. This is
not racy, because cleancache_init_ops is called from mount_fs with
super_block->s_umount held for writing, while iterate_supers takes this
semaphore for reading, so if we call iterate_supers after setting
cleancache_ops, all super blocks that had been created before
cleancache_register_ops was called will be assigned pool ids by the
action function of iterate_supers while all newer super blocks will
receive it in cleancache_init_fs.
This patch therefore removes the maps and hence the artificial limit on
the number of cleancache enabled filesystems.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Stefan Hengelein <ilendir@googlemail.com>
Cc: Florian Schmaus <fschmaus@gmail.com>
Cc: Andor Daam <andor.daam@googlemail.com>
Cc: Dan Magenheimer <dan.magenheimer@oracle.com>
Cc: Bob Liu <lliubbo@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
320 lines
9.9 KiB
C
320 lines
9.9 KiB
C
/*
|
|
* Cleancache frontend
|
|
*
|
|
* This code provides the generic "frontend" layer to call a matching
|
|
* "backend" driver implementation of cleancache. See
|
|
* Documentation/vm/cleancache.txt for more information.
|
|
*
|
|
* Copyright (C) 2009-2010 Oracle Corp. All rights reserved.
|
|
* Author: Dan Magenheimer
|
|
*
|
|
* This work is licensed under the terms of the GNU GPL, version 2.
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/exportfs.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/debugfs.h>
|
|
#include <linux/cleancache.h>
|
|
|
|
/*
|
|
* cleancache_ops is set by cleancache_register_ops to contain the pointers
|
|
* to the cleancache "backend" implementation functions.
|
|
*/
|
|
static struct cleancache_ops *cleancache_ops __read_mostly;
|
|
|
|
/*
|
|
* Counters available via /sys/kernel/debug/cleancache (if debugfs is
|
|
* properly configured. These are for information only so are not protected
|
|
* against increment races.
|
|
*/
|
|
static u64 cleancache_succ_gets;
|
|
static u64 cleancache_failed_gets;
|
|
static u64 cleancache_puts;
|
|
static u64 cleancache_invalidates;
|
|
|
|
static void cleancache_register_ops_sb(struct super_block *sb, void *unused)
|
|
{
|
|
switch (sb->cleancache_poolid) {
|
|
case CLEANCACHE_NO_BACKEND:
|
|
__cleancache_init_fs(sb);
|
|
break;
|
|
case CLEANCACHE_NO_BACKEND_SHARED:
|
|
__cleancache_init_shared_fs(sb);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Register operations for cleancache. Returns 0 on success.
|
|
*/
|
|
int cleancache_register_ops(struct cleancache_ops *ops)
|
|
{
|
|
if (cmpxchg(&cleancache_ops, NULL, ops))
|
|
return -EBUSY;
|
|
|
|
/*
|
|
* A cleancache backend can be built as a module and hence loaded after
|
|
* a cleancache enabled filesystem has called cleancache_init_fs. To
|
|
* handle such a scenario, here we call ->init_fs or ->init_shared_fs
|
|
* for each active super block. To differentiate between local and
|
|
* shared filesystems, we temporarily initialize sb->cleancache_poolid
|
|
* to CLEANCACHE_NO_BACKEND or CLEANCACHE_NO_BACKEND_SHARED
|
|
* respectively in case there is no backend registered at the time
|
|
* cleancache_init_fs or cleancache_init_shared_fs is called.
|
|
*
|
|
* Since filesystems can be mounted concurrently with cleancache
|
|
* backend registration, we have to be careful to guarantee that all
|
|
* cleancache enabled filesystems that has been mounted by the time
|
|
* cleancache_register_ops is called has got and all mounted later will
|
|
* get cleancache_poolid. This is assured by the following statements
|
|
* tied together:
|
|
*
|
|
* a) iterate_supers skips only those super blocks that has started
|
|
* ->kill_sb
|
|
*
|
|
* b) if iterate_supers encounters a super block that has not finished
|
|
* ->mount yet, it waits until it is finished
|
|
*
|
|
* c) cleancache_init_fs is called from ->mount and
|
|
* cleancache_invalidate_fs is called from ->kill_sb
|
|
*
|
|
* d) we call iterate_supers after cleancache_ops has been set
|
|
*
|
|
* From a) it follows that if iterate_supers skips a super block, then
|
|
* either the super block is already dead, in which case we do not need
|
|
* to bother initializing cleancache for it, or it was mounted after we
|
|
* initiated iterate_supers. In the latter case, it must have seen
|
|
* cleancache_ops set according to d) and initialized cleancache from
|
|
* ->mount by itself according to c). This proves that we call
|
|
* ->init_fs at least once for each active super block.
|
|
*
|
|
* From b) and c) it follows that if iterate_supers encounters a super
|
|
* block that has already started ->init_fs, it will wait until ->mount
|
|
* and hence ->init_fs has finished, then check cleancache_poolid, see
|
|
* that it has already been set and therefore do nothing. This proves
|
|
* that we call ->init_fs no more than once for each super block.
|
|
*
|
|
* Combined together, the last two paragraphs prove the function
|
|
* correctness.
|
|
*
|
|
* Note that various cleancache callbacks may proceed before this
|
|
* function is called or even concurrently with it, but since
|
|
* CLEANCACHE_NO_BACKEND is negative, they will all result in a noop
|
|
* until the corresponding ->init_fs has been actually called and
|
|
* cleancache_ops has been set.
|
|
*/
|
|
iterate_supers(cleancache_register_ops_sb, NULL);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(cleancache_register_ops);
|
|
|
|
/* Called by a cleancache-enabled filesystem at time of mount */
|
|
void __cleancache_init_fs(struct super_block *sb)
|
|
{
|
|
int pool_id = CLEANCACHE_NO_BACKEND;
|
|
|
|
if (cleancache_ops) {
|
|
pool_id = cleancache_ops->init_fs(PAGE_SIZE);
|
|
if (pool_id < 0)
|
|
pool_id = CLEANCACHE_NO_POOL;
|
|
}
|
|
sb->cleancache_poolid = pool_id;
|
|
}
|
|
EXPORT_SYMBOL(__cleancache_init_fs);
|
|
|
|
/* Called by a cleancache-enabled clustered filesystem at time of mount */
|
|
void __cleancache_init_shared_fs(struct super_block *sb)
|
|
{
|
|
int pool_id = CLEANCACHE_NO_BACKEND_SHARED;
|
|
|
|
if (cleancache_ops) {
|
|
pool_id = cleancache_ops->init_shared_fs(sb->s_uuid, PAGE_SIZE);
|
|
if (pool_id < 0)
|
|
pool_id = CLEANCACHE_NO_POOL;
|
|
}
|
|
sb->cleancache_poolid = pool_id;
|
|
}
|
|
EXPORT_SYMBOL(__cleancache_init_shared_fs);
|
|
|
|
/*
|
|
* If the filesystem uses exportable filehandles, use the filehandle as
|
|
* the key, else use the inode number.
|
|
*/
|
|
static int cleancache_get_key(struct inode *inode,
|
|
struct cleancache_filekey *key)
|
|
{
|
|
int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *);
|
|
int len = 0, maxlen = CLEANCACHE_KEY_MAX;
|
|
struct super_block *sb = inode->i_sb;
|
|
|
|
key->u.ino = inode->i_ino;
|
|
if (sb->s_export_op != NULL) {
|
|
fhfn = sb->s_export_op->encode_fh;
|
|
if (fhfn) {
|
|
len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL);
|
|
if (len <= FILEID_ROOT || len == FILEID_INVALID)
|
|
return -1;
|
|
if (maxlen > CLEANCACHE_KEY_MAX)
|
|
return -1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* "Get" data from cleancache associated with the poolid/inode/index
|
|
* that were specified when the data was put to cleanache and, if
|
|
* successful, use it to fill the specified page with data and return 0.
|
|
* The pageframe is unchanged and returns -1 if the get fails.
|
|
* Page must be locked by caller.
|
|
*
|
|
* The function has two checks before any action is taken - whether
|
|
* a backend is registered and whether the sb->cleancache_poolid
|
|
* is correct.
|
|
*/
|
|
int __cleancache_get_page(struct page *page)
|
|
{
|
|
int ret = -1;
|
|
int pool_id;
|
|
struct cleancache_filekey key = { .u.key = { 0 } };
|
|
|
|
if (!cleancache_ops) {
|
|
cleancache_failed_gets++;
|
|
goto out;
|
|
}
|
|
|
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
pool_id = page->mapping->host->i_sb->cleancache_poolid;
|
|
if (pool_id < 0)
|
|
goto out;
|
|
|
|
if (cleancache_get_key(page->mapping->host, &key) < 0)
|
|
goto out;
|
|
|
|
ret = cleancache_ops->get_page(pool_id, key, page->index, page);
|
|
if (ret == 0)
|
|
cleancache_succ_gets++;
|
|
else
|
|
cleancache_failed_gets++;
|
|
out:
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL(__cleancache_get_page);
|
|
|
|
/*
|
|
* "Put" data from a page to cleancache and associate it with the
|
|
* (previously-obtained per-filesystem) poolid and the page's,
|
|
* inode and page index. Page must be locked. Note that a put_page
|
|
* always "succeeds", though a subsequent get_page may succeed or fail.
|
|
*
|
|
* The function has two checks before any action is taken - whether
|
|
* a backend is registered and whether the sb->cleancache_poolid
|
|
* is correct.
|
|
*/
|
|
void __cleancache_put_page(struct page *page)
|
|
{
|
|
int pool_id;
|
|
struct cleancache_filekey key = { .u.key = { 0 } };
|
|
|
|
if (!cleancache_ops) {
|
|
cleancache_puts++;
|
|
return;
|
|
}
|
|
|
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
pool_id = page->mapping->host->i_sb->cleancache_poolid;
|
|
if (pool_id >= 0 &&
|
|
cleancache_get_key(page->mapping->host, &key) >= 0) {
|
|
cleancache_ops->put_page(pool_id, key, page->index, page);
|
|
cleancache_puts++;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(__cleancache_put_page);
|
|
|
|
/*
|
|
* Invalidate any data from cleancache associated with the poolid and the
|
|
* page's inode and page index so that a subsequent "get" will fail.
|
|
*
|
|
* The function has two checks before any action is taken - whether
|
|
* a backend is registered and whether the sb->cleancache_poolid
|
|
* is correct.
|
|
*/
|
|
void __cleancache_invalidate_page(struct address_space *mapping,
|
|
struct page *page)
|
|
{
|
|
/* careful... page->mapping is NULL sometimes when this is called */
|
|
int pool_id = mapping->host->i_sb->cleancache_poolid;
|
|
struct cleancache_filekey key = { .u.key = { 0 } };
|
|
|
|
if (!cleancache_ops)
|
|
return;
|
|
|
|
if (pool_id >= 0) {
|
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
if (cleancache_get_key(mapping->host, &key) >= 0) {
|
|
cleancache_ops->invalidate_page(pool_id,
|
|
key, page->index);
|
|
cleancache_invalidates++;
|
|
}
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(__cleancache_invalidate_page);
|
|
|
|
/*
|
|
* Invalidate all data from cleancache associated with the poolid and the
|
|
* mappings's inode so that all subsequent gets to this poolid/inode
|
|
* will fail.
|
|
*
|
|
* The function has two checks before any action is taken - whether
|
|
* a backend is registered and whether the sb->cleancache_poolid
|
|
* is correct.
|
|
*/
|
|
void __cleancache_invalidate_inode(struct address_space *mapping)
|
|
{
|
|
int pool_id = mapping->host->i_sb->cleancache_poolid;
|
|
struct cleancache_filekey key = { .u.key = { 0 } };
|
|
|
|
if (!cleancache_ops)
|
|
return;
|
|
|
|
if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
|
|
cleancache_ops->invalidate_inode(pool_id, key);
|
|
}
|
|
EXPORT_SYMBOL(__cleancache_invalidate_inode);
|
|
|
|
/*
|
|
* Called by any cleancache-enabled filesystem at time of unmount;
|
|
* note that pool_id is surrendered and may be returned by a subsequent
|
|
* cleancache_init_fs or cleancache_init_shared_fs.
|
|
*/
|
|
void __cleancache_invalidate_fs(struct super_block *sb)
|
|
{
|
|
int pool_id;
|
|
|
|
pool_id = sb->cleancache_poolid;
|
|
sb->cleancache_poolid = CLEANCACHE_NO_POOL;
|
|
|
|
if (cleancache_ops && pool_id >= 0)
|
|
cleancache_ops->invalidate_fs(pool_id);
|
|
}
|
|
EXPORT_SYMBOL(__cleancache_invalidate_fs);
|
|
|
|
static int __init init_cleancache(void)
|
|
{
|
|
#ifdef CONFIG_DEBUG_FS
|
|
struct dentry *root = debugfs_create_dir("cleancache", NULL);
|
|
if (root == NULL)
|
|
return -ENXIO;
|
|
debugfs_create_u64("succ_gets", S_IRUGO, root, &cleancache_succ_gets);
|
|
debugfs_create_u64("failed_gets", S_IRUGO,
|
|
root, &cleancache_failed_gets);
|
|
debugfs_create_u64("puts", S_IRUGO, root, &cleancache_puts);
|
|
debugfs_create_u64("invalidates", S_IRUGO,
|
|
root, &cleancache_invalidates);
|
|
#endif
|
|
return 0;
|
|
}
|
|
module_init(init_cleancache)
|