kernel-ark/fs/fuse/control.c
Eric W. Biederman 7f78e03513 fs: Limit sys_mount to only request filesystem modules.
Modify the request_module to prefix the file system type with "fs-"
and add aliases to all of the filesystems that can be built as modules
to match.

A common practice is to build all of the kernel code and leave code
that is not commonly needed as modules, with the result that many
users are exposed to any bug anywhere in the kernel.

Looking for filesystems with a fs- prefix limits the pool of possible
modules that can be loaded by mount to just filesystems trivially
making things safer with no real cost.

Using aliases means user space can control the policy of which
filesystem modules are auto-loaded by editing /etc/modprobe.d/*.conf
with blacklist and alias directives.  Allowing simple, safe,
well understood work-arounds to known problematic software.

This also addresses a rare but unfortunate problem where the filesystem
name is not the same as it's module name and module auto-loading
would not work.  While writing this patch I saw a handful of such
cases.  The most significant being autofs that lives in the module
autofs4.

This is relevant to user namespaces because we can reach the request
module in get_fs_type() without having any special permissions, and
people get uncomfortable when a user specified string (in this case
the filesystem type) goes all of the way to request_module.

After having looked at this issue I don't think there is any
particular reason to perform any filtering or permission checks beyond
making it clear in the module request that we want a filesystem
module.  The common pattern in the kernel is to call request_module()
without regards to the users permissions.  In general all a filesystem
module does once loaded is call register_filesystem() and go to sleep.
Which means there is not much attack surface exposed by loading a
filesytem module unless the filesystem is mounted.  In a user
namespace filesystems are not mounted unless .fs_flags = FS_USERNS_MOUNT,
which most filesystems do not set today.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Acked-by: Kees Cook <keescook@chromium.org>
Reported-by: Kees Cook <keescook@google.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
2013-03-03 19:36:31 -08:00

355 lines
7.9 KiB
C

/*
FUSE: Filesystem in Userspace
Copyright (C) 2001-2008 Miklos Szeredi <miklos@szeredi.hu>
This program can be distributed under the terms of the GNU GPL.
See the file COPYING.
*/
#include "fuse_i.h"
#include <linux/init.h>
#include <linux/module.h>
#define FUSE_CTL_SUPER_MAGIC 0x65735543
/*
* This is non-NULL when the single instance of the control filesystem
* exists. Protected by fuse_mutex
*/
static struct super_block *fuse_control_sb;
static struct fuse_conn *fuse_ctl_file_conn_get(struct file *file)
{
struct fuse_conn *fc;
mutex_lock(&fuse_mutex);
fc = file_inode(file)->i_private;
if (fc)
fc = fuse_conn_get(fc);
mutex_unlock(&fuse_mutex);
return fc;
}
static ssize_t fuse_conn_abort_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
if (fc) {
fuse_abort_conn(fc);
fuse_conn_put(fc);
}
return count;
}
static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos)
{
char tmp[32];
size_t size;
if (!*ppos) {
long value;
struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
if (!fc)
return 0;
value = atomic_read(&fc->num_waiting);
file->private_data = (void *)value;
fuse_conn_put(fc);
}
size = sprintf(tmp, "%ld\n", (long)file->private_data);
return simple_read_from_buffer(buf, len, ppos, tmp, size);
}
static ssize_t fuse_conn_limit_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos, unsigned val)
{
char tmp[32];
size_t size = sprintf(tmp, "%u\n", val);
return simple_read_from_buffer(buf, len, ppos, tmp, size);
}
static ssize_t fuse_conn_limit_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos, unsigned *val,
unsigned global_limit)
{
unsigned long t;
unsigned limit = (1 << 16) - 1;
int err;
if (*ppos)
return -EINVAL;
err = kstrtoul_from_user(buf, count, 0, &t);
if (err)
return err;
if (!capable(CAP_SYS_ADMIN))
limit = min(limit, global_limit);
if (t > limit)
return -EINVAL;
*val = t;
return count;
}
static ssize_t fuse_conn_max_background_read(struct file *file,
char __user *buf, size_t len,
loff_t *ppos)
{
struct fuse_conn *fc;
unsigned val;
fc = fuse_ctl_file_conn_get(file);
if (!fc)
return 0;
val = fc->max_background;
fuse_conn_put(fc);
return fuse_conn_limit_read(file, buf, len, ppos, val);
}
static ssize_t fuse_conn_max_background_write(struct file *file,
const char __user *buf,
size_t count, loff_t *ppos)
{
unsigned uninitialized_var(val);
ssize_t ret;
ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
max_user_bgreq);
if (ret > 0) {
struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
if (fc) {
fc->max_background = val;
fuse_conn_put(fc);
}
}
return ret;
}
static ssize_t fuse_conn_congestion_threshold_read(struct file *file,
char __user *buf, size_t len,
loff_t *ppos)
{
struct fuse_conn *fc;
unsigned val;
fc = fuse_ctl_file_conn_get(file);
if (!fc)
return 0;
val = fc->congestion_threshold;
fuse_conn_put(fc);
return fuse_conn_limit_read(file, buf, len, ppos, val);
}
static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
const char __user *buf,
size_t count, loff_t *ppos)
{
unsigned uninitialized_var(val);
ssize_t ret;
ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
max_user_congthresh);
if (ret > 0) {
struct fuse_conn *fc = fuse_ctl_file_conn_get(file);
if (fc) {
fc->congestion_threshold = val;
fuse_conn_put(fc);
}
}
return ret;
}
static const struct file_operations fuse_ctl_abort_ops = {
.open = nonseekable_open,
.write = fuse_conn_abort_write,
.llseek = no_llseek,
};
static const struct file_operations fuse_ctl_waiting_ops = {
.open = nonseekable_open,
.read = fuse_conn_waiting_read,
.llseek = no_llseek,
};
static const struct file_operations fuse_conn_max_background_ops = {
.open = nonseekable_open,
.read = fuse_conn_max_background_read,
.write = fuse_conn_max_background_write,
.llseek = no_llseek,
};
static const struct file_operations fuse_conn_congestion_threshold_ops = {
.open = nonseekable_open,
.read = fuse_conn_congestion_threshold_read,
.write = fuse_conn_congestion_threshold_write,
.llseek = no_llseek,
};
static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
struct fuse_conn *fc,
const char *name,
int mode, int nlink,
const struct inode_operations *iop,
const struct file_operations *fop)
{
struct dentry *dentry;
struct inode *inode;
BUG_ON(fc->ctl_ndents >= FUSE_CTL_NUM_DENTRIES);
dentry = d_alloc_name(parent, name);
if (!dentry)
return NULL;
fc->ctl_dentry[fc->ctl_ndents++] = dentry;
inode = new_inode(fuse_control_sb);
if (!inode)
return NULL;
inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_uid = fc->user_id;
inode->i_gid = fc->group_id;
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
/* setting ->i_op to NULL is not allowed */
if (iop)
inode->i_op = iop;
inode->i_fop = fop;
set_nlink(inode, nlink);
inode->i_private = fc;
d_add(dentry, inode);
return dentry;
}
/*
* Add a connection to the control filesystem (if it exists). Caller
* must hold fuse_mutex
*/
int fuse_ctl_add_conn(struct fuse_conn *fc)
{
struct dentry *parent;
char name[32];
if (!fuse_control_sb)
return 0;
parent = fuse_control_sb->s_root;
inc_nlink(parent->d_inode);
sprintf(name, "%u", fc->dev);
parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2,
&simple_dir_inode_operations,
&simple_dir_operations);
if (!parent)
goto err;
if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1,
NULL, &fuse_ctl_waiting_ops) ||
!fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1,
NULL, &fuse_ctl_abort_ops) ||
!fuse_ctl_add_dentry(parent, fc, "max_background", S_IFREG | 0600,
1, NULL, &fuse_conn_max_background_ops) ||
!fuse_ctl_add_dentry(parent, fc, "congestion_threshold",
S_IFREG | 0600, 1, NULL,
&fuse_conn_congestion_threshold_ops))
goto err;
return 0;
err:
fuse_ctl_remove_conn(fc);
return -ENOMEM;
}
/*
* Remove a connection from the control filesystem (if it exists).
* Caller must hold fuse_mutex
*/
void fuse_ctl_remove_conn(struct fuse_conn *fc)
{
int i;
if (!fuse_control_sb)
return;
for (i = fc->ctl_ndents - 1; i >= 0; i--) {
struct dentry *dentry = fc->ctl_dentry[i];
dentry->d_inode->i_private = NULL;
d_drop(dentry);
dput(dentry);
}
drop_nlink(fuse_control_sb->s_root->d_inode);
}
static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
{
struct tree_descr empty_descr = {""};
struct fuse_conn *fc;
int err;
err = simple_fill_super(sb, FUSE_CTL_SUPER_MAGIC, &empty_descr);
if (err)
return err;
mutex_lock(&fuse_mutex);
BUG_ON(fuse_control_sb);
fuse_control_sb = sb;
list_for_each_entry(fc, &fuse_conn_list, entry) {
err = fuse_ctl_add_conn(fc);
if (err) {
fuse_control_sb = NULL;
mutex_unlock(&fuse_mutex);
return err;
}
}
mutex_unlock(&fuse_mutex);
return 0;
}
static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data)
{
return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super);
}
static void fuse_ctl_kill_sb(struct super_block *sb)
{
struct fuse_conn *fc;
mutex_lock(&fuse_mutex);
fuse_control_sb = NULL;
list_for_each_entry(fc, &fuse_conn_list, entry)
fc->ctl_ndents = 0;
mutex_unlock(&fuse_mutex);
kill_litter_super(sb);
}
static struct file_system_type fuse_ctl_fs_type = {
.owner = THIS_MODULE,
.name = "fusectl",
.mount = fuse_ctl_mount,
.kill_sb = fuse_ctl_kill_sb,
};
MODULE_ALIAS_FS("fusectl");
int __init fuse_ctl_init(void)
{
return register_filesystem(&fuse_ctl_fs_type);
}
void fuse_ctl_cleanup(void)
{
unregister_filesystem(&fuse_ctl_fs_type);
}