0ab66088c8
sysfs: implement sysfs_dirent active reference and immediate disconnect Opening a sysfs node references its associated kobject, so userland can arbitrarily prolong lifetime of a kobject which complicates lifetime rules in drivers. This patch implements active reference and makes the association between kobject and sysfs immediately breakable. Now each sysfs_dirent has two reference counts - s_count and s_active. s_count is a regular reference count which guarantees that the containing sysfs_dirent is accessible. As long as s_count reference is held, all sysfs internal fields in sysfs_dirent are accessible including s_parent and s_name. The newly added s_active is active reference count. This is acquired by invoking sysfs_get_active() and it's the caller's responsibility to ensure sysfs_dirent itself is accessible (should be holding s_count one way or the other). Dereferencing sysfs_dirent to access objects out of sysfs proper requires active reference. This includes access to the associated kobjects, attributes and ops. The active references can be drained and denied by calling sysfs_deactivate(). All active sysfs_dirents must be deactivated after deletion but before the default reference is dropped. This enables immediate disconnect of sysfs nodes. Once a sysfs_dirent is deleted, it won't access any entity external to sysfs proper. Because attr/bin_attr ops access both the node itself and its parent for kobject, they need to hold active references to both. sysfs_get/put_active_two() helpers are provided to help grabbing both references. Parent's is acquired first and released last. Unlike other operations, mmapped area lingers on after mmap() is finished and the module implement implementing it and kobj need to stay referenced till all the mapped pages are gone. This is accomplished by holding one set of active references to the bin_attr and its parent if there have been any mmap during lifetime of an openfile. The references are dropped when the openfile is released. This change makes sysfs lifetime rules independent from both kobject's and module's. It not only fixes several race conditions caused by sysfs not holding onto the proper module when referencing kobject, but also helps fixing and simplifying lifetime management in driver model and drivers by taking sysfs out of the equation. Please read the following message for more info. http://article.gmane.org/gmane.linux.kernel/510293 Signed-off-by: Tejun Heo <htejun@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
293 lines
7.1 KiB
C
293 lines
7.1 KiB
C
/*
|
|
* inode.c - basic inode and dentry operations.
|
|
*
|
|
* sysfs is Copyright (c) 2001-3 Patrick Mochel
|
|
*
|
|
* Please see Documentation/filesystems/sysfs.txt for more information.
|
|
*/
|
|
|
|
#undef DEBUG
|
|
|
|
#include <linux/pagemap.h>
|
|
#include <linux/namei.h>
|
|
#include <linux/backing-dev.h>
|
|
#include <linux/capability.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/sched.h>
|
|
#include <asm/semaphore.h>
|
|
#include "sysfs.h"
|
|
|
|
extern struct super_block * sysfs_sb;
|
|
|
|
static const struct address_space_operations sysfs_aops = {
|
|
.readpage = simple_readpage,
|
|
.prepare_write = simple_prepare_write,
|
|
.commit_write = simple_commit_write
|
|
};
|
|
|
|
static struct backing_dev_info sysfs_backing_dev_info = {
|
|
.ra_pages = 0, /* No readahead */
|
|
.capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
|
|
};
|
|
|
|
static const struct inode_operations sysfs_inode_operations ={
|
|
.setattr = sysfs_setattr,
|
|
};
|
|
|
|
void sysfs_delete_inode(struct inode *inode)
|
|
{
|
|
/* Free the shadowed directory inode operations */
|
|
if (sysfs_is_shadowed_inode(inode)) {
|
|
kfree(inode->i_op);
|
|
inode->i_op = NULL;
|
|
}
|
|
return generic_delete_inode(inode);
|
|
}
|
|
|
|
int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
|
|
{
|
|
struct inode * inode = dentry->d_inode;
|
|
struct sysfs_dirent * sd = dentry->d_fsdata;
|
|
struct iattr * sd_iattr;
|
|
unsigned int ia_valid = iattr->ia_valid;
|
|
int error;
|
|
|
|
if (!sd)
|
|
return -EINVAL;
|
|
|
|
sd_iattr = sd->s_iattr;
|
|
|
|
error = inode_change_ok(inode, iattr);
|
|
if (error)
|
|
return error;
|
|
|
|
error = inode_setattr(inode, iattr);
|
|
if (error)
|
|
return error;
|
|
|
|
if (!sd_iattr) {
|
|
/* setting attributes for the first time, allocate now */
|
|
sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL);
|
|
if (!sd_iattr)
|
|
return -ENOMEM;
|
|
/* assign default attributes */
|
|
sd_iattr->ia_mode = sd->s_mode;
|
|
sd_iattr->ia_uid = 0;
|
|
sd_iattr->ia_gid = 0;
|
|
sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
|
|
sd->s_iattr = sd_iattr;
|
|
}
|
|
|
|
/* attributes were changed atleast once in past */
|
|
|
|
if (ia_valid & ATTR_UID)
|
|
sd_iattr->ia_uid = iattr->ia_uid;
|
|
if (ia_valid & ATTR_GID)
|
|
sd_iattr->ia_gid = iattr->ia_gid;
|
|
if (ia_valid & ATTR_ATIME)
|
|
sd_iattr->ia_atime = timespec_trunc(iattr->ia_atime,
|
|
inode->i_sb->s_time_gran);
|
|
if (ia_valid & ATTR_MTIME)
|
|
sd_iattr->ia_mtime = timespec_trunc(iattr->ia_mtime,
|
|
inode->i_sb->s_time_gran);
|
|
if (ia_valid & ATTR_CTIME)
|
|
sd_iattr->ia_ctime = timespec_trunc(iattr->ia_ctime,
|
|
inode->i_sb->s_time_gran);
|
|
if (ia_valid & ATTR_MODE) {
|
|
umode_t mode = iattr->ia_mode;
|
|
|
|
if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
|
|
mode &= ~S_ISGID;
|
|
sd_iattr->ia_mode = sd->s_mode = mode;
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
static inline void set_default_inode_attr(struct inode * inode, mode_t mode)
|
|
{
|
|
inode->i_mode = mode;
|
|
inode->i_uid = 0;
|
|
inode->i_gid = 0;
|
|
inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
|
|
}
|
|
|
|
static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
|
|
{
|
|
inode->i_mode = iattr->ia_mode;
|
|
inode->i_uid = iattr->ia_uid;
|
|
inode->i_gid = iattr->ia_gid;
|
|
inode->i_atime = iattr->ia_atime;
|
|
inode->i_mtime = iattr->ia_mtime;
|
|
inode->i_ctime = iattr->ia_ctime;
|
|
}
|
|
|
|
|
|
/*
|
|
* sysfs has a different i_mutex lock order behavior for i_mutex than other
|
|
* filesystems; sysfs i_mutex is called in many places with subsystem locks
|
|
* held. At the same time, many of the VFS locking rules do not apply to
|
|
* sysfs at all (cross directory rename for example). To untangle this mess
|
|
* (which gives false positives in lockdep), we're giving sysfs inodes their
|
|
* own class for i_mutex.
|
|
*/
|
|
static struct lock_class_key sysfs_inode_imutex_key;
|
|
|
|
struct inode * sysfs_new_inode(mode_t mode, struct sysfs_dirent * sd)
|
|
{
|
|
struct inode * inode = new_inode(sysfs_sb);
|
|
if (inode) {
|
|
inode->i_blocks = 0;
|
|
inode->i_mapping->a_ops = &sysfs_aops;
|
|
inode->i_mapping->backing_dev_info = &sysfs_backing_dev_info;
|
|
inode->i_op = &sysfs_inode_operations;
|
|
inode->i_ino = sd->s_ino;
|
|
lockdep_set_class(&inode->i_mutex, &sysfs_inode_imutex_key);
|
|
|
|
if (sd->s_iattr) {
|
|
/* sysfs_dirent has non-default attributes
|
|
* get them for the new inode from persistent copy
|
|
* in sysfs_dirent
|
|
*/
|
|
set_inode_attr(inode, sd->s_iattr);
|
|
} else
|
|
set_default_inode_attr(inode, mode);
|
|
}
|
|
return inode;
|
|
}
|
|
|
|
int sysfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
|
|
{
|
|
int error = 0;
|
|
struct inode * inode = NULL;
|
|
if (dentry) {
|
|
if (!dentry->d_inode) {
|
|
struct sysfs_dirent * sd = dentry->d_fsdata;
|
|
if ((inode = sysfs_new_inode(mode, sd))) {
|
|
if (dentry->d_parent && dentry->d_parent->d_inode) {
|
|
struct inode *p_inode = dentry->d_parent->d_inode;
|
|
p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
|
|
}
|
|
goto Proceed;
|
|
}
|
|
else
|
|
error = -ENOMEM;
|
|
} else
|
|
error = -EEXIST;
|
|
} else
|
|
error = -ENOENT;
|
|
goto Done;
|
|
|
|
Proceed:
|
|
if (init)
|
|
error = init(inode);
|
|
if (!error) {
|
|
d_instantiate(dentry, inode);
|
|
if (S_ISDIR(mode))
|
|
dget(dentry); /* pin only directory dentry in core */
|
|
} else
|
|
iput(inode);
|
|
Done:
|
|
return error;
|
|
}
|
|
|
|
static inline void orphan_all_buffers(struct inode *node)
|
|
{
|
|
struct sysfs_buffer_collection *set;
|
|
struct sysfs_buffer *buf;
|
|
|
|
mutex_lock_nested(&node->i_mutex, I_MUTEX_CHILD);
|
|
set = node->i_private;
|
|
if (set) {
|
|
list_for_each_entry(buf, &set->associates, associates) {
|
|
down(&buf->sem);
|
|
buf->orphaned = 1;
|
|
up(&buf->sem);
|
|
}
|
|
}
|
|
mutex_unlock(&node->i_mutex);
|
|
}
|
|
|
|
|
|
/*
|
|
* Unhashes the dentry corresponding to given sysfs_dirent
|
|
* Called with parent inode's i_mutex held.
|
|
*/
|
|
void sysfs_drop_dentry(struct sysfs_dirent * sd, struct dentry * parent)
|
|
{
|
|
struct dentry *dentry = NULL;
|
|
struct inode *inode;
|
|
|
|
/* We're not holding a reference to ->s_dentry dentry but the
|
|
* field will stay valid as long as sysfs_lock is held.
|
|
*/
|
|
spin_lock(&sysfs_lock);
|
|
spin_lock(&dcache_lock);
|
|
|
|
/* dget dentry if it's still alive */
|
|
if (sd->s_dentry && sd->s_dentry->d_inode)
|
|
dentry = dget_locked(sd->s_dentry);
|
|
|
|
spin_unlock(&dcache_lock);
|
|
spin_unlock(&sysfs_lock);
|
|
|
|
/* drop dentry */
|
|
if (dentry) {
|
|
spin_lock(&dcache_lock);
|
|
spin_lock(&dentry->d_lock);
|
|
if (!d_unhashed(dentry) && dentry->d_inode) {
|
|
inode = dentry->d_inode;
|
|
spin_lock(&inode->i_lock);
|
|
__iget(inode);
|
|
spin_unlock(&inode->i_lock);
|
|
dget_locked(dentry);
|
|
__d_drop(dentry);
|
|
spin_unlock(&dentry->d_lock);
|
|
spin_unlock(&dcache_lock);
|
|
simple_unlink(parent->d_inode, dentry);
|
|
orphan_all_buffers(inode);
|
|
iput(inode);
|
|
} else {
|
|
spin_unlock(&dentry->d_lock);
|
|
spin_unlock(&dcache_lock);
|
|
}
|
|
|
|
dput(dentry);
|
|
}
|
|
}
|
|
|
|
int sysfs_hash_and_remove(struct dentry * dir, const char * name)
|
|
{
|
|
struct sysfs_dirent * sd;
|
|
struct sysfs_dirent * parent_sd;
|
|
int found = 0;
|
|
|
|
if (!dir)
|
|
return -ENOENT;
|
|
|
|
if (dir->d_inode == NULL)
|
|
/* no inode means this hasn't been made visible yet */
|
|
return -ENOENT;
|
|
|
|
parent_sd = dir->d_fsdata;
|
|
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
|
|
list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
|
|
if (!sd->s_type)
|
|
continue;
|
|
if (!strcmp(sd->s_name, name)) {
|
|
list_del_init(&sd->s_sibling);
|
|
sysfs_drop_dentry(sd, dir);
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
mutex_unlock(&dir->d_inode->i_mutex);
|
|
|
|
if (!found)
|
|
return -ENOENT;
|
|
|
|
sysfs_deactivate(sd);
|
|
sysfs_put(sd);
|
|
return 0;
|
|
}
|