From 9cb73b95df5118a987a376f187693b0c62f3d69d Mon Sep 17 00:00:00 2001 From: Josh Boyer Date: Tue, 3 Sep 2013 14:17:59 -0400 Subject: [PATCH] Add keyring patches to support krb5 (rhbz 1003043) --- config-generic | 2 + kernel.spec | 11 + keys-expand-keyring.patch | 6834 +++++++++++++++++++++++++++++++++++++ keys-krb-support.patch | 747 ++++ 4 files changed, 7594 insertions(+) create mode 100644 keys-expand-keyring.patch create mode 100644 keys-krb-support.patch diff --git a/config-generic b/config-generic index 57d7d9af3..0acca3193 100644 --- a/config-generic +++ b/config-generic @@ -4203,6 +4203,8 @@ CONFIG_ZLIB_DEFLATE=m CONFIG_INITRAMFS_SOURCE="" CONFIG_KEYS=y +CONFIG_PERSISTENT_KEYRINGS=y +CONFIG_BIG_KEYS=m CONFIG_TRUSTED_KEYS=m CONFIG_ENCRYPTED_KEYS=m CONFIG_KEYS_DEBUG_PROC_KEYS=y diff --git a/kernel.spec b/kernel.spec index 9beaa3f3c..564c6e574 100644 --- a/kernel.spec +++ b/kernel.spec @@ -646,6 +646,10 @@ Patch800: crash-driver.patch # crypto/ +# keys +Patch900: keys-expand-keyring.patch +Patch901: keys-krb-support.patch + # secure boot Patch1000: secure-modules.patch Patch1001: modsign-uefi.patch @@ -1379,6 +1383,10 @@ ApplyPatch crash-driver.patch # crypto/ +# keys +ApplyPatch keys-expand-keyring.patch +ApplyPatch keys-krb-support.patch + # secure boot ApplyPatch secure-modules.patch ApplyPatch modsign-uefi.patch @@ -2258,6 +2266,9 @@ fi # ||----w | # || || %changelog +* Tue Sep 03 2013 Josh Boyer +- Add keyring patches to support krb5 (rhbz 1003043) + * Tue Sep 03 2013 Kyle McMartin - [arm64] disable VGA_CONSOLE and PARPORT_PC - [arm64] install dtb as on %{arm} diff --git a/keys-expand-keyring.patch b/keys-expand-keyring.patch new file mode 100644 index 000000000..75618243b --- /dev/null +++ b/keys-expand-keyring.patch @@ -0,0 +1,6834 @@ +From 96dcf8e91389e509021448ffd798cc68471fcf0f Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Fri, 30 Aug 2013 15:37:50 +0100 +Subject: [PATCH 01/10] KEYS: Skip key state checks when checking for + possession + +Skip key state checks (invalidation, revocation and expiration) when checking +for possession. Without this, keys that have been marked invalid, revoked +keys and expired keys are not given a possession attribute - which means the +possessor is not granted any possession permits and cannot do anything with +them unless they also have one a user, group or other permit. + +This causes failures in the keyutils test suite's revocation and expiration +tests now that commit 96b5c8fea6c0861621051290d705ec2e971963f1 reduced the +initial permissions granted to a key. + +The failures are due to accesses to revoked and expired keys being given +EACCES instead of EKEYREVOKED or EKEYEXPIRED. + +Signed-off-by: David Howells +--- + security/keys/internal.h | 1 + + security/keys/process_keys.c | 8 +++++--- + security/keys/request_key.c | 6 ++++-- + security/keys/request_key_auth.c | 2 +- + 4 files changed, 11 insertions(+), 6 deletions(-) + +diff --git a/security/keys/internal.h b/security/keys/internal.h +index d4f1468..df971fe 100644 +--- a/security/keys/internal.h ++++ b/security/keys/internal.h +@@ -124,6 +124,7 @@ extern key_ref_t search_my_process_keyrings(struct key_type *type, + extern key_ref_t search_process_keyrings(struct key_type *type, + const void *description, + key_match_func_t match, ++ bool no_state_check, + const struct cred *cred); + + extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); +diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c +index 42defae..a3410d6 100644 +--- a/security/keys/process_keys.c ++++ b/security/keys/process_keys.c +@@ -440,6 +440,7 @@ found: + key_ref_t search_process_keyrings(struct key_type *type, + const void *description, + key_match_func_t match, ++ bool no_state_check, + const struct cred *cred) + { + struct request_key_auth *rka; +@@ -448,7 +449,7 @@ key_ref_t search_process_keyrings(struct key_type *type, + might_sleep(); + + key_ref = search_my_process_keyrings(type, description, match, +- false, cred); ++ no_state_check, cred); + if (!IS_ERR(key_ref)) + goto found; + err = key_ref; +@@ -468,7 +469,8 @@ key_ref_t search_process_keyrings(struct key_type *type, + rka = cred->request_key_auth->payload.data; + + key_ref = search_process_keyrings(type, description, +- match, rka->cred); ++ match, no_state_check, ++ rka->cred); + + up_read(&cred->request_key_auth->sem); + +@@ -675,7 +677,7 @@ try_again: + /* check to see if we possess the key */ + skey_ref = search_process_keyrings(key->type, key, + lookup_user_key_possessed, +- cred); ++ true, cred); + + if (!IS_ERR(skey_ref)) { + key_put(key); +diff --git a/security/keys/request_key.c b/security/keys/request_key.c +index c411f9b..172115b 100644 +--- a/security/keys/request_key.c ++++ b/security/keys/request_key.c +@@ -390,7 +390,8 @@ static int construct_alloc_key(struct key_type *type, + * waited for locks */ + mutex_lock(&key_construction_mutex); + +- key_ref = search_process_keyrings(type, description, type->match, cred); ++ key_ref = search_process_keyrings(type, description, type->match, ++ false, cred); + if (!IS_ERR(key_ref)) + goto key_already_present; + +@@ -539,7 +540,8 @@ struct key *request_key_and_link(struct key_type *type, + dest_keyring, flags); + + /* search all the process keyrings for a key */ +- key_ref = search_process_keyrings(type, description, type->match, cred); ++ key_ref = search_process_keyrings(type, description, type->match, ++ false, cred); + + if (!IS_ERR(key_ref)) { + key = key_ref_to_ptr(key_ref); +diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c +index 85730d5..92077de 100644 +--- a/security/keys/request_key_auth.c ++++ b/security/keys/request_key_auth.c +@@ -247,7 +247,7 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id) + &key_type_request_key_auth, + (void *) (unsigned long) target_id, + key_get_instantiation_authkey_match, +- cred); ++ false, cred); + + if (IS_ERR(authkey_ref)) { + authkey = ERR_CAST(authkey_ref); +-- +1.8.3.1 + + +From 9b1294158dd1fbca78541b5d55c057e46b1a9ca2 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Fri, 30 Aug 2013 15:37:51 +0100 +Subject: [PATCH 02/10] KEYS: Use bool in make_key_ref() and is_key_possessed() + +Make make_key_ref() take a bool possession parameter and make +is_key_possessed() return a bool. + +Signed-off-by: David Howells +--- + Documentation/security/keys.txt | 7 +++---- + include/linux/key.h | 4 ++-- + security/keys/keyring.c | 5 +++-- + 3 files changed, 8 insertions(+), 8 deletions(-) + +diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt +index 7b4145d..9ede670 100644 +--- a/Documentation/security/keys.txt ++++ b/Documentation/security/keys.txt +@@ -865,15 +865,14 @@ encountered: + calling processes has a searchable link to the key from one of its + keyrings. There are three functions for dealing with these: + +- key_ref_t make_key_ref(const struct key *key, +- unsigned long possession); ++ key_ref_t make_key_ref(const struct key *key, bool possession); + + struct key *key_ref_to_ptr(const key_ref_t key_ref); + +- unsigned long is_key_possessed(const key_ref_t key_ref); ++ bool is_key_possessed(const key_ref_t key_ref); + + The first function constructs a key reference from a key pointer and +- possession information (which must be 0 or 1 and not any other value). ++ possession information (which must be true or false). + + The second function retrieves the key pointer from a reference and the + third retrieves the possession flag. +diff --git a/include/linux/key.h b/include/linux/key.h +index 4dfde11..51bce29 100644 +--- a/include/linux/key.h ++++ b/include/linux/key.h +@@ -99,7 +99,7 @@ struct keyring_name; + typedef struct __key_reference_with_attributes *key_ref_t; + + static inline key_ref_t make_key_ref(const struct key *key, +- unsigned long possession) ++ bool possession) + { + return (key_ref_t) ((unsigned long) key | possession); + } +@@ -109,7 +109,7 @@ static inline struct key *key_ref_to_ptr(const key_ref_t key_ref) + return (struct key *) ((unsigned long) key_ref & ~1UL); + } + +-static inline unsigned long is_key_possessed(const key_ref_t key_ref) ++static inline bool is_key_possessed(const key_ref_t key_ref) + { + return (unsigned long) key_ref & 1UL; + } +diff --git a/security/keys/keyring.c b/security/keys/keyring.c +index 6ece7f2..f784063 100644 +--- a/security/keys/keyring.c ++++ b/security/keys/keyring.c +@@ -329,9 +329,10 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, + + struct keyring_list *keylist; + struct timespec now; +- unsigned long possessed, kflags; ++ unsigned long kflags; + struct key *keyring, *key; + key_ref_t key_ref; ++ bool possessed; + long err; + int sp, nkeys, kix; + +@@ -542,8 +543,8 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref, + key_perm_t perm) + { + struct keyring_list *klist; +- unsigned long possessed; + struct key *keyring, *key; ++ bool possessed; + int nkeys, loop; + + keyring = key_ref_to_ptr(keyring_ref); +-- +1.8.3.1 + + +From 4a7e7536b9b728f1d912d0e4c047c885c95e13a1 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Fri, 30 Aug 2013 15:37:51 +0100 +Subject: [PATCH 03/10] KEYS: key_is_dead() should take a const key pointer + argument + +key_is_dead() should take a const key pointer argument as it doesn't modify +what it points to. + +Signed-off-by: David Howells +--- + security/keys/internal.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/security/keys/internal.h b/security/keys/internal.h +index df971fe..490aef5 100644 +--- a/security/keys/internal.h ++++ b/security/keys/internal.h +@@ -203,7 +203,7 @@ extern struct key *key_get_instantiation_authkey(key_serial_t target_id); + /* + * Determine whether a key is dead. + */ +-static inline bool key_is_dead(struct key *key, time_t limit) ++static inline bool key_is_dead(const struct key *key, time_t limit) + { + return + key->flags & ((1 << KEY_FLAG_DEAD) | +-- +1.8.3.1 + + +From 9007a0a7f8c135f0085e46db277de0cf7b944403 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Fri, 30 Aug 2013 15:37:52 +0100 +Subject: [PATCH 04/10] KEYS: Consolidate the concept of an 'index key' for key + access + +Consolidate the concept of an 'index key' for accessing keys. The index key +is the search term needed to find a key directly - basically the key type and +the key description. We can add to that the description length. + +This will be useful when turning a keyring into an associative array rather +than just a pointer block. + +Signed-off-by: David Howells +--- + include/linux/key.h | 21 +++++++++---- + security/keys/internal.h | 8 ++--- + security/keys/key.c | 72 +++++++++++++++++++++++---------------------- + security/keys/keyring.c | 37 +++++++++++------------ + security/keys/request_key.c | 12 +++++--- + 5 files changed, 83 insertions(+), 67 deletions(-) + +diff --git a/include/linux/key.h b/include/linux/key.h +index 51bce29..d573e82 100644 +--- a/include/linux/key.h ++++ b/include/linux/key.h +@@ -82,6 +82,12 @@ struct key_owner; + struct keyring_list; + struct keyring_name; + ++struct keyring_index_key { ++ struct key_type *type; ++ const char *description; ++ size_t desc_len; ++}; ++ + /*****************************************************************************/ + /* + * key reference with possession attribute handling +@@ -129,7 +135,6 @@ struct key { + struct list_head graveyard_link; + struct rb_node serial_node; + }; +- struct key_type *type; /* type of key */ + struct rw_semaphore sem; /* change vs change sem */ + struct key_user *user; /* owner of this key */ + void *security; /* security data for this key */ +@@ -163,12 +168,18 @@ struct key { + #define KEY_FLAG_ROOT_CAN_CLEAR 6 /* set if key can be cleared by root without permission */ + #define KEY_FLAG_INVALIDATED 7 /* set if key has been invalidated */ + +- /* the description string +- * - this is used to match a key against search criteria +- * - this should be a printable string ++ /* the key type and key description string ++ * - the desc is used to match a key against search criteria ++ * - it should be a printable string + * - eg: for krb5 AFS, this might be "afs@REDHAT.COM" + */ +- char *description; ++ union { ++ struct keyring_index_key index_key; ++ struct { ++ struct key_type *type; /* type of key */ ++ char *description; ++ }; ++ }; + + /* type specific data + * - this is used by the keyring type to index the name +diff --git a/security/keys/internal.h b/security/keys/internal.h +index 490aef5..77441dd 100644 +--- a/security/keys/internal.h ++++ b/security/keys/internal.h +@@ -89,19 +89,17 @@ extern struct key_type *key_type_lookup(const char *type); + extern void key_type_put(struct key_type *ktype); + + extern int __key_link_begin(struct key *keyring, +- const struct key_type *type, +- const char *description, ++ const struct keyring_index_key *index_key, + unsigned long *_prealloc); + extern int __key_link_check_live_key(struct key *keyring, struct key *key); + extern void __key_link(struct key *keyring, struct key *key, + unsigned long *_prealloc); + extern void __key_link_end(struct key *keyring, +- struct key_type *type, ++ const struct keyring_index_key *index_key, + unsigned long prealloc); + + extern key_ref_t __keyring_search_one(key_ref_t keyring_ref, +- const struct key_type *type, +- const char *description, ++ const struct keyring_index_key *index_key, + key_perm_t perm); + + extern struct key *keyring_search_instkey(struct key *keyring, +diff --git a/security/keys/key.c b/security/keys/key.c +index 8fb7c7b..7e6bc39 100644 +--- a/security/keys/key.c ++++ b/security/keys/key.c +@@ -242,8 +242,8 @@ struct key *key_alloc(struct key_type *type, const char *desc, + } + } + +- desclen = strlen(desc) + 1; +- quotalen = desclen + type->def_datalen; ++ desclen = strlen(desc); ++ quotalen = desclen + 1 + type->def_datalen; + + /* get hold of the key tracking for this user */ + user = key_user_lookup(uid); +@@ -277,7 +277,8 @@ struct key *key_alloc(struct key_type *type, const char *desc, + goto no_memory_2; + + if (desc) { +- key->description = kmemdup(desc, desclen, GFP_KERNEL); ++ key->index_key.desc_len = desclen; ++ key->index_key.description = kmemdup(desc, desclen + 1, GFP_KERNEL); + if (!key->description) + goto no_memory_3; + } +@@ -285,7 +286,7 @@ struct key *key_alloc(struct key_type *type, const char *desc, + atomic_set(&key->usage, 1); + init_rwsem(&key->sem); + lockdep_set_class(&key->sem, &type->lock_class); +- key->type = type; ++ key->index_key.type = type; + key->user = user; + key->quotalen = quotalen; + key->datalen = type->def_datalen; +@@ -489,8 +490,7 @@ int key_instantiate_and_link(struct key *key, + } + + if (keyring) { +- ret = __key_link_begin(keyring, key->type, key->description, +- &prealloc); ++ ret = __key_link_begin(keyring, &key->index_key, &prealloc); + if (ret < 0) + goto error_free_preparse; + } +@@ -499,7 +499,7 @@ int key_instantiate_and_link(struct key *key, + &prealloc); + + if (keyring) +- __key_link_end(keyring, key->type, prealloc); ++ __key_link_end(keyring, &key->index_key, prealloc); + + error_free_preparse: + if (key->type->preparse) +@@ -548,8 +548,7 @@ int key_reject_and_link(struct key *key, + ret = -EBUSY; + + if (keyring) +- link_ret = __key_link_begin(keyring, key->type, +- key->description, &prealloc); ++ link_ret = __key_link_begin(keyring, &key->index_key, &prealloc); + + mutex_lock(&key_construction_mutex); + +@@ -581,7 +580,7 @@ int key_reject_and_link(struct key *key, + mutex_unlock(&key_construction_mutex); + + if (keyring) +- __key_link_end(keyring, key->type, prealloc); ++ __key_link_end(keyring, &key->index_key, prealloc); + + /* wake up anyone waiting for a key to be constructed */ + if (awaken) +@@ -780,25 +779,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, + key_perm_t perm, + unsigned long flags) + { +- unsigned long prealloc; ++ struct keyring_index_key index_key = { ++ .description = description, ++ }; + struct key_preparsed_payload prep; + const struct cred *cred = current_cred(); +- struct key_type *ktype; ++ unsigned long prealloc; + struct key *keyring, *key = NULL; + key_ref_t key_ref; + int ret; + + /* look up the key type to see if it's one of the registered kernel + * types */ +- ktype = key_type_lookup(type); +- if (IS_ERR(ktype)) { ++ index_key.type = key_type_lookup(type); ++ if (IS_ERR(index_key.type)) { + key_ref = ERR_PTR(-ENODEV); + goto error; + } + + key_ref = ERR_PTR(-EINVAL); +- if (!ktype->match || !ktype->instantiate || +- (!description && !ktype->preparse)) ++ if (!index_key.type->match || !index_key.type->instantiate || ++ (!index_key.description && !index_key.type->preparse)) + goto error_put_type; + + keyring = key_ref_to_ptr(keyring_ref); +@@ -812,21 +813,22 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, + memset(&prep, 0, sizeof(prep)); + prep.data = payload; + prep.datalen = plen; +- prep.quotalen = ktype->def_datalen; +- if (ktype->preparse) { +- ret = ktype->preparse(&prep); ++ prep.quotalen = index_key.type->def_datalen; ++ if (index_key.type->preparse) { ++ ret = index_key.type->preparse(&prep); + if (ret < 0) { + key_ref = ERR_PTR(ret); + goto error_put_type; + } +- if (!description) +- description = prep.description; ++ if (!index_key.description) ++ index_key.description = prep.description; + key_ref = ERR_PTR(-EINVAL); +- if (!description) ++ if (!index_key.description) + goto error_free_prep; + } ++ index_key.desc_len = strlen(index_key.description); + +- ret = __key_link_begin(keyring, ktype, description, &prealloc); ++ ret = __key_link_begin(keyring, &index_key, &prealloc); + if (ret < 0) { + key_ref = ERR_PTR(ret); + goto error_free_prep; +@@ -844,9 +846,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, + * key of the same type and description in the destination keyring and + * update that instead if possible + */ +- if (ktype->update) { +- key_ref = __keyring_search_one(keyring_ref, ktype, description, +- 0); ++ if (index_key.type->update) { ++ key_ref = __keyring_search_one(keyring_ref, &index_key, 0); + if (!IS_ERR(key_ref)) + goto found_matching_key; + } +@@ -856,16 +857,17 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, + perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; + perm |= KEY_USR_VIEW; + +- if (ktype->read) ++ if (index_key.type->read) + perm |= KEY_POS_READ; + +- if (ktype == &key_type_keyring || ktype->update) ++ if (index_key.type == &key_type_keyring || ++ index_key.type->update) + perm |= KEY_POS_WRITE; + } + + /* allocate a new key */ +- key = key_alloc(ktype, description, cred->fsuid, cred->fsgid, cred, +- perm, flags); ++ key = key_alloc(index_key.type, index_key.description, ++ cred->fsuid, cred->fsgid, cred, perm, flags); + if (IS_ERR(key)) { + key_ref = ERR_CAST(key); + goto error_link_end; +@@ -882,12 +884,12 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, + key_ref = make_key_ref(key, is_key_possessed(keyring_ref)); + + error_link_end: +- __key_link_end(keyring, ktype, prealloc); ++ __key_link_end(keyring, &index_key, prealloc); + error_free_prep: +- if (ktype->preparse) +- ktype->free_preparse(&prep); ++ if (index_key.type->preparse) ++ index_key.type->free_preparse(&prep); + error_put_type: +- key_type_put(ktype); ++ key_type_put(index_key.type); + error: + return key_ref; + +@@ -895,7 +897,7 @@ error: + /* we found a matching key, so we're going to try to update it + * - we can drop the locks first as we have the key pinned + */ +- __key_link_end(keyring, ktype, prealloc); ++ __key_link_end(keyring, &index_key, prealloc); + + key_ref = __key_update(key_ref, &prep); + goto error_free_prep; +diff --git a/security/keys/keyring.c b/security/keys/keyring.c +index f784063..c7f59f9 100644 +--- a/security/keys/keyring.c ++++ b/security/keys/keyring.c +@@ -538,8 +538,7 @@ EXPORT_SYMBOL(keyring_search); + * to the returned key reference. + */ + key_ref_t __keyring_search_one(key_ref_t keyring_ref, +- const struct key_type *ktype, +- const char *description, ++ const struct keyring_index_key *index_key, + key_perm_t perm) + { + struct keyring_list *klist; +@@ -558,9 +557,9 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref, + smp_rmb(); + for (loop = 0; loop < nkeys ; loop++) { + key = rcu_dereference(klist->keys[loop]); +- if (key->type == ktype && ++ if (key->type == index_key->type && + (!key->type->match || +- key->type->match(key, description)) && ++ key->type->match(key, index_key->description)) && + key_permission(make_key_ref(key, possessed), + perm) == 0 && + !(key->flags & ((1 << KEY_FLAG_INVALIDATED) | +@@ -747,8 +746,8 @@ static void keyring_unlink_rcu_disposal(struct rcu_head *rcu) + /* + * Preallocate memory so that a key can be linked into to a keyring. + */ +-int __key_link_begin(struct key *keyring, const struct key_type *type, +- const char *description, unsigned long *_prealloc) ++int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key, ++ unsigned long *_prealloc) + __acquires(&keyring->sem) + __acquires(&keyring_serialise_link_sem) + { +@@ -759,7 +758,8 @@ int __key_link_begin(struct key *keyring, const struct key_type *type, + size_t size; + int loop, lru, ret; + +- kenter("%d,%s,%s,", key_serial(keyring), type->name, description); ++ kenter("%d,%s,%s,", ++ key_serial(keyring), index_key->type->name, index_key->description); + + if (keyring->type != &key_type_keyring) + return -ENOTDIR; +@@ -772,7 +772,7 @@ int __key_link_begin(struct key *keyring, const struct key_type *type, + + /* serialise link/link calls to prevent parallel calls causing a cycle + * when linking two keyring in opposite orders */ +- if (type == &key_type_keyring) ++ if (index_key->type == &key_type_keyring) + down_write(&keyring_serialise_link_sem); + + klist = rcu_dereference_locked_keyring(keyring); +@@ -784,8 +784,8 @@ int __key_link_begin(struct key *keyring, const struct key_type *type, + for (loop = klist->nkeys - 1; loop >= 0; loop--) { + struct key *key = rcu_deref_link_locked(klist, loop, + keyring); +- if (key->type == type && +- strcmp(key->description, description) == 0) { ++ if (key->type == index_key->type && ++ strcmp(key->description, index_key->description) == 0) { + /* Found a match - we'll replace the link with + * one to the new key. We record the slot + * position. +@@ -865,7 +865,7 @@ error_quota: + key_payload_reserve(keyring, + keyring->datalen - KEYQUOTA_LINK_BYTES); + error_sem: +- if (type == &key_type_keyring) ++ if (index_key->type == &key_type_keyring) + up_write(&keyring_serialise_link_sem); + error_krsem: + up_write(&keyring->sem); +@@ -957,16 +957,17 @@ void __key_link(struct key *keyring, struct key *key, + * + * Must be called with __key_link_begin() having being called. + */ +-void __key_link_end(struct key *keyring, struct key_type *type, ++void __key_link_end(struct key *keyring, ++ const struct keyring_index_key *index_key, + unsigned long prealloc) + __releases(&keyring->sem) + __releases(&keyring_serialise_link_sem) + { +- BUG_ON(type == NULL); +- BUG_ON(type->name == NULL); +- kenter("%d,%s,%lx", keyring->serial, type->name, prealloc); ++ BUG_ON(index_key->type == NULL); ++ BUG_ON(index_key->type->name == NULL); ++ kenter("%d,%s,%lx", keyring->serial, index_key->type->name, prealloc); + +- if (type == &key_type_keyring) ++ if (index_key->type == &key_type_keyring) + up_write(&keyring_serialise_link_sem); + + if (prealloc) { +@@ -1007,12 +1008,12 @@ int key_link(struct key *keyring, struct key *key) + key_check(keyring); + key_check(key); + +- ret = __key_link_begin(keyring, key->type, key->description, &prealloc); ++ ret = __key_link_begin(keyring, &key->index_key, &prealloc); + if (ret == 0) { + ret = __key_link_check_live_key(keyring, key); + if (ret == 0) + __key_link(keyring, key, &prealloc); +- __key_link_end(keyring, key->type, prealloc); ++ __key_link_end(keyring, &key->index_key, prealloc); + } + + return ret; +diff --git a/security/keys/request_key.c b/security/keys/request_key.c +index 172115b..586cb79 100644 +--- a/security/keys/request_key.c ++++ b/security/keys/request_key.c +@@ -352,6 +352,11 @@ static int construct_alloc_key(struct key_type *type, + struct key_user *user, + struct key **_key) + { ++ const struct keyring_index_key index_key = { ++ .type = type, ++ .description = description, ++ .desc_len = strlen(description), ++ }; + const struct cred *cred = current_cred(); + unsigned long prealloc; + struct key *key; +@@ -379,8 +384,7 @@ static int construct_alloc_key(struct key_type *type, + set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags); + + if (dest_keyring) { +- ret = __key_link_begin(dest_keyring, type, description, +- &prealloc); ++ ret = __key_link_begin(dest_keyring, &index_key, &prealloc); + if (ret < 0) + goto link_prealloc_failed; + } +@@ -400,7 +404,7 @@ static int construct_alloc_key(struct key_type *type, + + mutex_unlock(&key_construction_mutex); + if (dest_keyring) +- __key_link_end(dest_keyring, type, prealloc); ++ __key_link_end(dest_keyring, &index_key, prealloc); + mutex_unlock(&user->cons_lock); + *_key = key; + kleave(" = 0 [%d]", key_serial(key)); +@@ -416,7 +420,7 @@ key_already_present: + ret = __key_link_check_live_key(dest_keyring, key); + if (ret == 0) + __key_link(dest_keyring, key, &prealloc); +- __key_link_end(dest_keyring, type, prealloc); ++ __key_link_end(dest_keyring, &index_key, prealloc); + if (ret < 0) + goto link_check_failed; + } +-- +1.8.3.1 + + +From eca8dad5cd291d2baf2d20372fcb0af9e75e25ea Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Fri, 30 Aug 2013 15:37:52 +0100 +Subject: [PATCH 05/10] KEYS: Introduce a search context structure + +Search functions pass around a bunch of arguments, each of which gets copied +with each call. Introduce a search context structure to hold these. + +Whilst we're at it, create a search flag that indicates whether the search +should be directly to the description or whether it should iterate through all +keys looking for a non-description match. + +This will be useful when keyrings use a generic data struct with generic +routines to manage their content as the search terms can just be passed +through to the iterator callback function. + +Also, for future use, the data to be supplied to the match function is +separated from the description pointer in the search context. This makes it +clear which is being supplied. + +Signed-off-by: David Howells +--- + include/linux/key-type.h | 5 ++ + security/keys/internal.h | 40 +++++++------ + security/keys/keyring.c | 70 +++++++++++------------ + security/keys/proc.c | 17 ++++-- + security/keys/process_keys.c | 117 +++++++++++++++++++-------------------- + security/keys/request_key.c | 56 +++++++++---------- + security/keys/request_key_auth.c | 14 +++-- + security/keys/user_defined.c | 18 +++--- + 8 files changed, 179 insertions(+), 158 deletions(-) + +diff --git a/include/linux/key-type.h b/include/linux/key-type.h +index 518a53a..f58737b 100644 +--- a/include/linux/key-type.h ++++ b/include/linux/key-type.h +@@ -63,6 +63,11 @@ struct key_type { + */ + size_t def_datalen; + ++ /* Default key search algorithm. */ ++ unsigned def_lookup_type; ++#define KEYRING_SEARCH_LOOKUP_DIRECT 0x0000 /* Direct lookup by description. */ ++#define KEYRING_SEARCH_LOOKUP_ITERATE 0x0001 /* Iterative search. */ ++ + /* vet a description */ + int (*vet_description)(const char *description); + +diff --git a/security/keys/internal.h b/security/keys/internal.h +index 77441dd..f4bf938 100644 +--- a/security/keys/internal.h ++++ b/security/keys/internal.h +@@ -107,23 +107,31 @@ extern struct key *keyring_search_instkey(struct key *keyring, + + typedef int (*key_match_func_t)(const struct key *, const void *); + ++struct keyring_search_context { ++ struct keyring_index_key index_key; ++ const struct cred *cred; ++ key_match_func_t match; ++ const void *match_data; ++ unsigned flags; ++#define KEYRING_SEARCH_LOOKUP_TYPE 0x0001 /* [as type->def_lookup_type] */ ++#define KEYRING_SEARCH_NO_STATE_CHECK 0x0002 /* Skip state checks */ ++#define KEYRING_SEARCH_DO_STATE_CHECK 0x0004 /* Override NO_STATE_CHECK */ ++#define KEYRING_SEARCH_NO_UPDATE_TIME 0x0008 /* Don't update times */ ++#define KEYRING_SEARCH_NO_CHECK_PERM 0x0010 /* Don't check permissions */ ++#define KEYRING_SEARCH_DETECT_TOO_DEEP 0x0020 /* Give an error on excessive depth */ ++ ++ /* Internal stuff */ ++ int skipped_ret; ++ bool possessed; ++ key_ref_t result; ++ struct timespec now; ++}; ++ + extern key_ref_t keyring_search_aux(key_ref_t keyring_ref, +- const struct cred *cred, +- struct key_type *type, +- const void *description, +- key_match_func_t match, +- bool no_state_check); +- +-extern key_ref_t search_my_process_keyrings(struct key_type *type, +- const void *description, +- key_match_func_t match, +- bool no_state_check, +- const struct cred *cred); +-extern key_ref_t search_process_keyrings(struct key_type *type, +- const void *description, +- key_match_func_t match, +- bool no_state_check, +- const struct cred *cred); ++ struct keyring_search_context *ctx); ++ ++extern key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx); ++extern key_ref_t search_process_keyrings(struct keyring_search_context *ctx); + + extern struct key *find_keyring_by_name(const char *name, bool skip_perm_check); + +diff --git a/security/keys/keyring.c b/security/keys/keyring.c +index c7f59f9..b42f2d4 100644 +--- a/security/keys/keyring.c ++++ b/security/keys/keyring.c +@@ -280,11 +280,7 @@ EXPORT_SYMBOL(keyring_alloc); + /** + * keyring_search_aux - Search a keyring tree for a key matching some criteria + * @keyring_ref: A pointer to the keyring with possession indicator. +- * @cred: The credentials to use for permissions checks. +- * @type: The type of key to search for. +- * @description: Parameter for @match. +- * @match: Function to rule on whether or not a key is the one required. +- * @no_state_check: Don't check if a matching key is bad ++ * @ctx: The keyring search context. + * + * Search the supplied keyring tree for a key that matches the criteria given. + * The root keyring and any linked keyrings must grant Search permission to the +@@ -314,11 +310,7 @@ EXPORT_SYMBOL(keyring_alloc); + * @keyring_ref is propagated to the returned key reference. + */ + key_ref_t keyring_search_aux(key_ref_t keyring_ref, +- const struct cred *cred, +- struct key_type *type, +- const void *description, +- key_match_func_t match, +- bool no_state_check) ++ struct keyring_search_context *ctx) + { + struct { + /* Need a separate keylist pointer for RCU purposes */ +@@ -328,20 +320,18 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, + } stack[KEYRING_SEARCH_MAX_DEPTH]; + + struct keyring_list *keylist; +- struct timespec now; + unsigned long kflags; + struct key *keyring, *key; + key_ref_t key_ref; +- bool possessed; + long err; + int sp, nkeys, kix; + + keyring = key_ref_to_ptr(keyring_ref); +- possessed = is_key_possessed(keyring_ref); ++ ctx->possessed = is_key_possessed(keyring_ref); + key_check(keyring); + + /* top keyring must have search permission to begin the search */ +- err = key_task_permission(keyring_ref, cred, KEY_SEARCH); ++ err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH); + if (err < 0) { + key_ref = ERR_PTR(err); + goto error; +@@ -353,7 +343,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, + + rcu_read_lock(); + +- now = current_kernel_time(); ++ ctx->now = current_kernel_time(); + err = -EAGAIN; + sp = 0; + +@@ -361,16 +351,17 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, + * are looking for */ + key_ref = ERR_PTR(-EAGAIN); + kflags = keyring->flags; +- if (keyring->type == type && match(keyring, description)) { ++ if (keyring->type == ctx->index_key.type && ++ ctx->match(keyring, ctx->match_data)) { + key = keyring; +- if (no_state_check) ++ if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK) + goto found; + + /* check it isn't negative and hasn't expired or been + * revoked */ + if (kflags & (1 << KEY_FLAG_REVOKED)) + goto error_2; +- if (key->expiry && now.tv_sec >= key->expiry) ++ if (key->expiry && ctx->now.tv_sec >= key->expiry) + goto error_2; + key_ref = ERR_PTR(key->type_data.reject_error); + if (kflags & (1 << KEY_FLAG_NEGATIVE)) +@@ -384,7 +375,7 @@ key_ref_t keyring_search_aux(key_ref_t keyring_ref, + if (kflags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED) | + (1 << KEY_FLAG_NEGATIVE)) || +- (keyring->expiry && now.tv_sec >= keyring->expiry)) ++ (keyring->expiry && ctx->now.tv_sec >= keyring->expiry)) + goto error_2; + + /* start processing a new keyring */ +@@ -406,29 +397,29 @@ descend: + kflags = key->flags; + + /* ignore keys not of this type */ +- if (key->type != type) ++ if (key->type != ctx->index_key.type) + continue; + + /* skip invalidated, revoked and expired keys */ +- if (!no_state_check) { ++ if (!(ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)) { + if (kflags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED))) + continue; + +- if (key->expiry && now.tv_sec >= key->expiry) ++ if (key->expiry && ctx->now.tv_sec >= key->expiry) + continue; + } + + /* keys that don't match */ +- if (!match(key, description)) ++ if (!ctx->match(key, ctx->match_data)) + continue; + + /* key must have search permissions */ +- if (key_task_permission(make_key_ref(key, possessed), +- cred, KEY_SEARCH) < 0) ++ if (key_task_permission(make_key_ref(key, ctx->possessed), ++ ctx->cred, KEY_SEARCH) < 0) + continue; + +- if (no_state_check) ++ if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK) + goto found; + + /* we set a different error code if we pass a negative key */ +@@ -456,8 +447,8 @@ ascend: + if (sp >= KEYRING_SEARCH_MAX_DEPTH) + continue; + +- if (key_task_permission(make_key_ref(key, possessed), +- cred, KEY_SEARCH) < 0) ++ if (key_task_permission(make_key_ref(key, ctx->possessed), ++ ctx->cred, KEY_SEARCH) < 0) + continue; + + /* stack the current position */ +@@ -489,12 +480,12 @@ not_this_keyring: + /* we found a viable match */ + found: + atomic_inc(&key->usage); +- key->last_used_at = now.tv_sec; +- keyring->last_used_at = now.tv_sec; ++ key->last_used_at = ctx->now.tv_sec; ++ keyring->last_used_at = ctx->now.tv_sec; + while (sp > 0) +- stack[--sp].keyring->last_used_at = now.tv_sec; ++ stack[--sp].keyring->last_used_at = ctx->now.tv_sec; + key_check(key); +- key_ref = make_key_ref(key, possessed); ++ key_ref = make_key_ref(key, ctx->possessed); + error_2: + rcu_read_unlock(); + error: +@@ -514,11 +505,20 @@ key_ref_t keyring_search(key_ref_t keyring, + struct key_type *type, + const char *description) + { +- if (!type->match) ++ struct keyring_search_context ctx = { ++ .index_key.type = type, ++ .index_key.description = description, ++ .cred = current_cred(), ++ .match = type->match, ++ .match_data = description, ++ .flags = (type->def_lookup_type | ++ KEYRING_SEARCH_DO_STATE_CHECK), ++ }; ++ ++ if (!ctx.match) + return ERR_PTR(-ENOKEY); + +- return keyring_search_aux(keyring, current->cred, +- type, description, type->match, false); ++ return keyring_search_aux(keyring, &ctx); + } + EXPORT_SYMBOL(keyring_search); + +diff --git a/security/keys/proc.c b/security/keys/proc.c +index 217b685..88e9a46 100644 +--- a/security/keys/proc.c ++++ b/security/keys/proc.c +@@ -182,7 +182,6 @@ static void proc_keys_stop(struct seq_file *p, void *v) + + static int proc_keys_show(struct seq_file *m, void *v) + { +- const struct cred *cred = current_cred(); + struct rb_node *_p = v; + struct key *key = rb_entry(_p, struct key, serial_node); + struct timespec now; +@@ -191,15 +190,23 @@ static int proc_keys_show(struct seq_file *m, void *v) + char xbuf[12]; + int rc; + ++ struct keyring_search_context ctx = { ++ .index_key.type = key->type, ++ .index_key.description = key->description, ++ .cred = current_cred(), ++ .match = lookup_user_key_possessed, ++ .match_data = key, ++ .flags = (KEYRING_SEARCH_NO_STATE_CHECK | ++ KEYRING_SEARCH_LOOKUP_DIRECT), ++ }; ++ + key_ref = make_key_ref(key, 0); + + /* determine if the key is possessed by this process (a test we can + * skip if the key does not indicate the possessor can view it + */ + if (key->perm & KEY_POS_VIEW) { +- skey_ref = search_my_process_keyrings(key->type, key, +- lookup_user_key_possessed, +- true, cred); ++ skey_ref = search_my_process_keyrings(&ctx); + if (!IS_ERR(skey_ref)) { + key_ref_put(skey_ref); + key_ref = make_key_ref(key, 1); +@@ -211,7 +218,7 @@ static int proc_keys_show(struct seq_file *m, void *v) + * - the caller holds a spinlock, and thus the RCU read lock, making our + * access to __current_cred() safe + */ +- rc = key_task_permission(key_ref, cred, KEY_VIEW); ++ rc = key_task_permission(key_ref, ctx.cred, KEY_VIEW); + if (rc < 0) + return 0; + +diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c +index a3410d6..e68a3e0 100644 +--- a/security/keys/process_keys.c ++++ b/security/keys/process_keys.c +@@ -319,11 +319,7 @@ void key_fsgid_changed(struct task_struct *tsk) + * In the case of a successful return, the possession attribute is set on the + * returned key reference. + */ +-key_ref_t search_my_process_keyrings(struct key_type *type, +- const void *description, +- key_match_func_t match, +- bool no_state_check, +- const struct cred *cred) ++key_ref_t search_my_process_keyrings(struct keyring_search_context *ctx) + { + key_ref_t key_ref, ret, err; + +@@ -339,10 +335,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type, + err = ERR_PTR(-EAGAIN); + + /* search the thread keyring first */ +- if (cred->thread_keyring) { ++ if (ctx->cred->thread_keyring) { + key_ref = keyring_search_aux( +- make_key_ref(cred->thread_keyring, 1), +- cred, type, description, match, no_state_check); ++ make_key_ref(ctx->cred->thread_keyring, 1), ctx); + if (!IS_ERR(key_ref)) + goto found; + +@@ -358,10 +353,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type, + } + + /* search the process keyring second */ +- if (cred->process_keyring) { ++ if (ctx->cred->process_keyring) { + key_ref = keyring_search_aux( +- make_key_ref(cred->process_keyring, 1), +- cred, type, description, match, no_state_check); ++ make_key_ref(ctx->cred->process_keyring, 1), ctx); + if (!IS_ERR(key_ref)) + goto found; + +@@ -379,11 +373,11 @@ key_ref_t search_my_process_keyrings(struct key_type *type, + } + + /* search the session keyring */ +- if (cred->session_keyring) { ++ if (ctx->cred->session_keyring) { + rcu_read_lock(); + key_ref = keyring_search_aux( +- make_key_ref(rcu_dereference(cred->session_keyring), 1), +- cred, type, description, match, no_state_check); ++ make_key_ref(rcu_dereference(ctx->cred->session_keyring), 1), ++ ctx); + rcu_read_unlock(); + + if (!IS_ERR(key_ref)) +@@ -402,10 +396,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type, + } + } + /* or search the user-session keyring */ +- else if (cred->user->session_keyring) { ++ else if (ctx->cred->user->session_keyring) { + key_ref = keyring_search_aux( +- make_key_ref(cred->user->session_keyring, 1), +- cred, type, description, match, no_state_check); ++ make_key_ref(ctx->cred->user->session_keyring, 1), ++ ctx); + if (!IS_ERR(key_ref)) + goto found; + +@@ -437,19 +431,14 @@ found: + * + * Return same as search_my_process_keyrings(). + */ +-key_ref_t search_process_keyrings(struct key_type *type, +- const void *description, +- key_match_func_t match, +- bool no_state_check, +- const struct cred *cred) ++key_ref_t search_process_keyrings(struct keyring_search_context *ctx) + { + struct request_key_auth *rka; + key_ref_t key_ref, ret = ERR_PTR(-EACCES), err; + + might_sleep(); + +- key_ref = search_my_process_keyrings(type, description, match, +- no_state_check, cred); ++ key_ref = search_my_process_keyrings(ctx); + if (!IS_ERR(key_ref)) + goto found; + err = key_ref; +@@ -458,19 +447,21 @@ key_ref_t search_process_keyrings(struct key_type *type, + * search the keyrings of the process mentioned there + * - we don't permit access to request_key auth keys via this method + */ +- if (cred->request_key_auth && +- cred == current_cred() && +- type != &key_type_request_key_auth ++ if (ctx->cred->request_key_auth && ++ ctx->cred == current_cred() && ++ ctx->index_key.type != &key_type_request_key_auth + ) { ++ const struct cred *cred = ctx->cred; ++ + /* defend against the auth key being revoked */ + down_read(&cred->request_key_auth->sem); + +- if (key_validate(cred->request_key_auth) == 0) { +- rka = cred->request_key_auth->payload.data; ++ if (key_validate(ctx->cred->request_key_auth) == 0) { ++ rka = ctx->cred->request_key_auth->payload.data; + +- key_ref = search_process_keyrings(type, description, +- match, no_state_check, +- rka->cred); ++ ctx->cred = rka->cred; ++ key_ref = search_process_keyrings(ctx); ++ ctx->cred = cred; + + up_read(&cred->request_key_auth->sem); + +@@ -524,19 +515,23 @@ int lookup_user_key_possessed(const struct key *key, const void *target) + key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags, + key_perm_t perm) + { ++ struct keyring_search_context ctx = { ++ .match = lookup_user_key_possessed, ++ .flags = (KEYRING_SEARCH_NO_STATE_CHECK | ++ KEYRING_SEARCH_LOOKUP_DIRECT), ++ }; + struct request_key_auth *rka; +- const struct cred *cred; + struct key *key; + key_ref_t key_ref, skey_ref; + int ret; + + try_again: +- cred = get_current_cred(); ++ ctx.cred = get_current_cred(); + key_ref = ERR_PTR(-ENOKEY); + + switch (id) { + case KEY_SPEC_THREAD_KEYRING: +- if (!cred->thread_keyring) { ++ if (!ctx.cred->thread_keyring) { + if (!(lflags & KEY_LOOKUP_CREATE)) + goto error; + +@@ -548,13 +543,13 @@ try_again: + goto reget_creds; + } + +- key = cred->thread_keyring; ++ key = ctx.cred->thread_keyring; + atomic_inc(&key->usage); + key_ref = make_key_ref(key, 1); + break; + + case KEY_SPEC_PROCESS_KEYRING: +- if (!cred->process_keyring) { ++ if (!ctx.cred->process_keyring) { + if (!(lflags & KEY_LOOKUP_CREATE)) + goto error; + +@@ -566,13 +561,13 @@ try_again: + goto reget_creds; + } + +- key = cred->process_keyring; ++ key = ctx.cred->process_keyring; + atomic_inc(&key->usage); + key_ref = make_key_ref(key, 1); + break; + + case KEY_SPEC_SESSION_KEYRING: +- if (!cred->session_keyring) { ++ if (!ctx.cred->session_keyring) { + /* always install a session keyring upon access if one + * doesn't exist yet */ + ret = install_user_keyrings(); +@@ -582,13 +577,13 @@ try_again: + ret = join_session_keyring(NULL); + else + ret = install_session_keyring( +- cred->user->session_keyring); ++ ctx.cred->user->session_keyring); + + if (ret < 0) + goto error; + goto reget_creds; +- } else if (cred->session_keyring == +- cred->user->session_keyring && ++ } else if (ctx.cred->session_keyring == ++ ctx.cred->user->session_keyring && + lflags & KEY_LOOKUP_CREATE) { + ret = join_session_keyring(NULL); + if (ret < 0) +@@ -597,32 +592,32 @@ try_again: + } + + rcu_read_lock(); +- key = rcu_dereference(cred->session_keyring); ++ key = rcu_dereference(ctx.cred->session_keyring); + atomic_inc(&key->usage); + rcu_read_unlock(); + key_ref = make_key_ref(key, 1); + break; + + case KEY_SPEC_USER_KEYRING: +- if (!cred->user->uid_keyring) { ++ if (!ctx.cred->user->uid_keyring) { + ret = install_user_keyrings(); + if (ret < 0) + goto error; + } + +- key = cred->user->uid_keyring; ++ key = ctx.cred->user->uid_keyring; + atomic_inc(&key->usage); + key_ref = make_key_ref(key, 1); + break; + + case KEY_SPEC_USER_SESSION_KEYRING: +- if (!cred->user->session_keyring) { ++ if (!ctx.cred->user->session_keyring) { + ret = install_user_keyrings(); + if (ret < 0) + goto error; + } + +- key = cred->user->session_keyring; ++ key = ctx.cred->user->session_keyring; + atomic_inc(&key->usage); + key_ref = make_key_ref(key, 1); + break; +@@ -633,7 +628,7 @@ try_again: + goto error; + + case KEY_SPEC_REQKEY_AUTH_KEY: +- key = cred->request_key_auth; ++ key = ctx.cred->request_key_auth; + if (!key) + goto error; + +@@ -642,20 +637,20 @@ try_again: + break; + + case KEY_SPEC_REQUESTOR_KEYRING: +- if (!cred->request_key_auth) ++ if (!ctx.cred->request_key_auth) + goto error; + +- down_read(&cred->request_key_auth->sem); ++ down_read(&ctx.cred->request_key_auth->sem); + if (test_bit(KEY_FLAG_REVOKED, +- &cred->request_key_auth->flags)) { ++ &ctx.cred->request_key_auth->flags)) { + key_ref = ERR_PTR(-EKEYREVOKED); + key = NULL; + } else { +- rka = cred->request_key_auth->payload.data; ++ rka = ctx.cred->request_key_auth->payload.data; + key = rka->dest_keyring; + atomic_inc(&key->usage); + } +- up_read(&cred->request_key_auth->sem); ++ up_read(&ctx.cred->request_key_auth->sem); + if (!key) + goto error; + key_ref = make_key_ref(key, 1); +@@ -675,9 +670,13 @@ try_again: + key_ref = make_key_ref(key, 0); + + /* check to see if we possess the key */ +- skey_ref = search_process_keyrings(key->type, key, +- lookup_user_key_possessed, +- true, cred); ++ ctx.index_key.type = key->type; ++ ctx.index_key.description = key->description; ++ ctx.index_key.desc_len = strlen(key->description); ++ ctx.match_data = key; ++ kdebug("check possessed"); ++ skey_ref = search_process_keyrings(&ctx); ++ kdebug("possessed=%p", skey_ref); + + if (!IS_ERR(skey_ref)) { + key_put(key); +@@ -717,14 +716,14 @@ try_again: + goto invalid_key; + + /* check the permissions */ +- ret = key_task_permission(key_ref, cred, perm); ++ ret = key_task_permission(key_ref, ctx.cred, perm); + if (ret < 0) + goto invalid_key; + + key->last_used_at = current_kernel_time().tv_sec; + + error: +- put_cred(cred); ++ put_cred(ctx.cred); + return key_ref; + + invalid_key: +@@ -735,7 +734,7 @@ invalid_key: + /* if we attempted to install a keyring, then it may have caused new + * creds to be installed */ + reget_creds: +- put_cred(cred); ++ put_cred(ctx.cred); + goto try_again; + } + +diff --git a/security/keys/request_key.c b/security/keys/request_key.c +index 586cb79..ab75df4 100644 +--- a/security/keys/request_key.c ++++ b/security/keys/request_key.c +@@ -345,38 +345,34 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) + * May return a key that's already under construction instead if there was a + * race between two thread calling request_key(). + */ +-static int construct_alloc_key(struct key_type *type, +- const char *description, ++static int construct_alloc_key(struct keyring_search_context *ctx, + struct key *dest_keyring, + unsigned long flags, + struct key_user *user, + struct key **_key) + { +- const struct keyring_index_key index_key = { +- .type = type, +- .description = description, +- .desc_len = strlen(description), +- }; +- const struct cred *cred = current_cred(); + unsigned long prealloc; + struct key *key; + key_perm_t perm; + key_ref_t key_ref; + int ret; + +- kenter("%s,%s,,,", type->name, description); ++ kenter("%s,%s,,,", ++ ctx->index_key.type->name, ctx->index_key.description); + + *_key = NULL; + mutex_lock(&user->cons_lock); + + perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; + perm |= KEY_USR_VIEW; +- if (type->read) ++ if (ctx->index_key.type->read) + perm |= KEY_POS_READ; +- if (type == &key_type_keyring || type->update) ++ if (ctx->index_key.type == &key_type_keyring || ++ ctx->index_key.type->update) + perm |= KEY_POS_WRITE; + +- key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred, ++ key = key_alloc(ctx->index_key.type, ctx->index_key.description, ++ ctx->cred->fsuid, ctx->cred->fsgid, ctx->cred, + perm, flags); + if (IS_ERR(key)) + goto alloc_failed; +@@ -384,7 +380,7 @@ static int construct_alloc_key(struct key_type *type, + set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags); + + if (dest_keyring) { +- ret = __key_link_begin(dest_keyring, &index_key, &prealloc); ++ ret = __key_link_begin(dest_keyring, &ctx->index_key, &prealloc); + if (ret < 0) + goto link_prealloc_failed; + } +@@ -394,8 +390,7 @@ static int construct_alloc_key(struct key_type *type, + * waited for locks */ + mutex_lock(&key_construction_mutex); + +- key_ref = search_process_keyrings(type, description, type->match, +- false, cred); ++ key_ref = search_process_keyrings(ctx); + if (!IS_ERR(key_ref)) + goto key_already_present; + +@@ -404,7 +399,7 @@ static int construct_alloc_key(struct key_type *type, + + mutex_unlock(&key_construction_mutex); + if (dest_keyring) +- __key_link_end(dest_keyring, &index_key, prealloc); ++ __key_link_end(dest_keyring, &ctx->index_key, prealloc); + mutex_unlock(&user->cons_lock); + *_key = key; + kleave(" = 0 [%d]", key_serial(key)); +@@ -420,7 +415,7 @@ key_already_present: + ret = __key_link_check_live_key(dest_keyring, key); + if (ret == 0) + __key_link(dest_keyring, key, &prealloc); +- __key_link_end(dest_keyring, &index_key, prealloc); ++ __key_link_end(dest_keyring, &ctx->index_key, prealloc); + if (ret < 0) + goto link_check_failed; + } +@@ -449,8 +444,7 @@ alloc_failed: + /* + * Commence key construction. + */ +-static struct key *construct_key_and_link(struct key_type *type, +- const char *description, ++static struct key *construct_key_and_link(struct keyring_search_context *ctx, + const char *callout_info, + size_t callout_len, + void *aux, +@@ -469,8 +463,7 @@ static struct key *construct_key_and_link(struct key_type *type, + + construct_get_dest_keyring(&dest_keyring); + +- ret = construct_alloc_key(type, description, dest_keyring, flags, user, +- &key); ++ ret = construct_alloc_key(ctx, dest_keyring, flags, user, &key); + key_user_put(user); + + if (ret == 0) { +@@ -534,18 +527,24 @@ struct key *request_key_and_link(struct key_type *type, + struct key *dest_keyring, + unsigned long flags) + { +- const struct cred *cred = current_cred(); ++ struct keyring_search_context ctx = { ++ .index_key.type = type, ++ .index_key.description = description, ++ .cred = current_cred(), ++ .match = type->match, ++ .match_data = description, ++ .flags = KEYRING_SEARCH_LOOKUP_DIRECT, ++ }; + struct key *key; + key_ref_t key_ref; + int ret; + + kenter("%s,%s,%p,%zu,%p,%p,%lx", +- type->name, description, callout_info, callout_len, aux, +- dest_keyring, flags); ++ ctx.index_key.type->name, ctx.index_key.description, ++ callout_info, callout_len, aux, dest_keyring, flags); + + /* search all the process keyrings for a key */ +- key_ref = search_process_keyrings(type, description, type->match, +- false, cred); ++ key_ref = search_process_keyrings(&ctx); + + if (!IS_ERR(key_ref)) { + key = key_ref_to_ptr(key_ref); +@@ -568,9 +567,8 @@ struct key *request_key_and_link(struct key_type *type, + if (!callout_info) + goto error; + +- key = construct_key_and_link(type, description, callout_info, +- callout_len, aux, dest_keyring, +- flags); ++ key = construct_key_and_link(&ctx, callout_info, callout_len, ++ aux, dest_keyring, flags); + } + + error: +diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c +index 92077de..8d09852 100644 +--- a/security/keys/request_key_auth.c ++++ b/security/keys/request_key_auth.c +@@ -239,15 +239,17 @@ static int key_get_instantiation_authkey_match(const struct key *key, + */ + struct key *key_get_instantiation_authkey(key_serial_t target_id) + { +- const struct cred *cred = current_cred(); ++ struct keyring_search_context ctx = { ++ .index_key.type = &key_type_request_key_auth, ++ .cred = current_cred(), ++ .match = key_get_instantiation_authkey_match, ++ .match_data = (void *)(unsigned long)target_id, ++ .flags = KEYRING_SEARCH_LOOKUP_DIRECT, ++ }; + struct key *authkey; + key_ref_t authkey_ref; + +- authkey_ref = search_process_keyrings( +- &key_type_request_key_auth, +- (void *) (unsigned long) target_id, +- key_get_instantiation_authkey_match, +- false, cred); ++ authkey_ref = search_process_keyrings(&ctx); + + if (IS_ERR(authkey_ref)) { + authkey = ERR_CAST(authkey_ref); +diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c +index 55dc889..faa2cae 100644 +--- a/security/keys/user_defined.c ++++ b/security/keys/user_defined.c +@@ -25,14 +25,15 @@ static int logon_vet_description(const char *desc); + * arbitrary blob of data as the payload + */ + struct key_type key_type_user = { +- .name = "user", +- .instantiate = user_instantiate, +- .update = user_update, +- .match = user_match, +- .revoke = user_revoke, +- .destroy = user_destroy, +- .describe = user_describe, +- .read = user_read, ++ .name = "user", ++ .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, ++ .instantiate = user_instantiate, ++ .update = user_update, ++ .match = user_match, ++ .revoke = user_revoke, ++ .destroy = user_destroy, ++ .describe = user_describe, ++ .read = user_read, + }; + + EXPORT_SYMBOL_GPL(key_type_user); +@@ -45,6 +46,7 @@ EXPORT_SYMBOL_GPL(key_type_user); + */ + struct key_type key_type_logon = { + .name = "logon", ++ .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, + .instantiate = user_instantiate, + .update = user_update, + .match = user_match, +-- +1.8.3.1 + + +From 4dffed72b92a305bcdbb73b719570d8f4ec53f46 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Fri, 30 Aug 2013 15:37:52 +0100 +Subject: [PATCH 06/10] KEYS: Search for auth-key by name rather than target + key ID + +Search for auth-key by name rather than by target key ID as, in a future +patch, we'll by searching directly by index key in preference to iteration +over all keys. + +Signed-off-by: David Howells +--- + security/keys/request_key_auth.c | 21 +++++++-------------- + 1 file changed, 7 insertions(+), 14 deletions(-) + +diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c +index 8d09852..7495a93 100644 +--- a/security/keys/request_key_auth.c ++++ b/security/keys/request_key_auth.c +@@ -18,6 +18,7 @@ + #include + #include + #include "internal.h" ++#include + + static int request_key_auth_instantiate(struct key *, + struct key_preparsed_payload *); +@@ -222,33 +223,25 @@ error_alloc: + } + + /* +- * See if an authorisation key is associated with a particular key. +- */ +-static int key_get_instantiation_authkey_match(const struct key *key, +- const void *_id) +-{ +- struct request_key_auth *rka = key->payload.data; +- key_serial_t id = (key_serial_t)(unsigned long) _id; +- +- return rka->target_key->serial == id; +-} +- +-/* + * Search the current process's keyrings for the authorisation key for + * instantiation of a key. + */ + struct key *key_get_instantiation_authkey(key_serial_t target_id) + { ++ char description[16]; + struct keyring_search_context ctx = { + .index_key.type = &key_type_request_key_auth, ++ .index_key.description = description, + .cred = current_cred(), +- .match = key_get_instantiation_authkey_match, +- .match_data = (void *)(unsigned long)target_id, ++ .match = user_match, ++ .match_data = description, + .flags = KEYRING_SEARCH_LOOKUP_DIRECT, + }; + struct key *authkey; + key_ref_t authkey_ref; + ++ sprintf(description, "%x", target_id); ++ + authkey_ref = search_process_keyrings(&ctx); + + if (IS_ERR(authkey_ref)) { +-- +1.8.3.1 + + +From 5f3c76b0923620ddd5294270ac478819f06f21d1 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Fri, 30 Aug 2013 15:37:53 +0100 +Subject: [PATCH 07/10] KEYS: Define a __key_get() wrapper to use rather than + atomic_inc() + +Define a __key_get() wrapper to use rather than atomic_inc() on the key usage +count as this makes it easier to hook in refcount error debugging. + +Signed-off-by: David Howells +--- + Documentation/security/keys.txt | 13 ++++++++----- + include/linux/key.h | 10 +++++++--- + security/keys/key.c | 2 +- + security/keys/keyring.c | 6 +++--- + security/keys/process_keys.c | 16 ++++++++-------- + 5 files changed, 27 insertions(+), 20 deletions(-) + +diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt +index 9ede670..a4c33f1 100644 +--- a/Documentation/security/keys.txt ++++ b/Documentation/security/keys.txt +@@ -960,14 +960,17 @@ payload contents" for more information. + the argument will not be parsed. + + +-(*) Extra references can be made to a key by calling the following function: ++(*) Extra references can be made to a key by calling one of the following ++ functions: + ++ struct key *__key_get(struct key *key); + struct key *key_get(struct key *key); + +- These need to be disposed of by calling key_put() when they've been +- finished with. The key pointer passed in will be returned. If the pointer +- is NULL or CONFIG_KEYS is not set then the key will not be dereferenced and +- no increment will take place. ++ Keys so references will need to be disposed of by calling key_put() when ++ they've been finished with. The key pointer passed in will be returned. ++ ++ In the case of key_get(), if the pointer is NULL or CONFIG_KEYS is not set ++ then the key will not be dereferenced and no increment will take place. + + + (*) A key's serial number can be obtained by calling: +diff --git a/include/linux/key.h b/include/linux/key.h +index d573e82..ef596c7 100644 +--- a/include/linux/key.h ++++ b/include/linux/key.h +@@ -219,13 +219,17 @@ extern void key_revoke(struct key *key); + extern void key_invalidate(struct key *key); + extern void key_put(struct key *key); + +-static inline struct key *key_get(struct key *key) ++static inline struct key *__key_get(struct key *key) + { +- if (key) +- atomic_inc(&key->usage); ++ atomic_inc(&key->usage); + return key; + } + ++static inline struct key *key_get(struct key *key) ++{ ++ return key ? __key_get(key) : key; ++} ++ + static inline void key_ref_put(key_ref_t key_ref) + { + key_put(key_ref_to_ptr(key_ref)); +diff --git a/security/keys/key.c b/security/keys/key.c +index 7e6bc39..1e23cc2 100644 +--- a/security/keys/key.c ++++ b/security/keys/key.c +@@ -644,7 +644,7 @@ found: + /* this races with key_put(), but that doesn't matter since key_put() + * doesn't actually change the key + */ +- atomic_inc(&key->usage); ++ __key_get(key); + + error: + spin_unlock(&key_serial_lock); +diff --git a/security/keys/keyring.c b/security/keys/keyring.c +index b42f2d4..87eff32 100644 +--- a/security/keys/keyring.c ++++ b/security/keys/keyring.c +@@ -479,7 +479,7 @@ not_this_keyring: + + /* we found a viable match */ + found: +- atomic_inc(&key->usage); ++ __key_get(key); + key->last_used_at = ctx->now.tv_sec; + keyring->last_used_at = ctx->now.tv_sec; + while (sp > 0) +@@ -573,7 +573,7 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref, + return ERR_PTR(-ENOKEY); + + found: +- atomic_inc(&key->usage); ++ __key_get(key); + keyring->last_used_at = key->last_used_at = + current_kernel_time().tv_sec; + rcu_read_unlock(); +@@ -909,7 +909,7 @@ void __key_link(struct key *keyring, struct key *key, + + klist = rcu_dereference_locked_keyring(keyring); + +- atomic_inc(&key->usage); ++ __key_get(key); + keyring->last_used_at = key->last_used_at = + current_kernel_time().tv_sec; + +diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c +index e68a3e0..68548ea 100644 +--- a/security/keys/process_keys.c ++++ b/security/keys/process_keys.c +@@ -235,7 +235,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) + if (IS_ERR(keyring)) + return PTR_ERR(keyring); + } else { +- atomic_inc(&keyring->usage); ++ __key_get(keyring); + } + + /* install the keyring */ +@@ -544,7 +544,7 @@ try_again: + } + + key = ctx.cred->thread_keyring; +- atomic_inc(&key->usage); ++ __key_get(key); + key_ref = make_key_ref(key, 1); + break; + +@@ -562,7 +562,7 @@ try_again: + } + + key = ctx.cred->process_keyring; +- atomic_inc(&key->usage); ++ __key_get(key); + key_ref = make_key_ref(key, 1); + break; + +@@ -593,7 +593,7 @@ try_again: + + rcu_read_lock(); + key = rcu_dereference(ctx.cred->session_keyring); +- atomic_inc(&key->usage); ++ __key_get(key); + rcu_read_unlock(); + key_ref = make_key_ref(key, 1); + break; +@@ -606,7 +606,7 @@ try_again: + } + + key = ctx.cred->user->uid_keyring; +- atomic_inc(&key->usage); ++ __key_get(key); + key_ref = make_key_ref(key, 1); + break; + +@@ -618,7 +618,7 @@ try_again: + } + + key = ctx.cred->user->session_keyring; +- atomic_inc(&key->usage); ++ __key_get(key); + key_ref = make_key_ref(key, 1); + break; + +@@ -632,7 +632,7 @@ try_again: + if (!key) + goto error; + +- atomic_inc(&key->usage); ++ __key_get(key); + key_ref = make_key_ref(key, 1); + break; + +@@ -648,7 +648,7 @@ try_again: + } else { + rka = ctx.cred->request_key_auth->payload.data; + key = rka->dest_keyring; +- atomic_inc(&key->usage); ++ __key_get(key); + } + up_read(&ctx.cred->request_key_auth->sem); + if (!key) +-- +1.8.3.1 + + +From 99b0f3185570bb92a61952673b9933d9c1999508 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Fri, 30 Aug 2013 15:37:53 +0100 +Subject: [PATCH 08/10] KEYS: Drop the permissions argument from + __keyring_search_one() + +Drop the permissions argument from __keyring_search_one() as the only caller +passes 0 here - which causes all checks to be skipped. + +Signed-off-by: David Howells +--- + security/keys/internal.h | 3 +-- + security/keys/key.c | 2 +- + security/keys/keyring.c | 9 +++------ + 3 files changed, 5 insertions(+), 9 deletions(-) + +diff --git a/security/keys/internal.h b/security/keys/internal.h +index f4bf938..73950bf 100644 +--- a/security/keys/internal.h ++++ b/security/keys/internal.h +@@ -99,8 +99,7 @@ extern void __key_link_end(struct key *keyring, + unsigned long prealloc); + + extern key_ref_t __keyring_search_one(key_ref_t keyring_ref, +- const struct keyring_index_key *index_key, +- key_perm_t perm); ++ const struct keyring_index_key *index_key); + + extern struct key *keyring_search_instkey(struct key *keyring, + key_serial_t target_id); +diff --git a/security/keys/key.c b/security/keys/key.c +index 1e23cc2..7d716b8 100644 +--- a/security/keys/key.c ++++ b/security/keys/key.c +@@ -847,7 +847,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, + * update that instead if possible + */ + if (index_key.type->update) { +- key_ref = __keyring_search_one(keyring_ref, &index_key, 0); ++ key_ref = __keyring_search_one(keyring_ref, &index_key); + if (!IS_ERR(key_ref)) + goto found_matching_key; + } +diff --git a/security/keys/keyring.c b/security/keys/keyring.c +index 87eff32..eeef1a0 100644 +--- a/security/keys/keyring.c ++++ b/security/keys/keyring.c +@@ -531,15 +531,14 @@ EXPORT_SYMBOL(keyring_search); + * RCU is used to make it unnecessary to lock the keyring key list here. + * + * Returns a pointer to the found key with usage count incremented if +- * successful and returns -ENOKEY if not found. Revoked keys and keys not +- * providing the requested permission are skipped over. ++ * successful and returns -ENOKEY if not found. Revoked and invalidated keys ++ * are skipped over. + * + * If successful, the possession indicator is propagated from the keyring ref + * to the returned key reference. + */ + key_ref_t __keyring_search_one(key_ref_t keyring_ref, +- const struct keyring_index_key *index_key, +- key_perm_t perm) ++ const struct keyring_index_key *index_key) + { + struct keyring_list *klist; + struct key *keyring, *key; +@@ -560,8 +559,6 @@ key_ref_t __keyring_search_one(key_ref_t keyring_ref, + if (key->type == index_key->type && + (!key->type->match || + key->type->match(key, index_key->description)) && +- key_permission(make_key_ref(key, possessed), +- perm) == 0 && + !(key->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED))) + ) +-- +1.8.3.1 + + +From cb720b39e41e62d55bf1e5f8243d78643d31154d Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Fri, 30 Aug 2013 15:37:53 +0100 +Subject: [PATCH 09/10] Add a generic associative array implementation. + +Add a generic associative array implementation that can be used as the +container for keyrings, thereby massively increasing the capacity available +whilst also speeding up searching in keyrings that contain a lot of keys. + +This may also be useful in FS-Cache for tracking cookies. + +Documentation is added into Documentation/associative_array.txt + +Some of the properties of the implementation are: + + (1) Objects are opaque pointers. The implementation does not care where they + point (if anywhere) or what they point to (if anything). + + [!] NOTE: Pointers to objects _must_ be zero in the two least significant + bits. + + (2) Objects do not need to contain linkage blocks for use by the array. This + permits an object to be located in multiple arrays simultaneously. + Rather, the array is made up of metadata blocks that point to objects. + + (3) Objects are labelled as being one of two types (the type is a bool value). + This information is stored in the array, but has no consequence to the + array itself or its algorithms. + + (4) Objects require index keys to locate them within the array. + + (5) Index keys must be unique. Inserting an object with the same key as one + already in the array will replace the old object. + + (6) Index keys can be of any length and can be of different lengths. + + (7) Index keys should encode the length early on, before any variation due to + length is seen. + + (8) Index keys can include a hash to scatter objects throughout the array. + + (9) The array can iterated over. The objects will not necessarily come out in + key order. + +(10) The array can be iterated whilst it is being modified, provided the RCU + readlock is being held by the iterator. Note, however, under these + circumstances, some objects may be seen more than once. If this is a + problem, the iterator should lock against modification. Objects will not + be missed, however, unless deleted. + +(11) Objects in the array can be looked up by means of their index key. + +(12) Objects can be looked up whilst the array is being modified, provided the + RCU readlock is being held by the thread doing the look up. + +The implementation uses a tree of 16-pointer nodes internally that are indexed +on each level by nibbles from the index key. To improve memory efficiency, +shortcuts can be emplaced to skip over what would otherwise be a series of +single-occupancy nodes. Further, nodes pack leaf object pointers into spare +space in the node rather than making an extra branch until as such time an +object needs to be added to a full node. + +Signed-off-by: David Howells +--- + Documentation/assoc_array.txt | 574 +++++++++++++ + include/linux/assoc_array.h | 92 ++ + include/linux/assoc_array_priv.h | 182 ++++ + lib/Kconfig | 14 + + lib/Makefile | 1 + + lib/assoc_array.c | 1745 ++++++++++++++++++++++++++++++++++++++ + 6 files changed, 2608 insertions(+) + create mode 100644 Documentation/assoc_array.txt + create mode 100644 include/linux/assoc_array.h + create mode 100644 include/linux/assoc_array_priv.h + create mode 100644 lib/assoc_array.c + +diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt +new file mode 100644 +index 0000000..f4faec0 +--- /dev/null ++++ b/Documentation/assoc_array.txt +@@ -0,0 +1,574 @@ ++ ======================================== ++ GENERIC ASSOCIATIVE ARRAY IMPLEMENTATION ++ ======================================== ++ ++Contents: ++ ++ - Overview. ++ ++ - The public API. ++ - Edit script. ++ - Operations table. ++ - Manipulation functions. ++ - Access functions. ++ - Index key form. ++ ++ - Internal workings. ++ - Basic internal tree layout. ++ - Shortcuts. ++ - Splitting and collapsing nodes. ++ - Non-recursive iteration. ++ - Simultaneous alteration and iteration. ++ ++ ++======== ++OVERVIEW ++======== ++ ++This associative array implementation is an object container with the following ++properties: ++ ++ (1) Objects are opaque pointers. The implementation does not care where they ++ point (if anywhere) or what they point to (if anything). ++ ++ [!] NOTE: Pointers to objects _must_ be zero in the least significant bit. ++ ++ (2) Objects do not need to contain linkage blocks for use by the array. This ++ permits an object to be located in multiple arrays simultaneously. ++ Rather, the array is made up of metadata blocks that point to objects. ++ ++ (3) Objects require index keys to locate them within the array. ++ ++ (4) Index keys must be unique. Inserting an object with the same key as one ++ already in the array will replace the old object. ++ ++ (5) Index keys can be of any length and can be of different lengths. ++ ++ (6) Index keys should encode the length early on, before any variation due to ++ length is seen. ++ ++ (7) Index keys can include a hash to scatter objects throughout the array. ++ ++ (8) The array can iterated over. The objects will not necessarily come out in ++ key order. ++ ++ (9) The array can be iterated over whilst it is being modified, provided the ++ RCU readlock is being held by the iterator. Note, however, under these ++ circumstances, some objects may be seen more than once. If this is a ++ problem, the iterator should lock against modification. Objects will not ++ be missed, however, unless deleted. ++ ++(10) Objects in the array can be looked up by means of their index key. ++ ++(11) Objects can be looked up whilst the array is being modified, provided the ++ RCU readlock is being held by the thread doing the look up. ++ ++The implementation uses a tree of 16-pointer nodes internally that are indexed ++on each level by nibbles from the index key in the same manner as in a radix ++tree. To improve memory efficiency, shortcuts can be emplaced to skip over ++what would otherwise be a series of single-occupancy nodes. Further, nodes ++pack leaf object pointers into spare space in the node rather than making an ++extra branch until as such time an object needs to be added to a full node. ++ ++ ++============== ++THE PUBLIC API ++============== ++ ++The public API can be found in . The associative array is ++rooted on the following structure: ++ ++ struct assoc_array { ++ ... ++ }; ++ ++The code is selected by enabling CONFIG_ASSOCIATIVE_ARRAY. ++ ++ ++EDIT SCRIPT ++----------- ++ ++The insertion and deletion functions produce an 'edit script' that can later be ++applied to effect the changes without risking ENOMEM. This retains the ++preallocated metadata blocks that will be installed in the internal tree and ++keeps track of the metadata blocks that will be removed from the tree when the ++script is applied. ++ ++This is also used to keep track of dead blocks and dead objects after the ++script has been applied so that they can be freed later. The freeing is done ++after an RCU grace period has passed - thus allowing access functions to ++proceed under the RCU read lock. ++ ++The script appears as outside of the API as a pointer of the type: ++ ++ struct assoc_array_edit; ++ ++There are two functions for dealing with the script: ++ ++ (1) Apply an edit script. ++ ++ void assoc_array_apply_edit(struct assoc_array_edit *edit); ++ ++ This will perform the edit functions, interpolating various write barriers ++ to permit accesses under the RCU read lock to continue. The edit script ++ will then be passed to call_rcu() to free it and any dead stuff it points ++ to. ++ ++ (2) Cancel an edit script. ++ ++ void assoc_array_cancel_edit(struct assoc_array_edit *edit); ++ ++ This frees the edit script and all preallocated memory immediately. If ++ this was for insertion, the new object is _not_ released by this function, ++ but must rather be released by the caller. ++ ++These functions are guaranteed not to fail. ++ ++ ++OPERATIONS TABLE ++---------------- ++ ++Various functions take a table of operations: ++ ++ struct assoc_array_ops { ++ ... ++ }; ++ ++This points to a number of methods, all of which need to be provided: ++ ++ (1) Get a chunk of index key from caller data: ++ ++ unsigned long (*get_key_chunk)(const void *index_key, int level); ++ ++ This should return a chunk of caller-supplied index key starting at the ++ *bit* position given by the level argument. The level argument will be a ++ multiple of ASSOC_ARRAY_KEY_CHUNK_SIZE and the function should return ++ ASSOC_ARRAY_KEY_CHUNK_SIZE bits. No error is possible. ++ ++ ++ (2) Get a chunk of an object's index key. ++ ++ unsigned long (*get_object_key_chunk)(const void *object, int level); ++ ++ As the previous function, but gets its data from an object in the array ++ rather than from a caller-supplied index key. ++ ++ ++ (3) See if this is the object we're looking for. ++ ++ bool (*compare_object)(const void *object, const void *index_key); ++ ++ Compare the object against an index key and return true if it matches and ++ false if it doesn't. ++ ++ ++ (4) Diff the index keys of two objects. ++ ++ int (*diff_objects)(const void *a, const void *b); ++ ++ Return the bit position at which the index keys of two objects differ or ++ -1 if they are the same. ++ ++ ++ (5) Free an object. ++ ++ void (*free_object)(void *object); ++ ++ Free the specified object. Note that this may be called an RCU grace ++ period after assoc_array_apply_edit() was called, so synchronize_rcu() may ++ be necessary on module unloading. ++ ++ ++MANIPULATION FUNCTIONS ++---------------------- ++ ++There are a number of functions for manipulating an associative array: ++ ++ (1) Initialise an associative array. ++ ++ void assoc_array_init(struct assoc_array *array); ++ ++ This initialises the base structure for an associative array. It can't ++ fail. ++ ++ ++ (2) Insert/replace an object in an associative array. ++ ++ struct assoc_array_edit * ++ assoc_array_insert(struct assoc_array *array, ++ const struct assoc_array_ops *ops, ++ const void *index_key, ++ void *object); ++ ++ This inserts the given object into the array. Note that the least ++ significant bit of the pointer must be zero as it's used to type-mark ++ pointers internally. ++ ++ If an object already exists for that key then it will be replaced with the ++ new object and the old one will be freed automatically. ++ ++ The index_key argument should hold index key information and is ++ passed to the methods in the ops table when they are called. ++ ++ This function makes no alteration to the array itself, but rather returns ++ an edit script that must be applied. -ENOMEM is returned in the case of ++ an out-of-memory error. ++ ++ The caller should lock exclusively against other modifiers of the array. ++ ++ ++ (3) Delete an object from an associative array. ++ ++ struct assoc_array_edit * ++ assoc_array_delete(struct assoc_array *array, ++ const struct assoc_array_ops *ops, ++ const void *index_key); ++ ++ This deletes an object that matches the specified data from the array. ++ ++ The index_key argument should hold index key information and is ++ passed to the methods in the ops table when they are called. ++ ++ This function makes no alteration to the array itself, but rather returns ++ an edit script that must be applied. -ENOMEM is returned in the case of ++ an out-of-memory error. NULL will be returned if the specified object is ++ not found within the array. ++ ++ The caller should lock exclusively against other modifiers of the array. ++ ++ ++ (4) Delete all objects from an associative array. ++ ++ struct assoc_array_edit * ++ assoc_array_clear(struct assoc_array *array, ++ const struct assoc_array_ops *ops); ++ ++ This deletes all the objects from an associative array and leaves it ++ completely empty. ++ ++ This function makes no alteration to the array itself, but rather returns ++ an edit script that must be applied. -ENOMEM is returned in the case of ++ an out-of-memory error. ++ ++ The caller should lock exclusively against other modifiers of the array. ++ ++ ++ (5) Destroy an associative array, deleting all objects. ++ ++ void assoc_array_destroy(struct assoc_array *array, ++ const struct assoc_array_ops *ops); ++ ++ This destroys the contents of the associative array and leaves it ++ completely empty. It is not permitted for another thread to be traversing ++ the array under the RCU read lock at the same time as this function is ++ destroying it as no RCU deferral is performed on memory release - ++ something that would require memory to be allocated. ++ ++ The caller should lock exclusively against other modifiers and accessors ++ of the array. ++ ++ ++ (6) Garbage collect an associative array. ++ ++ int assoc_array_gc(struct assoc_array *array, ++ const struct assoc_array_ops *ops, ++ bool (*iterator)(void *object, void *iterator_data), ++ void *iterator_data); ++ ++ This iterates over the objects in an associative array and passes each one ++ to iterator(). If iterator() returns true, the object is kept. If it ++ returns false, the object will be freed. If the iterator() function ++ returns true, it must perform any appropriate refcount incrementing on the ++ object before returning. ++ ++ The internal tree will be packed down if possible as part of the iteration ++ to reduce the number of nodes in it. ++ ++ The iterator_data is passed directly to iterator() and is otherwise ++ ignored by the function. ++ ++ The function will return 0 if successful and -ENOMEM if there wasn't ++ enough memory. ++ ++ It is possible for other threads to iterate over or search the array under ++ the RCU read lock whilst this function is in progress. The caller should ++ lock exclusively against other modifiers of the array. ++ ++ ++ACCESS FUNCTIONS ++---------------- ++ ++There are two functions for accessing an associative array: ++ ++ (1) Iterate over all the objects in an associative array. ++ ++ int assoc_array_iterate(const struct assoc_array *array, ++ int (*iterator)(const void *object, ++ void *iterator_data), ++ void *iterator_data); ++ ++ This passes each object in the array to the iterator callback function. ++ iterator_data is private data for that function. ++ ++ This may be used on an array at the same time as the array is being ++ modified, provided the RCU read lock is held. Under such circumstances, ++ it is possible for the iteration function to see some objects twice. If ++ this is a problem, then modification should be locked against. The ++ iteration algorithm should not, however, miss any objects. ++ ++ The function will return 0 if no objects were in the array or else it will ++ return the result of the last iterator function called. Iteration stops ++ immediately if any call to the iteration function results in a non-zero ++ return. ++ ++ ++ (2) Find an object in an associative array. ++ ++ void *assoc_array_find(const struct assoc_array *array, ++ const struct assoc_array_ops *ops, ++ const void *index_key); ++ ++ This walks through the array's internal tree directly to the object ++ specified by the index key.. ++ ++ This may be used on an array at the same time as the array is being ++ modified, provided the RCU read lock is held. ++ ++ The function will return the object if found (and set *_type to the object ++ type) or will return NULL if the object was not found. ++ ++ ++INDEX KEY FORM ++-------------- ++ ++The index key can be of any form, but since the algorithms aren't told how long ++the key is, it is strongly recommended that the index key includes its length ++very early on before any variation due to the length would have an effect on ++comparisons. ++ ++This will cause leaves with different length keys to scatter away from each ++other - and those with the same length keys to cluster together. ++ ++It is also recommended that the index key begin with a hash of the rest of the ++key to maximise scattering throughout keyspace. ++ ++The better the scattering, the wider and lower the internal tree will be. ++ ++Poor scattering isn't too much of a problem as there are shortcuts and nodes ++can contain mixtures of leaves and metadata pointers. ++ ++The index key is read in chunks of machine word. Each chunk is subdivided into ++one nibble (4 bits) per level, so on a 32-bit CPU this is good for 8 levels and ++on a 64-bit CPU, 16 levels. Unless the scattering is really poor, it is ++unlikely that more than one word of any particular index key will have to be ++used. ++ ++ ++================= ++INTERNAL WORKINGS ++================= ++ ++The associative array data structure has an internal tree. This tree is ++constructed of two types of metadata blocks: nodes and shortcuts. ++ ++A node is an array of slots. Each slot can contain one of four things: ++ ++ (*) A NULL pointer, indicating that the slot is empty. ++ ++ (*) A pointer to an object (a leaf). ++ ++ (*) A pointer to a node at the next level. ++ ++ (*) A pointer to a shortcut. ++ ++ ++BASIC INTERNAL TREE LAYOUT ++-------------------------- ++ ++Ignoring shortcuts for the moment, the nodes form a multilevel tree. The index ++key space is strictly subdivided by the nodes in the tree and nodes occur on ++fixed levels. For example: ++ ++ Level: 0 1 2 3 ++ =============== =============== =============== =============== ++ NODE D ++ NODE B NODE C +------>+---+ ++ +------>+---+ +------>+---+ | | 0 | ++ NODE A | | 0 | | | 0 | | +---+ ++ +---+ | +---+ | +---+ | : : ++ | 0 | | : : | : : | +---+ ++ +---+ | +---+ | +---+ | | f | ++ | 1 |---+ | 3 |---+ | 7 |---+ +---+ ++ +---+ +---+ +---+ ++ : : : : | 8 |---+ ++ +---+ +---+ +---+ | NODE E ++ | e |---+ | f | : : +------>+---+ ++ +---+ | +---+ +---+ | 0 | ++ | f | | | f | +---+ ++ +---+ | +---+ : : ++ | NODE F +---+ ++ +------>+---+ | f | ++ | 0 | NODE G +---+ ++ +---+ +------>+---+ ++ : : | | 0 | ++ +---+ | +---+ ++ | 6 |---+ : : ++ +---+ +---+ ++ : : | f | ++ +---+ +---+ ++ | f | ++ +---+ ++ ++In the above example, there are 7 nodes (A-G), each with 16 slots (0-f). ++Assuming no other meta data nodes in the tree, the key space is divided thusly: ++ ++ KEY PREFIX NODE ++ ========== ==== ++ 137* D ++ 138* E ++ 13[0-69-f]* C ++ 1[0-24-f]* B ++ e6* G ++ e[0-57-f]* F ++ [02-df]* A ++ ++So, for instance, keys with the following example index keys will be found in ++the appropriate nodes: ++ ++ INDEX KEY PREFIX NODE ++ =============== ======= ==== ++ 13694892892489 13 C ++ 13795289025897 137 D ++ 13889dde88793 138 E ++ 138bbb89003093 138 E ++ 1394879524789 12 C ++ 1458952489 1 B ++ 9431809de993ba - A ++ b4542910809cd - A ++ e5284310def98 e F ++ e68428974237 e6 G ++ e7fffcbd443 e F ++ f3842239082 - A ++ ++To save memory, if a node can hold all the leaves in its portion of keyspace, ++then the node will have all those leaves in it and will not have any metadata ++pointers - even if some of those leaves would like to be in the same slot. ++ ++A node can contain a heterogeneous mix of leaves and metadata pointers. ++Metadata pointers must be in the slots that match their subdivisions of key ++space. The leaves can be in any slot not occupied by a metadata pointer. It ++is guaranteed that none of the leaves in a node will match a slot occupied by a ++metadata pointer. If the metadata pointer is there, any leaf whose key matches ++the metadata key prefix must be in the subtree that the metadata pointer points ++to. ++ ++In the above example list of index keys, node A will contain: ++ ++ SLOT CONTENT INDEX KEY (PREFIX) ++ ==== =============== ================== ++ 1 PTR TO NODE B 1* ++ any LEAF 9431809de993ba ++ any LEAF b4542910809cd ++ e PTR TO NODE F e* ++ any LEAF f3842239082 ++ ++and node B: ++ ++ 3 PTR TO NODE C 13* ++ any LEAF 1458952489 ++ ++ ++SHORTCUTS ++--------- ++ ++Shortcuts are metadata records that jump over a piece of keyspace. A shortcut ++is a replacement for a series of single-occupancy nodes ascending through the ++levels. Shortcuts exist to save memory and to speed up traversal. ++ ++It is possible for the root of the tree to be a shortcut - say, for example, ++the tree contains at least 17 nodes all with key prefix '1111'. The insertion ++algorithm will insert a shortcut to skip over the '1111' keyspace in a single ++bound and get to the fourth level where these actually become different. ++ ++ ++SPLITTING AND COLLAPSING NODES ++------------------------------ ++ ++Each node has a maximum capacity of 16 leaves and metadata pointers. If the ++insertion algorithm finds that it is trying to insert a 17th object into a ++node, that node will be split such that at least two leaves that have a common ++key segment at that level end up in a separate node rooted on that slot for ++that common key segment. ++ ++If the leaves in a full node and the leaf that is being inserted are ++sufficiently similar, then a shortcut will be inserted into the tree. ++ ++When the number of objects in the subtree rooted at a node falls to 16 or ++fewer, then the subtree will be collapsed down to a single node - and this will ++ripple towards the root if possible. ++ ++ ++NON-RECURSIVE ITERATION ++----------------------- ++ ++Each node and shortcut contains a back pointer to its parent and the number of ++slot in that parent that points to it. None-recursive iteration uses these to ++proceed rootwards through the tree, going to the parent node, slot N + 1 to ++make sure progress is made without the need for a stack. ++ ++The backpointers, however, make simultaneous alteration and iteration tricky. ++ ++ ++SIMULTANEOUS ALTERATION AND ITERATION ++------------------------------------- ++ ++There are a number of cases to consider: ++ ++ (1) Simple insert/replace. This involves simply replacing a NULL or old ++ matching leaf pointer with the pointer to the new leaf after a barrier. ++ The metadata blocks don't change otherwise. An old leaf won't be freed ++ until after the RCU grace period. ++ ++ (2) Simple delete. This involves just clearing an old matching leaf. The ++ metadata blocks don't change otherwise. The old leaf won't be freed until ++ after the RCU grace period. ++ ++ (3) Insertion replacing part of a subtree that we haven't yet entered. This ++ may involve replacement of part of that subtree - but that won't affect ++ the iteration as we won't have reached the pointer to it yet and the ++ ancestry blocks are not replaced (the layout of those does not change). ++ ++ (4) Insertion replacing nodes that we're actively processing. This isn't a ++ problem as we've passed the anchoring pointer and won't switch onto the ++ new layout until we follow the back pointers - at which point we've ++ already examined the leaves in the replaced node (we iterate over all the ++ leaves in a node before following any of its metadata pointers). ++ ++ We might, however, re-see some leaves that have been split out into a new ++ branch that's in a slot further along than we were at. ++ ++ (5) Insertion replacing nodes that we're processing a dependent branch of. ++ This won't affect us until we follow the back pointers. Similar to (4). ++ ++ (6) Deletion collapsing a branch under us. This doesn't affect us because the ++ back pointers will get us back to the parent of the new node before we ++ could see the new node. The entire collapsed subtree is thrown away ++ unchanged - and will still be rooted on the same slot, so we shouldn't ++ process it a second time as we'll go back to slot + 1. ++ ++Note: ++ ++ (*) Under some circumstances, we need to simultaneously change the parent ++ pointer and the parent slot pointer on a node (say, for example, we ++ inserted another node before it and moved it up a level). We cannot do ++ this without locking against a read - so we have to replace that node too. ++ ++ However, when we're changing a shortcut into a node this isn't a problem ++ as shortcuts only have one slot and so the parent slot number isn't used ++ when traversing backwards over one. This means that it's okay to change ++ the slot number first - provided suitable barriers are used to make sure ++ the parent slot number is read after the back pointer. ++ ++Obsolete blocks and leaves are freed up after an RCU grace period has passed, ++so as long as anyone doing walking or iteration holds the RCU read lock, the ++old superstructure should not go away on them. +diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h +new file mode 100644 +index 0000000..9a193b8 +--- /dev/null ++++ b/include/linux/assoc_array.h +@@ -0,0 +1,92 @@ ++/* Generic associative array implementation. ++ * ++ * See Documentation/assoc_array.txt for information. ++ * ++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. ++ * Written by David Howells (dhowells@redhat.com) ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public Licence ++ * as published by the Free Software Foundation; either version ++ * 2 of the Licence, or (at your option) any later version. ++ */ ++ ++#ifndef _LINUX_ASSOC_ARRAY_H ++#define _LINUX_ASSOC_ARRAY_H ++ ++#ifdef CONFIG_ASSOCIATIVE_ARRAY ++ ++#include ++ ++#define ASSOC_ARRAY_KEY_CHUNK_SIZE BITS_PER_LONG /* Key data retrieved in chunks of this size */ ++ ++/* ++ * Generic associative array. ++ */ ++struct assoc_array { ++ struct assoc_array_ptr *root; /* The node at the root of the tree */ ++ unsigned long nr_leaves_on_tree; ++}; ++ ++/* ++ * Operations on objects and index keys for use by array manipulation routines. ++ */ ++struct assoc_array_ops { ++ /* Method to get a chunk of an index key from caller-supplied data */ ++ unsigned long (*get_key_chunk)(const void *index_key, int level); ++ ++ /* Method to get a piece of an object's index key */ ++ unsigned long (*get_object_key_chunk)(const void *object, int level); ++ ++ /* Is this the object we're looking for? */ ++ bool (*compare_object)(const void *object, const void *index_key); ++ ++ /* How different are two objects, to a bit position in their keys? (or ++ * -1 if they're the same) ++ */ ++ int (*diff_objects)(const void *a, const void *b); ++ ++ /* Method to free an object. */ ++ void (*free_object)(void *object); ++}; ++ ++/* ++ * Access and manipulation functions. ++ */ ++struct assoc_array_edit; ++ ++static inline void assoc_array_init(struct assoc_array *array) ++{ ++ array->root = NULL; ++ array->nr_leaves_on_tree = 0; ++} ++ ++extern int assoc_array_iterate(const struct assoc_array *array, ++ int (*iterator)(const void *object, ++ void *iterator_data), ++ void *iterator_data); ++extern void *assoc_array_find(const struct assoc_array *array, ++ const struct assoc_array_ops *ops, ++ const void *index_key); ++extern void assoc_array_destroy(struct assoc_array *array, ++ const struct assoc_array_ops *ops); ++extern struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, ++ const struct assoc_array_ops *ops, ++ const void *index_key, ++ void *object); ++extern void assoc_array_insert_set_object(struct assoc_array_edit *edit, ++ void *object); ++extern struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, ++ const struct assoc_array_ops *ops, ++ const void *index_key); ++extern struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, ++ const struct assoc_array_ops *ops); ++extern void assoc_array_apply_edit(struct assoc_array_edit *edit); ++extern void assoc_array_cancel_edit(struct assoc_array_edit *edit); ++extern int assoc_array_gc(struct assoc_array *array, ++ const struct assoc_array_ops *ops, ++ bool (*iterator)(void *object, void *iterator_data), ++ void *iterator_data); ++ ++#endif /* CONFIG_ASSOCIATIVE_ARRAY */ ++#endif /* _LINUX_ASSOC_ARRAY_H */ +diff --git a/include/linux/assoc_array_priv.h b/include/linux/assoc_array_priv.h +new file mode 100644 +index 0000000..711275e +--- /dev/null ++++ b/include/linux/assoc_array_priv.h +@@ -0,0 +1,182 @@ ++/* Private definitions for the generic associative array implementation. ++ * ++ * See Documentation/assoc_array.txt for information. ++ * ++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. ++ * Written by David Howells (dhowells@redhat.com) ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public Licence ++ * as published by the Free Software Foundation; either version ++ * 2 of the Licence, or (at your option) any later version. ++ */ ++ ++#ifndef _LINUX_ASSOC_ARRAY_PRIV_H ++#define _LINUX_ASSOC_ARRAY_PRIV_H ++ ++#ifdef CONFIG_ASSOCIATIVE_ARRAY ++ ++#include ++ ++#define ASSOC_ARRAY_FAN_OUT 16 /* Number of slots per node */ ++#define ASSOC_ARRAY_FAN_MASK (ASSOC_ARRAY_FAN_OUT - 1) ++#define ASSOC_ARRAY_LEVEL_STEP (ilog2(ASSOC_ARRAY_FAN_OUT)) ++#define ASSOC_ARRAY_LEVEL_STEP_MASK (ASSOC_ARRAY_LEVEL_STEP - 1) ++#define ASSOC_ARRAY_KEY_CHUNK_MASK (ASSOC_ARRAY_KEY_CHUNK_SIZE - 1) ++#define ASSOC_ARRAY_KEY_CHUNK_SHIFT (ilog2(BITS_PER_LONG)) ++ ++/* ++ * Undefined type representing a pointer with type information in the bottom ++ * two bits. ++ */ ++struct assoc_array_ptr; ++ ++/* ++ * An N-way node in the tree. ++ * ++ * Each slot contains one of four things: ++ * ++ * (1) Nothing (NULL). ++ * ++ * (2) A leaf object (pointer types 0). ++ * ++ * (3) A next-level node (pointer type 1, subtype 0). ++ * ++ * (4) A shortcut (pointer type 1, subtype 1). ++ * ++ * The tree is optimised for search-by-ID, but permits reasonable iteration ++ * also. ++ * ++ * The tree is navigated by constructing an index key consisting of an array of ++ * segments, where each segment is ilog2(ASSOC_ARRAY_FAN_OUT) bits in size. ++ * ++ * The segments correspond to levels of the tree (the first segment is used at ++ * level 0, the second at level 1, etc.). ++ */ ++struct assoc_array_node { ++ struct assoc_array_ptr *back_pointer; ++ u8 parent_slot; ++ struct assoc_array_ptr *slots[ASSOC_ARRAY_FAN_OUT]; ++ unsigned long nr_leaves_on_branch; ++}; ++ ++/* ++ * A shortcut through the index space out to where a collection of nodes/leaves ++ * with the same IDs live. ++ */ ++struct assoc_array_shortcut { ++ struct assoc_array_ptr *back_pointer; ++ int parent_slot; ++ int skip_to_level; ++ struct assoc_array_ptr *next_node; ++ unsigned long index_key[]; ++}; ++ ++/* ++ * Preallocation cache. ++ */ ++struct assoc_array_edit { ++ struct rcu_head rcu; ++ struct assoc_array *array; ++ const struct assoc_array_ops *ops; ++ const struct assoc_array_ops *ops_for_excised_subtree; ++ struct assoc_array_ptr *leaf; ++ struct assoc_array_ptr **leaf_p; ++ struct assoc_array_ptr *dead_leaf; ++ struct assoc_array_ptr *new_meta[3]; ++ struct assoc_array_ptr *excised_meta[1]; ++ struct assoc_array_ptr *excised_subtree; ++ struct assoc_array_ptr **set_backpointers[ASSOC_ARRAY_FAN_OUT]; ++ struct assoc_array_ptr *set_backpointers_to; ++ struct assoc_array_node *adjust_count_on; ++ long adjust_count_by; ++ struct { ++ struct assoc_array_ptr **ptr; ++ struct assoc_array_ptr *to; ++ } set[2]; ++ struct { ++ u8 *p; ++ u8 to; ++ } set_parent_slot[1]; ++ u8 segment_cache[ASSOC_ARRAY_FAN_OUT + 1]; ++}; ++ ++/* ++ * Internal tree member pointers are marked in the bottom one or two bits to ++ * indicate what type they are so that we don't have to look behind every ++ * pointer to see what it points to. ++ * ++ * We provide functions to test type annotations and to create and translate ++ * the annotated pointers. ++ */ ++#define ASSOC_ARRAY_PTR_TYPE_MASK 0x1UL ++#define ASSOC_ARRAY_PTR_LEAF_TYPE 0x0UL /* Points to leaf (or nowhere) */ ++#define ASSOC_ARRAY_PTR_META_TYPE 0x1UL /* Points to node or shortcut */ ++#define ASSOC_ARRAY_PTR_SUBTYPE_MASK 0x2UL ++#define ASSOC_ARRAY_PTR_NODE_SUBTYPE 0x0UL ++#define ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE 0x2UL ++ ++static inline bool assoc_array_ptr_is_meta(const struct assoc_array_ptr *x) ++{ ++ return (unsigned long)x & ASSOC_ARRAY_PTR_TYPE_MASK; ++} ++static inline bool assoc_array_ptr_is_leaf(const struct assoc_array_ptr *x) ++{ ++ return !assoc_array_ptr_is_meta(x); ++} ++static inline bool assoc_array_ptr_is_shortcut(const struct assoc_array_ptr *x) ++{ ++ return (unsigned long)x & ASSOC_ARRAY_PTR_SUBTYPE_MASK; ++} ++static inline bool assoc_array_ptr_is_node(const struct assoc_array_ptr *x) ++{ ++ return !assoc_array_ptr_is_shortcut(x); ++} ++ ++static inline void *assoc_array_ptr_to_leaf(const struct assoc_array_ptr *x) ++{ ++ return (void *)((unsigned long)x & ~ASSOC_ARRAY_PTR_TYPE_MASK); ++} ++ ++static inline ++unsigned long __assoc_array_ptr_to_meta(const struct assoc_array_ptr *x) ++{ ++ return (unsigned long)x & ++ ~(ASSOC_ARRAY_PTR_SUBTYPE_MASK | ASSOC_ARRAY_PTR_TYPE_MASK); ++} ++static inline ++struct assoc_array_node *assoc_array_ptr_to_node(const struct assoc_array_ptr *x) ++{ ++ return (struct assoc_array_node *)__assoc_array_ptr_to_meta(x); ++} ++static inline ++struct assoc_array_shortcut *assoc_array_ptr_to_shortcut(const struct assoc_array_ptr *x) ++{ ++ return (struct assoc_array_shortcut *)__assoc_array_ptr_to_meta(x); ++} ++ ++static inline ++struct assoc_array_ptr *__assoc_array_x_to_ptr(const void *p, unsigned long t) ++{ ++ return (struct assoc_array_ptr *)((unsigned long)p | t); ++} ++static inline ++struct assoc_array_ptr *assoc_array_leaf_to_ptr(const void *p) ++{ ++ return __assoc_array_x_to_ptr(p, ASSOC_ARRAY_PTR_LEAF_TYPE); ++} ++static inline ++struct assoc_array_ptr *assoc_array_node_to_ptr(const struct assoc_array_node *p) ++{ ++ return __assoc_array_x_to_ptr( ++ p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_NODE_SUBTYPE); ++} ++static inline ++struct assoc_array_ptr *assoc_array_shortcut_to_ptr(const struct assoc_array_shortcut *p) ++{ ++ return __assoc_array_x_to_ptr( ++ p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE); ++} ++ ++#endif /* CONFIG_ASSOCIATIVE_ARRAY */ ++#endif /* _LINUX_ASSOC_ARRAY_PRIV_H */ +diff --git a/lib/Kconfig b/lib/Kconfig +index 35da513..b7d3234 100644 +--- a/lib/Kconfig ++++ b/lib/Kconfig +@@ -312,6 +312,20 @@ config TEXTSEARCH_FSM + config BTREE + boolean + ++config ASSOCIATIVE_ARRAY ++ bool ++ help ++ Generic associative array. Can be searched and iterated over whilst ++ it is being modified. It is also reasonably quick to search and ++ modify. The algorithms are non-recursive, and the trees are highly ++ capacious. ++ ++ See: ++ ++ Documentation/assoc_array.txt ++ ++ for more information. ++ + config HAS_IOMEM + boolean + depends on !NO_IOMEM +diff --git a/lib/Makefile b/lib/Makefile +index 7baccfd..2c59891 100644 +--- a/lib/Makefile ++++ b/lib/Makefile +@@ -49,6 +49,7 @@ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) + obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o + + obj-$(CONFIG_BTREE) += btree.o ++obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o + obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o + obj-$(CONFIG_DEBUG_LIST) += list_debug.o + obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o +diff --git a/lib/assoc_array.c b/lib/assoc_array.c +new file mode 100644 +index 0000000..a095281 +--- /dev/null ++++ b/lib/assoc_array.c +@@ -0,0 +1,1745 @@ ++/* Generic associative array implementation. ++ * ++ * See Documentation/assoc_array.txt for information. ++ * ++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. ++ * Written by David Howells (dhowells@redhat.com) ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public Licence ++ * as published by the Free Software Foundation; either version ++ * 2 of the Licence, or (at your option) any later version. ++ */ ++//#define DEBUG ++#include ++#include ++ ++/* ++ * Iterate over an associative array. The caller must hold the RCU read lock ++ * or better. ++ */ ++static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, ++ const struct assoc_array_ptr *stop, ++ int (*iterator)(const void *leaf, ++ void *iterator_data), ++ void *iterator_data) ++{ ++ const struct assoc_array_shortcut *shortcut; ++ const struct assoc_array_node *node; ++ const struct assoc_array_ptr *cursor, *ptr, *parent; ++ unsigned long has_meta; ++ int slot, ret; ++ ++ cursor = root; ++ ++begin_node: ++ if (assoc_array_ptr_is_shortcut(cursor)) { ++ /* Descend through a shortcut */ ++ shortcut = assoc_array_ptr_to_shortcut(cursor); ++ smp_read_barrier_depends(); ++ cursor = ACCESS_ONCE(shortcut->next_node); ++ } ++ ++ node = assoc_array_ptr_to_node(cursor); ++ smp_read_barrier_depends(); ++ slot = 0; ++ ++ /* We perform two passes of each node. ++ * ++ * The first pass does all the leaves in this node. This means we ++ * don't miss any leaves if the node is split up by insertion whilst ++ * we're iterating over the branches rooted here (we may, however, see ++ * some leaves twice). ++ */ ++ has_meta = 0; ++ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ++ ptr = ACCESS_ONCE(node->slots[slot]); ++ has_meta |= (unsigned long)ptr; ++ if (ptr && assoc_array_ptr_is_leaf(ptr)) { ++ /* We need a barrier between the read of the pointer ++ * and dereferencing the pointer - but only if we are ++ * actually going to dereference it. ++ */ ++ smp_read_barrier_depends(); ++ ++ /* Invoke the callback */ ++ ret = iterator(assoc_array_ptr_to_leaf(ptr), ++ iterator_data); ++ if (ret) ++ return ret; ++ } ++ } ++ ++ /* The second pass attends to all the metadata pointers. If we follow ++ * one of these we may find that we don't come back here, but rather go ++ * back to a replacement node with the leaves in a different layout. ++ * ++ * We are guaranteed to make progress, however, as the slot number for ++ * a particular portion of the key space cannot change - and we ++ * continue at the back pointer + 1. ++ */ ++ if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) ++ goto finished_node; ++ slot = 0; ++ ++continue_node: ++ node = assoc_array_ptr_to_node(cursor); ++ smp_read_barrier_depends(); ++ ++ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ++ ptr = ACCESS_ONCE(node->slots[slot]); ++ if (assoc_array_ptr_is_meta(ptr)) { ++ cursor = ptr; ++ goto begin_node; ++ } ++ } ++ ++finished_node: ++ /* Move up to the parent (may need to skip back over a shortcut) */ ++ parent = ACCESS_ONCE(node->back_pointer); ++ slot = node->parent_slot; ++ if (parent == stop) ++ return 0; ++ ++ if (assoc_array_ptr_is_shortcut(parent)) { ++ shortcut = assoc_array_ptr_to_shortcut(parent); ++ smp_read_barrier_depends(); ++ cursor = parent; ++ parent = ACCESS_ONCE(shortcut->back_pointer); ++ slot = shortcut->parent_slot; ++ if (parent == stop) ++ return 0; ++ } ++ ++ /* Ascend to next slot in parent node */ ++ cursor = parent; ++ slot++; ++ goto continue_node; ++} ++ ++/** ++ * assoc_array_iterate - Pass all objects in the array to a callback ++ * @array: The array to iterate over. ++ * @iterator: The callback function. ++ * @iterator_data: Private data for the callback function. ++ * ++ * Iterate over all the objects in an associative array. Each one will be ++ * presented to the iterator function. ++ * ++ * If the array is being modified concurrently with the iteration then it is ++ * possible that some objects in the array will be passed to the iterator ++ * callback more than once - though every object should be passed at least ++ * once. If this is undesirable then the caller must lock against modification ++ * for the duration of this function. ++ * ++ * The function will return 0 if no objects were in the array or else it will ++ * return the result of the last iterator function called. Iteration stops ++ * immediately if any call to the iteration function results in a non-zero ++ * return. ++ * ++ * The caller should hold the RCU read lock or better if concurrent ++ * modification is possible. ++ */ ++int assoc_array_iterate(const struct assoc_array *array, ++ int (*iterator)(const void *object, ++ void *iterator_data), ++ void *iterator_data) ++{ ++ struct assoc_array_ptr *root = ACCESS_ONCE(array->root); ++ ++ if (!root) ++ return 0; ++ return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); ++} ++ ++enum assoc_array_walk_status { ++ assoc_array_walk_tree_empty, ++ assoc_array_walk_found_terminal_node, ++ assoc_array_walk_found_wrong_shortcut, ++} status; ++ ++struct assoc_array_walk_result { ++ struct { ++ struct assoc_array_node *node; /* Node in which leaf might be found */ ++ int level; ++ int slot; ++ } terminal_node; ++ struct { ++ struct assoc_array_shortcut *shortcut; ++ int level; ++ int sc_level; ++ unsigned long sc_segments; ++ unsigned long dissimilarity; ++ } wrong_shortcut; ++}; ++ ++/* ++ * Navigate through the internal tree looking for the closest node to the key. ++ */ ++static enum assoc_array_walk_status ++assoc_array_walk(const struct assoc_array *array, ++ const struct assoc_array_ops *ops, ++ const void *index_key, ++ struct assoc_array_walk_result *result) ++{ ++ struct assoc_array_shortcut *shortcut; ++ struct assoc_array_node *node; ++ struct assoc_array_ptr *cursor, *ptr; ++ unsigned long sc_segments, dissimilarity; ++ unsigned long segments; ++ int level, sc_level, next_sc_level; ++ int slot; ++ ++ pr_devel("-->%s()\n", __func__); ++ ++ cursor = ACCESS_ONCE(array->root); ++ if (!cursor) ++ return assoc_array_walk_tree_empty; ++ ++ level = 0; ++ ++ /* Use segments from the key for the new leaf to navigate through the ++ * internal tree, skipping through nodes and shortcuts that are on ++ * route to the destination. Eventually we'll come to a slot that is ++ * either empty or contains a leaf at which point we've found a node in ++ * which the leaf we're looking for might be found or into which it ++ * should be inserted. ++ */ ++jumped: ++ segments = ops->get_key_chunk(index_key, level); ++ pr_devel("segments[%d]: %lx\n", level, segments); ++ ++ if (assoc_array_ptr_is_shortcut(cursor)) ++ goto follow_shortcut; ++ ++consider_node: ++ node = assoc_array_ptr_to_node(cursor); ++ smp_read_barrier_depends(); ++ ++ slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); ++ slot &= ASSOC_ARRAY_FAN_MASK; ++ ptr = ACCESS_ONCE(node->slots[slot]); ++ ++ pr_devel("consider slot %x [ix=%d type=%lu]\n", ++ slot, level, (unsigned long)ptr & 3); ++ ++ if (!assoc_array_ptr_is_meta(ptr)) { ++ /* The node doesn't have a node/shortcut pointer in the slot ++ * corresponding to the index key that we have to follow. ++ */ ++ result->terminal_node.node = node; ++ result->terminal_node.level = level; ++ result->terminal_node.slot = slot; ++ pr_devel("<--%s() = terminal_node\n", __func__); ++ return assoc_array_walk_found_terminal_node; ++ } ++ ++ if (assoc_array_ptr_is_node(ptr)) { ++ /* There is a pointer to a node in the slot corresponding to ++ * this index key segment, so we need to follow it. ++ */ ++ cursor = ptr; ++ level += ASSOC_ARRAY_LEVEL_STEP; ++ if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) ++ goto consider_node; ++ goto jumped; ++ } ++ ++ /* There is a shortcut in the slot corresponding to the index key ++ * segment. We follow the shortcut if its partial index key matches ++ * this leaf's. Otherwise we need to split the shortcut. ++ */ ++ cursor = ptr; ++follow_shortcut: ++ shortcut = assoc_array_ptr_to_shortcut(cursor); ++ smp_read_barrier_depends(); ++ pr_devel("shortcut to %d\n", shortcut->skip_to_level); ++ sc_level = level + ASSOC_ARRAY_LEVEL_STEP; ++ BUG_ON(sc_level > shortcut->skip_to_level); ++ ++ do { ++ /* Check the leaf against the shortcut's index key a word at a ++ * time, trimming the final word (the shortcut stores the index ++ * key completely from the root to the shortcut's target). ++ */ ++ if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) ++ segments = ops->get_key_chunk(index_key, sc_level); ++ ++ sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; ++ dissimilarity = segments ^ sc_segments; ++ ++ if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { ++ /* Trim segments that are beyond the shortcut */ ++ int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; ++ dissimilarity &= ~(ULONG_MAX << shift); ++ next_sc_level = shortcut->skip_to_level; ++ } else { ++ next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; ++ next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); ++ } ++ ++ if (dissimilarity != 0) { ++ /* This shortcut points elsewhere */ ++ result->wrong_shortcut.shortcut = shortcut; ++ result->wrong_shortcut.level = level; ++ result->wrong_shortcut.sc_level = sc_level; ++ result->wrong_shortcut.sc_segments = sc_segments; ++ result->wrong_shortcut.dissimilarity = dissimilarity; ++ return assoc_array_walk_found_wrong_shortcut; ++ } ++ ++ sc_level = next_sc_level; ++ } while (sc_level < shortcut->skip_to_level); ++ ++ /* The shortcut matches the leaf's index to this point. */ ++ cursor = ACCESS_ONCE(shortcut->next_node); ++ if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { ++ level = sc_level; ++ goto jumped; ++ } else { ++ level = sc_level; ++ goto consider_node; ++ } ++} ++ ++/** ++ * assoc_array_find - Find an object by index key ++ * @array: The associative array to search. ++ * @ops: The operations to use. ++ * @index_key: The key to the object. ++ * ++ * Find an object in an associative array by walking through the internal tree ++ * to the node that should contain the object and then searching the leaves ++ * there. NULL is returned if the requested object was not found in the array. ++ * ++ * The caller must hold the RCU read lock or better. ++ */ ++void *assoc_array_find(const struct assoc_array *array, ++ const struct assoc_array_ops *ops, ++ const void *index_key) ++{ ++ struct assoc_array_walk_result result; ++ const struct assoc_array_node *node; ++ const struct assoc_array_ptr *ptr; ++ const void *leaf; ++ int slot; ++ ++ if (assoc_array_walk(array, ops, index_key, &result) != ++ assoc_array_walk_found_terminal_node) ++ return NULL; ++ ++ node = result.terminal_node.node; ++ smp_read_barrier_depends(); ++ ++ /* If the target key is available to us, it's has to be pointed to by ++ * the terminal node. ++ */ ++ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ++ ptr = ACCESS_ONCE(node->slots[slot]); ++ if (ptr && assoc_array_ptr_is_leaf(ptr)) { ++ /* We need a barrier between the read of the pointer ++ * and dereferencing the pointer - but only if we are ++ * actually going to dereference it. ++ */ ++ leaf = assoc_array_ptr_to_leaf(ptr); ++ smp_read_barrier_depends(); ++ if (ops->compare_object(leaf, index_key)) ++ return (void *)leaf; ++ } ++ } ++ ++ return NULL; ++} ++ ++/* ++ * Destructively iterate over an associative array. The caller must prevent ++ * other simultaneous accesses. ++ */ ++static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, ++ const struct assoc_array_ops *ops) ++{ ++ struct assoc_array_shortcut *shortcut; ++ struct assoc_array_node *node; ++ struct assoc_array_ptr *cursor, *parent = NULL; ++ int slot = -1; ++ ++ pr_devel("-->%s()\n", __func__); ++ ++ cursor = root; ++ if (!cursor) { ++ pr_devel("empty\n"); ++ return; ++ } ++ ++move_to_meta: ++ if (assoc_array_ptr_is_shortcut(cursor)) { ++ /* Descend through a shortcut */ ++ pr_devel("[%d] shortcut\n", slot); ++ BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); ++ shortcut = assoc_array_ptr_to_shortcut(cursor); ++ BUG_ON(shortcut->back_pointer != parent); ++ BUG_ON(slot != -1 && shortcut->parent_slot != slot); ++ parent = cursor; ++ cursor = shortcut->next_node; ++ slot = -1; ++ BUG_ON(!assoc_array_ptr_is_node(cursor)); ++ } ++ ++ pr_devel("[%d] node\n", slot); ++ node = assoc_array_ptr_to_node(cursor); ++ BUG_ON(node->back_pointer != parent); ++ BUG_ON(slot != -1 && node->parent_slot != slot); ++ slot = 0; ++ ++continue_node: ++ pr_devel("Node %p [back=%p]\n", node, node->back_pointer); ++ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ++ struct assoc_array_ptr *ptr = node->slots[slot]; ++ if (!ptr) ++ continue; ++ if (assoc_array_ptr_is_meta(ptr)) { ++ parent = cursor; ++ cursor = ptr; ++ goto move_to_meta; ++ } ++ ++ if (ops) { ++ pr_devel("[%d] free leaf\n", slot); ++ ops->free_object(assoc_array_ptr_to_leaf(ptr)); ++ } ++ } ++ ++ parent = node->back_pointer; ++ slot = node->parent_slot; ++ pr_devel("free node\n"); ++ kfree(node); ++ if (!parent) ++ return; /* Done */ ++ ++ /* Move back up to the parent (may need to free a shortcut on ++ * the way up) */ ++ if (assoc_array_ptr_is_shortcut(parent)) { ++ shortcut = assoc_array_ptr_to_shortcut(parent); ++ BUG_ON(shortcut->next_node != cursor); ++ cursor = parent; ++ parent = shortcut->back_pointer; ++ slot = shortcut->parent_slot; ++ pr_devel("free shortcut\n"); ++ kfree(shortcut); ++ if (!parent) ++ return; ++ ++ BUG_ON(!assoc_array_ptr_is_node(parent)); ++ } ++ ++ /* Ascend to next slot in parent node */ ++ pr_devel("ascend to %p[%d]\n", parent, slot); ++ cursor = parent; ++ node = assoc_array_ptr_to_node(cursor); ++ slot++; ++ goto continue_node; ++} ++ ++/** ++ * assoc_array_destroy - Destroy an associative array ++ * @array: The array to destroy. ++ * @ops: The operations to use. ++ * ++ * Discard all metadata and free all objects in an associative array. The ++ * array will be empty and ready to use again upon completion. This function ++ * cannot fail. ++ * ++ * The caller must prevent all other accesses whilst this takes place as no ++ * attempt is made to adjust pointers gracefully to permit RCU readlock-holding ++ * accesses to continue. On the other hand, no memory allocation is required. ++ */ ++void assoc_array_destroy(struct assoc_array *array, ++ const struct assoc_array_ops *ops) ++{ ++ assoc_array_destroy_subtree(array->root, ops); ++ array->root = NULL; ++} ++ ++/* ++ * Handle insertion into an empty tree. ++ */ ++static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) ++{ ++ struct assoc_array_node *new_n0; ++ ++ pr_devel("-->%s()\n", __func__); ++ ++ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); ++ if (!new_n0) ++ return false; ++ ++ edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); ++ edit->leaf_p = &new_n0->slots[0]; ++ edit->adjust_count_on = new_n0; ++ edit->set[0].ptr = &edit->array->root; ++ edit->set[0].to = assoc_array_node_to_ptr(new_n0); ++ ++ pr_devel("<--%s() = ok [no root]\n", __func__); ++ return true; ++} ++ ++/* ++ * Handle insertion into a terminal node. ++ */ ++static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, ++ const struct assoc_array_ops *ops, ++ const void *index_key, ++ struct assoc_array_walk_result *result) ++{ ++ struct assoc_array_shortcut *shortcut, *new_s0; ++ struct assoc_array_node *node, *new_n0, *new_n1, *side; ++ struct assoc_array_ptr *ptr; ++ unsigned long dissimilarity, base_seg, blank; ++ size_t keylen; ++ bool have_meta; ++ int level, diff; ++ int slot, next_slot, free_slot, i, j; ++ ++ node = result->terminal_node.node; ++ level = result->terminal_node.level; ++ edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; ++ ++ pr_devel("-->%s()\n", __func__); ++ ++ /* We arrived at a node which doesn't have an onward node or shortcut ++ * pointer that we have to follow. This means that (a) the leaf we ++ * want must go here (either by insertion or replacement) or (b) we ++ * need to split this node and insert in one of the fragments. ++ */ ++ free_slot = -1; ++ ++ /* Firstly, we have to check the leaves in this node to see if there's ++ * a matching one we should replace in place. ++ */ ++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ++ ptr = node->slots[i]; ++ if (!ptr) { ++ free_slot = i; ++ continue; ++ } ++ if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) { ++ pr_devel("replace in slot %d\n", i); ++ edit->leaf_p = &node->slots[i]; ++ edit->dead_leaf = node->slots[i]; ++ pr_devel("<--%s() = ok [replace]\n", __func__); ++ return true; ++ } ++ } ++ ++ /* If there is a free slot in this node then we can just insert the ++ * leaf here. ++ */ ++ if (free_slot >= 0) { ++ pr_devel("insert in free slot %d\n", free_slot); ++ edit->leaf_p = &node->slots[free_slot]; ++ edit->adjust_count_on = node; ++ pr_devel("<--%s() = ok [insert]\n", __func__); ++ return true; ++ } ++ ++ /* The node has no spare slots - so we're either going to have to split ++ * it or insert another node before it. ++ * ++ * Whatever, we're going to need at least two new nodes - so allocate ++ * those now. We may also need a new shortcut, but we deal with that ++ * when we need it. ++ */ ++ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); ++ if (!new_n0) ++ return false; ++ edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); ++ new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); ++ if (!new_n1) ++ return false; ++ edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); ++ ++ /* We need to find out how similar the leaves are. */ ++ pr_devel("no spare slots\n"); ++ have_meta = false; ++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ++ ptr = node->slots[i]; ++ if (assoc_array_ptr_is_meta(ptr)) { ++ edit->segment_cache[i] = 0xff; ++ have_meta = true; ++ continue; ++ } ++ base_seg = ops->get_object_key_chunk( ++ assoc_array_ptr_to_leaf(ptr), level); ++ base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; ++ edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; ++ } ++ ++ if (have_meta) { ++ pr_devel("have meta\n"); ++ goto split_node; ++ } ++ ++ /* The node contains only leaves */ ++ dissimilarity = 0; ++ base_seg = edit->segment_cache[0]; ++ for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) ++ dissimilarity |= edit->segment_cache[i] ^ base_seg; ++ ++ pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); ++ ++ if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { ++ /* The old leaves all cluster in the same slot. We will need ++ * to insert a shortcut if the new node wants to cluster with them. ++ */ ++ if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) ++ goto all_leaves_cluster_together; ++ ++ /* Otherwise we can just insert a new node ahead of the old ++ * one. ++ */ ++ goto present_leaves_cluster_but_not_new_leaf; ++ } ++ ++split_node: ++ pr_devel("split node\n"); ++ ++ /* We need to split the current node; we know that the node doesn't ++ * simply contain a full set of leaves that cluster together (it ++ * contains meta pointers and/or non-clustering leaves). ++ * ++ * We need to expel at least two leaves out of a set consisting of the ++ * leaves in the node and the new leaf. ++ * ++ * We need a new node (n0) to replace the current one and a new node to ++ * take the expelled nodes (n1). ++ */ ++ edit->set[0].to = assoc_array_node_to_ptr(new_n0); ++ new_n0->back_pointer = node->back_pointer; ++ new_n0->parent_slot = node->parent_slot; ++ new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); ++ new_n1->parent_slot = -1; /* Need to calculate this */ ++ ++do_split_node: ++ pr_devel("do_split_node\n"); ++ ++ new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; ++ new_n1->nr_leaves_on_branch = 0; ++ ++ /* Begin by finding two matching leaves. There have to be at least two ++ * that match - even if there are meta pointers - because any leaf that ++ * would match a slot with a meta pointer in it must be somewhere ++ * behind that meta pointer and cannot be here. Further, given N ++ * remaining leaf slots, we now have N+1 leaves to go in them. ++ */ ++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ++ slot = edit->segment_cache[i]; ++ if (slot != 0xff) ++ for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) ++ if (edit->segment_cache[j] == slot) ++ goto found_slot_for_multiple_occupancy; ++ } ++found_slot_for_multiple_occupancy: ++ pr_devel("same slot: %x %x [%02x]\n", i, j, slot); ++ BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); ++ BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); ++ BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); ++ ++ new_n1->parent_slot = slot; ++ ++ /* Metadata pointers cannot change slot */ ++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) ++ if (assoc_array_ptr_is_meta(node->slots[i])) ++ new_n0->slots[i] = node->slots[i]; ++ else ++ new_n0->slots[i] = NULL; ++ BUG_ON(new_n0->slots[slot] != NULL); ++ new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); ++ ++ /* Filter the leaf pointers between the new nodes */ ++ free_slot = -1; ++ next_slot = 0; ++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ++ if (assoc_array_ptr_is_meta(node->slots[i])) ++ continue; ++ if (edit->segment_cache[i] == slot) { ++ new_n1->slots[next_slot++] = node->slots[i]; ++ new_n1->nr_leaves_on_branch++; ++ } else { ++ do { ++ free_slot++; ++ } while (new_n0->slots[free_slot] != NULL); ++ new_n0->slots[free_slot] = node->slots[i]; ++ } ++ } ++ ++ pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); ++ ++ if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { ++ do { ++ free_slot++; ++ } while (new_n0->slots[free_slot] != NULL); ++ edit->leaf_p = &new_n0->slots[free_slot]; ++ edit->adjust_count_on = new_n0; ++ } else { ++ edit->leaf_p = &new_n1->slots[next_slot++]; ++ edit->adjust_count_on = new_n1; ++ } ++ ++ BUG_ON(next_slot <= 1); ++ ++ edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); ++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ++ if (edit->segment_cache[i] == 0xff) { ++ ptr = node->slots[i]; ++ BUG_ON(assoc_array_ptr_is_leaf(ptr)); ++ if (assoc_array_ptr_is_node(ptr)) { ++ side = assoc_array_ptr_to_node(ptr); ++ edit->set_backpointers[i] = &side->back_pointer; ++ } else { ++ shortcut = assoc_array_ptr_to_shortcut(ptr); ++ edit->set_backpointers[i] = &shortcut->back_pointer; ++ } ++ } ++ } ++ ++ ptr = node->back_pointer; ++ if (!ptr) ++ edit->set[0].ptr = &edit->array->root; ++ else if (assoc_array_ptr_is_node(ptr)) ++ edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; ++ else ++ edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; ++ edit->excised_meta[0] = assoc_array_node_to_ptr(node); ++ pr_devel("<--%s() = ok [split node]\n", __func__); ++ return true; ++ ++present_leaves_cluster_but_not_new_leaf: ++ /* All the old leaves cluster in the same slot, but the new leaf wants ++ * to go into a different slot, so we create a new node to hold the new ++ * leaf and a pointer to a new node holding all the old leaves. ++ */ ++ pr_devel("present leaves cluster but not new leaf\n"); ++ ++ new_n0->back_pointer = node->back_pointer; ++ new_n0->parent_slot = node->parent_slot; ++ new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; ++ new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); ++ new_n1->parent_slot = edit->segment_cache[0]; ++ new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch; ++ edit->adjust_count_on = new_n0; ++ ++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) ++ new_n1->slots[i] = node->slots[i]; ++ ++ new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0); ++ edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]]; ++ ++ edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot]; ++ edit->set[0].to = assoc_array_node_to_ptr(new_n0); ++ edit->excised_meta[0] = assoc_array_node_to_ptr(node); ++ pr_devel("<--%s() = ok [insert node before]\n", __func__); ++ return true; ++ ++all_leaves_cluster_together: ++ /* All the leaves, new and old, want to cluster together in this node ++ * in the same slot, so we have to replace this node with a shortcut to ++ * skip over the identical parts of the key and then place a pair of ++ * nodes, one inside the other, at the end of the shortcut and ++ * distribute the keys between them. ++ * ++ * Firstly we need to work out where the leaves start diverging as a ++ * bit position into their keys so that we know how big the shortcut ++ * needs to be. ++ * ++ * We only need to make a single pass of N of the N+1 leaves because if ++ * any keys differ between themselves at bit X then at least one of ++ * them must also differ with the base key at bit X or before. ++ */ ++ pr_devel("all leaves cluster together\n"); ++ diff = INT_MAX; ++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ++ int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf), ++ assoc_array_ptr_to_leaf(node->slots[i])); ++ if (x < diff) { ++ BUG_ON(x < 0); ++ diff = x; ++ } ++ } ++ BUG_ON(diff == INT_MAX); ++ BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); ++ ++ keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); ++ keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; ++ ++ new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + ++ keylen * sizeof(unsigned long), GFP_KERNEL); ++ if (!new_s0) ++ return false; ++ edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); ++ ++ edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); ++ new_s0->back_pointer = node->back_pointer; ++ new_s0->parent_slot = node->parent_slot; ++ new_s0->next_node = assoc_array_node_to_ptr(new_n0); ++ new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); ++ new_n0->parent_slot = 0; ++ new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); ++ new_n1->parent_slot = -1; /* Need to calculate this */ ++ ++ new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; ++ pr_devel("skip_to_level = %d [diff %d]\n", level, diff); ++ BUG_ON(level <= 0); ++ ++ for (i = 0; i < keylen; i++) ++ new_s0->index_key[i] = ++ ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); ++ ++ blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); ++ pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); ++ new_s0->index_key[keylen - 1] &= ~blank; ++ ++ /* This now reduces to a node splitting exercise for which we'll need ++ * to regenerate the disparity table. ++ */ ++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ++ ptr = node->slots[i]; ++ base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), ++ level); ++ base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; ++ edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; ++ } ++ ++ base_seg = ops->get_key_chunk(index_key, level); ++ base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; ++ edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; ++ goto do_split_node; ++} ++ ++/* ++ * Handle insertion into the middle of a shortcut. ++ */ ++static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, ++ const struct assoc_array_ops *ops, ++ struct assoc_array_walk_result *result) ++{ ++ struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; ++ struct assoc_array_node *node, *new_n0, *side; ++ unsigned long sc_segments, dissimilarity, blank; ++ size_t keylen; ++ int level, sc_level, diff; ++ int sc_slot; ++ ++ shortcut = result->wrong_shortcut.shortcut; ++ level = result->wrong_shortcut.level; ++ sc_level = result->wrong_shortcut.sc_level; ++ sc_segments = result->wrong_shortcut.sc_segments; ++ dissimilarity = result->wrong_shortcut.dissimilarity; ++ ++ pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", ++ __func__, level, dissimilarity, sc_level); ++ ++ /* We need to split a shortcut and insert a node between the two ++ * pieces. Zero-length pieces will be dispensed with entirely. ++ * ++ * First of all, we need to find out in which level the first ++ * difference was. ++ */ ++ diff = __ffs(dissimilarity); ++ diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; ++ diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; ++ pr_devel("diff=%d\n", diff); ++ ++ if (!shortcut->back_pointer) { ++ edit->set[0].ptr = &edit->array->root; ++ } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { ++ node = assoc_array_ptr_to_node(shortcut->back_pointer); ++ edit->set[0].ptr = &node->slots[shortcut->parent_slot]; ++ } else { ++ BUG(); ++ } ++ ++ edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); ++ ++ /* Create a new node now since we're going to need it anyway */ ++ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); ++ if (!new_n0) ++ return false; ++ edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); ++ edit->adjust_count_on = new_n0; ++ ++ /* Insert a new shortcut before the new node if this segment isn't of ++ * zero length - otherwise we just connect the new node directly to the ++ * parent. ++ */ ++ level += ASSOC_ARRAY_LEVEL_STEP; ++ if (diff > level) { ++ pr_devel("pre-shortcut %d...%d\n", level, diff); ++ keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); ++ keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; ++ ++ new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + ++ keylen * sizeof(unsigned long), GFP_KERNEL); ++ if (!new_s0) ++ return false; ++ edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); ++ edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); ++ new_s0->back_pointer = shortcut->back_pointer; ++ new_s0->parent_slot = shortcut->parent_slot; ++ new_s0->next_node = assoc_array_node_to_ptr(new_n0); ++ new_s0->skip_to_level = diff; ++ ++ new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); ++ new_n0->parent_slot = 0; ++ ++ memcpy(new_s0->index_key, shortcut->index_key, ++ keylen * sizeof(unsigned long)); ++ ++ blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); ++ pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); ++ new_s0->index_key[keylen - 1] &= ~blank; ++ } else { ++ pr_devel("no pre-shortcut\n"); ++ edit->set[0].to = assoc_array_node_to_ptr(new_n0); ++ new_n0->back_pointer = shortcut->back_pointer; ++ new_n0->parent_slot = shortcut->parent_slot; ++ } ++ ++ side = assoc_array_ptr_to_node(shortcut->next_node); ++ new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; ++ ++ /* We need to know which slot in the new node is going to take a ++ * metadata pointer. ++ */ ++ sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); ++ sc_slot &= ASSOC_ARRAY_FAN_MASK; ++ ++ pr_devel("new slot %lx >> %d -> %d\n", ++ sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); ++ ++ /* Determine whether we need to follow the new node with a replacement ++ * for the current shortcut. We could in theory reuse the current ++ * shortcut if its parent slot number doesn't change - but that's a ++ * 1-in-16 chance so not worth expending the code upon. ++ */ ++ level = diff + ASSOC_ARRAY_LEVEL_STEP; ++ if (level < shortcut->skip_to_level) { ++ pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); ++ keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); ++ keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; ++ ++ new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) + ++ keylen * sizeof(unsigned long), GFP_KERNEL); ++ if (!new_s1) ++ return false; ++ edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); ++ ++ new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); ++ new_s1->parent_slot = sc_slot; ++ new_s1->next_node = shortcut->next_node; ++ new_s1->skip_to_level = shortcut->skip_to_level; ++ ++ new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); ++ ++ memcpy(new_s1->index_key, shortcut->index_key, ++ keylen * sizeof(unsigned long)); ++ ++ edit->set[1].ptr = &side->back_pointer; ++ edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); ++ } else { ++ pr_devel("no post-shortcut\n"); ++ ++ /* We don't have to replace the pointed-to node as long as we ++ * use memory barriers to make sure the parent slot number is ++ * changed before the back pointer (the parent slot number is ++ * irrelevant to the old parent shortcut). ++ */ ++ new_n0->slots[sc_slot] = shortcut->next_node; ++ edit->set_parent_slot[0].p = &side->parent_slot; ++ edit->set_parent_slot[0].to = sc_slot; ++ edit->set[1].ptr = &side->back_pointer; ++ edit->set[1].to = assoc_array_node_to_ptr(new_n0); ++ } ++ ++ /* Install the new leaf in a spare slot in the new node. */ ++ if (sc_slot == 0) ++ edit->leaf_p = &new_n0->slots[1]; ++ else ++ edit->leaf_p = &new_n0->slots[0]; ++ ++ pr_devel("<--%s() = ok [split shortcut]\n", __func__); ++ return edit; ++} ++ ++/** ++ * assoc_array_insert - Script insertion of an object into an associative array ++ * @array: The array to insert into. ++ * @ops: The operations to use. ++ * @index_key: The key to insert at. ++ * @object: The object to insert. ++ * ++ * Precalculate and preallocate a script for the insertion or replacement of an ++ * object in an associative array. This results in an edit script that can ++ * either be applied or cancelled. ++ * ++ * The function returns a pointer to an edit script or -ENOMEM. ++ * ++ * The caller should lock against other modifications and must continue to hold ++ * the lock until assoc_array_apply_edit() has been called. ++ * ++ * Accesses to the tree may take place concurrently with this function, ++ * provided they hold the RCU read lock. ++ */ ++struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, ++ const struct assoc_array_ops *ops, ++ const void *index_key, ++ void *object) ++{ ++ struct assoc_array_walk_result result; ++ struct assoc_array_edit *edit; ++ ++ pr_devel("-->%s()\n", __func__); ++ ++ /* The leaf pointer we're given must not have the bottom bit set as we ++ * use those for type-marking the pointer. NULL pointers are also not ++ * allowed as they indicate an empty slot but we have to allow them ++ * here as they can be updated later. ++ */ ++ BUG_ON(assoc_array_ptr_is_meta(object)); ++ ++ edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); ++ if (!edit) ++ return ERR_PTR(-ENOMEM); ++ edit->array = array; ++ edit->ops = ops; ++ edit->leaf = assoc_array_leaf_to_ptr(object); ++ edit->adjust_count_by = 1; ++ ++ switch (assoc_array_walk(array, ops, index_key, &result)) { ++ case assoc_array_walk_tree_empty: ++ /* Allocate a root node if there isn't one yet */ ++ if (!assoc_array_insert_in_empty_tree(edit)) ++ goto enomem; ++ return edit; ++ ++ case assoc_array_walk_found_terminal_node: ++ /* We found a node that doesn't have a node/shortcut pointer in ++ * the slot corresponding to the index key that we have to ++ * follow. ++ */ ++ if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, ++ &result)) ++ goto enomem; ++ return edit; ++ ++ case assoc_array_walk_found_wrong_shortcut: ++ /* We found a shortcut that didn't match our key in a slot we ++ * needed to follow. ++ */ ++ if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) ++ goto enomem; ++ return edit; ++ } ++ ++enomem: ++ /* Clean up after an out of memory error */ ++ pr_devel("enomem\n"); ++ assoc_array_cancel_edit(edit); ++ return ERR_PTR(-ENOMEM); ++} ++ ++/** ++ * assoc_array_insert_set_object - Set the new object pointer in an edit script ++ * @edit: The edit script to modify. ++ * @object: The object pointer to set. ++ * ++ * Change the object to be inserted in an edit script. The object pointed to ++ * by the old object is not freed. This must be done prior to applying the ++ * script. ++ */ ++void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) ++{ ++ BUG_ON(!object); ++ edit->leaf = assoc_array_leaf_to_ptr(object); ++} ++ ++struct assoc_array_delete_collapse_context { ++ struct assoc_array_node *node; ++ const void *skip_leaf; ++ int slot; ++}; ++ ++/* ++ * Subtree collapse to node iterator. ++ */ ++static int assoc_array_delete_collapse_iterator(const void *leaf, ++ void *iterator_data) ++{ ++ struct assoc_array_delete_collapse_context *collapse = iterator_data; ++ ++ if (leaf == collapse->skip_leaf) ++ return 0; ++ ++ BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); ++ ++ collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); ++ return 0; ++} ++ ++/** ++ * assoc_array_delete - Script deletion of an object from an associative array ++ * @array: The array to search. ++ * @ops: The operations to use. ++ * @index_key: The key to the object. ++ * ++ * Precalculate and preallocate a script for the deletion of an object from an ++ * associative array. This results in an edit script that can either be ++ * applied or cancelled. ++ * ++ * The function returns a pointer to an edit script if the object was found, ++ * NULL if the object was not found or -ENOMEM. ++ * ++ * The caller should lock against other modifications and must continue to hold ++ * the lock until assoc_array_apply_edit() has been called. ++ * ++ * Accesses to the tree may take place concurrently with this function, ++ * provided they hold the RCU read lock. ++ */ ++struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, ++ const struct assoc_array_ops *ops, ++ const void *index_key) ++{ ++ struct assoc_array_delete_collapse_context collapse; ++ struct assoc_array_walk_result result; ++ struct assoc_array_node *node, *new_n0; ++ struct assoc_array_edit *edit; ++ struct assoc_array_ptr *ptr; ++ bool has_meta; ++ int slot, i; ++ ++ pr_devel("-->%s()\n", __func__); ++ ++ edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); ++ if (!edit) ++ return ERR_PTR(-ENOMEM); ++ edit->array = array; ++ edit->ops = ops; ++ edit->adjust_count_by = -1; ++ ++ switch (assoc_array_walk(array, ops, index_key, &result)) { ++ case assoc_array_walk_found_terminal_node: ++ /* We found a node that should contain the leaf we've been ++ * asked to remove - *if* it's in the tree. ++ */ ++ pr_devel("terminal_node\n"); ++ node = result.terminal_node.node; ++ ++ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ++ ptr = node->slots[slot]; ++ if (ptr && ++ assoc_array_ptr_is_leaf(ptr) && ++ ops->compare_object(assoc_array_ptr_to_leaf(ptr), ++ index_key)) ++ goto found_leaf; ++ } ++ case assoc_array_walk_tree_empty: ++ case assoc_array_walk_found_wrong_shortcut: ++ default: ++ assoc_array_cancel_edit(edit); ++ pr_devel("not found\n"); ++ return NULL; ++ } ++ ++found_leaf: ++ BUG_ON(array->nr_leaves_on_tree <= 0); ++ ++ /* In the simplest form of deletion we just clear the slot and release ++ * the leaf after a suitable interval. ++ */ ++ edit->dead_leaf = node->slots[slot]; ++ edit->set[0].ptr = &node->slots[slot]; ++ edit->set[0].to = NULL; ++ edit->adjust_count_on = node; ++ ++ /* If that concludes erasure of the last leaf, then delete the entire ++ * internal array. ++ */ ++ if (array->nr_leaves_on_tree == 1) { ++ edit->set[1].ptr = &array->root; ++ edit->set[1].to = NULL; ++ edit->adjust_count_on = NULL; ++ edit->excised_subtree = array->root; ++ pr_devel("all gone\n"); ++ return edit; ++ } ++ ++ /* However, we'd also like to clear up some metadata blocks if we ++ * possibly can. ++ * ++ * We go for a simple algorithm of: if this node has FAN_OUT or fewer ++ * leaves in it, then attempt to collapse it - and attempt to ++ * recursively collapse up the tree. ++ * ++ * We could also try and collapse in partially filled subtrees to take ++ * up space in this node. ++ */ ++ if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { ++ struct assoc_array_node *parent, *grandparent; ++ struct assoc_array_ptr *ptr; ++ ++ /* First of all, we need to know if this node has metadata so ++ * that we don't try collapsing if all the leaves are already ++ * here. ++ */ ++ has_meta = false; ++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ++ ptr = node->slots[i]; ++ if (assoc_array_ptr_is_meta(ptr)) { ++ has_meta = true; ++ break; ++ } ++ } ++ ++ pr_devel("leaves: %ld [m=%d]\n", ++ node->nr_leaves_on_branch - 1, has_meta); ++ ++ /* Look further up the tree to see if we can collapse this node ++ * into a more proximal node too. ++ */ ++ parent = node; ++ collapse_up: ++ pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); ++ ++ ptr = parent->back_pointer; ++ if (!ptr) ++ goto do_collapse; ++ if (assoc_array_ptr_is_shortcut(ptr)) { ++ struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); ++ ptr = s->back_pointer; ++ if (!ptr) ++ goto do_collapse; ++ } ++ ++ grandparent = assoc_array_ptr_to_node(ptr); ++ if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { ++ parent = grandparent; ++ goto collapse_up; ++ } ++ ++ do_collapse: ++ /* There's no point collapsing if the original node has no meta ++ * pointers to discard and if we didn't merge into one of that ++ * node's ancestry. ++ */ ++ if (has_meta || parent != node) { ++ node = parent; ++ ++ /* Create a new node to collapse into */ ++ new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); ++ if (!new_n0) ++ goto enomem; ++ edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); ++ ++ new_n0->back_pointer = node->back_pointer; ++ new_n0->parent_slot = node->parent_slot; ++ new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; ++ edit->adjust_count_on = new_n0; ++ ++ collapse.node = new_n0; ++ collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); ++ collapse.slot = 0; ++ assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), ++ node->back_pointer, ++ assoc_array_delete_collapse_iterator, ++ &collapse); ++ pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); ++ BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); ++ ++ if (!node->back_pointer) { ++ edit->set[1].ptr = &array->root; ++ } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { ++ BUG(); ++ } else if (assoc_array_ptr_is_node(node->back_pointer)) { ++ struct assoc_array_node *p = ++ assoc_array_ptr_to_node(node->back_pointer); ++ edit->set[1].ptr = &p->slots[node->parent_slot]; ++ } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { ++ struct assoc_array_shortcut *s = ++ assoc_array_ptr_to_shortcut(node->back_pointer); ++ edit->set[1].ptr = &s->next_node; ++ } ++ edit->set[1].to = assoc_array_node_to_ptr(new_n0); ++ edit->excised_subtree = assoc_array_node_to_ptr(node); ++ } ++ } ++ ++ return edit; ++ ++enomem: ++ /* Clean up after an out of memory error */ ++ pr_devel("enomem\n"); ++ assoc_array_cancel_edit(edit); ++ return ERR_PTR(-ENOMEM); ++} ++ ++/** ++ * assoc_array_clear - Script deletion of all objects from an associative array ++ * @array: The array to clear. ++ * @ops: The operations to use. ++ * ++ * Precalculate and preallocate a script for the deletion of all the objects ++ * from an associative array. This results in an edit script that can either ++ * be applied or cancelled. ++ * ++ * The function returns a pointer to an edit script if there are objects to be ++ * deleted, NULL if there are no objects in the array or -ENOMEM. ++ * ++ * The caller should lock against other modifications and must continue to hold ++ * the lock until assoc_array_apply_edit() has been called. ++ * ++ * Accesses to the tree may take place concurrently with this function, ++ * provided they hold the RCU read lock. ++ */ ++struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, ++ const struct assoc_array_ops *ops) ++{ ++ struct assoc_array_edit *edit; ++ ++ pr_devel("-->%s()\n", __func__); ++ ++ if (!array->root) ++ return NULL; ++ ++ edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); ++ if (!edit) ++ return ERR_PTR(-ENOMEM); ++ edit->array = array; ++ edit->ops = ops; ++ edit->set[1].ptr = &array->root; ++ edit->set[1].to = NULL; ++ edit->excised_subtree = array->root; ++ edit->ops_for_excised_subtree = ops; ++ pr_devel("all gone\n"); ++ return edit; ++} ++ ++/* ++ * Handle the deferred destruction after an applied edit. ++ */ ++static void assoc_array_rcu_cleanup(struct rcu_head *head) ++{ ++ struct assoc_array_edit *edit = ++ container_of(head, struct assoc_array_edit, rcu); ++ int i; ++ ++ pr_devel("-->%s()\n", __func__); ++ ++ if (edit->dead_leaf) ++ edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); ++ for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) ++ if (edit->excised_meta[i]) ++ kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); ++ ++ if (edit->excised_subtree) { ++ BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); ++ if (assoc_array_ptr_is_node(edit->excised_subtree)) { ++ struct assoc_array_node *n = ++ assoc_array_ptr_to_node(edit->excised_subtree); ++ n->back_pointer = NULL; ++ } else { ++ struct assoc_array_shortcut *s = ++ assoc_array_ptr_to_shortcut(edit->excised_subtree); ++ s->back_pointer = NULL; ++ } ++ assoc_array_destroy_subtree(edit->excised_subtree, ++ edit->ops_for_excised_subtree); ++ } ++ ++ kfree(edit); ++} ++ ++/** ++ * assoc_array_apply_edit - Apply an edit script to an associative array ++ * @edit: The script to apply. ++ * ++ * Apply an edit script to an associative array to effect an insertion, ++ * deletion or clearance. As the edit script includes preallocated memory, ++ * this is guaranteed not to fail. ++ * ++ * The edit script, dead objects and dead metadata will be scheduled for ++ * destruction after an RCU grace period to permit those doing read-only ++ * accesses on the array to continue to do so under the RCU read lock whilst ++ * the edit is taking place. ++ */ ++void assoc_array_apply_edit(struct assoc_array_edit *edit) ++{ ++ struct assoc_array_shortcut *shortcut; ++ struct assoc_array_node *node; ++ struct assoc_array_ptr *ptr; ++ int i; ++ ++ pr_devel("-->%s()\n", __func__); ++ ++ smp_wmb(); ++ if (edit->leaf_p) ++ *edit->leaf_p = edit->leaf; ++ ++ smp_wmb(); ++ for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) ++ if (edit->set_parent_slot[i].p) ++ *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; ++ ++ smp_wmb(); ++ for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) ++ if (edit->set_backpointers[i]) ++ *edit->set_backpointers[i] = edit->set_backpointers_to; ++ ++ smp_wmb(); ++ for (i = 0; i < ARRAY_SIZE(edit->set); i++) ++ if (edit->set[i].ptr) ++ *edit->set[i].ptr = edit->set[i].to; ++ ++ if (edit->array->root == NULL) { ++ edit->array->nr_leaves_on_tree = 0; ++ } else if (edit->adjust_count_on) { ++ node = edit->adjust_count_on; ++ for (;;) { ++ node->nr_leaves_on_branch += edit->adjust_count_by; ++ ++ ptr = node->back_pointer; ++ if (!ptr) ++ break; ++ if (assoc_array_ptr_is_shortcut(ptr)) { ++ shortcut = assoc_array_ptr_to_shortcut(ptr); ++ ptr = shortcut->back_pointer; ++ if (!ptr) ++ break; ++ } ++ BUG_ON(!assoc_array_ptr_is_node(ptr)); ++ node = assoc_array_ptr_to_node(ptr); ++ } ++ ++ edit->array->nr_leaves_on_tree += edit->adjust_count_by; ++ } ++ ++ call_rcu(&edit->rcu, assoc_array_rcu_cleanup); ++} ++ ++/** ++ * assoc_array_cancel_edit - Discard an edit script. ++ * @edit: The script to discard. ++ * ++ * Free an edit script and all the preallocated data it holds without making ++ * any changes to the associative array it was intended for. ++ * ++ * NOTE! In the case of an insertion script, this does _not_ release the leaf ++ * that was to be inserted. That is left to the caller. ++ */ ++void assoc_array_cancel_edit(struct assoc_array_edit *edit) ++{ ++ struct assoc_array_ptr *ptr; ++ int i; ++ ++ pr_devel("-->%s()\n", __func__); ++ ++ /* Clean up after an out of memory error */ ++ for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { ++ ptr = edit->new_meta[i]; ++ if (ptr) { ++ if (assoc_array_ptr_is_node(ptr)) ++ kfree(assoc_array_ptr_to_node(ptr)); ++ else ++ kfree(assoc_array_ptr_to_shortcut(ptr)); ++ } ++ } ++ kfree(edit); ++} ++ ++/** ++ * assoc_array_gc - Garbage collect an associative array. ++ * @array: The array to clean. ++ * @ops: The operations to use. ++ * @iterator: A callback function to pass judgement on each object. ++ * @iterator_data: Private data for the callback function. ++ * ++ * Collect garbage from an associative array and pack down the internal tree to ++ * save memory. ++ * ++ * The iterator function is asked to pass judgement upon each object in the ++ * array. If it returns false, the object is discard and if it returns true, ++ * the object is kept. If it returns true, it must increment the object's ++ * usage count (or whatever it needs to do to retain it) before returning. ++ * ++ * This function returns 0 if successful or -ENOMEM if out of memory. In the ++ * latter case, the array is not changed. ++ * ++ * The caller should lock against other modifications and must continue to hold ++ * the lock until assoc_array_apply_edit() has been called. ++ * ++ * Accesses to the tree may take place concurrently with this function, ++ * provided they hold the RCU read lock. ++ */ ++int assoc_array_gc(struct assoc_array *array, ++ const struct assoc_array_ops *ops, ++ bool (*iterator)(void *object, void *iterator_data), ++ void *iterator_data) ++{ ++ struct assoc_array_shortcut *shortcut, *new_s; ++ struct assoc_array_node *node, *new_n; ++ struct assoc_array_edit *edit; ++ struct assoc_array_ptr *cursor, *ptr; ++ struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; ++ unsigned long nr_leaves_on_tree; ++ int keylen, slot, nr_free, next_slot, i; ++ ++ pr_devel("-->%s()\n", __func__); ++ ++ if (!array->root) ++ return 0; ++ ++ edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); ++ if (!edit) ++ return -ENOMEM; ++ edit->array = array; ++ edit->ops = ops; ++ edit->ops_for_excised_subtree = ops; ++ edit->set[0].ptr = &array->root; ++ edit->excised_subtree = array->root; ++ ++ new_root = new_parent = NULL; ++ new_ptr_pp = &new_root; ++ cursor = array->root; ++ ++descend: ++ /* If this point is a shortcut, then we need to duplicate it and ++ * advance the target cursor. ++ */ ++ if (assoc_array_ptr_is_shortcut(cursor)) { ++ shortcut = assoc_array_ptr_to_shortcut(cursor); ++ keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); ++ keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; ++ new_s = kmalloc(sizeof(struct assoc_array_shortcut) + ++ keylen * sizeof(unsigned long), GFP_KERNEL); ++ if (!new_s) ++ goto enomem; ++ pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); ++ memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) + ++ keylen * sizeof(unsigned long))); ++ new_s->back_pointer = new_parent; ++ new_s->parent_slot = shortcut->parent_slot; ++ *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); ++ new_ptr_pp = &new_s->next_node; ++ cursor = shortcut->next_node; ++ } ++ ++ /* Duplicate the node at this position */ ++ node = assoc_array_ptr_to_node(cursor); ++ new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); ++ if (!new_n) ++ goto enomem; ++ pr_devel("dup node %p -> %p\n", node, new_n); ++ new_n->back_pointer = new_parent; ++ new_n->parent_slot = node->parent_slot; ++ *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); ++ new_ptr_pp = NULL; ++ slot = 0; ++ ++continue_node: ++ /* Filter across any leaves and gc any subtrees */ ++ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ++ ptr = node->slots[slot]; ++ if (!ptr) ++ continue; ++ ++ if (assoc_array_ptr_is_leaf(ptr)) { ++ if (iterator(assoc_array_ptr_to_leaf(ptr), ++ iterator_data)) ++ /* The iterator will have done any reference ++ * counting on the object for us. ++ */ ++ new_n->slots[slot] = ptr; ++ continue; ++ } ++ ++ new_ptr_pp = &new_n->slots[slot]; ++ cursor = ptr; ++ goto descend; ++ } ++ ++ pr_devel("-- compress node %p --\n", new_n); ++ ++ /* Count up the number of empty slots in this node and work out the ++ * subtree leaf count. ++ */ ++ new_n->nr_leaves_on_branch = 0; ++ nr_free = 0; ++ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ++ ptr = new_n->slots[slot]; ++ if (!ptr) ++ nr_free++; ++ else if (assoc_array_ptr_is_leaf(ptr)) ++ new_n->nr_leaves_on_branch++; ++ } ++ pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); ++ ++ /* See what we can fold in */ ++ next_slot = 0; ++ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ++ struct assoc_array_shortcut *s; ++ struct assoc_array_node *child; ++ ++ ptr = new_n->slots[slot]; ++ if (!ptr || assoc_array_ptr_is_leaf(ptr)) ++ continue; ++ ++ s = NULL; ++ if (assoc_array_ptr_is_shortcut(ptr)) { ++ s = assoc_array_ptr_to_shortcut(ptr); ++ ptr = s->next_node; ++ } ++ ++ child = assoc_array_ptr_to_node(ptr); ++ new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; ++ ++ if (child->nr_leaves_on_branch <= nr_free + 1) { ++ /* Fold the child node into this one */ ++ pr_devel("[%d] fold node %lu/%d [nx %d]\n", ++ slot, child->nr_leaves_on_branch, nr_free + 1, ++ next_slot); ++ ++ /* We would already have reaped an intervening shortcut ++ * on the way back up the tree. ++ */ ++ BUG_ON(s); ++ ++ new_n->slots[slot] = NULL; ++ nr_free++; ++ if (slot < next_slot) ++ next_slot = slot; ++ for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { ++ struct assoc_array_ptr *p = child->slots[i]; ++ if (!p) ++ continue; ++ BUG_ON(assoc_array_ptr_is_meta(p)); ++ while (new_n->slots[next_slot]) ++ next_slot++; ++ BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); ++ new_n->slots[next_slot++] = p; ++ nr_free--; ++ } ++ kfree(child); ++ } else { ++ pr_devel("[%d] retain node %lu/%d [nx %d]\n", ++ slot, child->nr_leaves_on_branch, nr_free + 1, ++ next_slot); ++ } ++ } ++ ++ pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); ++ ++ nr_leaves_on_tree = new_n->nr_leaves_on_branch; ++ ++ /* Excise this node if it is singly occupied by a shortcut */ ++ if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { ++ for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) ++ if ((ptr = new_n->slots[slot])) ++ break; ++ ++ if (assoc_array_ptr_is_meta(ptr) && ++ assoc_array_ptr_is_shortcut(ptr)) { ++ pr_devel("excise node %p with 1 shortcut\n", new_n); ++ new_s = assoc_array_ptr_to_shortcut(ptr); ++ new_parent = new_n->back_pointer; ++ slot = new_n->parent_slot; ++ kfree(new_n); ++ if (!new_parent) { ++ new_s->back_pointer = NULL; ++ new_s->parent_slot = 0; ++ new_root = ptr; ++ goto gc_complete; ++ } ++ ++ if (assoc_array_ptr_is_shortcut(new_parent)) { ++ /* We can discard any preceding shortcut also */ ++ struct assoc_array_shortcut *s = ++ assoc_array_ptr_to_shortcut(new_parent); ++ ++ pr_devel("excise preceding shortcut\n"); ++ ++ new_parent = new_s->back_pointer = s->back_pointer; ++ slot = new_s->parent_slot = s->parent_slot; ++ kfree(s); ++ if (!new_parent) { ++ new_s->back_pointer = NULL; ++ new_s->parent_slot = 0; ++ new_root = ptr; ++ goto gc_complete; ++ } ++ } ++ ++ new_s->back_pointer = new_parent; ++ new_s->parent_slot = slot; ++ new_n = assoc_array_ptr_to_node(new_parent); ++ new_n->slots[slot] = ptr; ++ goto ascend_old_tree; ++ } ++ } ++ ++ /* Excise any shortcuts we might encounter that point to nodes that ++ * only contain leaves. ++ */ ++ ptr = new_n->back_pointer; ++ if (!ptr) ++ goto gc_complete; ++ ++ if (assoc_array_ptr_is_shortcut(ptr)) { ++ new_s = assoc_array_ptr_to_shortcut(ptr); ++ new_parent = new_s->back_pointer; ++ slot = new_s->parent_slot; ++ ++ if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { ++ struct assoc_array_node *n; ++ ++ pr_devel("excise shortcut\n"); ++ new_n->back_pointer = new_parent; ++ new_n->parent_slot = slot; ++ kfree(new_s); ++ if (!new_parent) { ++ new_root = assoc_array_node_to_ptr(new_n); ++ goto gc_complete; ++ } ++ ++ n = assoc_array_ptr_to_node(new_parent); ++ n->slots[slot] = assoc_array_node_to_ptr(new_n); ++ } ++ } else { ++ new_parent = ptr; ++ } ++ new_n = assoc_array_ptr_to_node(new_parent); ++ ++ascend_old_tree: ++ ptr = node->back_pointer; ++ if (assoc_array_ptr_is_shortcut(ptr)) { ++ shortcut = assoc_array_ptr_to_shortcut(ptr); ++ slot = shortcut->parent_slot; ++ cursor = shortcut->back_pointer; ++ } else { ++ slot = node->parent_slot; ++ cursor = ptr; ++ } ++ BUG_ON(!ptr); ++ node = assoc_array_ptr_to_node(cursor); ++ slot++; ++ goto continue_node; ++ ++gc_complete: ++ edit->set[0].to = new_root; ++ assoc_array_apply_edit(edit); ++ edit->array->nr_leaves_on_tree = nr_leaves_on_tree; ++ return 0; ++ ++enomem: ++ pr_devel("enomem\n"); ++ assoc_array_destroy_subtree(new_root, edit->ops); ++ kfree(edit); ++ return -ENOMEM; ++} +-- +1.8.3.1 + + +From 03ac60b84587fa8e57e7ec5cd3d59b7fa8d97c79 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Fri, 30 Aug 2013 15:37:54 +0100 +Subject: [PATCH 10/10] KEYS: Expand the capacity of a keyring + +Expand the capacity of a keyring to be able to hold a lot more keys by using +the previously added associative array implementation. Currently the maximum +capacity is: + + (PAGE_SIZE - sizeof(header)) / sizeof(struct key *) + +which, on a 64-bit system, is a little more 500. However, since this is being +used for the NFS uid mapper, we need more than that. The new implementation +gives us effectively unlimited capacity. + +With some alterations, the keyutils testsuite runs successfully to completion +after this patch is applied. The alterations are because (a) keyrings that +are simply added to no longer appear ordered and (b) some of the errors have +changed a bit. + +Signed-off-by: David Howells +--- + include/keys/keyring-type.h | 17 +- + include/linux/key.h | 13 +- + lib/assoc_array.c | 1 + + security/keys/Kconfig | 1 + + security/keys/gc.c | 33 +- + security/keys/internal.h | 17 +- + security/keys/key.c | 35 +- + security/keys/keyring.c | 1436 ++++++++++++++++++++++--------------------- + security/keys/request_key.c | 12 +- + 9 files changed, 803 insertions(+), 762 deletions(-) + +diff --git a/include/keys/keyring-type.h b/include/keys/keyring-type.h +index cf49159..fca5c62 100644 +--- a/include/keys/keyring-type.h ++++ b/include/keys/keyring-type.h +@@ -1,6 +1,6 @@ + /* Keyring key type + * +- * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. ++ * Copyright (C) 2008, 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or +@@ -13,19 +13,6 @@ + #define _KEYS_KEYRING_TYPE_H + + #include +-#include +- +-/* +- * the keyring payload contains a list of the keys to which the keyring is +- * subscribed +- */ +-struct keyring_list { +- struct rcu_head rcu; /* RCU deletion hook */ +- unsigned short maxkeys; /* max keys this list can hold */ +- unsigned short nkeys; /* number of keys currently held */ +- unsigned short delkey; /* key to be unlinked by RCU */ +- struct key __rcu *keys[0]; +-}; +- ++#include + + #endif /* _KEYS_KEYRING_TYPE_H */ +diff --git a/include/linux/key.h b/include/linux/key.h +index ef596c7..2417f78 100644 +--- a/include/linux/key.h ++++ b/include/linux/key.h +@@ -22,6 +22,7 @@ + #include + #include + #include ++#include + + #ifdef __KERNEL__ + #include +@@ -196,11 +197,13 @@ struct key { + * whatever + */ + union { +- unsigned long value; +- void __rcu *rcudata; +- void *data; +- struct keyring_list __rcu *subscriptions; +- } payload; ++ union { ++ unsigned long value; ++ void __rcu *rcudata; ++ void *data; ++ } payload; ++ struct assoc_array keys; ++ }; + }; + + extern struct key *key_alloc(struct key_type *type, +diff --git a/lib/assoc_array.c b/lib/assoc_array.c +index a095281..17edeaf 100644 +--- a/lib/assoc_array.c ++++ b/lib/assoc_array.c +@@ -12,6 +12,7 @@ + */ + //#define DEBUG + #include ++#include + #include + + /* +diff --git a/security/keys/Kconfig b/security/keys/Kconfig +index a90d6d3..15e0dfe 100644 +--- a/security/keys/Kconfig ++++ b/security/keys/Kconfig +@@ -4,6 +4,7 @@ + + config KEYS + bool "Enable access key retention support" ++ select ASSOCIATIVE_ARRAY + help + This option provides support for retaining authentication tokens and + access keys in the kernel. +diff --git a/security/keys/gc.c b/security/keys/gc.c +index d67c97b..cce621c 100644 +--- a/security/keys/gc.c ++++ b/security/keys/gc.c +@@ -130,6 +130,13 @@ void key_gc_keytype(struct key_type *ktype) + kleave(""); + } + ++static int key_gc_keyring_func(const void *object, void *iterator_data) ++{ ++ const struct key *key = object; ++ time_t *limit = iterator_data; ++ return key_is_dead(key, *limit); ++} ++ + /* + * Garbage collect pointers from a keyring. + * +@@ -138,10 +145,9 @@ void key_gc_keytype(struct key_type *ktype) + */ + static void key_gc_keyring(struct key *keyring, time_t limit) + { +- struct keyring_list *klist; +- int loop; ++ int result; + +- kenter("%x", key_serial(keyring)); ++ kenter("%x{%s}", keyring->serial, keyring->description ?: ""); + + if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED))) +@@ -149,27 +155,17 @@ static void key_gc_keyring(struct key *keyring, time_t limit) + + /* scan the keyring looking for dead keys */ + rcu_read_lock(); +- klist = rcu_dereference(keyring->payload.subscriptions); +- if (!klist) +- goto unlock_dont_gc; +- +- loop = klist->nkeys; +- smp_rmb(); +- for (loop--; loop >= 0; loop--) { +- struct key *key = rcu_dereference(klist->keys[loop]); +- if (key_is_dead(key, limit)) +- goto do_gc; +- } +- +-unlock_dont_gc: ++ result = assoc_array_iterate(&keyring->keys, ++ key_gc_keyring_func, &limit); + rcu_read_unlock(); ++ if (result == true) ++ goto do_gc; ++ + dont_gc: + kleave(" [no gc]"); + return; + + do_gc: +- rcu_read_unlock(); +- + keyring_gc(keyring, limit); + kleave(" [gc]"); + } +@@ -392,7 +388,6 @@ found_unreferenced_key: + */ + found_keyring: + spin_unlock(&key_serial_lock); +- kdebug("scan keyring %d", key->serial); + key_gc_keyring(key, limit); + goto maybe_resched; + +diff --git a/security/keys/internal.h b/security/keys/internal.h +index 73950bf..581c6f6 100644 +--- a/security/keys/internal.h ++++ b/security/keys/internal.h +@@ -90,20 +90,23 @@ extern void key_type_put(struct key_type *ktype); + + extern int __key_link_begin(struct key *keyring, + const struct keyring_index_key *index_key, +- unsigned long *_prealloc); ++ struct assoc_array_edit **_edit); + extern int __key_link_check_live_key(struct key *keyring, struct key *key); +-extern void __key_link(struct key *keyring, struct key *key, +- unsigned long *_prealloc); ++extern void __key_link(struct key *key, struct assoc_array_edit **_edit); + extern void __key_link_end(struct key *keyring, + const struct keyring_index_key *index_key, +- unsigned long prealloc); ++ struct assoc_array_edit *edit); + +-extern key_ref_t __keyring_search_one(key_ref_t keyring_ref, +- const struct keyring_index_key *index_key); ++extern key_ref_t find_key_to_update(key_ref_t keyring_ref, ++ const struct keyring_index_key *index_key); + + extern struct key *keyring_search_instkey(struct key *keyring, + key_serial_t target_id); + ++extern int iterate_over_keyring(const struct key *keyring, ++ int (*func)(const struct key *key, void *data), ++ void *data); ++ + typedef int (*key_match_func_t)(const struct key *, const void *); + + struct keyring_search_context { +@@ -119,6 +122,8 @@ struct keyring_search_context { + #define KEYRING_SEARCH_NO_CHECK_PERM 0x0010 /* Don't check permissions */ + #define KEYRING_SEARCH_DETECT_TOO_DEEP 0x0020 /* Give an error on excessive depth */ + ++ int (*iterator)(const void *object, void *iterator_data); ++ + /* Internal stuff */ + int skipped_ret; + bool possessed; +diff --git a/security/keys/key.c b/security/keys/key.c +index 7d716b8..a819b5c 100644 +--- a/security/keys/key.c ++++ b/security/keys/key.c +@@ -409,7 +409,7 @@ static int __key_instantiate_and_link(struct key *key, + struct key_preparsed_payload *prep, + struct key *keyring, + struct key *authkey, +- unsigned long *_prealloc) ++ struct assoc_array_edit **_edit) + { + int ret, awaken; + +@@ -436,7 +436,7 @@ static int __key_instantiate_and_link(struct key *key, + + /* and link it into the destination keyring */ + if (keyring) +- __key_link(keyring, key, _prealloc); ++ __key_link(key, _edit); + + /* disable the authorisation key */ + if (authkey) +@@ -476,7 +476,7 @@ int key_instantiate_and_link(struct key *key, + struct key *authkey) + { + struct key_preparsed_payload prep; +- unsigned long prealloc; ++ struct assoc_array_edit *edit; + int ret; + + memset(&prep, 0, sizeof(prep)); +@@ -490,16 +490,15 @@ int key_instantiate_and_link(struct key *key, + } + + if (keyring) { +- ret = __key_link_begin(keyring, &key->index_key, &prealloc); ++ ret = __key_link_begin(keyring, &key->index_key, &edit); + if (ret < 0) + goto error_free_preparse; + } + +- ret = __key_instantiate_and_link(key, &prep, keyring, authkey, +- &prealloc); ++ ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &edit); + + if (keyring) +- __key_link_end(keyring, &key->index_key, prealloc); ++ __key_link_end(keyring, &key->index_key, edit); + + error_free_preparse: + if (key->type->preparse) +@@ -537,7 +536,7 @@ int key_reject_and_link(struct key *key, + struct key *keyring, + struct key *authkey) + { +- unsigned long prealloc; ++ struct assoc_array_edit *edit; + struct timespec now; + int ret, awaken, link_ret = 0; + +@@ -548,7 +547,7 @@ int key_reject_and_link(struct key *key, + ret = -EBUSY; + + if (keyring) +- link_ret = __key_link_begin(keyring, &key->index_key, &prealloc); ++ link_ret = __key_link_begin(keyring, &key->index_key, &edit); + + mutex_lock(&key_construction_mutex); + +@@ -570,7 +569,7 @@ int key_reject_and_link(struct key *key, + + /* and link it into the destination keyring */ + if (keyring && link_ret == 0) +- __key_link(keyring, key, &prealloc); ++ __key_link(key, &edit); + + /* disable the authorisation key */ + if (authkey) +@@ -580,7 +579,7 @@ int key_reject_and_link(struct key *key, + mutex_unlock(&key_construction_mutex); + + if (keyring) +- __key_link_end(keyring, &key->index_key, prealloc); ++ __key_link_end(keyring, &key->index_key, edit); + + /* wake up anyone waiting for a key to be constructed */ + if (awaken) +@@ -783,8 +782,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, + .description = description, + }; + struct key_preparsed_payload prep; ++ struct assoc_array_edit *edit; + const struct cred *cred = current_cred(); +- unsigned long prealloc; + struct key *keyring, *key = NULL; + key_ref_t key_ref; + int ret; +@@ -828,7 +827,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, + } + index_key.desc_len = strlen(index_key.description); + +- ret = __key_link_begin(keyring, &index_key, &prealloc); ++ ret = __key_link_begin(keyring, &index_key, &edit); + if (ret < 0) { + key_ref = ERR_PTR(ret); + goto error_free_prep; +@@ -847,8 +846,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, + * update that instead if possible + */ + if (index_key.type->update) { +- key_ref = __keyring_search_one(keyring_ref, &index_key); +- if (!IS_ERR(key_ref)) ++ key_ref = find_key_to_update(keyring_ref, &index_key); ++ if (key_ref) + goto found_matching_key; + } + +@@ -874,7 +873,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, + } + + /* instantiate it and link it into the target keyring */ +- ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc); ++ ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &edit); + if (ret < 0) { + key_put(key); + key_ref = ERR_PTR(ret); +@@ -884,7 +883,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, + key_ref = make_key_ref(key, is_key_possessed(keyring_ref)); + + error_link_end: +- __key_link_end(keyring, &index_key, prealloc); ++ __key_link_end(keyring, &index_key, edit); + error_free_prep: + if (index_key.type->preparse) + index_key.type->free_preparse(&prep); +@@ -897,7 +896,7 @@ error: + /* we found a matching key, so we're going to try to update it + * - we can drop the locks first as we have the key pinned + */ +- __key_link_end(keyring, &index_key, prealloc); ++ __key_link_end(keyring, &index_key, edit); + + key_ref = __key_update(key_ref, &prep); + goto error_free_prep; +diff --git a/security/keys/keyring.c b/security/keys/keyring.c +index eeef1a0..f7cdea2 100644 +--- a/security/keys/keyring.c ++++ b/security/keys/keyring.c +@@ -1,6 +1,6 @@ + /* Keyring handling + * +- * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved. ++ * Copyright (C) 2004-2005, 2008, 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or +@@ -17,25 +17,11 @@ + #include + #include + #include ++#include ++#include + #include + #include "internal.h" + +-#define rcu_dereference_locked_keyring(keyring) \ +- (rcu_dereference_protected( \ +- (keyring)->payload.subscriptions, \ +- rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem))) +- +-#define rcu_deref_link_locked(klist, index, keyring) \ +- (rcu_dereference_protected( \ +- (klist)->keys[index], \ +- rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem))) +- +-#define MAX_KEYRING_LINKS \ +- min_t(size_t, USHRT_MAX - 1, \ +- ((PAGE_SIZE - sizeof(struct keyring_list)) / sizeof(struct key *))) +- +-#define KEY_LINK_FIXQUOTA 1UL +- + /* + * When plumbing the depths of the key tree, this sets a hard limit + * set on how deep we're willing to go. +@@ -47,6 +33,28 @@ + */ + #define KEYRING_NAME_HASH_SIZE (1 << 5) + ++/* ++ * We mark pointers we pass to the associative array with bit 1 set if ++ * they're keyrings and clear otherwise. ++ */ ++#define KEYRING_PTR_SUBTYPE 0x2UL ++ ++static inline bool keyring_ptr_is_keyring(const struct assoc_array_ptr *x) ++{ ++ return (unsigned long)x & KEYRING_PTR_SUBTYPE; ++} ++static inline struct key *keyring_ptr_to_key(const struct assoc_array_ptr *x) ++{ ++ void *object = assoc_array_ptr_to_leaf(x); ++ return (struct key *)((unsigned long)object & ~KEYRING_PTR_SUBTYPE); ++} ++static inline void *keyring_key_to_ptr(struct key *key) ++{ ++ if (key->type == &key_type_keyring) ++ return (void *)((unsigned long)key | KEYRING_PTR_SUBTYPE); ++ return key; ++} ++ + static struct list_head keyring_name_hash[KEYRING_NAME_HASH_SIZE]; + static DEFINE_RWLOCK(keyring_name_lock); + +@@ -67,7 +75,6 @@ static inline unsigned keyring_hash(const char *desc) + */ + static int keyring_instantiate(struct key *keyring, + struct key_preparsed_payload *prep); +-static int keyring_match(const struct key *keyring, const void *criterion); + static void keyring_revoke(struct key *keyring); + static void keyring_destroy(struct key *keyring); + static void keyring_describe(const struct key *keyring, struct seq_file *m); +@@ -76,9 +83,9 @@ static long keyring_read(const struct key *keyring, + + struct key_type key_type_keyring = { + .name = "keyring", +- .def_datalen = sizeof(struct keyring_list), ++ .def_datalen = 0, + .instantiate = keyring_instantiate, +- .match = keyring_match, ++ .match = user_match, + .revoke = keyring_revoke, + .destroy = keyring_destroy, + .describe = keyring_describe, +@@ -127,6 +134,7 @@ static int keyring_instantiate(struct key *keyring, + + ret = -EINVAL; + if (prep->datalen == 0) { ++ assoc_array_init(&keyring->keys); + /* make the keyring available by name if it has one */ + keyring_publish_name(keyring); + ret = 0; +@@ -136,15 +144,226 @@ static int keyring_instantiate(struct key *keyring, + } + + /* +- * Match keyrings on their name ++ * Multiply 64-bits by 32-bits to 96-bits and fold back to 64-bit. Ideally we'd ++ * fold the carry back too, but that requires inline asm. ++ */ ++static u64 mult_64x32_and_fold(u64 x, u32 y) ++{ ++ u64 hi = (u64)(u32)(x >> 32) * y; ++ u64 lo = (u64)(u32)(x) * y; ++ return lo + ((u64)(u32)hi << 32) + (u32)(hi >> 32); ++} ++ ++/* ++ * Hash a key type and description. ++ */ ++static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key) ++{ ++ const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP; ++ const unsigned long level_mask = ASSOC_ARRAY_LEVEL_STEP_MASK; ++ const char *description = index_key->description; ++ unsigned long hash, type; ++ u32 piece; ++ u64 acc; ++ int n, desc_len = index_key->desc_len; ++ ++ type = (unsigned long)index_key->type; ++ ++ acc = mult_64x32_and_fold(type, desc_len + 13); ++ acc = mult_64x32_and_fold(acc, 9207); ++ for (;;) { ++ n = desc_len; ++ if (n <= 0) ++ break; ++ if (n > 4) ++ n = 4; ++ piece = 0; ++ memcpy(&piece, description, n); ++ description += n; ++ desc_len -= n; ++ acc = mult_64x32_and_fold(acc, piece); ++ acc = mult_64x32_and_fold(acc, 9207); ++ } ++ ++ /* Fold the hash down to 32 bits if need be. */ ++ hash = acc; ++ if (ASSOC_ARRAY_KEY_CHUNK_SIZE == 32) ++ hash ^= acc >> 32; ++ ++ /* Squidge all the keyrings into a separate part of the tree to ++ * ordinary keys by making sure the lowest level segment in the hash is ++ * zero for keyrings and non-zero otherwise. ++ */ ++ if (index_key->type != &key_type_keyring && (hash & level_mask) == 0) ++ return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1; ++ if (index_key->type == &key_type_keyring && (hash & level_mask) != 0) ++ return (hash + (hash << level_shift)) & ~level_mask; ++ return hash; ++} ++ ++/* ++ * Build the next index key chunk. ++ * ++ * On 32-bit systems the index key is laid out as: ++ * ++ * 0 4 5 9... ++ * hash desclen typeptr desc[] ++ * ++ * On 64-bit systems: ++ * ++ * 0 8 9 17... ++ * hash desclen typeptr desc[] ++ * ++ * We return it one word-sized chunk at a time. + */ +-static int keyring_match(const struct key *keyring, const void *description) ++static unsigned long keyring_get_key_chunk(const void *data, int level) ++{ ++ const struct keyring_index_key *index_key = data; ++ unsigned long chunk = 0; ++ long offset = 0; ++ int desc_len = index_key->desc_len, n = sizeof(chunk); ++ ++ level /= ASSOC_ARRAY_KEY_CHUNK_SIZE; ++ switch (level) { ++ case 0: ++ return hash_key_type_and_desc(index_key); ++ case 1: ++ return ((unsigned long)index_key->type << 8) | desc_len; ++ case 2: ++ if (desc_len == 0) ++ return (u8)((unsigned long)index_key->type >> ++ (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8)); ++ n--; ++ offset = 1; ++ default: ++ offset += sizeof(chunk) - 1; ++ offset += (level - 3) * sizeof(chunk); ++ if (offset >= desc_len) ++ return 0; ++ desc_len -= offset; ++ if (desc_len > n) ++ desc_len = n; ++ offset += desc_len; ++ do { ++ chunk <<= 8; ++ chunk |= ((u8*)index_key->description)[--offset]; ++ } while (--desc_len > 0); ++ ++ if (level == 2) { ++ chunk <<= 8; ++ chunk |= (u8)((unsigned long)index_key->type >> ++ (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8)); ++ } ++ return chunk; ++ } ++} ++ ++static unsigned long keyring_get_object_key_chunk(const void *object, int level) ++{ ++ const struct key *key = keyring_ptr_to_key(object); ++ return keyring_get_key_chunk(&key->index_key, level); ++} ++ ++static bool keyring_compare_object(const void *object, const void *data) + { +- return keyring->description && +- strcmp(keyring->description, description) == 0; ++ const struct keyring_index_key *index_key = data; ++ const struct key *key = keyring_ptr_to_key(object); ++ ++ return key->index_key.type == index_key->type && ++ key->index_key.desc_len == index_key->desc_len && ++ memcmp(key->index_key.description, index_key->description, ++ index_key->desc_len) == 0; + } + + /* ++ * Compare the index keys of a pair of objects and determine the bit position ++ * at which they differ - if they differ. ++ */ ++static int keyring_diff_objects(const void *_a, const void *_b) ++{ ++ const struct key *key_a = keyring_ptr_to_key(_a); ++ const struct key *key_b = keyring_ptr_to_key(_b); ++ const struct keyring_index_key *a = &key_a->index_key; ++ const struct keyring_index_key *b = &key_b->index_key; ++ unsigned long seg_a, seg_b; ++ int level, i; ++ ++ level = 0; ++ seg_a = hash_key_type_and_desc(a); ++ seg_b = hash_key_type_and_desc(b); ++ if ((seg_a ^ seg_b) != 0) ++ goto differ; ++ ++ /* The number of bits contributed by the hash is controlled by a ++ * constant in the assoc_array headers. Everything else thereafter we ++ * can deal with as being machine word-size dependent. ++ */ ++ level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8; ++ seg_a = a->desc_len; ++ seg_b = b->desc_len; ++ if ((seg_a ^ seg_b) != 0) ++ goto differ; ++ ++ /* The next bit may not work on big endian */ ++ level++; ++ seg_a = (unsigned long)a->type; ++ seg_b = (unsigned long)b->type; ++ if ((seg_a ^ seg_b) != 0) ++ goto differ; ++ ++ level += sizeof(unsigned long); ++ if (a->desc_len == 0) ++ goto same; ++ ++ i = 0; ++ if (((unsigned long)a->description | (unsigned long)b->description) & ++ (sizeof(unsigned long) - 1)) { ++ do { ++ seg_a = *(unsigned long *)(a->description + i); ++ seg_b = *(unsigned long *)(b->description + i); ++ if ((seg_a ^ seg_b) != 0) ++ goto differ_plus_i; ++ i += sizeof(unsigned long); ++ } while (i < (a->desc_len & (sizeof(unsigned long) - 1))); ++ } ++ ++ for (; i < a->desc_len; i++) { ++ seg_a = *(unsigned char *)(a->description + i); ++ seg_b = *(unsigned char *)(b->description + i); ++ if ((seg_a ^ seg_b) != 0) ++ goto differ_plus_i; ++ } ++ ++same: ++ return -1; ++ ++differ_plus_i: ++ level += i; ++differ: ++ i = level * 8 + __ffs(seg_a ^ seg_b); ++ return i; ++} ++ ++/* ++ * Free an object after stripping the keyring flag off of the pointer. ++ */ ++static void keyring_free_object(void *object) ++{ ++ key_put(keyring_ptr_to_key(object)); ++} ++ ++/* ++ * Operations for keyring management by the index-tree routines. ++ */ ++static const struct assoc_array_ops keyring_assoc_array_ops = { ++ .get_key_chunk = keyring_get_key_chunk, ++ .get_object_key_chunk = keyring_get_object_key_chunk, ++ .compare_object = keyring_compare_object, ++ .diff_objects = keyring_diff_objects, ++ .free_object = keyring_free_object, ++}; ++ ++/* + * Clean up a keyring when it is destroyed. Unpublish its name if it had one + * and dispose of its data. + * +@@ -155,9 +374,6 @@ static int keyring_match(const struct key *keyring, const void *description) + */ + static void keyring_destroy(struct key *keyring) + { +- struct keyring_list *klist; +- int loop; +- + if (keyring->description) { + write_lock(&keyring_name_lock); + +@@ -168,12 +384,7 @@ static void keyring_destroy(struct key *keyring) + write_unlock(&keyring_name_lock); + } + +- klist = rcu_access_pointer(keyring->payload.subscriptions); +- if (klist) { +- for (loop = klist->nkeys - 1; loop >= 0; loop--) +- key_put(rcu_access_pointer(klist->keys[loop])); +- kfree(klist); +- } ++ assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops); + } + + /* +@@ -181,76 +392,88 @@ static void keyring_destroy(struct key *keyring) + */ + static void keyring_describe(const struct key *keyring, struct seq_file *m) + { +- struct keyring_list *klist; +- + if (keyring->description) + seq_puts(m, keyring->description); + else + seq_puts(m, "[anon]"); + + if (key_is_instantiated(keyring)) { +- rcu_read_lock(); +- klist = rcu_dereference(keyring->payload.subscriptions); +- if (klist) +- seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys); ++ if (keyring->keys.nr_leaves_on_tree != 0) ++ seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree); + else + seq_puts(m, ": empty"); +- rcu_read_unlock(); + } + } + ++struct keyring_read_iterator_context { ++ size_t qty; ++ size_t count; ++ key_serial_t __user *buffer; ++}; ++ ++static int keyring_read_iterator(const void *object, void *data) ++{ ++ struct keyring_read_iterator_context *ctx = data; ++ const struct key *key = keyring_ptr_to_key(object); ++ int ret; ++ ++ kenter("{%s,%d},,{%zu/%zu}", ++ key->type->name, key->serial, ctx->count, ctx->qty); ++ ++ if (ctx->count >= ctx->qty) ++ return 1; ++ ++ ret = put_user(key->serial, ctx->buffer); ++ if (ret < 0) ++ return ret; ++ ctx->buffer++; ++ ctx->count += sizeof(key->serial); ++ return 0; ++} ++ + /* + * Read a list of key IDs from the keyring's contents in binary form + * +- * The keyring's semaphore is read-locked by the caller. ++ * The keyring's semaphore is read-locked by the caller. This prevents someone ++ * from modifying it under us - which could cause us to read key IDs multiple ++ * times. + */ + static long keyring_read(const struct key *keyring, + char __user *buffer, size_t buflen) + { +- struct keyring_list *klist; +- struct key *key; +- size_t qty, tmp; +- int loop, ret; ++ struct keyring_read_iterator_context ctx; ++ unsigned long nr_keys; ++ int ret; + +- ret = 0; +- klist = rcu_dereference_locked_keyring(keyring); +- if (klist) { +- /* calculate how much data we could return */ +- qty = klist->nkeys * sizeof(key_serial_t); +- +- if (buffer && buflen > 0) { +- if (buflen > qty) +- buflen = qty; +- +- /* copy the IDs of the subscribed keys into the +- * buffer */ +- ret = -EFAULT; +- +- for (loop = 0; loop < klist->nkeys; loop++) { +- key = rcu_deref_link_locked(klist, loop, +- keyring); +- +- tmp = sizeof(key_serial_t); +- if (tmp > buflen) +- tmp = buflen; +- +- if (copy_to_user(buffer, +- &key->serial, +- tmp) != 0) +- goto error; +- +- buflen -= tmp; +- if (buflen == 0) +- break; +- buffer += tmp; +- } +- } ++ kenter("{%d},,%zu", key_serial(keyring), buflen); ++ ++ if (buflen & (sizeof(key_serial_t) - 1)) ++ return -EINVAL; ++ ++ nr_keys = keyring->keys.nr_leaves_on_tree; ++ if (nr_keys == 0) ++ return 0; + +- ret = qty; ++ /* Calculate how much data we could return */ ++ ctx.qty = nr_keys * sizeof(key_serial_t); ++ ++ if (!buffer || !buflen) ++ return ctx.qty; ++ ++ if (buflen > ctx.qty) ++ ctx.qty = buflen; ++ ++ /* Copy the IDs of the subscribed keys into the buffer */ ++ ctx.buffer = (key_serial_t __user *)buffer; ++ ctx.count = 0; ++ ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx); ++ if (ret < 0) { ++ kleave(" = %d [iterate]", ret); ++ return ret; + } + +-error: +- return ret; ++ kleave(" = %zu [ok]", ctx.count); ++ return ctx.count; + } + + /* +@@ -277,219 +500,360 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, + } + EXPORT_SYMBOL(keyring_alloc); + +-/** +- * keyring_search_aux - Search a keyring tree for a key matching some criteria +- * @keyring_ref: A pointer to the keyring with possession indicator. +- * @ctx: The keyring search context. +- * +- * Search the supplied keyring tree for a key that matches the criteria given. +- * The root keyring and any linked keyrings must grant Search permission to the +- * caller to be searchable and keys can only be found if they too grant Search +- * to the caller. The possession flag on the root keyring pointer controls use +- * of the possessor bits in permissions checking of the entire tree. In +- * addition, the LSM gets to forbid keyring searches and key matches. +- * +- * The search is performed as a breadth-then-depth search up to the prescribed +- * limit (KEYRING_SEARCH_MAX_DEPTH). +- * +- * Keys are matched to the type provided and are then filtered by the match +- * function, which is given the description to use in any way it sees fit. The +- * match function may use any attributes of a key that it wishes to to +- * determine the match. Normally the match function from the key type would be +- * used. +- * +- * RCU is used to prevent the keyring key lists from disappearing without the +- * need to take lots of locks. +- * +- * Returns a pointer to the found key and increments the key usage count if +- * successful; -EAGAIN if no matching keys were found, or if expired or revoked +- * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the +- * specified keyring wasn't a keyring. +- * +- * In the case of a successful return, the possession attribute from +- * @keyring_ref is propagated to the returned key reference. ++/* ++ * Iteration function to consider each key found. + */ +-key_ref_t keyring_search_aux(key_ref_t keyring_ref, +- struct keyring_search_context *ctx) ++static int keyring_search_iterator(const void *object, void *iterator_data) + { +- struct { +- /* Need a separate keylist pointer for RCU purposes */ +- struct key *keyring; +- struct keyring_list *keylist; +- int kix; +- } stack[KEYRING_SEARCH_MAX_DEPTH]; +- +- struct keyring_list *keylist; +- unsigned long kflags; +- struct key *keyring, *key; +- key_ref_t key_ref; +- long err; +- int sp, nkeys, kix; ++ struct keyring_search_context *ctx = iterator_data; ++ const struct key *key = keyring_ptr_to_key(object); ++ unsigned long kflags = key->flags; + +- keyring = key_ref_to_ptr(keyring_ref); +- ctx->possessed = is_key_possessed(keyring_ref); +- key_check(keyring); ++ kenter("{%d}", key->serial); + +- /* top keyring must have search permission to begin the search */ +- err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH); +- if (err < 0) { +- key_ref = ERR_PTR(err); +- goto error; ++ /* ignore keys not of this type */ ++ if (key->type != ctx->index_key.type) { ++ kleave(" = 0 [!type]"); ++ return 0; + } + +- key_ref = ERR_PTR(-ENOTDIR); +- if (keyring->type != &key_type_keyring) +- goto error; ++ /* skip invalidated, revoked and expired keys */ ++ if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { ++ if (kflags & ((1 << KEY_FLAG_INVALIDATED) | ++ (1 << KEY_FLAG_REVOKED))) { ++ ctx->result = ERR_PTR(-EKEYREVOKED); ++ kleave(" = %d [invrev]", ctx->skipped_ret); ++ goto skipped; ++ } + +- rcu_read_lock(); ++ if (key->expiry && ctx->now.tv_sec >= key->expiry) { ++ ctx->result = ERR_PTR(-EKEYEXPIRED); ++ kleave(" = %d [expire]", ctx->skipped_ret); ++ goto skipped; ++ } ++ } + +- ctx->now = current_kernel_time(); +- err = -EAGAIN; +- sp = 0; +- +- /* firstly we should check to see if this top-level keyring is what we +- * are looking for */ +- key_ref = ERR_PTR(-EAGAIN); +- kflags = keyring->flags; +- if (keyring->type == ctx->index_key.type && +- ctx->match(keyring, ctx->match_data)) { +- key = keyring; +- if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK) +- goto found; ++ /* keys that don't match */ ++ if (!ctx->match(key, ctx->match_data)) { ++ kleave(" = 0 [!match]"); ++ return 0; ++ } + +- /* check it isn't negative and hasn't expired or been +- * revoked */ +- if (kflags & (1 << KEY_FLAG_REVOKED)) +- goto error_2; +- if (key->expiry && ctx->now.tv_sec >= key->expiry) +- goto error_2; +- key_ref = ERR_PTR(key->type_data.reject_error); +- if (kflags & (1 << KEY_FLAG_NEGATIVE)) +- goto error_2; +- goto found; ++ /* key must have search permissions */ ++ if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) && ++ key_task_permission(make_key_ref(key, ctx->possessed), ++ ctx->cred, KEY_SEARCH) < 0) { ++ ctx->result = ERR_PTR(-EACCES); ++ kleave(" = %d [!perm]", ctx->skipped_ret); ++ goto skipped; + } + +- /* otherwise, the top keyring must not be revoked, expired, or +- * negatively instantiated if we are to search it */ +- key_ref = ERR_PTR(-EAGAIN); +- if (kflags & ((1 << KEY_FLAG_INVALIDATED) | +- (1 << KEY_FLAG_REVOKED) | +- (1 << KEY_FLAG_NEGATIVE)) || +- (keyring->expiry && ctx->now.tv_sec >= keyring->expiry)) +- goto error_2; +- +- /* start processing a new keyring */ +-descend: +- kflags = keyring->flags; +- if (kflags & ((1 << KEY_FLAG_INVALIDATED) | +- (1 << KEY_FLAG_REVOKED))) +- goto not_this_keyring; ++ if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { ++ /* we set a different error code if we pass a negative key */ ++ if (kflags & (1 << KEY_FLAG_NEGATIVE)) { ++ ctx->result = ERR_PTR(key->type_data.reject_error); ++ kleave(" = %d [neg]", ctx->skipped_ret); ++ goto skipped; ++ } ++ } + +- keylist = rcu_dereference(keyring->payload.subscriptions); +- if (!keylist) +- goto not_this_keyring; ++ /* Found */ ++ ctx->result = make_key_ref(key, ctx->possessed); ++ kleave(" = 1 [found]"); ++ return 1; + +- /* iterate through the keys in this keyring first */ +- nkeys = keylist->nkeys; +- smp_rmb(); +- for (kix = 0; kix < nkeys; kix++) { +- key = rcu_dereference(keylist->keys[kix]); +- kflags = key->flags; ++skipped: ++ return ctx->skipped_ret; ++} + +- /* ignore keys not of this type */ +- if (key->type != ctx->index_key.type) +- continue; ++/* ++ * Search inside a keyring for a key. We can search by walking to it ++ * directly based on its index-key or we can iterate over the entire ++ * tree looking for it, based on the match function. ++ */ ++static int search_keyring(struct key *keyring, struct keyring_search_context *ctx) ++{ ++ if ((ctx->flags & KEYRING_SEARCH_LOOKUP_TYPE) == ++ KEYRING_SEARCH_LOOKUP_DIRECT) { ++ const void *object; ++ ++ object = assoc_array_find(&keyring->keys, ++ &keyring_assoc_array_ops, ++ &ctx->index_key); ++ return object ? ctx->iterator(object, ctx) : 0; ++ } ++ return assoc_array_iterate(&keyring->keys, ctx->iterator, ctx); ++} + +- /* skip invalidated, revoked and expired keys */ +- if (!(ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)) { +- if (kflags & ((1 << KEY_FLAG_INVALIDATED) | +- (1 << KEY_FLAG_REVOKED))) +- continue; ++/* ++ * Search a tree of keyrings that point to other keyrings up to the maximum ++ * depth. ++ */ ++static bool search_nested_keyrings(struct key *keyring, ++ struct keyring_search_context *ctx) ++{ ++ struct { ++ struct key *keyring; ++ struct assoc_array_node *node; ++ int slot; ++ } stack[KEYRING_SEARCH_MAX_DEPTH]; + +- if (key->expiry && ctx->now.tv_sec >= key->expiry) +- continue; +- } ++ struct assoc_array_shortcut *shortcut; ++ struct assoc_array_node *node; ++ struct assoc_array_ptr *ptr; ++ struct key *key; ++ int sp = 0, slot; + +- /* keys that don't match */ +- if (!ctx->match(key, ctx->match_data)) +- continue; ++ kenter("{%d},{%s,%s}", ++ keyring->serial, ++ ctx->index_key.type->name, ++ ctx->index_key.description); + +- /* key must have search permissions */ +- if (key_task_permission(make_key_ref(key, ctx->possessed), +- ctx->cred, KEY_SEARCH) < 0) +- continue; ++ if (ctx->index_key.description) ++ ctx->index_key.desc_len = strlen(ctx->index_key.description); + +- if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK) ++ /* Check to see if this top-level keyring is what we are looking for ++ * and whether it is valid or not. ++ */ ++ if (ctx->flags & KEYRING_SEARCH_LOOKUP_ITERATE || ++ keyring_compare_object(keyring, &ctx->index_key)) { ++ ctx->skipped_ret = 2; ++ ctx->flags |= KEYRING_SEARCH_DO_STATE_CHECK; ++ switch (ctx->iterator(keyring_key_to_ptr(keyring), ctx)) { ++ case 1: + goto found; +- +- /* we set a different error code if we pass a negative key */ +- if (kflags & (1 << KEY_FLAG_NEGATIVE)) { +- err = key->type_data.reject_error; +- continue; ++ case 2: ++ return false; ++ default: ++ break; + } ++ } + ++ ctx->skipped_ret = 0; ++ if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK) ++ ctx->flags &= ~KEYRING_SEARCH_DO_STATE_CHECK; ++ ++ /* Start processing a new keyring */ ++descend_to_keyring: ++ kdebug("descend to %d", keyring->serial); ++ if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) | ++ (1 << KEY_FLAG_REVOKED))) ++ goto not_this_keyring; ++ ++ /* Search through the keys in this keyring before its searching its ++ * subtrees. ++ */ ++ if (search_keyring(keyring, ctx)) + goto found; +- } + +- /* search through the keyrings nested in this one */ +- kix = 0; +-ascend: +- nkeys = keylist->nkeys; +- smp_rmb(); +- for (; kix < nkeys; kix++) { +- key = rcu_dereference(keylist->keys[kix]); +- if (key->type != &key_type_keyring) +- continue; ++ /* Then manually iterate through the keyrings nested in this one. ++ * ++ * Start from the root node of the index tree. Because of the way the ++ * hash function has been set up, keyrings cluster on the leftmost ++ * branch of the root node (root slot 0) or in the root node itself. ++ * Non-keyrings avoid the leftmost branch of the root entirely (root ++ * slots 1-15). ++ */ ++ ptr = ACCESS_ONCE(keyring->keys.root); ++ if (!ptr) ++ goto not_this_keyring; + +- /* recursively search nested keyrings +- * - only search keyrings for which we have search permission ++ if (assoc_array_ptr_is_shortcut(ptr)) { ++ /* If the root is a shortcut, either the keyring only contains ++ * keyring pointers (everything clusters behind root slot 0) or ++ * doesn't contain any keyring pointers. + */ +- if (sp >= KEYRING_SEARCH_MAX_DEPTH) ++ shortcut = assoc_array_ptr_to_shortcut(ptr); ++ smp_read_barrier_depends(); ++ if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0) ++ goto not_this_keyring; ++ ++ ptr = ACCESS_ONCE(shortcut->next_node); ++ node = assoc_array_ptr_to_node(ptr); ++ goto begin_node; ++ } ++ ++ node = assoc_array_ptr_to_node(ptr); ++ smp_read_barrier_depends(); ++ ++ ptr = node->slots[0]; ++ if (!assoc_array_ptr_is_meta(ptr)) ++ goto begin_node; ++ ++descend_to_node: ++ /* Descend to a more distal node in this keyring's content tree and go ++ * through that. ++ */ ++ kdebug("descend"); ++ if (assoc_array_ptr_is_shortcut(ptr)) { ++ shortcut = assoc_array_ptr_to_shortcut(ptr); ++ smp_read_barrier_depends(); ++ ptr = ACCESS_ONCE(shortcut->next_node); ++ BUG_ON(!assoc_array_ptr_is_node(ptr)); ++ node = assoc_array_ptr_to_node(ptr); ++ } ++ ++begin_node: ++ kdebug("begin_node"); ++ smp_read_barrier_depends(); ++ slot = 0; ++ascend_to_node: ++ /* Go through the slots in a node */ ++ for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { ++ ptr = ACCESS_ONCE(node->slots[slot]); ++ ++ if (assoc_array_ptr_is_meta(ptr) && node->back_pointer) ++ goto descend_to_node; ++ ++ if (!keyring_ptr_is_keyring(ptr)) + continue; + +- if (key_task_permission(make_key_ref(key, ctx->possessed), ++ key = keyring_ptr_to_key(ptr); ++ ++ if (sp >= KEYRING_SEARCH_MAX_DEPTH) { ++ if (ctx->flags & KEYRING_SEARCH_DETECT_TOO_DEEP) { ++ ctx->result = ERR_PTR(-ELOOP); ++ return false; ++ } ++ goto not_this_keyring; ++ } ++ ++ /* Search a nested keyring */ ++ if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) && ++ key_task_permission(make_key_ref(key, ctx->possessed), + ctx->cred, KEY_SEARCH) < 0) + continue; + + /* stack the current position */ + stack[sp].keyring = keyring; +- stack[sp].keylist = keylist; +- stack[sp].kix = kix; ++ stack[sp].node = node; ++ stack[sp].slot = slot; + sp++; + + /* begin again with the new keyring */ + keyring = key; +- goto descend; ++ goto descend_to_keyring; ++ } ++ ++ /* We've dealt with all the slots in the current node, so now we need ++ * to ascend to the parent and continue processing there. ++ */ ++ ptr = ACCESS_ONCE(node->back_pointer); ++ slot = node->parent_slot; ++ ++ if (ptr && assoc_array_ptr_is_shortcut(ptr)) { ++ shortcut = assoc_array_ptr_to_shortcut(ptr); ++ smp_read_barrier_depends(); ++ ptr = ACCESS_ONCE(shortcut->back_pointer); ++ slot = shortcut->parent_slot; ++ } ++ if (!ptr) ++ goto not_this_keyring; ++ node = assoc_array_ptr_to_node(ptr); ++ smp_read_barrier_depends(); ++ slot++; ++ ++ /* If we've ascended to the root (zero backpointer), we must have just ++ * finished processing the leftmost branch rather than the root slots - ++ * so there can't be any more keyrings for us to find. ++ */ ++ if (node->back_pointer) { ++ kdebug("ascend %d", slot); ++ goto ascend_to_node; + } + +- /* the keyring we're looking at was disqualified or didn't contain a +- * matching key */ ++ /* The keyring we're looking at was disqualified or didn't contain a ++ * matching key. ++ */ + not_this_keyring: +- if (sp > 0) { +- /* resume the processing of a keyring higher up in the tree */ +- sp--; +- keyring = stack[sp].keyring; +- keylist = stack[sp].keylist; +- kix = stack[sp].kix + 1; +- goto ascend; ++ kdebug("not_this_keyring %d", sp); ++ if (sp <= 0) { ++ kleave(" = false"); ++ return false; + } + +- key_ref = ERR_PTR(err); +- goto error_2; ++ /* Resume the processing of a keyring higher up in the tree */ ++ sp--; ++ keyring = stack[sp].keyring; ++ node = stack[sp].node; ++ slot = stack[sp].slot + 1; ++ kdebug("ascend to %d [%d]", keyring->serial, slot); ++ goto ascend_to_node; + +- /* we found a viable match */ ++ /* We found a viable match */ + found: +- __key_get(key); +- key->last_used_at = ctx->now.tv_sec; +- keyring->last_used_at = ctx->now.tv_sec; +- while (sp > 0) +- stack[--sp].keyring->last_used_at = ctx->now.tv_sec; ++ key = key_ref_to_ptr(ctx->result); + key_check(key); +- key_ref = make_key_ref(key, ctx->possessed); +-error_2: ++ if (!(ctx->flags & KEYRING_SEARCH_NO_UPDATE_TIME)) { ++ key->last_used_at = ctx->now.tv_sec; ++ keyring->last_used_at = ctx->now.tv_sec; ++ while (sp > 0) ++ stack[--sp].keyring->last_used_at = ctx->now.tv_sec; ++ } ++ kleave(" = true"); ++ return true; ++} ++ ++/** ++ * keyring_search_aux - Search a keyring tree for a key matching some criteria ++ * @keyring_ref: A pointer to the keyring with possession indicator. ++ * @ctx: The keyring search context. ++ * ++ * Search the supplied keyring tree for a key that matches the criteria given. ++ * The root keyring and any linked keyrings must grant Search permission to the ++ * caller to be searchable and keys can only be found if they too grant Search ++ * to the caller. The possession flag on the root keyring pointer controls use ++ * of the possessor bits in permissions checking of the entire tree. In ++ * addition, the LSM gets to forbid keyring searches and key matches. ++ * ++ * The search is performed as a breadth-then-depth search up to the prescribed ++ * limit (KEYRING_SEARCH_MAX_DEPTH). ++ * ++ * Keys are matched to the type provided and are then filtered by the match ++ * function, which is given the description to use in any way it sees fit. The ++ * match function may use any attributes of a key that it wishes to to ++ * determine the match. Normally the match function from the key type would be ++ * used. ++ * ++ * RCU can be used to prevent the keyring key lists from disappearing without ++ * the need to take lots of locks. ++ * ++ * Returns a pointer to the found key and increments the key usage count if ++ * successful; -EAGAIN if no matching keys were found, or if expired or revoked ++ * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the ++ * specified keyring wasn't a keyring. ++ * ++ * In the case of a successful return, the possession attribute from ++ * @keyring_ref is propagated to the returned key reference. ++ */ ++key_ref_t keyring_search_aux(key_ref_t keyring_ref, ++ struct keyring_search_context *ctx) ++{ ++ struct key *keyring; ++ long err; ++ ++ ctx->iterator = keyring_search_iterator; ++ ctx->possessed = is_key_possessed(keyring_ref); ++ ctx->result = ERR_PTR(-EAGAIN); ++ ++ keyring = key_ref_to_ptr(keyring_ref); ++ key_check(keyring); ++ ++ if (keyring->type != &key_type_keyring) ++ return ERR_PTR(-ENOTDIR); ++ ++ if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM)) { ++ err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH); ++ if (err < 0) ++ return ERR_PTR(err); ++ } ++ ++ rcu_read_lock(); ++ ctx->now = current_kernel_time(); ++ if (search_nested_keyrings(keyring, ctx)) ++ __key_get(key_ref_to_ptr(ctx->result)); + rcu_read_unlock(); +-error: +- return key_ref; ++ return ctx->result; + } + + /** +@@ -499,7 +863,7 @@ error: + * @description: The name of the keyring we want to find. + * + * As keyring_search_aux() above, but using the current task's credentials and +- * type's default matching function. ++ * type's default matching function and preferred search method. + */ + key_ref_t keyring_search(key_ref_t keyring, + struct key_type *type, +@@ -523,58 +887,49 @@ key_ref_t keyring_search(key_ref_t keyring, + EXPORT_SYMBOL(keyring_search); + + /* +- * Search the given keyring only (no recursion). ++ * Search the given keyring for a key that might be updated. + * + * The caller must guarantee that the keyring is a keyring and that the +- * permission is granted to search the keyring as no check is made here. +- * +- * RCU is used to make it unnecessary to lock the keyring key list here. ++ * permission is granted to modify the keyring as no check is made here. The ++ * caller must also hold a lock on the keyring semaphore. + * + * Returns a pointer to the found key with usage count incremented if +- * successful and returns -ENOKEY if not found. Revoked and invalidated keys +- * are skipped over. ++ * successful and returns NULL if not found. Revoked and invalidated keys are ++ * skipped over. + * + * If successful, the possession indicator is propagated from the keyring ref + * to the returned key reference. + */ +-key_ref_t __keyring_search_one(key_ref_t keyring_ref, +- const struct keyring_index_key *index_key) ++key_ref_t find_key_to_update(key_ref_t keyring_ref, ++ const struct keyring_index_key *index_key) + { +- struct keyring_list *klist; + struct key *keyring, *key; +- bool possessed; +- int nkeys, loop; ++ const void *object; + + keyring = key_ref_to_ptr(keyring_ref); +- possessed = is_key_possessed(keyring_ref); + +- rcu_read_lock(); ++ kenter("{%d},{%s,%s}", ++ keyring->serial, index_key->type->name, index_key->description); + +- klist = rcu_dereference(keyring->payload.subscriptions); +- if (klist) { +- nkeys = klist->nkeys; +- smp_rmb(); +- for (loop = 0; loop < nkeys ; loop++) { +- key = rcu_dereference(klist->keys[loop]); +- if (key->type == index_key->type && +- (!key->type->match || +- key->type->match(key, index_key->description)) && +- !(key->flags & ((1 << KEY_FLAG_INVALIDATED) | +- (1 << KEY_FLAG_REVOKED))) +- ) +- goto found; +- } +- } ++ object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops, ++ index_key); + +- rcu_read_unlock(); +- return ERR_PTR(-ENOKEY); ++ if (object) ++ goto found; ++ ++ kleave(" = NULL"); ++ return NULL; + + found: ++ key = keyring_ptr_to_key(object); ++ if (key->flags & ((1 << KEY_FLAG_INVALIDATED) | ++ (1 << KEY_FLAG_REVOKED))) { ++ kleave(" = NULL [x]"); ++ return NULL; ++ } + __key_get(key); +- keyring->last_used_at = key->last_used_at = +- current_kernel_time().tv_sec; +- rcu_read_unlock(); +- return make_key_ref(key, possessed); ++ kleave(" = {%d}", key->serial); ++ return make_key_ref(key, is_key_possessed(keyring_ref)); + } + + /* +@@ -637,6 +992,19 @@ out: + return keyring; + } + ++static int keyring_detect_cycle_iterator(const void *object, ++ void *iterator_data) ++{ ++ struct keyring_search_context *ctx = iterator_data; ++ const struct key *key = keyring_ptr_to_key(object); ++ ++ kenter("{%d}", key->serial); ++ ++ BUG_ON(key != ctx->match_data); ++ ctx->result = ERR_PTR(-EDEADLK); ++ return 1; ++} ++ + /* + * See if a cycle will will be created by inserting acyclic tree B in acyclic + * tree A at the topmost level (ie: as a direct child of A). +@@ -646,117 +1014,39 @@ out: + */ + static int keyring_detect_cycle(struct key *A, struct key *B) + { +- struct { +- struct keyring_list *keylist; +- int kix; +- } stack[KEYRING_SEARCH_MAX_DEPTH]; +- +- struct keyring_list *keylist; +- struct key *subtree, *key; +- int sp, nkeys, kix, ret; ++ struct keyring_search_context ctx = { ++ .index_key = A->index_key, ++ .match_data = A, ++ .iterator = keyring_detect_cycle_iterator, ++ .flags = (KEYRING_SEARCH_LOOKUP_DIRECT | ++ KEYRING_SEARCH_NO_STATE_CHECK | ++ KEYRING_SEARCH_NO_UPDATE_TIME | ++ KEYRING_SEARCH_NO_CHECK_PERM | ++ KEYRING_SEARCH_DETECT_TOO_DEEP), ++ }; + + rcu_read_lock(); +- +- ret = -EDEADLK; +- if (A == B) +- goto cycle_detected; +- +- subtree = B; +- sp = 0; +- +- /* start processing a new keyring */ +-descend: +- if (test_bit(KEY_FLAG_REVOKED, &subtree->flags)) +- goto not_this_keyring; +- +- keylist = rcu_dereference(subtree->payload.subscriptions); +- if (!keylist) +- goto not_this_keyring; +- kix = 0; +- +-ascend: +- /* iterate through the remaining keys in this keyring */ +- nkeys = keylist->nkeys; +- smp_rmb(); +- for (; kix < nkeys; kix++) { +- key = rcu_dereference(keylist->keys[kix]); +- +- if (key == A) +- goto cycle_detected; +- +- /* recursively check nested keyrings */ +- if (key->type == &key_type_keyring) { +- if (sp >= KEYRING_SEARCH_MAX_DEPTH) +- goto too_deep; +- +- /* stack the current position */ +- stack[sp].keylist = keylist; +- stack[sp].kix = kix; +- sp++; +- +- /* begin again with the new keyring */ +- subtree = key; +- goto descend; +- } +- } +- +- /* the keyring we're looking at was disqualified or didn't contain a +- * matching key */ +-not_this_keyring: +- if (sp > 0) { +- /* resume the checking of a keyring higher up in the tree */ +- sp--; +- keylist = stack[sp].keylist; +- kix = stack[sp].kix + 1; +- goto ascend; +- } +- +- ret = 0; /* no cycles detected */ +- +-error: ++ search_nested_keyrings(B, &ctx); + rcu_read_unlock(); +- return ret; +- +-too_deep: +- ret = -ELOOP; +- goto error; +- +-cycle_detected: +- ret = -EDEADLK; +- goto error; +-} +- +-/* +- * Dispose of a keyring list after the RCU grace period, freeing the unlinked +- * key +- */ +-static void keyring_unlink_rcu_disposal(struct rcu_head *rcu) +-{ +- struct keyring_list *klist = +- container_of(rcu, struct keyring_list, rcu); +- +- if (klist->delkey != USHRT_MAX) +- key_put(rcu_access_pointer(klist->keys[klist->delkey])); +- kfree(klist); ++ return PTR_ERR(ctx.result) == -EAGAIN ? 0 : PTR_ERR(ctx.result); + } + + /* + * Preallocate memory so that a key can be linked into to a keyring. + */ +-int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key, +- unsigned long *_prealloc) ++int __key_link_begin(struct key *keyring, ++ const struct keyring_index_key *index_key, ++ struct assoc_array_edit **_edit) + __acquires(&keyring->sem) + __acquires(&keyring_serialise_link_sem) + { +- struct keyring_list *klist, *nklist; +- unsigned long prealloc; +- unsigned max; +- time_t lowest_lru; +- size_t size; +- int loop, lru, ret; ++ struct assoc_array_edit *edit; ++ int ret; + + kenter("%d,%s,%s,", +- key_serial(keyring), index_key->type->name, index_key->description); ++ keyring->serial, index_key->type->name, index_key->description); ++ ++ BUG_ON(index_key->desc_len == 0); + + if (keyring->type != &key_type_keyring) + return -ENOTDIR; +@@ -772,88 +1062,25 @@ int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_ + if (index_key->type == &key_type_keyring) + down_write(&keyring_serialise_link_sem); + +- klist = rcu_dereference_locked_keyring(keyring); +- +- /* see if there's a matching key we can displace */ +- lru = -1; +- if (klist && klist->nkeys > 0) { +- lowest_lru = TIME_T_MAX; +- for (loop = klist->nkeys - 1; loop >= 0; loop--) { +- struct key *key = rcu_deref_link_locked(klist, loop, +- keyring); +- if (key->type == index_key->type && +- strcmp(key->description, index_key->description) == 0) { +- /* Found a match - we'll replace the link with +- * one to the new key. We record the slot +- * position. +- */ +- klist->delkey = loop; +- prealloc = 0; +- goto done; +- } +- if (key->last_used_at < lowest_lru) { +- lowest_lru = key->last_used_at; +- lru = loop; +- } +- } +- } +- +- /* If the keyring is full then do an LRU discard */ +- if (klist && +- klist->nkeys == klist->maxkeys && +- klist->maxkeys >= MAX_KEYRING_LINKS) { +- kdebug("LRU discard %d\n", lru); +- klist->delkey = lru; +- prealloc = 0; +- goto done; +- } +- + /* check that we aren't going to overrun the user's quota */ + ret = key_payload_reserve(keyring, + keyring->datalen + KEYQUOTA_LINK_BYTES); + if (ret < 0) + goto error_sem; + +- if (klist && klist->nkeys < klist->maxkeys) { +- /* there's sufficient slack space to append directly */ +- klist->delkey = klist->nkeys; +- prealloc = KEY_LINK_FIXQUOTA; +- } else { +- /* grow the key list */ +- max = 4; +- if (klist) { +- max += klist->maxkeys; +- if (max > MAX_KEYRING_LINKS) +- max = MAX_KEYRING_LINKS; +- BUG_ON(max <= klist->maxkeys); +- } +- +- size = sizeof(*klist) + sizeof(struct key *) * max; +- +- ret = -ENOMEM; +- nklist = kmalloc(size, GFP_KERNEL); +- if (!nklist) +- goto error_quota; +- +- nklist->maxkeys = max; +- if (klist) { +- memcpy(nklist->keys, klist->keys, +- sizeof(struct key *) * klist->nkeys); +- nklist->delkey = klist->nkeys; +- nklist->nkeys = klist->nkeys + 1; +- klist->delkey = USHRT_MAX; +- } else { +- nklist->nkeys = 1; +- nklist->delkey = 0; +- } +- +- /* add the key into the new space */ +- RCU_INIT_POINTER(nklist->keys[nklist->delkey], NULL); +- prealloc = (unsigned long)nklist | KEY_LINK_FIXQUOTA; ++ /* Create an edit script that will insert/replace the key in the ++ * keyring tree. ++ */ ++ edit = assoc_array_insert(&keyring->keys, ++ &keyring_assoc_array_ops, ++ index_key, ++ NULL); ++ if (IS_ERR(edit)) { ++ ret = PTR_ERR(edit); ++ goto error_quota; + } + +-done: +- *_prealloc = prealloc; ++ *_edit = edit; + kleave(" = 0"); + return 0; + +@@ -893,60 +1120,12 @@ int __key_link_check_live_key(struct key *keyring, struct key *key) + * holds at most one link to any given key of a particular type+description + * combination. + */ +-void __key_link(struct key *keyring, struct key *key, +- unsigned long *_prealloc) ++void __key_link(struct key *key, struct assoc_array_edit **_edit) + { +- struct keyring_list *klist, *nklist; +- struct key *discard; +- +- nklist = (struct keyring_list *)(*_prealloc & ~KEY_LINK_FIXQUOTA); +- *_prealloc = 0; +- +- kenter("%d,%d,%p", keyring->serial, key->serial, nklist); +- +- klist = rcu_dereference_locked_keyring(keyring); +- + __key_get(key); +- keyring->last_used_at = key->last_used_at = +- current_kernel_time().tv_sec; +- +- /* there's a matching key we can displace or an empty slot in a newly +- * allocated list we can fill */ +- if (nklist) { +- kdebug("reissue %hu/%hu/%hu", +- nklist->delkey, nklist->nkeys, nklist->maxkeys); +- +- RCU_INIT_POINTER(nklist->keys[nklist->delkey], key); +- +- rcu_assign_pointer(keyring->payload.subscriptions, nklist); +- +- /* dispose of the old keyring list and, if there was one, the +- * displaced key */ +- if (klist) { +- kdebug("dispose %hu/%hu/%hu", +- klist->delkey, klist->nkeys, klist->maxkeys); +- call_rcu(&klist->rcu, keyring_unlink_rcu_disposal); +- } +- } else if (klist->delkey < klist->nkeys) { +- kdebug("replace %hu/%hu/%hu", +- klist->delkey, klist->nkeys, klist->maxkeys); +- +- discard = rcu_dereference_protected( +- klist->keys[klist->delkey], +- rwsem_is_locked(&keyring->sem)); +- rcu_assign_pointer(klist->keys[klist->delkey], key); +- /* The garbage collector will take care of RCU +- * synchronisation */ +- key_put(discard); +- } else { +- /* there's sufficient slack space to append directly */ +- kdebug("append %hu/%hu/%hu", +- klist->delkey, klist->nkeys, klist->maxkeys); +- +- RCU_INIT_POINTER(klist->keys[klist->delkey], key); +- smp_wmb(); +- klist->nkeys++; +- } ++ assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key)); ++ assoc_array_apply_edit(*_edit); ++ *_edit = NULL; + } + + /* +@@ -956,23 +1135,20 @@ void __key_link(struct key *keyring, struct key *key, + */ + void __key_link_end(struct key *keyring, + const struct keyring_index_key *index_key, +- unsigned long prealloc) ++ struct assoc_array_edit *edit) + __releases(&keyring->sem) + __releases(&keyring_serialise_link_sem) + { + BUG_ON(index_key->type == NULL); +- BUG_ON(index_key->type->name == NULL); +- kenter("%d,%s,%lx", keyring->serial, index_key->type->name, prealloc); ++ kenter("%d,%s,", keyring->serial, index_key->type->name); + + if (index_key->type == &key_type_keyring) + up_write(&keyring_serialise_link_sem); + +- if (prealloc) { +- if (prealloc & KEY_LINK_FIXQUOTA) +- key_payload_reserve(keyring, +- keyring->datalen - +- KEYQUOTA_LINK_BYTES); +- kfree((struct keyring_list *)(prealloc & ~KEY_LINK_FIXQUOTA)); ++ if (edit) { ++ key_payload_reserve(keyring, ++ keyring->datalen - KEYQUOTA_LINK_BYTES); ++ assoc_array_cancel_edit(edit); + } + up_write(&keyring->sem); + } +@@ -999,20 +1175,24 @@ void __key_link_end(struct key *keyring, + */ + int key_link(struct key *keyring, struct key *key) + { +- unsigned long prealloc; ++ struct assoc_array_edit *edit; + int ret; + ++ kenter("{%d,%d}", keyring->serial, atomic_read(&keyring->usage)); ++ + key_check(keyring); + key_check(key); + +- ret = __key_link_begin(keyring, &key->index_key, &prealloc); ++ ret = __key_link_begin(keyring, &key->index_key, &edit); + if (ret == 0) { ++ kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage)); + ret = __key_link_check_live_key(keyring, key); + if (ret == 0) +- __key_link(keyring, key, &prealloc); +- __key_link_end(keyring, &key->index_key, prealloc); ++ __key_link(key, &edit); ++ __key_link_end(keyring, &key->index_key, edit); + } + ++ kleave(" = %d {%d,%d}", ret, keyring->serial, atomic_read(&keyring->usage)); + return ret; + } + EXPORT_SYMBOL(key_link); +@@ -1036,90 +1216,36 @@ EXPORT_SYMBOL(key_link); + */ + int key_unlink(struct key *keyring, struct key *key) + { +- struct keyring_list *klist, *nklist; +- int loop, ret; ++ struct assoc_array_edit *edit; ++ int ret; + + key_check(keyring); + key_check(key); + +- ret = -ENOTDIR; + if (keyring->type != &key_type_keyring) +- goto error; ++ return -ENOTDIR; + + down_write(&keyring->sem); + +- klist = rcu_dereference_locked_keyring(keyring); +- if (klist) { +- /* search the keyring for the key */ +- for (loop = 0; loop < klist->nkeys; loop++) +- if (rcu_access_pointer(klist->keys[loop]) == key) +- goto key_is_present; ++ edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops, ++ &key->index_key); ++ if (IS_ERR(edit)) { ++ ret = PTR_ERR(edit); ++ goto error; + } +- +- up_write(&keyring->sem); + ret = -ENOENT; +- goto error; +- +-key_is_present: +- /* we need to copy the key list for RCU purposes */ +- nklist = kmalloc(sizeof(*klist) + +- sizeof(struct key *) * klist->maxkeys, +- GFP_KERNEL); +- if (!nklist) +- goto nomem; +- nklist->maxkeys = klist->maxkeys; +- nklist->nkeys = klist->nkeys - 1; +- +- if (loop > 0) +- memcpy(&nklist->keys[0], +- &klist->keys[0], +- loop * sizeof(struct key *)); +- +- if (loop < nklist->nkeys) +- memcpy(&nklist->keys[loop], +- &klist->keys[loop + 1], +- (nklist->nkeys - loop) * sizeof(struct key *)); +- +- /* adjust the user's quota */ +- key_payload_reserve(keyring, +- keyring->datalen - KEYQUOTA_LINK_BYTES); +- +- rcu_assign_pointer(keyring->payload.subscriptions, nklist); +- +- up_write(&keyring->sem); +- +- /* schedule for later cleanup */ +- klist->delkey = loop; +- call_rcu(&klist->rcu, keyring_unlink_rcu_disposal); ++ if (edit == NULL) ++ goto error; + ++ assoc_array_apply_edit(edit); + ret = 0; + + error: +- return ret; +-nomem: +- ret = -ENOMEM; + up_write(&keyring->sem); +- goto error; ++ return ret; + } + EXPORT_SYMBOL(key_unlink); + +-/* +- * Dispose of a keyring list after the RCU grace period, releasing the keys it +- * links to. +- */ +-static void keyring_clear_rcu_disposal(struct rcu_head *rcu) +-{ +- struct keyring_list *klist; +- int loop; +- +- klist = container_of(rcu, struct keyring_list, rcu); +- +- for (loop = klist->nkeys - 1; loop >= 0; loop--) +- key_put(rcu_access_pointer(klist->keys[loop])); +- +- kfree(klist); +-} +- + /** + * keyring_clear - Clear a keyring + * @keyring: The keyring to clear. +@@ -1130,33 +1256,25 @@ static void keyring_clear_rcu_disposal(struct rcu_head *rcu) + */ + int keyring_clear(struct key *keyring) + { +- struct keyring_list *klist; ++ struct assoc_array_edit *edit; + int ret; + +- ret = -ENOTDIR; +- if (keyring->type == &key_type_keyring) { +- /* detach the pointer block with the locks held */ +- down_write(&keyring->sem); +- +- klist = rcu_dereference_locked_keyring(keyring); +- if (klist) { +- /* adjust the quota */ +- key_payload_reserve(keyring, +- sizeof(struct keyring_list)); +- +- rcu_assign_pointer(keyring->payload.subscriptions, +- NULL); +- } +- +- up_write(&keyring->sem); ++ if (keyring->type != &key_type_keyring) ++ return -ENOTDIR; + +- /* free the keys after the locks have been dropped */ +- if (klist) +- call_rcu(&klist->rcu, keyring_clear_rcu_disposal); ++ down_write(&keyring->sem); + ++ edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops); ++ if (IS_ERR(edit)) { ++ ret = PTR_ERR(edit); ++ } else { ++ if (edit) ++ assoc_array_apply_edit(edit); ++ key_payload_reserve(keyring, 0); + ret = 0; + } + ++ up_write(&keyring->sem); + return ret; + } + EXPORT_SYMBOL(keyring_clear); +@@ -1168,17 +1286,25 @@ EXPORT_SYMBOL(keyring_clear); + */ + static void keyring_revoke(struct key *keyring) + { +- struct keyring_list *klist; ++ struct assoc_array_edit *edit; + +- klist = rcu_dereference_locked_keyring(keyring); ++ edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops); ++ if (!IS_ERR(edit)) { ++ if (edit) ++ assoc_array_apply_edit(edit); ++ key_payload_reserve(keyring, 0); ++ } ++} + +- /* adjust the quota */ +- key_payload_reserve(keyring, 0); ++static bool gc_iterator(void *object, void *iterator_data) ++{ ++ struct key *key = keyring_ptr_to_key(object); ++ time_t *limit = iterator_data; + +- if (klist) { +- rcu_assign_pointer(keyring->payload.subscriptions, NULL); +- call_rcu(&klist->rcu, keyring_clear_rcu_disposal); +- } ++ if (key_is_dead(key, *limit)) ++ return false; ++ key_get(key); ++ return true; + } + + /* +@@ -1191,88 +1317,12 @@ static void keyring_revoke(struct key *keyring) + */ + void keyring_gc(struct key *keyring, time_t limit) + { +- struct keyring_list *klist, *new; +- struct key *key; +- int loop, keep, max; +- + kenter("{%x,%s}", key_serial(keyring), keyring->description); + + down_write(&keyring->sem); +- +- klist = rcu_dereference_locked_keyring(keyring); +- if (!klist) +- goto no_klist; +- +- /* work out how many subscriptions we're keeping */ +- keep = 0; +- for (loop = klist->nkeys - 1; loop >= 0; loop--) +- if (!key_is_dead(rcu_deref_link_locked(klist, loop, keyring), +- limit)) +- keep++; +- +- if (keep == klist->nkeys) +- goto just_return; +- +- /* allocate a new keyring payload */ +- max = roundup(keep, 4); +- new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *), +- GFP_KERNEL); +- if (!new) +- goto nomem; +- new->maxkeys = max; +- new->nkeys = 0; +- new->delkey = 0; +- +- /* install the live keys +- * - must take care as expired keys may be updated back to life +- */ +- keep = 0; +- for (loop = klist->nkeys - 1; loop >= 0; loop--) { +- key = rcu_deref_link_locked(klist, loop, keyring); +- if (!key_is_dead(key, limit)) { +- if (keep >= max) +- goto discard_new; +- RCU_INIT_POINTER(new->keys[keep++], key_get(key)); +- } +- } +- new->nkeys = keep; +- +- /* adjust the quota */ +- key_payload_reserve(keyring, +- sizeof(struct keyring_list) + +- KEYQUOTA_LINK_BYTES * keep); +- +- if (keep == 0) { +- rcu_assign_pointer(keyring->payload.subscriptions, NULL); +- kfree(new); +- } else { +- rcu_assign_pointer(keyring->payload.subscriptions, new); +- } +- +- up_write(&keyring->sem); +- +- call_rcu(&klist->rcu, keyring_clear_rcu_disposal); +- kleave(" [yes]"); +- return; +- +-discard_new: +- new->nkeys = keep; +- keyring_clear_rcu_disposal(&new->rcu); ++ assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops, ++ gc_iterator, &limit); + up_write(&keyring->sem); +- kleave(" [discard]"); +- return; + +-just_return: +- up_write(&keyring->sem); +- kleave(" [no dead]"); +- return; +- +-no_klist: +- up_write(&keyring->sem); +- kleave(" [no_klist]"); +- return; +- +-nomem: +- up_write(&keyring->sem); +- kleave(" [oom]"); ++ kleave(""); + } +diff --git a/security/keys/request_key.c b/security/keys/request_key.c +index ab75df4..df94827 100644 +--- a/security/keys/request_key.c ++++ b/security/keys/request_key.c +@@ -351,7 +351,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx, + struct key_user *user, + struct key **_key) + { +- unsigned long prealloc; ++ struct assoc_array_edit *edit; + struct key *key; + key_perm_t perm; + key_ref_t key_ref; +@@ -380,7 +380,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx, + set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags); + + if (dest_keyring) { +- ret = __key_link_begin(dest_keyring, &ctx->index_key, &prealloc); ++ ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit); + if (ret < 0) + goto link_prealloc_failed; + } +@@ -395,11 +395,11 @@ static int construct_alloc_key(struct keyring_search_context *ctx, + goto key_already_present; + + if (dest_keyring) +- __key_link(dest_keyring, key, &prealloc); ++ __key_link(key, &edit); + + mutex_unlock(&key_construction_mutex); + if (dest_keyring) +- __key_link_end(dest_keyring, &ctx->index_key, prealloc); ++ __key_link_end(dest_keyring, &ctx->index_key, edit); + mutex_unlock(&user->cons_lock); + *_key = key; + kleave(" = 0 [%d]", key_serial(key)); +@@ -414,8 +414,8 @@ key_already_present: + if (dest_keyring) { + ret = __key_link_check_live_key(dest_keyring, key); + if (ret == 0) +- __key_link(dest_keyring, key, &prealloc); +- __key_link_end(dest_keyring, &ctx->index_key, prealloc); ++ __key_link(key, &edit); ++ __key_link_end(dest_keyring, &ctx->index_key, edit); + if (ret < 0) + goto link_check_failed; + } +-- +1.8.3.1 + diff --git a/keys-krb-support.patch b/keys-krb-support.patch new file mode 100644 index 000000000..07a909daa --- /dev/null +++ b/keys-krb-support.patch @@ -0,0 +1,747 @@ +From 64160c504842a359801cff17464931fa028ff164 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Fri, 30 Aug 2013 15:37:54 +0100 +Subject: [PATCH 1/2] KEYS: Implement a big key type that can save to tmpfs + +Implement a big key type that can save its contents to tmpfs and thus +swapspace when memory is tight. This is useful for Kerberos ticket caches. + +Signed-off-by: David Howells +Tested-by: Simo Sorce +--- + include/keys/big_key-type.h | 25 ++++++ + include/linux/key.h | 1 + + security/keys/Kconfig | 11 +++ + security/keys/Makefile | 1 + + security/keys/big_key.c | 204 ++++++++++++++++++++++++++++++++++++++++++++ + 5 files changed, 242 insertions(+) + create mode 100644 include/keys/big_key-type.h + create mode 100644 security/keys/big_key.c + +diff --git a/include/keys/big_key-type.h b/include/keys/big_key-type.h +new file mode 100644 +index 0000000..d69bc8a +--- /dev/null ++++ b/include/keys/big_key-type.h +@@ -0,0 +1,25 @@ ++/* Big capacity key type. ++ * ++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. ++ * Written by David Howells (dhowells@redhat.com) ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public License ++ * as published by the Free Software Foundation; either version ++ * 2 of the License, or (at your option) any later version. ++ */ ++ ++#ifndef _KEYS_BIG_KEY_TYPE_H ++#define _KEYS_BIG_KEY_TYPE_H ++ ++#include ++ ++extern struct key_type key_type_big_key; ++ ++extern int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep); ++extern void big_key_revoke(struct key *key); ++extern void big_key_destroy(struct key *key); ++extern void big_key_describe(const struct key *big_key, struct seq_file *m); ++extern long big_key_read(const struct key *key, char __user *buffer, size_t buflen); ++ ++#endif /* _KEYS_BIG_KEY_TYPE_H */ +diff --git a/include/linux/key.h b/include/linux/key.h +index 2417f78..010dbb6 100644 +--- a/include/linux/key.h ++++ b/include/linux/key.h +@@ -201,6 +201,7 @@ struct key { + unsigned long value; + void __rcu *rcudata; + void *data; ++ void *data2[2]; + } payload; + struct assoc_array keys; + }; +diff --git a/security/keys/Kconfig b/security/keys/Kconfig +index 15e0dfe..b563622 100644 +--- a/security/keys/Kconfig ++++ b/security/keys/Kconfig +@@ -20,6 +20,17 @@ config KEYS + + If you are unsure as to whether this is required, answer N. + ++config BIG_KEYS ++ tristate "Large payload keys" ++ depends on KEYS ++ depends on TMPFS ++ help ++ This option provides support for holding large keys within the kernel ++ (for example Kerberos ticket caches). The data may be stored out to ++ swapspace by tmpfs. ++ ++ If you are unsure as to whether this is required, answer N. ++ + config TRUSTED_KEYS + tristate "TRUSTED KEYS" + depends on KEYS && TCG_TPM +diff --git a/security/keys/Makefile b/security/keys/Makefile +index 504aaa0..c487c77 100644 +--- a/security/keys/Makefile ++++ b/security/keys/Makefile +@@ -22,5 +22,6 @@ obj-$(CONFIG_SYSCTL) += sysctl.o + # + # Key types + # ++obj-$(CONFIG_BIG_KEYS) += big_key.o + obj-$(CONFIG_TRUSTED_KEYS) += trusted.o + obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted-keys/ +diff --git a/security/keys/big_key.c b/security/keys/big_key.c +new file mode 100644 +index 0000000..5f9defc +--- /dev/null ++++ b/security/keys/big_key.c +@@ -0,0 +1,204 @@ ++/* Large capacity key type ++ * ++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. ++ * Written by David Howells (dhowells@redhat.com) ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public Licence ++ * as published by the Free Software Foundation; either version ++ * 2 of the Licence, or (at your option) any later version. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++MODULE_LICENSE("GPL"); ++ ++/* ++ * If the data is under this limit, there's no point creating a shm file to ++ * hold it as the permanently resident metadata for the shmem fs will be at ++ * least as large as the data. ++ */ ++#define BIG_KEY_FILE_THRESHOLD (sizeof(struct inode) + sizeof(struct dentry)) ++ ++/* ++ * big_key defined keys take an arbitrary string as the description and an ++ * arbitrary blob of data as the payload ++ */ ++struct key_type key_type_big_key = { ++ .name = "big_key", ++ .def_lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, ++ .instantiate = big_key_instantiate, ++ .match = user_match, ++ .revoke = big_key_revoke, ++ .destroy = big_key_destroy, ++ .describe = big_key_describe, ++ .read = big_key_read, ++}; ++ ++/* ++ * Instantiate a big key ++ */ ++int big_key_instantiate(struct key *key, struct key_preparsed_payload *prep) ++{ ++ struct path *path = (struct path *)&key->payload.data2; ++ struct file *file; ++ ssize_t written; ++ size_t datalen = prep->datalen; ++ int ret; ++ ++ ret = -EINVAL; ++ if (datalen <= 0 || datalen > 1024 * 1024 || !prep->data) ++ goto error; ++ ++ /* Set an arbitrary quota */ ++ ret = key_payload_reserve(key, 16); ++ if (ret < 0) ++ goto error; ++ ++ key->type_data.x[1] = datalen; ++ ++ if (datalen > BIG_KEY_FILE_THRESHOLD) { ++ /* Create a shmem file to store the data in. This will permit the data ++ * to be swapped out if needed. ++ * ++ * TODO: Encrypt the stored data with a temporary key. ++ */ ++ file = shmem_file_setup("", datalen, 0); ++ if (IS_ERR(file)) ++ goto err_quota; ++ ++ written = kernel_write(file, prep->data, prep->datalen, 0); ++ if (written != datalen) { ++ if (written >= 0) ++ ret = -ENOMEM; ++ goto err_fput; ++ } ++ ++ /* Pin the mount and dentry to the key so that we can open it again ++ * later ++ */ ++ *path = file->f_path; ++ path_get(path); ++ fput(file); ++ } else { ++ /* Just store the data in a buffer */ ++ void *data = kmalloc(datalen, GFP_KERNEL); ++ if (!data) { ++ ret = -ENOMEM; ++ goto err_quota; ++ } ++ ++ key->payload.data = memcpy(data, prep->data, prep->datalen); ++ } ++ return 0; ++ ++err_fput: ++ fput(file); ++err_quota: ++ key_payload_reserve(key, 0); ++error: ++ return ret; ++} ++ ++/* ++ * dispose of the links from a revoked keyring ++ * - called with the key sem write-locked ++ */ ++void big_key_revoke(struct key *key) ++{ ++ struct path *path = (struct path *)&key->payload.data2; ++ ++ /* clear the quota */ ++ key_payload_reserve(key, 0); ++ if (key_is_instantiated(key) && key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD) ++ vfs_truncate(path, 0); ++} ++ ++/* ++ * dispose of the data dangling from the corpse of a big_key key ++ */ ++void big_key_destroy(struct key *key) ++{ ++ if (key->type_data.x[1] > BIG_KEY_FILE_THRESHOLD) { ++ struct path *path = (struct path *)&key->payload.data2; ++ path_put(path); ++ path->mnt = NULL; ++ path->dentry = NULL; ++ } else { ++ kfree(key->payload.data); ++ key->payload.data = NULL; ++ } ++} ++ ++/* ++ * describe the big_key key ++ */ ++void big_key_describe(const struct key *key, struct seq_file *m) ++{ ++ unsigned long datalen = key->type_data.x[1]; ++ ++ seq_puts(m, key->description); ++ ++ if (key_is_instantiated(key)) ++ seq_printf(m, ": %lu [%s]", ++ datalen, ++ datalen > BIG_KEY_FILE_THRESHOLD ? "file" : "buff"); ++} ++ ++/* ++ * read the key data ++ * - the key's semaphore is read-locked ++ */ ++long big_key_read(const struct key *key, char __user *buffer, size_t buflen) ++{ ++ unsigned long datalen = key->type_data.x[1]; ++ long ret; ++ ++ if (!buffer || buflen < datalen) ++ return datalen; ++ ++ if (datalen > BIG_KEY_FILE_THRESHOLD) { ++ struct path *path = (struct path *)&key->payload.data2; ++ struct file *file; ++ loff_t pos; ++ ++ file = dentry_open(path, O_RDONLY, current_cred()); ++ if (IS_ERR(file)) ++ return PTR_ERR(file); ++ ++ pos = 0; ++ ret = vfs_read(file, buffer, datalen, &pos); ++ fput(file); ++ if (ret >= 0 && ret != datalen) ++ ret = -EIO; ++ } else { ++ ret = datalen; ++ if (copy_to_user(buffer, key->payload.data, datalen) != 0) ++ ret = -EFAULT; ++ } ++ ++ return ret; ++} ++ ++/* ++ * Module stuff ++ */ ++static int __init big_key_init(void) ++{ ++ return register_key_type(&key_type_big_key); ++} ++ ++static void __exit big_key_cleanup(void) ++{ ++ unregister_key_type(&key_type_big_key); ++} ++ ++module_init(big_key_init); ++module_exit(big_key_cleanup); +-- +1.8.3.1 + + +From b1e5b74e060add16de8d6005802644fa1700167f Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Fri, 30 Aug 2013 15:37:54 +0100 +Subject: [PATCH 2/2] KEYS: Add per-user_namespace registers for persistent + per-UID kerberos caches + +Add support for per-user_namespace registers of persistent per-UID kerberos +caches held within the kernel. + +This allows the kerberos cache to be retained beyond the life of all a user's +processes so that the user's cron jobs can work. + +The kerberos cache is envisioned as a keyring/key tree looking something like: + + struct user_namespace + \___ .krb_cache keyring - The register + \___ _krb.0 keyring - Root's Kerberos cache + \___ _krb.5000 keyring - User 5000's Kerberos cache + \___ _krb.5001 keyring - User 5001's Kerberos cache + \___ tkt785 big_key - A ccache blob + \___ tkt12345 big_key - Another ccache blob + +Or possibly: + + struct user_namespace + \___ .krb_cache keyring - The register + \___ _krb.0 keyring - Root's Kerberos cache + \___ _krb.5000 keyring - User 5000's Kerberos cache + \___ _krb.5001 keyring - User 5001's Kerberos cache + \___ tkt785 keyring - A ccache + \___ krbtgt/REDHAT.COM@REDHAT.COM big_key + \___ http/REDHAT.COM@REDHAT.COM user + \___ afs/REDHAT.COM@REDHAT.COM user + \___ nfs/REDHAT.COM@REDHAT.COM user + \___ krbtgt/KERNEL.ORG@KERNEL.ORG big_key + \___ http/KERNEL.ORG@KERNEL.ORG big_key + +What goes into a particular Kerberos cache is entirely up to userspace. Kernel +support is limited to giving you the Kerberos cache keyring that you want. + +The user asks for their Kerberos cache by: + + krb_cache = keyctl_get_krbcache(uid, dest_keyring); + +The uid is -1 or the user's own UID for the user's own cache or the uid of some +other user's cache (requires CAP_SETUID). This permits rpc.gssd or whatever to +mess with the cache. + +The cache returned is a keyring named "_krb." that the possessor can read, +search, clear, invalidate, unlink from and add links to. Active LSMs get a +chance to rule on whether the caller is permitted to make a link. + +Each uid's cache keyring is created when it first accessed and is given a +timeout that is extended each time this function is called so that the keyring +goes away after a while. The timeout is configurable by sysctl but defaults to +three days. + +Each user_namespace struct gets a lazily-created keyring that serves as the +register. The cache keyrings are added to it. This means that standard key +search and garbage collection facilities are available. + +The user_namespace struct's register goes away when it does and anything left +in it is then automatically gc'd. + +Signed-off-by: David Howells +Tested-by: Simo Sorce +cc: Serge E. Hallyn +cc: Eric W. Biederman +--- + include/linux/user_namespace.h | 6 ++ + include/uapi/linux/keyctl.h | 1 + + kernel/user.c | 4 + + kernel/user_namespace.c | 6 ++ + security/keys/Kconfig | 17 +++++ + security/keys/Makefile | 1 + + security/keys/compat.c | 3 + + security/keys/internal.h | 9 +++ + security/keys/keyctl.c | 3 + + security/keys/persistent.c | 169 +++++++++++++++++++++++++++++++++++++++++ + security/keys/sysctl.c | 11 +++ + 11 files changed, 230 insertions(+) + create mode 100644 security/keys/persistent.c + +diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h +index b6b215f..cf21958 100644 +--- a/include/linux/user_namespace.h ++++ b/include/linux/user_namespace.h +@@ -28,6 +28,12 @@ struct user_namespace { + unsigned int proc_inum; + bool may_mount_sysfs; + bool may_mount_proc; ++ ++ /* Register of per-UID persistent keyrings for this namespace */ ++#ifdef CONFIG_PERSISTENT_KEYRINGS ++ struct key *persistent_keyring_register; ++ struct rw_semaphore persistent_keyring_register_sem; ++#endif + }; + + extern struct user_namespace init_user_ns; +diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h +index c9b7f4fa..840cb99 100644 +--- a/include/uapi/linux/keyctl.h ++++ b/include/uapi/linux/keyctl.h +@@ -56,5 +56,6 @@ + #define KEYCTL_REJECT 19 /* reject a partially constructed key */ + #define KEYCTL_INSTANTIATE_IOV 20 /* instantiate a partially constructed key */ + #define KEYCTL_INVALIDATE 21 /* invalidate a key */ ++#define KEYCTL_GET_PERSISTENT 22 /* get a user's persistent keyring */ + + #endif /* _LINUX_KEYCTL_H */ +diff --git a/kernel/user.c b/kernel/user.c +index 69b4c3d..6c9e1b9 100644 +--- a/kernel/user.c ++++ b/kernel/user.c +@@ -53,6 +53,10 @@ struct user_namespace init_user_ns = { + .proc_inum = PROC_USER_INIT_INO, + .may_mount_sysfs = true, + .may_mount_proc = true, ++#ifdef CONFIG_KEYS_KERBEROS_CACHE ++ .krb_cache_register_sem = ++ __RWSEM_INITIALIZER(init_user_ns.krb_cache_register_sem), ++#endif + }; + EXPORT_SYMBOL_GPL(init_user_ns); + +diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c +index d8c30db..ef7985e 100644 +--- a/kernel/user_namespace.c ++++ b/kernel/user_namespace.c +@@ -99,6 +99,9 @@ int create_user_ns(struct cred *new) + + update_mnt_policy(ns); + ++#ifdef CONFIG_PERSISTENT_KEYRINGS ++ rwsem_init(&ns->persistent_keyring_register_sem); ++#endif + return 0; + } + +@@ -123,6 +126,9 @@ void free_user_ns(struct user_namespace *ns) + + do { + parent = ns->parent; ++#ifdef CONFIG_PERSISTENT_KEYRINGS ++ key_put(ns->persistent_keyring_register); ++#endif + proc_free_inum(ns->proc_inum); + kmem_cache_free(user_ns_cachep, ns); + ns = parent; +diff --git a/security/keys/Kconfig b/security/keys/Kconfig +index b563622..53d8748 100644 +--- a/security/keys/Kconfig ++++ b/security/keys/Kconfig +@@ -20,6 +20,23 @@ config KEYS + + If you are unsure as to whether this is required, answer N. + ++config PERSISTENT_KEYRINGS ++ bool "Enable register of persistent per-UID keyrings" ++ depends on KEYS ++ help ++ This option provides a register of persistent per-UID keyrings, ++ primarily aimed at Kerberos key storage. The keyrings are persistent ++ in the sense that they stay around after all processes of that UID ++ have exited, not that they survive the machine being rebooted. ++ ++ A particular keyring may be accessed by either the user whose keyring ++ it is or by a process with administrative privileges. The active ++ LSMs gets to rule on which admin-level processes get to access the ++ cache. ++ ++ Keyrings are created and added into the register upon demand and get ++ removed if they expire (a default timeout is set upon creation). ++ + config BIG_KEYS + tristate "Large payload keys" + depends on KEYS +diff --git a/security/keys/Makefile b/security/keys/Makefile +index c487c77..dfb3a7b 100644 +--- a/security/keys/Makefile ++++ b/security/keys/Makefile +@@ -18,6 +18,7 @@ obj-y := \ + obj-$(CONFIG_KEYS_COMPAT) += compat.o + obj-$(CONFIG_PROC_FS) += proc.o + obj-$(CONFIG_SYSCTL) += sysctl.o ++obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o + + # + # Key types +diff --git a/security/keys/compat.c b/security/keys/compat.c +index d65fa7f..bbd32c7 100644 +--- a/security/keys/compat.c ++++ b/security/keys/compat.c +@@ -138,6 +138,9 @@ asmlinkage long compat_sys_keyctl(u32 option, + case KEYCTL_INVALIDATE: + return keyctl_invalidate_key(arg2); + ++ case KEYCTL_GET_PERSISTENT: ++ return keyctl_get_persistent(arg2, arg3); ++ + default: + return -EOPNOTSUPP; + } +diff --git a/security/keys/internal.h b/security/keys/internal.h +index 581c6f6..80b2aac 100644 +--- a/security/keys/internal.h ++++ b/security/keys/internal.h +@@ -255,6 +255,15 @@ extern long keyctl_invalidate_key(key_serial_t); + extern long keyctl_instantiate_key_common(key_serial_t, + const struct iovec *, + unsigned, size_t, key_serial_t); ++#ifdef CONFIG_PERSISTENT_KEYRINGS ++extern long keyctl_get_persistent(uid_t, key_serial_t); ++extern unsigned persistent_keyring_expiry; ++#else ++static inline long keyctl_get_persistent(uid_t uid, key_serial_t destring) ++{ ++ return -EOPNOTSUPP; ++} ++#endif + + /* + * Debugging key validation +diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c +index 33cfd27..cee72ce 100644 +--- a/security/keys/keyctl.c ++++ b/security/keys/keyctl.c +@@ -1667,6 +1667,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3, + case KEYCTL_INVALIDATE: + return keyctl_invalidate_key((key_serial_t) arg2); + ++ case KEYCTL_GET_PERSISTENT: ++ return keyctl_get_persistent((uid_t)arg2, (key_serial_t)arg3); ++ + default: + return -EOPNOTSUPP; + } +diff --git a/security/keys/persistent.c b/security/keys/persistent.c +new file mode 100644 +index 0000000..631a022 +--- /dev/null ++++ b/security/keys/persistent.c +@@ -0,0 +1,169 @@ ++/* General persistent per-UID keyrings register ++ * ++ * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. ++ * Written by David Howells (dhowells@redhat.com) ++ * ++ * This program is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU General Public Licence ++ * as published by the Free Software Foundation; either version ++ * 2 of the Licence, or (at your option) any later version. ++ */ ++ ++#include ++#include "internal.h" ++ ++unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */ ++ ++/* ++ * Create the persistent keyring register for the current user namespace. ++ * ++ * Called with the namespace's sem locked for writing. ++ */ ++static int key_create_persistent_register(struct user_namespace *ns) ++{ ++ struct key *reg = keyring_alloc(".persistent_register", ++ KUIDT_INIT(0), KGIDT_INIT(0), ++ current_cred(), ++ ((KEY_POS_ALL & ~KEY_POS_SETATTR) | ++ KEY_USR_VIEW | KEY_USR_READ), ++ KEY_ALLOC_NOT_IN_QUOTA, NULL); ++ if (IS_ERR(reg)) ++ return PTR_ERR(reg); ++ ++ ns->persistent_keyring_register = reg; ++ return 0; ++} ++ ++/* ++ * Create the persistent keyring for the specified user. ++ * ++ * Called with the namespace's sem locked for writing. ++ */ ++static key_ref_t key_create_persistent(struct user_namespace *ns, kuid_t uid, ++ struct keyring_index_key *index_key) ++{ ++ struct key *persistent; ++ key_ref_t reg_ref, persistent_ref; ++ ++ if (!ns->persistent_keyring_register) { ++ long err = key_create_persistent_register(ns); ++ if (err < 0) ++ return ERR_PTR(err); ++ } else { ++ reg_ref = make_key_ref(ns->persistent_keyring_register, true); ++ persistent_ref = find_key_to_update(reg_ref, index_key); ++ if (persistent_ref) ++ return persistent_ref; ++ } ++ ++ persistent = keyring_alloc(index_key->description, ++ uid, INVALID_GID, current_cred(), ++ ((KEY_POS_ALL & ~KEY_POS_SETATTR) | ++ KEY_USR_VIEW | KEY_USR_READ), ++ KEY_ALLOC_NOT_IN_QUOTA, ++ ns->persistent_keyring_register); ++ if (IS_ERR(persistent)) ++ return ERR_CAST(persistent); ++ ++ return make_key_ref(persistent, true); ++} ++ ++/* ++ * Get the persistent keyring for a specific UID and link it to the nominated ++ * keyring. ++ */ ++static long key_get_persistent(struct user_namespace *ns, kuid_t uid, ++ key_ref_t dest_ref) ++{ ++ struct keyring_index_key index_key; ++ struct key *persistent; ++ key_ref_t reg_ref, persistent_ref; ++ char buf[32]; ++ long ret; ++ ++ /* Look in the register if it exists */ ++ index_key.type = &key_type_keyring; ++ index_key.description = buf; ++ index_key.desc_len = sprintf(buf, "_persistent.%u", from_kuid(ns, uid)); ++ ++ if (ns->persistent_keyring_register) { ++ reg_ref = make_key_ref(ns->persistent_keyring_register, true); ++ down_read(&ns->persistent_keyring_register_sem); ++ persistent_ref = find_key_to_update(reg_ref, &index_key); ++ up_read(&ns->persistent_keyring_register_sem); ++ ++ if (persistent_ref) ++ goto found; ++ } ++ ++ /* It wasn't in the register, so we'll need to create it. We might ++ * also need to create the register. ++ */ ++ down_write(&ns->persistent_keyring_register_sem); ++ persistent_ref = key_create_persistent(ns, uid, &index_key); ++ up_write(&ns->persistent_keyring_register_sem); ++ if (!IS_ERR(persistent_ref)) ++ goto found; ++ ++ return PTR_ERR(persistent_ref); ++ ++found: ++ ret = key_task_permission(persistent_ref, current_cred(), KEY_LINK); ++ if (ret == 0) { ++ persistent = key_ref_to_ptr(persistent_ref); ++ ret = key_link(key_ref_to_ptr(dest_ref), persistent); ++ if (ret == 0) { ++ key_set_timeout(persistent, persistent_keyring_expiry); ++ ret = persistent->serial; ++ } ++ } ++ ++ key_ref_put(persistent_ref); ++ return ret; ++} ++ ++/* ++ * Get the persistent keyring for a specific UID and link it to the nominated ++ * keyring. ++ */ ++long keyctl_get_persistent(uid_t _uid, key_serial_t destid) ++{ ++ struct user_namespace *ns = current_user_ns(); ++ key_ref_t dest_ref; ++ kuid_t uid; ++ long ret; ++ ++ /* -1 indicates the current user */ ++ if (_uid == (uid_t)-1) { ++ uid = current_uid(); ++ } else { ++ uid = make_kuid(ns, _uid); ++ if (!uid_valid(uid)) ++ return -EINVAL; ++ ++ /* You can only see your own persistent cache if you're not ++ * sufficiently privileged. ++ */ ++ if (uid != current_uid() && ++ uid != current_suid() && ++ uid != current_euid() && ++ uid != current_fsuid() && ++ !ns_capable(ns, CAP_SETUID)) ++ return -EPERM; ++ } ++ ++ /* There must be a destination keyring */ ++ dest_ref = lookup_user_key(destid, KEY_LOOKUP_CREATE, KEY_WRITE); ++ if (IS_ERR(dest_ref)) ++ return PTR_ERR(dest_ref); ++ if (key_ref_to_ptr(dest_ref)->type != &key_type_keyring) { ++ ret = -ENOTDIR; ++ goto out_put_dest; ++ } ++ ++ ret = key_get_persistent(ns, uid, dest_ref); ++ ++out_put_dest: ++ key_ref_put(dest_ref); ++ return ret; ++} +diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c +index ee32d18..8c0af08 100644 +--- a/security/keys/sysctl.c ++++ b/security/keys/sysctl.c +@@ -61,5 +61,16 @@ ctl_table key_sysctls[] = { + .extra1 = (void *) &zero, + .extra2 = (void *) &max, + }, ++#ifdef CONFIG_PERSISTENT_KEYRINGS ++ { ++ .procname = "persistent_keyring_expiry", ++ .data = &persistent_keyring_expiry, ++ .maxlen = sizeof(unsigned), ++ .mode = 0644, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = (void *) &zero, ++ .extra2 = (void *) &max, ++ }, ++#endif + { } + }; +-- +1.8.3.1 +