/* * Implementation of the kernel access vector cache (AVC). * * Authors: Stephen Smalley, <sds@epoch.ncsc.mil> * James Morris <jmorris@redhat.com> * * Update: KaiGai, Kohei <kaigai@ak.jp.nec.com> * Replaced the avc_lock spinlock by RCU. * * Copyright (C) 2003 Red Hat, Inc., James Morris <jmorris@redhat.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, * as published by the Free Software Foundation. */ #include <linux/types.h> #include <linux/stddef.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/dcache.h> #include <linux/init.h> #include <linux/skbuff.h> #include <linux/percpu.h> #include <net/sock.h> #include <linux/un.h> #include <net/af_unix.h> #include <linux/ip.h> #include <linux/audit.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include "avc.h" #include "avc_ss.h" #include "classmap.h" #define AVC_CACHE_SLOTS 512 #define AVC_DEF_CACHE_THRESHOLD 512 #define AVC_CACHE_RECLAIM 16 #ifdef CONFIG_SECURITY_SELINUX_AVC_STATS #define avc_cache_stats_incr(field) \ do { \ per_cpu(avc_cache_stats, get_cpu()).field++; \ put_cpu(); \ } while (0) #else #define avc_cache_stats_incr(field) do {} while (0) #endif struct avc_entry { u32 ssid; u32 tsid; u16 tclass; struct av_decision avd; }; struct avc_node { struct avc_entry ae; struct hlist_node list; /* anchored in avc_cache->slots[i] */ struct rcu_head rhead; }; struct avc_cache { struct hlist_head slots[AVC_CACHE_SLOTS]; /* head for avc_node->list */ spinlock_t slots_lock[AVC_CACHE_SLOTS]; /* lock for writes */ atomic_t lru_hint; /* LRU hint for reclaim scan */ atomic_t active_nodes; u32 latest_notif; /* latest revocation notification */ }; struct avc_callback_node { int (*callback) (u32 event, u32 ssid, u32 tsid, u16 tclass, u32 perms, u32 *out_retained); u32 events; u32 ssid; u32 tsid; u16 tclass; u32 perms; struct avc_callback_node *next; }; /* Exported via selinufs */ unsigned int avc_cache_threshold = AVC_DEF_CACHE_THRESHOLD; #ifdef CONFIG_SECURITY_SELINUX_AVC_STATS DEFINE_PER_CPU(struct avc_cache_stats, avc_cache_stats) = { 0 }; #endif static struct avc_cache avc_cache; static struct avc_callback_node *avc_callbacks; static struct kmem_cache *avc_node_cachep; static inline int avc_hash(u32 ssid, u32 tsid, u16 tclass) { return (ssid ^ (tsid<<2) ^ (tclass<<4)) & (AVC_CACHE_SLOTS - 1); } /** * avc_dump_av - Display an access vector in human-readable form. * @tclass: target security class * @av: access vector */ static void avc_dump_av(struct audit_buffer *ab, u16 tclass, u32 av) { const char **perms; int i, perm; if (av == 0) { audit_log_format(ab, " null"); return; } perms = secclass_map[tclass-1].perms; audit_log_format(ab, " {"); i = 0; perm = 1; while (i < (sizeof(av) * 8)) { if ((perm & av) && perms[i]) { audit_log_format(ab, " %s", perms[i]); av &= ~perm; } i++; perm <<= 1; } if (av) audit_log_format(ab, " 0x%x", av); audit_log_format(ab, " }"); } /** * avc_dump_query - Display a SID pair and a class in human-readable form. * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class */ static void avc_dump_query(struct audit_buffer *ab, u32 ssid, u32 tsid, u16 tclass) { int rc; char *scontext; u32 scontext_len; rc = security_sid_to_context(ssid, &scontext, &scontext_len); if (rc) audit_log_format(ab, "ssid=%d", ssid); else { audit_log_format(ab, "scontext=%s", scontext); kfree(scontext); } rc = security_sid_to_context(tsid, &scontext, &scontext_len); if (rc) audit_log_format(ab, " tsid=%d", tsid); else { audit_log_format(ab, " tcontext=%s", scontext); kfree(scontext); } BUG_ON(tclass >= ARRAY_SIZE(secclass_map)); audit_log_format(ab, " tclass=%s", secclass_map[tclass-1].name); } /** * avc_init - Initialize the AVC. * * Initialize the access vector cache. */ void __init avc_init(void) { int i; for (i = 0; i < AVC_CACHE_SLOTS; i++) { INIT_HLIST_HEAD(&avc_cache.slots[i]); spin_lock_init(&avc_cache.slots_lock[i]); } atomic_set(&avc_cache.active_nodes, 0); atomic_set(&avc_cache.lru_hint, 0); avc_node_cachep = kmem_cache_create("avc_node", sizeof(struct avc_node), 0, SLAB_PANIC, NULL); audit_log(current->audit_context, GFP_KERNEL, AUDIT_KERNEL, "AVC INITIALIZED\n"); } int avc_get_hash_stats(char *page) { int i, chain_len, max_chain_len, slots_used; struct avc_node *node; struct hlist_head *head; rcu_read_lock(); slots_used = 0; max_chain_len = 0; for (i = 0; i < AVC_CACHE_SLOTS; i++) { head = &avc_cache.slots[i]; if (!hlist_empty(head)) { struct hlist_node *next; slots_used++; chain_len = 0; hlist_for_each_entry_rcu(node, next, head, list) chain_len++; if (chain_len > max_chain_len) max_chain_len = chain_len; } } rcu_read_unlock(); return scnprintf(page, PAGE_SIZE, "entries: %d\nbuckets used: %d/%d\n" "longest chain: %d\n", atomic_read(&avc_cache.active_nodes), slots_used, AVC_CACHE_SLOTS, max_chain_len); } static void avc_node_free(struct rcu_head *rhead) { struct avc_node *node = container_of(rhead, struct avc_node, rhead); kmem_cache_free(avc_node_cachep, node); avc_cache_stats_incr(frees); } static void avc_node_delete(struct avc_node *node) { hlist_del_rcu(&node->list); call_rcu(&node->rhead, avc_node_free); atomic_dec(&avc_cache.active_nodes); } static void avc_node_kill(struct avc_node *node) { kmem_cache_free(avc_node_cachep, node); avc_cache_stats_incr(frees); atomic_dec(&avc_cache.active_nodes); } static void avc_node_replace(struct avc_node *new, struct avc_node *old) { hlist_replace_rcu(&old->list, &new->list); call_rcu(&old->rhead, avc_node_free); atomic_dec(&avc_cache.active_nodes); } static inline int avc_reclaim_node(void) { struct avc_node *node; int hvalue, try, ecx; unsigned long flags; struct hlist_head *head; struct hlist_node *next; spinlock_t *lock; for (try = 0, ecx = 0; try < AVC_CACHE_SLOTS; try++) { hvalue = atomic_inc_return(&avc_cache.lru_hint) & (AVC_CACHE_SLOTS - 1); head = &avc_cache.slots[hvalue]; lock = &avc_cache.slots_lock[hvalue]; if (!spin_trylock_irqsave(lock, flags)) continue; rcu_read_lock(); hlist_for_each_entry(node, next, head, list) { avc_node_delete(node); avc_cache_stats_incr(reclaims); ecx++; if (ecx >= AVC_CACHE_RECLAIM) { rcu_read_unlock(); spin_unlock_irqrestore(lock, flags); goto out; } } rcu_read_unlock(); spin_unlock_irqrestore(lock, flags); } out: return ecx; } static struct avc_node *avc_alloc_node(void) { struct avc_node *node; node = kmem_cache_zalloc(avc_node_cachep, GFP_ATOMIC); if (!node) goto out; INIT_HLIST_NODE(&node->list); avc_cache_stats_incr(allocations); if (atomic_inc_return(&avc_cache.active_nodes) > avc_cache_threshold) avc_reclaim_node(); out: return node; } static void avc_node_populate(struct avc_node *node, u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd) { node->ae.ssid = ssid; node->ae.tsid = tsid; node->ae.tclass = tclass; memcpy(&node->ae.avd, avd, sizeof(node->ae.avd)); } static inline struct avc_node *avc_search_node(u32 ssid, u32 tsid, u16 tclass) { struct avc_node *node, *ret = NULL; int hvalue; struct hlist_head *head; struct hlist_node *next; hvalue = avc_hash(ssid, tsid, tclass); head = &avc_cache.slots[hvalue]; hlist_for_each_entry_rcu(node, next, head, list) { if (ssid == node->ae.ssid && tclass == node->ae.tclass && tsid == node->ae.tsid) { ret = node; break; } } return ret; } /** * avc_lookup - Look up an AVC entry. * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class * * Look up an AVC entry that is valid for the * (@ssid, @tsid), interpreting the permissions * based on @tclass. If a valid AVC entry exists, * then this function returns the avc_node. * Otherwise, this function returns NULL. */ static struct avc_node *avc_lookup(u32 ssid, u32 tsid, u16 tclass) { struct avc_node *node; avc_cache_stats_incr(lookups); node = avc_search_node(ssid, tsid, tclass); if (node) avc_cache_stats_incr(hits); else avc_cache_stats_incr(misses); return node; } static int avc_latest_notif_update(int seqno, int is_insert) { int ret = 0; static DEFINE_SPINLOCK(notif_lock); unsigned long flag; spin_lock_irqsave(¬if_lock, flag); if (is_insert) { if (seqno < avc_cache.latest_notif) { printk(KERN_WARNING "SELinux: avc: seqno %d < latest_notif %d\n", seqno, avc_cache.latest_notif); ret = -EAGAIN; } } else { if (seqno > avc_cache.latest_notif) avc_cache.latest_notif = seqno; } spin_unlock_irqrestore(¬if_lock, flag); return ret; } /** * avc_insert - Insert an AVC entry. * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class * @avd: resulting av decision * * Insert an AVC entry for the SID pair * (@ssid, @tsid) and class @tclass. * The access vectors and the sequence number are * normally provided by the security server in * response to a security_compute_av() call. If the * sequence number @avd->seqno is not less than the latest * revocation notification, then the function copies * the access vectors into a cache entry, returns * avc_node inserted. Otherwise, this function returns NULL. */ static struct avc_node *avc_insert(u32 ssid, u32 tsid, u16 tclass, struct av_decision *avd) { struct avc_node *pos, *node = NULL; int hvalue; unsigned long flag; if (avc_latest_notif_update(avd->seqno, 1)) goto out; node = avc_alloc_node(); if (node) { struct hlist_head *head; struct hlist_node *next; spinlock_t *lock; hvalue = avc_hash(ssid, tsid, tclass); avc_node_populate(node, ssid, tsid, tclass, avd); head = &avc_cache.slots[hvalue]; lock = &avc_cache.slots_lock[hvalue]; spin_lock_irqsave(lock, flag); hlist_for_each_entry(pos, next, head, list) { if (pos->ae.ssid == ssid && pos->ae.tsid == tsid && pos->ae.tclass == tclass) { avc_node_replace(node, pos); goto found; } } hlist_add_head_rcu(&node->list, head); found: spin_unlock_irqrestore(lock, flag); } out: return node; } /** * avc_audit_pre_callback - SELinux specific information * will be called by generic audit code * @ab: the audit buffer * @a: audit_data */ static void avc_audit_pre_callback(struct audit_buffer *ab, void *a) { struct common_audit_data *ad = a; audit_log_format(ab, "avc: %s ", ad->selinux_audit_data.denied ? "denied" : "granted"); avc_dump_av(ab, ad->selinux_audit_data.tclass, ad->selinux_audit_data.audited); audit_log_format(ab, " for "); } /** * avc_audit_post_callback - SELinux specific information * will be called by generic audit code * @ab: the audit buffer * @a: audit_data */ static void avc_audit_post_callback(struct audit_buffer *ab, void *a) { struct common_audit_data *ad = a; audit_log_format(ab, " "); avc_dump_query(ab, ad->selinux_audit_data.ssid, ad->selinux_audit_data.tsid, ad->selinux_audit_data.tclass); } /** * avc_audit - Audit the granting or denial of permissions. * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class * @requested: requested permissions * @avd: access vector decisions * @result: result from avc_has_perm_noaudit * @a: auxiliary audit data * @flags: VFS walk flags * * Audit the granting or denial of permissions in accordance * with the policy. This function is typically called by * avc_has_perm() after a permission check, but can also be * called directly by callers who use avc_has_perm_noaudit() * in order to separate the permission check from the auditing. * For example, this separation is useful when the permission check must * be performed under a lock, to allow the lock to be released * before calling the auditing code. */ int avc_audit(u32 ssid, u32 tsid, u16 tclass, u32 requested, struct av_decision *avd, int result, struct common_audit_data *a, unsigned flags) { struct common_audit_data stack_data; u32 denied, audited; denied = requested & ~avd->allowed; if (denied) { audited = denied & avd->auditdeny; /* * a->selinux_audit_data.auditdeny is TRICKY! Setting a bit in * this field means that ANY denials should NOT be audited if * the policy contains an explicit dontaudit rule for that * permission. Take notice that this is unrelated to the * actual permissions that were denied. As an example lets * assume: * * denied == READ * avd.auditdeny & ACCESS == 0 (not set means explicit rule) * selinux_audit_data.auditdeny & ACCESS == 1 * * We will NOT audit the denial even though the denied * permission was READ and the auditdeny checks were for * ACCESS */ if (a && a->selinux_audit_data.auditdeny && !(a->selinux_audit_data.auditdeny & avd->auditdeny)) audited = 0; } else if (result) audited = denied = requested; else audited = requested & avd->auditallow; if (!audited) return 0; if (!a) { a = &stack_data; COMMON_AUDIT_DATA_INIT(a, NONE); } /* * When in a RCU walk do the audit on the RCU retry. This is because * the collection of the dname in an inode audit message is not RCU * safe. Note this may drop some audits when the situation changes * during retry. However this is logically just as if the operation * happened a little later. */ if ((a->type == LSM_AUDIT_DATA_FS) && (flags & IPERM_FLAG_RCU)) return -ECHILD; a->selinux_audit_data.tclass = tclass; a->selinux_audit_data.requested = requested; a->selinux_audit_data.ssid = ssid; a->selinux_audit_data.tsid = tsid; a->selinux_audit_data.audited = audited; a->selinux_audit_data.denied = denied; a->lsm_pre_audit = avc_audit_pre_callback; a->lsm_post_audit = avc_audit_post_callback; common_lsm_audit(a); return 0; } /** * avc_add_callback - Register a callback for security events. * @callback: callback function * @events: security events * @ssid: source security identifier or %SECSID_WILD * @tsid: target security identifier or %SECSID_WILD * @tclass: target security class * @perms: permissions * * Register a callback function for events in the set @events * related to the SID pair (@ssid, @tsid) * and the permissions @perms, interpreting * @perms based on @tclass. Returns %0 on success or * -%ENOMEM if insufficient memory exists to add the callback. */ int avc_add_callback(int (*callback)(u32 event, u32 ssid, u32 tsid, u16 tclass, u32 perms, u32 *out_retained), u32 events, u32 ssid, u32 tsid, u16 tclass, u32 perms) { struct avc_callback_node *c; int rc = 0; c = kmalloc(sizeof(*c), GFP_ATOMIC); if (!c) { rc = -ENOMEM; goto out; } c->callback = callback; c->events = events; c->ssid = ssid; c->tsid = tsid; c->perms = perms; c->next = avc_callbacks; avc_callbacks = c; out: return rc; } static inline int avc_sidcmp(u32 x, u32 y) { return (x == y || x == SECSID_WILD || y == SECSID_WILD); } /** * avc_update_node Update an AVC entry * @event : Updating event * @perms : Permission mask bits * @ssid,@tsid,@tclass : identifier of an AVC entry * @seqno : sequence number when decision was made * * if a valid AVC entry doesn't exist,this function returns -ENOENT. * if kmalloc() called internal returns NULL, this function returns -ENOMEM. * otherwise, this function updates the AVC entry. The original AVC-entry object * will release later by RCU. */ static int avc_update_node(u32 event, u32 perms, u32 ssid, u32 tsid, u16 tclass, u32 seqno) { int hvalue, rc = 0; unsigned long flag; struct avc_node *pos, *node, *orig = NULL; struct hlist_head *head; struct hlist_node *next; spinlock_t *lock; node = avc_alloc_node(); if (!node) { rc = -ENOMEM; goto out; } /* Lock the target slot */ hvalue = avc_hash(ssid, tsid, tclass); head = &avc_cache.slots[hvalue]; lock = &avc_cache.slots_lock[hvalue]; spin_lock_irqsave(lock, flag); hlist_for_each_entry(pos, next, head, list) { if (ssid == pos->ae.ssid && tsid == pos->ae.tsid && tclass == pos->ae.tclass && seqno == pos->ae.avd.seqno){ orig = pos; break; } } if (!orig) { rc = -ENOENT; avc_node_kill(node); goto out_unlock; } /* * Copy and replace original node. */ avc_node_populate(node, ssid, tsid, tclass, &orig->ae.avd); switch (event) { case AVC_CALLBACK_GRANT: node->ae.avd.allowed |= perms; break; case AVC_CALLBACK_TRY_REVOKE: case AVC_CALLBACK_REVOKE: node->ae.avd.allowed &= ~perms; break; case AVC_CALLBACK_AUDITALLOW_ENABLE: node->ae.avd.auditallow |= perms; break; case AVC_CALLBACK_AUDITALLOW_DISABLE: node->ae.avd.auditallow &= ~perms; break; case AVC_CALLBACK_AUDITDENY_ENABLE: node->ae.avd.auditdeny |= perms; break; case AVC_CALLBACK_AUDITDENY_DISABLE: node->ae.avd.auditdeny &= ~perms; break; } avc_node_replace(node, orig); out_unlock: spin_unlock_irqrestore(lock, flag); out: return rc; } /** * avc_flush - Flush the cache */ static void avc_flush(void) { struct hlist_head *head; struct hlist_node *next; struct avc_node *node; spinlock_t *lock; unsigned long flag; int i; for (i = 0; i < AVC_CACHE_SLOTS; i++) { head = &avc_cache.slots[i]; lock = &avc_cache.slots_lock[i]; spin_lock_irqsave(lock, flag); /* * With preemptable RCU, the outer spinlock does not * prevent RCU grace periods from ending. */ rcu_read_lock(); hlist_for_each_entry(node, next, head, list) avc_node_delete(node); rcu_read_unlock(); spin_unlock_irqrestore(lock, flag); } } /** * avc_ss_reset - Flush the cache and revalidate migrated permissions. * @seqno: policy sequence number */ int avc_ss_reset(u32 seqno) { struct avc_callback_node *c; int rc = 0, tmprc; avc_flush(); for (c = avc_callbacks; c; c = c->next) { if (c->events & AVC_CALLBACK_RESET) { tmprc = c->callback(AVC_CALLBACK_RESET, 0, 0, 0, 0, NULL); /* save the first error encountered for the return value and continue processing the callbacks */ if (!rc) rc = tmprc; } } avc_latest_notif_update(seqno, 0); return rc; } /** * avc_has_perm_noaudit - Check permissions but perform no auditing. * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class * @requested: requested permissions, interpreted based on @tclass * @flags: AVC_STRICT or 0 * @avd: access vector decisions * * Check the AVC to determine whether the @requested permissions are granted * for the SID pair (@ssid, @tsid), interpreting the permissions * based on @tclass, and call the security server on a cache miss to obtain * a new decision and add it to the cache. Return a copy of the decisions * in @avd. Return %0 if all @requested permissions are granted, * -%EACCES if any permissions are denied, or another -errno upon * other errors. This function is typically called by avc_has_perm(), * but may also be called directly to separate permission checking from * auditing, e.g. in cases where a lock must be held for the check but * should be released for the auditing. */ int avc_has_perm_noaudit(u32 ssid, u32 tsid, u16 tclass, u32 requested, unsigned flags, struct av_decision *in_avd) { struct avc_node *node; struct av_decision avd_entry, *avd; int rc = 0; u32 denied; BUG_ON(!requested); rcu_read_lock(); node = avc_lookup(ssid, tsid, tclass); if (!node) { rcu_read_unlock(); if (in_avd) avd = in_avd; else avd = &avd_entry; security_compute_av(ssid, tsid, tclass, avd); rcu_read_lock(); node = avc_insert(ssid, tsid, tclass, avd); } else { if (in_avd) memcpy(in_avd, &node->ae.avd, sizeof(*in_avd)); avd = &node->ae.avd; } denied = requested & ~(avd->allowed); if (denied) { if (flags & AVC_STRICT) rc = -EACCES; else if (!selinux_enforcing || (avd->flags & AVD_FLAGS_PERMISSIVE)) avc_update_node(AVC_CALLBACK_GRANT, requested, ssid, tsid, tclass, avd->seqno); else rc = -EACCES; } rcu_read_unlock(); return rc; } /** * avc_has_perm - Check permissions and perform any appropriate auditing. * @ssid: source security identifier * @tsid: target security identifier * @tclass: target security class * @requested: requested permissions, interpreted based on @tclass * @auditdata: auxiliary audit data * @flags: VFS walk flags * * Check the AVC to determine whether the @requested permissions are granted * for the SID pair (@ssid, @tsid), interpreting the permissions * based on @tclass, and call the security server on a cache miss to obtain * a new decision and add it to the cache. Audit the granting or denial of * permissions in accordance with the policy. Return %0 if all @requested * permissions are granted, -%EACCES if any permissions are denied, or * another -errno upon other errors. */ int avc_has_perm_flags(u32 ssid, u32 tsid, u16 tclass, u32 requested, struct common_audit_data *auditdata, unsigned flags) { struct av_decision avd; int rc, rc2; rc = avc_has_perm_noaudit(ssid, tsid, tclass, requested, 0, &avd); rc2 = avc_audit(ssid, tsid, tclass, requested, &avd, rc, auditdata, flags); if (rc2) return rc2; return rc; } u32 avc_policy_seqno(void) { return avc_cache.latest_notif; } void avc_disable(void) { /* * If you are looking at this because you have realized that we are * not destroying the avc_node_cachep it might be easy to fix, but * I don't know the memory barrier semantics well enough to know. It's * possible that some other task dereferenced security_ops when * it still pointed to selinux operations. If that is the case it's * possible that it is about to use the avc and is about to need the * avc_node_cachep. I know I could wrap the security.c security_ops call * in an rcu_lock, but seriously, it's not worth it. Instead I just flush * the cache and get that memory back. */ if (avc_node_cachep) { avc_flush(); /* kmem_cache_destroy(avc_node_cachep); */ } }