#include <linux/kernel.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/skbuff.h> #include <linux/netfilter.h> #include <linux/seq_file.h> #include <linux/rcupdate.h> #include <net/protocol.h> #include <net/netfilter/nf_queue.h> #include <net/dst.h> #include "nf_internals.h" /* * A queue handler may be registered for each protocol. Each is protected by * long term mutex. The handler must provide an an outfn() to accept packets * for queueing and must reinject all packets it receives, no matter what. */ static const struct nf_queue_handler __rcu *queue_handler[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(queue_handler_mutex); /* return EBUSY when somebody else is registered, return EEXIST if the * same handler is registered, return 0 in case of success. */ int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) { int ret; const struct nf_queue_handler *old; if (pf >= ARRAY_SIZE(queue_handler)) return -EINVAL; mutex_lock(&queue_handler_mutex); old = rcu_dereference_protected(queue_handler[pf], lockdep_is_held(&queue_handler_mutex)); if (old == qh) ret = -EEXIST; else if (old) ret = -EBUSY; else { rcu_assign_pointer(queue_handler[pf], qh); ret = 0; } mutex_unlock(&queue_handler_mutex); return ret; } EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) { const struct nf_queue_handler *old; if (pf >= ARRAY_SIZE(queue_handler)) return -EINVAL; mutex_lock(&queue_handler_mutex); old = rcu_dereference_protected(queue_handler[pf], lockdep_is_held(&queue_handler_mutex)); if (old && old != qh) { mutex_unlock(&queue_handler_mutex); return -EINVAL; } rcu_assign_pointer(queue_handler[pf], NULL); mutex_unlock(&queue_handler_mutex); synchronize_rcu(); return 0; } EXPORT_SYMBOL(nf_unregister_queue_handler); void nf_unregister_queue_handlers(const struct nf_queue_handler *qh) { u_int8_t pf; mutex_lock(&queue_handler_mutex); for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) { if (rcu_dereference_protected( queue_handler[pf], lockdep_is_held(&queue_handler_mutex) ) == qh) rcu_assign_pointer(queue_handler[pf], NULL); } mutex_unlock(&queue_handler_mutex); synchronize_rcu(); } EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) { /* Release those devices we held, or Alexey will kill me. */ if (entry->indev) dev_put(entry->indev); if (entry->outdev) dev_put(entry->outdev); #ifdef CONFIG_BRIDGE_NETFILTER if (entry->skb->nf_bridge) { struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge; if (nf_bridge->physindev) dev_put(nf_bridge->physindev); if (nf_bridge->physoutdev) dev_put(nf_bridge->physoutdev); } #endif /* Drop reference to owner of hook which queued us. */ module_put(entry->elem->owner); } /* * Any packet that leaves via this function must come back * through nf_reinject(). */ static int __nf_queue(struct sk_buff *skb, struct list_head *elem, u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), unsigned int queuenum) { int status = -ENOENT; struct nf_queue_entry *entry = NULL; #ifdef CONFIG_BRIDGE_NETFILTER struct net_device *physindev; struct net_device *physoutdev; #endif const struct nf_afinfo *afinfo; const struct nf_queue_handler *qh; /* QUEUE == DROP if no one is waiting, to be safe. */ rcu_read_lock(); qh = rcu_dereference(queue_handler[pf]); if (!qh) { status = -ESRCH; goto err_unlock; } afinfo = nf_get_afinfo(pf); if (!afinfo) goto err_unlock; entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC); if (!entry) { status = -ENOMEM; goto err_unlock; } *entry = (struct nf_queue_entry) { .skb = skb, .elem = list_entry(elem, struct nf_hook_ops, list), .pf = pf, .hook = hook, .indev = indev, .outdev = outdev, .okfn = okfn, }; /* If it's going away, ignore hook. */ if (!try_module_get(entry->elem->owner)) { status = -ECANCELED; goto err_unlock; } /* Bump dev refs so they don't vanish while packet is out */ if (indev) dev_hold(indev); if (outdev) dev_hold(outdev); #ifdef CONFIG_BRIDGE_NETFILTER if (skb->nf_bridge) { physindev = skb->nf_bridge->physindev; if (physindev) dev_hold(physindev); physoutdev = skb->nf_bridge->physoutdev; if (physoutdev) dev_hold(physoutdev); } #endif skb_dst_force(skb); afinfo->saveroute(skb, entry); status = qh->outfn(entry, queuenum); rcu_read_unlock(); if (status < 0) { nf_queue_entry_release_refs(entry); goto err; } return 0; err_unlock: rcu_read_unlock(); err: kfree(entry); return status; } int nf_queue(struct sk_buff *skb, struct list_head *elem, u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), unsigned int queuenum) { struct sk_buff *segs; int err; unsigned int queued; if (!skb_is_gso(skb)) return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, queuenum); switch (pf) { case NFPROTO_IPV4: skb->protocol = htons(ETH_P_IP); break; case NFPROTO_IPV6: skb->protocol = htons(ETH_P_IPV6); break; } segs = skb_gso_segment(skb, 0); /* Does not use PTR_ERR to limit the number of error codes that can be * returned by nf_queue. For instance, callers rely on -ECANCELED to mean * 'ignore this hook'. */ if (IS_ERR(segs)) return -EINVAL; queued = 0; err = 0; do { struct sk_buff *nskb = segs->next; segs->next = NULL; if (err == 0) err = __nf_queue(segs, elem, pf, hook, indev, outdev, okfn, queuenum); if (err == 0) queued++; else kfree_skb(segs); segs = nskb; } while (segs); /* also free orig skb if only some segments were queued */ if (unlikely(err && queued)) err = 0; if (err == 0) kfree_skb(skb); return err; } void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { struct sk_buff *skb = entry->skb; struct list_head *elem = &entry->elem->list; const struct nf_afinfo *afinfo; int err; rcu_read_lock(); nf_queue_entry_release_refs(entry); /* Continue traversal iff userspace said ok... */ if (verdict == NF_REPEAT) { elem = elem->prev; verdict = NF_ACCEPT; } if (verdict == NF_ACCEPT) { afinfo = nf_get_afinfo(entry->pf); if (!afinfo || afinfo->reroute(skb, entry) < 0) verdict = NF_DROP; } if (verdict == NF_ACCEPT) { next_hook: verdict = nf_iterate(&nf_hooks[entry->pf][entry->hook], skb, entry->hook, entry->indev, entry->outdev, &elem, entry->okfn, INT_MIN); } switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_STOP: local_bh_disable(); entry->okfn(skb); local_bh_enable(); break; case NF_QUEUE: err = __nf_queue(skb, elem, entry->pf, entry->hook, entry->indev, entry->outdev, entry->okfn, verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ECANCELED) goto next_hook; if (err == -ESRCH && (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) goto next_hook; kfree_skb(skb); } break; case NF_STOLEN: default: kfree_skb(skb); } rcu_read_unlock(); kfree(entry); } EXPORT_SYMBOL(nf_reinject); #ifdef CONFIG_PROC_FS static void *seq_start(struct seq_file *seq, loff_t *pos) { if (*pos >= ARRAY_SIZE(queue_handler)) return NULL; return pos; } static void *seq_next(struct seq_file *s, void *v, loff_t *pos) { (*pos)++; if (*pos >= ARRAY_SIZE(queue_handler)) return NULL; return pos; } static void seq_stop(struct seq_file *s, void *v) { } static int seq_show(struct seq_file *s, void *v) { int ret; loff_t *pos = v; const struct nf_queue_handler *qh; rcu_read_lock(); qh = rcu_dereference(queue_handler[*pos]); if (!qh) ret = seq_printf(s, "%2lld NONE\n", *pos); else ret = seq_printf(s, "%2lld %s\n", *pos, qh->name); rcu_read_unlock(); return ret; } static const struct seq_operations nfqueue_seq_ops = { .start = seq_start, .next = seq_next, .stop = seq_stop, .show = seq_show, }; static int nfqueue_open(struct inode *inode, struct file *file) { return seq_open(file, &nfqueue_seq_ops); } static const struct file_operations nfqueue_file_ops = { .owner = THIS_MODULE, .open = nfqueue_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; #endif /* PROC_FS */ int __init netfilter_queue_init(void) { #ifdef CONFIG_PROC_FS if (!proc_create("nf_queue", S_IRUGO, proc_net_netfilter, &nfqueue_file_ops)) return -1; #endif return 0; }