/* * ipvrf.c "ip vrf" * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. * * Authors: David Ahern <dsa@cumulusnetworks.com> * */ #include <sys/types.h> #include <sys/stat.h> #include <sys/socket.h> #include <sys/mount.h> #include <linux/bpf.h> #include <linux/if.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <string.h> #include <dirent.h> #include <errno.h> #include <limits.h> #include "rt_names.h" #include "utils.h" #include "ip_common.h" #include "bpf_util.h" #define CGRP_PROC_FILE "/cgroup.procs" static struct link_filter vrf_filter; static void usage(void) { fprintf(stderr, "Usage: ip vrf show [NAME] ...\n"); fprintf(stderr, " ip vrf exec [NAME] cmd ...\n"); fprintf(stderr, " ip vrf identify [PID]\n"); fprintf(stderr, " ip vrf pids [NAME]\n"); exit(-1); } /* * parse process based cgroup file looking for PATH/vrf/NAME where * NAME is the name of the vrf the process is associated with */ static int vrf_identify(pid_t pid, char *name, size_t len) { char path[PATH_MAX]; char buf[4096]; char *vrf, *end; FILE *fp; snprintf(path, sizeof(path), "/proc/%d/cgroup", pid); fp = fopen(path, "r"); if (!fp) return -1; memset(name, 0, len); while (fgets(buf, sizeof(buf), fp)) { /* want the controller-less cgroup */ if (strstr(buf, "::/") == NULL) continue; vrf = strstr(buf, "/vrf/"); if (vrf) { vrf += 5; /* skip past "/vrf/" */ end = strchr(vrf, '\n'); if (end) *end = '\0'; strlcpy(name, vrf, len); break; } } fclose(fp); return 0; } static int ipvrf_identify(int argc, char **argv) { char vrf[32]; int rc; unsigned int pid; if (argc < 1) pid = getpid(); else if (argc > 1) invarg("Extra arguments specified\n", argv[1]); else if (get_unsigned(&pid, argv[0], 10)) invarg("Invalid pid\n", argv[0]); rc = vrf_identify(pid, vrf, sizeof(vrf)); if (!rc) { if (vrf[0] != '\0') printf("%s\n", vrf); } else { fprintf(stderr, "Failed to lookup vrf association: %s\n", strerror(errno)); } return rc; } /* read PATH/vrf/NAME/cgroup.procs file */ static void read_cgroup_pids(const char *base_path, char *name) { char path[PATH_MAX]; char buf[4096]; FILE *fp; if (snprintf(path, sizeof(path), "%s/vrf/%s%s", base_path, name, CGRP_PROC_FILE) >= sizeof(path)) return; fp = fopen(path, "r"); if (!fp) return; /* no cgroup file, nothing to show */ /* dump contents (pids) of cgroup.procs */ while (fgets(buf, sizeof(buf), fp)) { char *nl, comm[32]; nl = strchr(buf, '\n'); if (nl) *nl = '\0'; if (get_command_name(buf, comm, sizeof(comm))) strcpy(comm, "<terminated?>"); printf("%5s %s\n", buf, comm); } fclose(fp); } /* recurse path looking for PATH[/NETNS]/vrf/NAME */ static int recurse_dir(char *base_path, char *name, const char *netns) { char path[PATH_MAX]; struct dirent *de; struct stat fstat; int rc; DIR *d; d = opendir(base_path); if (!d) return -1; while ((de = readdir(d)) != NULL) { if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, "..")) continue; if (!strcmp(de->d_name, "vrf")) { const char *pdir = strrchr(base_path, '/'); /* found a 'vrf' directory. if it is for the given * namespace then dump the cgroup pids */ if (*netns == '\0' || (pdir && !strcmp(pdir+1, netns))) read_cgroup_pids(base_path, name); continue; } /* is this a subdir that needs to be walked */ if (snprintf(path, sizeof(path), "%s/%s", base_path, de->d_name) >= sizeof(path)) continue; if (lstat(path, &fstat) < 0) continue; if (S_ISDIR(fstat.st_mode)) { rc = recurse_dir(path, name, netns); if (rc != 0) goto out; } } rc = 0; out: closedir(d); return rc; } static int ipvrf_get_netns(char *netns, int len) { if (netns_identify_pid("self", netns, len-3)) { fprintf(stderr, "Failed to get name of network namespace: %s\n", strerror(errno)); return -1; } if (*netns != '\0') strcat(netns, "-ns"); return 0; } static int ipvrf_pids(int argc, char **argv) { char *mnt, *vrf; char netns[256]; int ret = -1; if (argc != 1) { fprintf(stderr, "Invalid arguments\n"); return -1; } vrf = argv[0]; if (!name_is_vrf(vrf)) { fprintf(stderr, "Invalid VRF name\n"); return -1; } mnt = find_cgroup2_mount(); if (!mnt) return -1; if (ipvrf_get_netns(netns, sizeof(netns)) < 0) goto out; ret = recurse_dir(mnt, vrf, netns); out: free(mnt); return ret; } /* load BPF program to set sk_bound_dev_if for sockets */ static char bpf_log_buf[256*1024]; static int prog_load(int idx) { struct bpf_insn prog[] = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_MOV64_IMM(BPF_REG_3, idx), BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)), BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ BPF_EXIT_INSN(), }; return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog), "GPL", bpf_log_buf, sizeof(bpf_log_buf)); } static int vrf_configure_cgroup(const char *path, int ifindex) { int rc = -1, cg_fd, prog_fd = -1; cg_fd = open(path, O_DIRECTORY | O_RDONLY); if (cg_fd < 0) { fprintf(stderr, "Failed to open cgroup path: '%s'\n", strerror(errno)); goto out; } /* * Load bpf program into kernel and attach to cgroup to affect * socket creates */ prog_fd = prog_load(ifindex); if (prog_fd < 0) { fprintf(stderr, "Failed to load BPF prog: '%s'\n", strerror(errno)); if (errno != EPERM) { fprintf(stderr, "Kernel compiled with CGROUP_BPF enabled?\n"); } goto out; } if (bpf_prog_attach_fd(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE)) { fprintf(stderr, "Failed to attach prog to cgroup: '%s'\n", strerror(errno)); goto out; } rc = 0; out: close(cg_fd); close(prog_fd); return rc; } /* get base path for controller-less cgroup for a process. * path returned does not include /vrf/NAME if it exists */ static int vrf_path(char *vpath, size_t len) { char path[PATH_MAX]; char buf[4096]; char *vrf; FILE *fp; snprintf(path, sizeof(path), "/proc/%d/cgroup", getpid()); fp = fopen(path, "r"); if (!fp) return -1; vpath[0] = '\0'; while (fgets(buf, sizeof(buf), fp)) { char *start, *nl; start = strstr(buf, "::/"); if (!start) continue; /* advance past '::' */ start += 2; nl = strchr(start, '\n'); if (nl) *nl = '\0'; vrf = strstr(start, "/vrf"); if (vrf) *vrf = '\0'; strlcpy(vpath, start, len); /* if vrf path is just / then return nothing */ if (!strcmp(vpath, "/")) vpath[0] = '\0'; break; } fclose(fp); return 0; } static int vrf_switch(const char *name) { char path[PATH_MAX], *mnt, pid[16]; char vpath[PATH_MAX], netns[256]; int ifindex = 0; int rc = -1, len, fd = -1; if (strcmp(name, "default")) { ifindex = name_is_vrf(name); if (!ifindex) { fprintf(stderr, "Invalid VRF name\n"); return -1; } } mnt = find_cgroup2_mount(); if (!mnt) return -1; /* -1 on length to add '/' to the end */ if (ipvrf_get_netns(netns, sizeof(netns) - 1) < 0) goto out; if (vrf_path(vpath, sizeof(vpath)) < 0) { fprintf(stderr, "Failed to get base cgroup path: %s\n", strerror(errno)); goto out; } /* if path already ends in netns then don't add it again */ if (*netns != '\0') { char *pdir = strrchr(vpath, '/'); if (!pdir) pdir = vpath; else pdir++; if (strcmp(pdir, netns) == 0) *pdir = '\0'; strcat(netns, "/"); } /* path to cgroup; make sure buffer has room to cat "/cgroup.procs" * to the end of the path */ len = snprintf(path, sizeof(path) - sizeof(CGRP_PROC_FILE), "%s%s/%svrf/%s", mnt, vpath, netns, ifindex ? name : ""); if (len > sizeof(path) - sizeof(CGRP_PROC_FILE)) { fprintf(stderr, "Invalid path to cgroup2 mount\n"); goto out; } if (make_path(path, 0755)) { fprintf(stderr, "Failed to setup vrf cgroup2 directory\n"); goto out; } if (ifindex && vrf_configure_cgroup(path, ifindex)) goto out; /* * write pid to cgroup.procs making process part of cgroup */ strcat(path, CGRP_PROC_FILE); fd = open(path, O_RDWR | O_APPEND); if (fd < 0) { fprintf(stderr, "Failed to open cgroups.procs file: %s.\n", strerror(errno)); goto out; } snprintf(pid, sizeof(pid), "%d", getpid()); if (write(fd, pid, strlen(pid)) < 0) { fprintf(stderr, "Failed to join cgroup\n"); goto out2; } rc = 0; out2: close(fd); out: free(mnt); return rc; } static int ipvrf_exec(int argc, char **argv) { if (argc < 1) { fprintf(stderr, "No VRF name specified\n"); return -1; } if (argc < 2) { fprintf(stderr, "No command specified\n"); return -1; } if (vrf_switch(argv[0])) return -1; return -cmd_exec(argv[1], argv + 1, !!batch_mode); } /* reset VRF association of current process to default VRF; * used by netns_exec */ void vrf_reset(void) { char vrf[32]; if (vrf_identify(getpid(), vrf, sizeof(vrf)) || (vrf[0] == '\0')) return; vrf_switch("default"); } static int ipvrf_filter_req(struct nlmsghdr *nlh, int reqlen) { struct rtattr *linkinfo; int err; if (vrf_filter.kind) { linkinfo = addattr_nest(nlh, reqlen, IFLA_LINKINFO); err = addattr_l(nlh, reqlen, IFLA_INFO_KIND, vrf_filter.kind, strlen(vrf_filter.kind)); if (err) return err; addattr_nest_end(nlh, linkinfo); } return 0; } /* input arg is linkinfo */ static __u32 vrf_table_linkinfo(struct rtattr *li[]) { struct rtattr *attr[IFLA_VRF_MAX + 1]; if (li[IFLA_INFO_DATA]) { parse_rtattr_nested(attr, IFLA_VRF_MAX, li[IFLA_INFO_DATA]); if (attr[IFLA_VRF_TABLE]) return rta_getattr_u32(attr[IFLA_VRF_TABLE]); } return 0; } static int ipvrf_print(struct nlmsghdr *n) { struct ifinfomsg *ifi = NLMSG_DATA(n); struct rtattr *tb[IFLA_MAX+1]; struct rtattr *li[IFLA_INFO_MAX+1]; int len = n->nlmsg_len; const char *name; __u32 tb_id; len -= NLMSG_LENGTH(sizeof(*ifi)); if (len < 0) return 0; if (vrf_filter.ifindex && vrf_filter.ifindex != ifi->ifi_index) return 0; parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len); /* kernel does not support filter by master device */ if (tb[IFLA_MASTER]) { int master = *(int *)RTA_DATA(tb[IFLA_MASTER]); if (vrf_filter.master && master != vrf_filter.master) return 0; } if (!tb[IFLA_IFNAME]) { fprintf(stderr, "BUG: device with ifindex %d has nil ifname\n", ifi->ifi_index); return 0; } name = rta_getattr_str(tb[IFLA_IFNAME]); /* missing LINKINFO means not VRF. e.g., kernel does not * support filtering on kind, so userspace needs to handle */ if (!tb[IFLA_LINKINFO]) return 0; parse_rtattr_nested(li, IFLA_INFO_MAX, tb[IFLA_LINKINFO]); if (!li[IFLA_INFO_KIND]) return 0; if (strcmp(RTA_DATA(li[IFLA_INFO_KIND]), "vrf")) return 0; tb_id = vrf_table_linkinfo(li); if (!tb_id) { fprintf(stderr, "BUG: VRF %s is missing table id\n", name); return 0; } printf("%-16s %5u", name, tb_id); printf("\n"); return 1; } static int ipvrf_show(int argc, char **argv) { struct nlmsg_chain linfo = { NULL, NULL}; int rc = 0; vrf_filter.kind = "vrf"; if (argc > 1) usage(); if (argc == 1) { __u32 tb_id; tb_id = ipvrf_get_table(argv[0]); if (!tb_id) { fprintf(stderr, "Invalid VRF\n"); return 1; } printf("%s %u\n", argv[0], tb_id); return 0; } if (ip_linkaddr_list(0, ipvrf_filter_req, &linfo, NULL) == 0) { struct nlmsg_list *l; unsigned nvrf = 0; int n; n = printf("%-16s %5s\n", "Name", "Table"); printf("%.*s\n", n-1, "-----------------------"); for (l = linfo.head; l; l = l->next) nvrf += ipvrf_print(&l->h); if (!nvrf) printf("No VRF has been configured\n"); } else rc = 1; free_nlmsg_chain(&linfo); return rc; } int do_ipvrf(int argc, char **argv) { if (argc == 0) return ipvrf_show(0, NULL); if (matches(*argv, "identify") == 0) return ipvrf_identify(argc-1, argv+1); if (matches(*argv, "pids") == 0) return ipvrf_pids(argc-1, argv+1); if (matches(*argv, "exec") == 0) return ipvrf_exec(argc-1, argv+1); if (matches(*argv, "show") == 0 || matches(*argv, "lst") == 0 || matches(*argv, "list") == 0) return ipvrf_show(argc-1, argv+1); if (matches(*argv, "help") == 0) usage(); fprintf(stderr, "Command \"%s\" is unknown, try \"ip vrf help\".\n", *argv); exit(-1); }