/*
* Copyright (C) 2016 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <errno.h>
#include <netdb.h>
#include <string.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <sys/socket.h>
#include <sys/uio.h>
#include <linux/netlink.h>
#include <linux/sock_diag.h>
#include <linux/inet_diag.h>
#define LOG_TAG "Netd"
#include <android-base/strings.h>
#include <cutils/log.h>
#include "Fwmark.h"
#include "NetdConstants.h"
#include "Permission.h"
#include "SockDiag.h"
#include <chrono>
#ifndef SOCK_DESTROY
#define SOCK_DESTROY 21
#endif
#define INET_DIAG_BC_MARK_COND 10
namespace {
int checkError(int fd) {
struct {
nlmsghdr h;
nlmsgerr err;
} __attribute__((__packed__)) ack;
ssize_t bytesread = recv(fd, &ack, sizeof(ack), MSG_DONTWAIT | MSG_PEEK);
if (bytesread == -1) {
// Read failed (error), or nothing to read (good).
return (errno == EAGAIN) ? 0 : -errno;
} else if (bytesread == (ssize_t) sizeof(ack) && ack.h.nlmsg_type == NLMSG_ERROR) {
// We got an error. Consume it.
recv(fd, &ack, sizeof(ack), 0);
return ack.err.error;
} else {
// The kernel replied with something. Leave it to the caller.
return 0;
}
}
} // namespace
bool SockDiag::open() {
if (hasSocks()) {
return false;
}
mSock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_INET_DIAG);
mWriteSock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_INET_DIAG);
if (!hasSocks()) {
closeSocks();
return false;
}
sockaddr_nl nl = { .nl_family = AF_NETLINK };
if ((connect(mSock, reinterpret_cast<sockaddr *>(&nl), sizeof(nl)) == -1) ||
(connect(mWriteSock, reinterpret_cast<sockaddr *>(&nl), sizeof(nl)) == -1)) {
closeSocks();
return false;
}
return true;
}
int SockDiag::sendDumpRequest(uint8_t proto, uint8_t family, uint32_t states,
iovec *iov, int iovcnt) {
struct {
nlmsghdr nlh;
inet_diag_req_v2 req;
} __attribute__((__packed__)) request = {
.nlh = {
.nlmsg_type = SOCK_DIAG_BY_FAMILY,
.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
},
.req = {
.sdiag_family = family,
.sdiag_protocol = proto,
.idiag_states = states,
},
};
size_t len = 0;
iov[0].iov_base = &request;
iov[0].iov_len = sizeof(request);
for (int i = 0; i < iovcnt; i++) {
len += iov[i].iov_len;
}
request.nlh.nlmsg_len = len;
if (writev(mSock, iov, iovcnt) != (ssize_t) len) {
return -errno;
}
return checkError(mSock);
}
int SockDiag::sendDumpRequest(uint8_t proto, uint8_t family, uint32_t states) {
iovec iov[] = {
{ nullptr, 0 },
};
return sendDumpRequest(proto, family, states, iov, ARRAY_SIZE(iov));
}
int SockDiag::sendDumpRequest(uint8_t proto, uint8_t family, const char *addrstr) {
addrinfo hints = { .ai_flags = AI_NUMERICHOST };
addrinfo *res;
in6_addr mapped = { .s6_addr32 = { 0, 0, htonl(0xffff), 0 } };
int ret;
// TODO: refactor the netlink parsing code out of system/core, bring it into netd, and stop
// doing string conversions when they're not necessary.
if ((ret = getaddrinfo(addrstr, nullptr, &hints, &res)) != 0) {
return -EINVAL;
}
// So we don't have to call freeaddrinfo on every failure path.
ScopedAddrinfo resP(res);
void *addr;
uint8_t addrlen;
if (res->ai_family == AF_INET && family == AF_INET) {
in_addr& ina = reinterpret_cast<sockaddr_in*>(res->ai_addr)->sin_addr;
addr = &ina;
addrlen = sizeof(ina);
} else if (res->ai_family == AF_INET && family == AF_INET6) {
in_addr& ina = reinterpret_cast<sockaddr_in*>(res->ai_addr)->sin_addr;
mapped.s6_addr32[3] = ina.s_addr;
addr = &mapped;
addrlen = sizeof(mapped);
} else if (res->ai_family == AF_INET6 && family == AF_INET6) {
in6_addr& in6a = reinterpret_cast<sockaddr_in6*>(res->ai_addr)->sin6_addr;
addr = &in6a;
addrlen = sizeof(in6a);
} else {
return -EAFNOSUPPORT;
}
uint8_t prefixlen = addrlen * 8;
uint8_t yesjump = sizeof(inet_diag_bc_op) + sizeof(inet_diag_hostcond) + addrlen;
uint8_t nojump = yesjump + 4;
struct {
nlattr nla;
inet_diag_bc_op op;
inet_diag_hostcond cond;
} __attribute__((__packed__)) attrs = {
.nla = {
.nla_type = INET_DIAG_REQ_BYTECODE,
},
.op = {
INET_DIAG_BC_S_COND,
yesjump,
nojump,
},
.cond = {
family,
prefixlen,
-1,
{}
},
};
attrs.nla.nla_len = sizeof(attrs) + addrlen;
iovec iov[] = {
{ nullptr, 0 },
{ &attrs, sizeof(attrs) },
{ addr, addrlen },
};
uint32_t states = ~(1 << TCP_TIME_WAIT);
return sendDumpRequest(proto, family, states, iov, ARRAY_SIZE(iov));
}
int SockDiag::readDiagMsg(uint8_t proto, SockDiag::DumpCallback callback) {
char buf[kBufferSize];
ssize_t bytesread;
do {
bytesread = read(mSock, buf, sizeof(buf));
if (bytesread < 0) {
return -errno;
}
uint32_t len = bytesread;
for (nlmsghdr *nlh = reinterpret_cast<nlmsghdr *>(buf);
NLMSG_OK(nlh, len);
nlh = NLMSG_NEXT(nlh, len)) {
switch (nlh->nlmsg_type) {
case NLMSG_DONE:
callback(proto, NULL);
return 0;
case NLMSG_ERROR: {
nlmsgerr *err = reinterpret_cast<nlmsgerr *>(NLMSG_DATA(nlh));
return err->error;
}
default:
inet_diag_msg *msg = reinterpret_cast<inet_diag_msg *>(NLMSG_DATA(nlh));
if (callback(proto, msg)) {
sockDestroy(proto, msg);
}
}
}
} while (bytesread > 0);
return 0;
}
// Determines whether a socket is a loopback socket. Does not check socket state.
bool SockDiag::isLoopbackSocket(const inet_diag_msg *msg) {
switch (msg->idiag_family) {
case AF_INET:
// Old kernels only copy the IPv4 address and leave the other 12 bytes uninitialized.
return IN_LOOPBACK(htonl(msg->id.idiag_src[0])) ||
IN_LOOPBACK(htonl(msg->id.idiag_dst[0])) ||
msg->id.idiag_src[0] == msg->id.idiag_dst[0];
case AF_INET6: {
const struct in6_addr *src = (const struct in6_addr *) &msg->id.idiag_src;
const struct in6_addr *dst = (const struct in6_addr *) &msg->id.idiag_dst;
return (IN6_IS_ADDR_V4MAPPED(src) && IN_LOOPBACK(src->s6_addr32[3])) ||
(IN6_IS_ADDR_V4MAPPED(dst) && IN_LOOPBACK(dst->s6_addr32[3])) ||
IN6_IS_ADDR_LOOPBACK(src) || IN6_IS_ADDR_LOOPBACK(dst) ||
!memcmp(src, dst, sizeof(*src));
}
default:
return false;
}
}
int SockDiag::sockDestroy(uint8_t proto, const inet_diag_msg *msg) {
if (msg == nullptr) {
return 0;
}
DestroyRequest request = {
.nlh = {
.nlmsg_type = SOCK_DESTROY,
.nlmsg_flags = NLM_F_REQUEST,
},
.req = {
.sdiag_family = msg->idiag_family,
.sdiag_protocol = proto,
.idiag_states = (uint32_t) (1 << msg->idiag_state),
.id = msg->id,
},
};
request.nlh.nlmsg_len = sizeof(request);
if (write(mWriteSock, &request, sizeof(request)) < (ssize_t) sizeof(request)) {
return -errno;
}
int ret = checkError(mWriteSock);
if (!ret) mSocketsDestroyed++;
return ret;
}
int SockDiag::destroySockets(uint8_t proto, int family, const char *addrstr) {
if (!hasSocks()) {
return -EBADFD;
}
if (int ret = sendDumpRequest(proto, family, addrstr)) {
return ret;
}
auto destroyAll = [] (uint8_t, const inet_diag_msg*) { return true; };
return readDiagMsg(proto, destroyAll);
}
int SockDiag::destroySockets(const char *addrstr) {
Stopwatch s;
mSocketsDestroyed = 0;
if (!strchr(addrstr, ':')) {
if (int ret = destroySockets(IPPROTO_TCP, AF_INET, addrstr)) {
ALOGE("Failed to destroy IPv4 sockets on %s: %s", addrstr, strerror(-ret));
return ret;
}
}
if (int ret = destroySockets(IPPROTO_TCP, AF_INET6, addrstr)) {
ALOGE("Failed to destroy IPv6 sockets on %s: %s", addrstr, strerror(-ret));
return ret;
}
if (mSocketsDestroyed > 0) {
ALOGI("Destroyed %d sockets on %s in %.1f ms", mSocketsDestroyed, addrstr, s.timeTaken());
}
return mSocketsDestroyed;
}
int SockDiag::destroyLiveSockets(DumpCallback destroyFilter, const char *what,
iovec *iov, int iovcnt) {
int proto = IPPROTO_TCP;
for (const int family : {AF_INET, AF_INET6}) {
const char *familyName = (family == AF_INET) ? "IPv4" : "IPv6";
uint32_t states = (1 << TCP_ESTABLISHED) | (1 << TCP_SYN_SENT) | (1 << TCP_SYN_RECV);
if (int ret = sendDumpRequest(proto, family, states, iov, iovcnt)) {
ALOGE("Failed to dump %s sockets for %s: %s", familyName, what, strerror(-ret));
return ret;
}
if (int ret = readDiagMsg(proto, destroyFilter)) {
ALOGE("Failed to destroy %s sockets for %s: %s", familyName, what, strerror(-ret));
return ret;
}
}
return 0;
}
int SockDiag::destroySockets(uint8_t proto, const uid_t uid, bool excludeLoopback) {
mSocketsDestroyed = 0;
Stopwatch s;
auto shouldDestroy = [uid, excludeLoopback] (uint8_t, const inet_diag_msg *msg) {
return msg != nullptr &&
msg->idiag_uid == uid &&
!(excludeLoopback && isLoopbackSocket(msg));
};
for (const int family : {AF_INET, AF_INET6}) {
const char *familyName = family == AF_INET ? "IPv4" : "IPv6";
uint32_t states = (1 << TCP_ESTABLISHED) | (1 << TCP_SYN_SENT) | (1 << TCP_SYN_RECV);
if (int ret = sendDumpRequest(proto, family, states)) {
ALOGE("Failed to dump %s sockets for UID: %s", familyName, strerror(-ret));
return ret;
}
if (int ret = readDiagMsg(proto, shouldDestroy)) {
ALOGE("Failed to destroy %s sockets for UID: %s", familyName, strerror(-ret));
return ret;
}
}
if (mSocketsDestroyed > 0) {
ALOGI("Destroyed %d sockets for UID in %.1f ms", mSocketsDestroyed, s.timeTaken());
}
return 0;
}
int SockDiag::destroySockets(const UidRanges& uidRanges, const std::set<uid_t>& skipUids,
bool excludeLoopback) {
mSocketsDestroyed = 0;
Stopwatch s;
auto shouldDestroy = [&] (uint8_t, const inet_diag_msg *msg) {
return msg != nullptr &&
uidRanges.hasUid(msg->idiag_uid) &&
skipUids.find(msg->idiag_uid) == skipUids.end() &&
!(excludeLoopback && isLoopbackSocket(msg));
};
iovec iov[] = {
{ nullptr, 0 },
};
if (int ret = destroyLiveSockets(shouldDestroy, "UID", iov, ARRAY_SIZE(iov))) {
return ret;
}
std::vector<uid_t> skipUidStrings;
for (uid_t uid : skipUids) {
skipUidStrings.push_back(uid);
}
std::sort(skipUidStrings.begin(), skipUidStrings.end());
if (mSocketsDestroyed > 0) {
ALOGI("Destroyed %d sockets for %s skip={%s} in %.1f ms",
mSocketsDestroyed, uidRanges.toString().c_str(),
android::base::Join(skipUidStrings, " ").c_str(), s.timeTaken());
}
return 0;
}
// Destroys all "live" (CONNECTED, SYN_SENT, SYN_RECV) TCP sockets on the specified netId where:
// 1. The opening app no longer has permission to use this network, or:
// 2. The opening app does have permission, but did not explicitly select this network.
//
// We destroy sockets without the explicit bit because we want to avoid the situation where a
// privileged app uses its privileges without knowing it is doing so. For example, a privileged app
// might have opened a socket on this network just because it was the default network at the
// time. If we don't kill these sockets, those apps could continue to use them without realizing
// that they are now sending and receiving traffic on a network that is now restricted.
int SockDiag::destroySocketsLackingPermission(unsigned netId, Permission permission,
bool excludeLoopback) {
struct markmatch {
inet_diag_bc_op op;
// TODO: switch to inet_diag_markcond
__u32 mark;
__u32 mask;
} __attribute__((packed));
constexpr uint8_t matchlen = sizeof(markmatch);
Fwmark netIdMark, netIdMask;
netIdMark.netId = netId;
netIdMask.netId = 0xffff;
Fwmark controlMark;
controlMark.explicitlySelected = true;
controlMark.permission = permission;
// A SOCK_DIAG bytecode program that accepts the sockets we intend to destroy.
struct bytecode {
markmatch netIdMatch;
markmatch controlMatch;
inet_diag_bc_op controlJump;
} __attribute__((packed)) bytecode;
// The length of the INET_DIAG_BC_JMP instruction.
constexpr uint8_t jmplen = sizeof(inet_diag_bc_op);
// Jump exactly this far past the end of the program to reject.
constexpr uint8_t rejectoffset = sizeof(inet_diag_bc_op);
// Total length of the program.
constexpr uint8_t bytecodelen = sizeof(bytecode);
bytecode = (struct bytecode) {
// If netId matches, continue, otherwise, reject (i.e., leave socket alone).
{ { INET_DIAG_BC_MARK_COND, matchlen, bytecodelen + rejectoffset },
netIdMark.intValue, netIdMask.intValue },
// If explicit and permission bits match, go to the JMP below which rejects the socket
// (i.e., we leave it alone). Otherwise, jump to the end of the program, which accepts the
// socket (so we destroy it).
{ { INET_DIAG_BC_MARK_COND, matchlen, matchlen + jmplen },
controlMark.intValue, controlMark.intValue },
// This JMP unconditionally rejects the packet by jumping to the reject target. It is
// necessary to keep the kernel bytecode verifier happy. If we don't have a JMP the bytecode
// is invalid because the target of every no jump must always be reachable by yes jumps.
// Without this JMP, the accept target is not reachable by yes jumps and the program will
// be rejected by the validator.
{ INET_DIAG_BC_JMP, jmplen, jmplen + rejectoffset },
// We have reached the end of the program. Accept the socket, and destroy it below.
};
struct nlattr nla = {
.nla_type = INET_DIAG_REQ_BYTECODE,
.nla_len = sizeof(struct nlattr) + bytecodelen,
};
iovec iov[] = {
{ nullptr, 0 },
{ &nla, sizeof(nla) },
{ &bytecode, bytecodelen },
};
mSocketsDestroyed = 0;
Stopwatch s;
auto shouldDestroy = [&] (uint8_t, const inet_diag_msg *msg) {
return msg != nullptr && !(excludeLoopback && isLoopbackSocket(msg));
};
if (int ret = destroyLiveSockets(shouldDestroy, "permission change", iov, ARRAY_SIZE(iov))) {
return ret;
}
if (mSocketsDestroyed > 0) {
ALOGI("Destroyed %d sockets for netId %d permission=%d in %.1f ms",
mSocketsDestroyed, netId, permission, s.timeTaken());
}
return 0;
}