/* $NetBSD: res_send.c,v 1.9 2006/01/24 17:41:25 christos Exp $ */
/*
* Copyright (c) 1985, 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Portions Copyright (c) 1993 by Digital Equipment Corporation.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies, and that
* the name of Digital Equipment Corporation not be used in advertising or
* publicity pertaining to distribution of the document or software without
* specific, written prior permission.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
* WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
* CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*/
/*
* Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
* Portions Copyright (c) 1996-1999 by Internet Software Consortium.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/*
* Send query to name server and wait for reply.
*/
#define LOG_TAG "res_send"
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <arpa/inet.h>
#include <arpa/nameser.h>
#include <netinet/in.h>
#include <errno.h>
#include <fcntl.h>
#include <netdb.h>
#include <poll.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>
#include <android-base/logging.h>
#include <android/multinetwork.h> // ResNsendFlags
#include <netdutils/Slice.h>
#include "DnsTlsDispatcher.h"
#include "DnsTlsTransport.h"
#include "PrivateDnsConfiguration.h"
#include "netd_resolv/resolv.h"
#include "netd_resolv/stats.h"
#include "private/android_filesystem_config.h"
#include "res_state_ext.h"
#include "resolv_cache.h"
#include "resolv_private.h"
// TODO: use the namespace something like android::netd_resolv for libnetd_resolv
using namespace android::net;
using android::netdutils::Slice;
static DnsTlsDispatcher sDnsTlsDispatcher;
static int get_salen(const struct sockaddr*);
static struct sockaddr* get_nsaddr(res_state, size_t);
static int send_vc(res_state, res_params* params, const u_char*, int, u_char*, int, int*, int,
time_t*, int*, int*);
static int send_dg(res_state, res_params* params, const u_char*, int, u_char*, int, int*, int, int*,
int*, time_t*, int*, int*);
static void Aerror(const res_state, const char*, int, const struct sockaddr*, int);
static void Perror(const res_state, const char*, int);
static int sock_eq(struct sockaddr*, struct sockaddr*);
static int connect_with_timeout(int sock, const struct sockaddr* nsap, socklen_t salen,
const struct timespec timeout);
static int retrying_poll(const int sock, short events, const struct timespec* finish);
static int res_tls_send(res_state, const Slice query, const Slice answer, int* rcode,
bool* fallback);
/* BIONIC-BEGIN: implement source port randomization */
// BEGIN: Code copied from ISC eventlib
// TODO: move away from this code
#define BILLION 1000000000
static struct timespec evConsTime(time_t sec, long nsec) {
struct timespec x;
x.tv_sec = sec;
x.tv_nsec = nsec;
return (x);
}
static struct timespec evAddTime(struct timespec addend1, struct timespec addend2) {
struct timespec x;
x.tv_sec = addend1.tv_sec + addend2.tv_sec;
x.tv_nsec = addend1.tv_nsec + addend2.tv_nsec;
if (x.tv_nsec >= BILLION) {
x.tv_sec++;
x.tv_nsec -= BILLION;
}
return (x);
}
static struct timespec evSubTime(struct timespec minuend, struct timespec subtrahend) {
struct timespec x;
x.tv_sec = minuend.tv_sec - subtrahend.tv_sec;
if (minuend.tv_nsec >= subtrahend.tv_nsec)
x.tv_nsec = minuend.tv_nsec - subtrahend.tv_nsec;
else {
x.tv_nsec = BILLION - subtrahend.tv_nsec + minuend.tv_nsec;
x.tv_sec--;
}
return (x);
}
static int evCmpTime(struct timespec a, struct timespec b) {
#define SGN(x) ((x) < 0 ? (-1) : (x) > 0 ? (1) : (0));
time_t s = a.tv_sec - b.tv_sec;
long n;
if (s != 0) return SGN(s);
n = a.tv_nsec - b.tv_nsec;
return SGN(n);
}
static struct timespec evNowTime(void) {
struct timespec tsnow;
clock_gettime(CLOCK_REALTIME, &tsnow);
return tsnow;
}
static struct iovec evConsIovec(void* buf, size_t cnt) {
struct iovec ret;
memset(&ret, 0xf5, sizeof ret);
ret.iov_base = buf;
ret.iov_len = cnt;
return ret;
}
// END: Code copied from ISC eventlib
static int random_bind(int s, int family) {
sockaddr_union u;
int j;
socklen_t slen;
/* clear all, this also sets the IP4/6 address to 'any' */
memset(&u, 0, sizeof u);
switch (family) {
case AF_INET:
u.sin.sin_family = family;
slen = sizeof u.sin;
break;
case AF_INET6:
u.sin6.sin6_family = family;
slen = sizeof u.sin6;
break;
default:
errno = EPROTO;
return -1;
}
/* first try to bind to a random source port a few times */
for (j = 0; j < 10; j++) {
/* find a random port between 1025 .. 65534 */
int port = 1025 + (arc4random_uniform(65535 - 1025));
if (family == AF_INET)
u.sin.sin_port = htons(port);
else
u.sin6.sin6_port = htons(port);
if (!bind(s, &u.sa, slen)) return 0;
}
// nothing after 10 attempts, our network table is probably busy
// let the system decide which port is best
if (family == AF_INET)
u.sin.sin_port = 0;
else
u.sin6.sin6_port = 0;
return bind(s, &u.sa, slen);
}
/* BIONIC-END */
// Disables all nameservers other than selectedServer
static void res_set_usable_server(int selectedServer, int nscount, bool usable_servers[]) {
int usableIndex = 0;
for (int ns = 0; ns < nscount; ns++) {
if (usable_servers[ns]) ++usableIndex;
if (usableIndex != selectedServer) usable_servers[ns] = false;
}
}
/* int
* res_isourserver(ina)
* looks up "ina" in _res.ns_addr_list[]
* returns:
* 0 : not found
* >0 : found
* author:
* paul vixie, 29may94
*/
static int res_ourserver_p(const res_state statp, const sockaddr* sa) {
const sockaddr_in *inp, *srv;
const sockaddr_in6 *in6p, *srv6;
int ns;
switch (sa->sa_family) {
case AF_INET:
inp = (const struct sockaddr_in*) (const void*) sa;
for (ns = 0; ns < statp->nscount; ns++) {
srv = (struct sockaddr_in*) (void*) get_nsaddr(statp, (size_t) ns);
if (srv->sin_family == inp->sin_family && srv->sin_port == inp->sin_port &&
(srv->sin_addr.s_addr == INADDR_ANY ||
srv->sin_addr.s_addr == inp->sin_addr.s_addr))
return 1;
}
break;
case AF_INET6:
if (statp->_u._ext.ext == NULL) break;
in6p = (const struct sockaddr_in6*) (const void*) sa;
for (ns = 0; ns < statp->nscount; ns++) {
srv6 = (struct sockaddr_in6*) (void*) get_nsaddr(statp, (size_t) ns);
if (srv6->sin6_family == in6p->sin6_family && srv6->sin6_port == in6p->sin6_port &&
#ifdef HAVE_SIN6_SCOPE_ID
(srv6->sin6_scope_id == 0 || srv6->sin6_scope_id == in6p->sin6_scope_id) &&
#endif
(IN6_IS_ADDR_UNSPECIFIED(&srv6->sin6_addr) ||
IN6_ARE_ADDR_EQUAL(&srv6->sin6_addr, &in6p->sin6_addr)))
return 1;
}
break;
default:
break;
}
return 0;
}
/* int
* res_nameinquery(name, type, cl, buf, eom)
* look for (name, type, cl) in the query section of packet (buf, eom)
* requires:
* buf + HFIXEDSZ <= eom
* returns:
* -1 : format error
* 0 : not found
* >0 : found
* author:
* paul vixie, 29may94
*/
int res_nameinquery(const char* name, int type, int cl, const u_char* buf, const u_char* eom) {
const u_char* cp = buf + HFIXEDSZ;
int qdcount = ntohs(((const HEADER*) (const void*) buf)->qdcount);
while (qdcount-- > 0) {
char tname[MAXDNAME + 1];
int n = dn_expand(buf, eom, cp, tname, sizeof tname);
if (n < 0) return (-1);
cp += n;
if (cp + 2 * INT16SZ > eom) return (-1);
int ttype = ntohs(*reinterpret_cast<const uint16_t*>(cp));
cp += INT16SZ;
int tclass = ntohs(*reinterpret_cast<const uint16_t*>(cp));
cp += INT16SZ;
if (ttype == type && tclass == cl && ns_samename(tname, name) == 1) return (1);
}
return (0);
}
/* int
* res_queriesmatch(buf1, eom1, buf2, eom2)
* is there a 1:1 mapping of (name,type,class)
* in (buf1,eom1) and (buf2,eom2)?
* returns:
* -1 : format error
* 0 : not a 1:1 mapping
* >0 : is a 1:1 mapping
* author:
* paul vixie, 29may94
*/
int res_queriesmatch(const u_char* buf1, const u_char* eom1, const u_char* buf2,
const u_char* eom2) {
const u_char* cp = buf1 + HFIXEDSZ;
int qdcount = ntohs(((const HEADER*) (const void*) buf1)->qdcount);
if (buf1 + HFIXEDSZ > eom1 || buf2 + HFIXEDSZ > eom2) return (-1);
/*
* Only header section present in replies to
* dynamic update packets.
*/
if ((((const HEADER*) (const void*) buf1)->opcode == ns_o_update) &&
(((const HEADER*) (const void*) buf2)->opcode == ns_o_update))
return (1);
if (qdcount != ntohs(((const HEADER*) (const void*) buf2)->qdcount)) return (0);
while (qdcount-- > 0) {
char tname[MAXDNAME + 1];
int n = dn_expand(buf1, eom1, cp, tname, sizeof tname);
if (n < 0) return (-1);
cp += n;
if (cp + 2 * INT16SZ > eom1) return (-1);
int ttype = ntohs(*reinterpret_cast<const uint16_t*>(cp));
cp += INT16SZ;
int tclass = ntohs(*reinterpret_cast<const uint16_t*>(cp));
cp += INT16SZ;
if (!res_nameinquery(tname, ttype, tclass, buf2, eom2)) return (0);
}
return (1);
}
int res_nsend(res_state statp, const u_char* buf, int buflen, u_char* ans, int anssiz, int* rcode,
uint32_t flags) {
int gotsomewhere, terrno, v_circuit, resplen, n;
ResolvCacheStatus cache_status = RESOLV_CACHE_UNSUPPORTED;
if (anssiz < HFIXEDSZ) {
// TODO: Remove errno once callers stop using it
errno = EINVAL;
return -EINVAL;
}
LOG(DEBUG) << __func__;
res_pquery(buf, buflen);
v_circuit = (statp->options & RES_USEVC) || buflen > PACKETSZ;
gotsomewhere = 0;
terrno = ETIMEDOUT;
int anslen = 0;
cache_status = _resolv_cache_lookup(statp->netid, buf, buflen, ans, anssiz, &anslen, flags);
if (cache_status == RESOLV_CACHE_FOUND) {
HEADER* hp = (HEADER*)(void*)ans;
*rcode = hp->rcode;
return anslen;
} else if (cache_status != RESOLV_CACHE_UNSUPPORTED) {
// had a cache miss for a known network, so populate the thread private
// data so the normal resolve path can do its thing
_resolv_populate_res_for_net(statp);
}
if (statp->nscount == 0) {
// We have no nameservers configured, so there's no point trying.
// Tell the cache the query failed, or any retries and anyone else asking the same
// question will block for PENDING_REQUEST_TIMEOUT seconds instead of failing fast.
_resolv_cache_query_failed(statp->netid, buf, buflen, flags);
// TODO: Remove errno once callers stop using it
errno = ESRCH;
return -ESRCH;
}
/*
* If the ns_addr_list in the resolver context has changed, then
* invalidate our cached copy and the associated timing data.
*/
if (statp->_u._ext.nscount != 0) {
int needclose = 0;
struct sockaddr_storage peer;
socklen_t peerlen;
if (statp->_u._ext.nscount != statp->nscount) {
needclose++;
} else {
for (int ns = 0; ns < statp->nscount; ns++) {
if (statp->nsaddr_list[ns].sin_family &&
!sock_eq((struct sockaddr*) (void*) &statp->nsaddr_list[ns],
(struct sockaddr*) (void*) &statp->_u._ext.ext->nsaddrs[ns])) {
needclose++;
break;
}
if (statp->_u._ext.nssocks[ns] == -1) continue;
peerlen = sizeof(peer);
if (getpeername(statp->_u._ext.nssocks[ns], (struct sockaddr*) (void*) &peer,
&peerlen) < 0) {
needclose++;
break;
}
if (!sock_eq((struct sockaddr*) (void*) &peer, get_nsaddr(statp, (size_t) ns))) {
needclose++;
break;
}
}
}
if (needclose) {
res_nclose(statp);
statp->_u._ext.nscount = 0;
}
}
/*
* Maybe initialize our private copy of the ns_addr_list.
*/
if (statp->_u._ext.nscount == 0) {
for (int ns = 0; ns < statp->nscount; ns++) {
statp->_u._ext.nstimes[ns] = RES_MAXTIME;
statp->_u._ext.nssocks[ns] = -1;
if (!statp->nsaddr_list[ns].sin_family) continue;
statp->_u._ext.ext->nsaddrs[ns].sin = statp->nsaddr_list[ns];
}
statp->_u._ext.nscount = statp->nscount;
}
/*
* Some resolvers want to even out the load on their nameservers.
* Note that RES_BLAST overrides RES_ROTATE.
*/
if ((statp->options & RES_ROTATE) != 0U && (statp->options & RES_BLAST) == 0U) {
sockaddr_union inu;
struct sockaddr_in ina;
int lastns = statp->nscount - 1;
int fd;
u_int16_t nstime;
if (statp->_u._ext.ext != NULL) inu = statp->_u._ext.ext->nsaddrs[0];
ina = statp->nsaddr_list[0];
fd = statp->_u._ext.nssocks[0];
nstime = statp->_u._ext.nstimes[0];
for (int ns = 0; ns < lastns; ns++) {
if (statp->_u._ext.ext != NULL)
statp->_u._ext.ext->nsaddrs[ns] = statp->_u._ext.ext->nsaddrs[ns + 1];
statp->nsaddr_list[ns] = statp->nsaddr_list[ns + 1];
statp->_u._ext.nssocks[ns] = statp->_u._ext.nssocks[ns + 1];
statp->_u._ext.nstimes[ns] = statp->_u._ext.nstimes[ns + 1];
}
if (statp->_u._ext.ext != NULL) statp->_u._ext.ext->nsaddrs[lastns] = inu;
statp->nsaddr_list[lastns] = ina;
statp->_u._ext.nssocks[lastns] = fd;
statp->_u._ext.nstimes[lastns] = nstime;
}
res_stats stats[MAXNS];
res_params params;
int revision_id = resolv_cache_get_resolver_stats(statp->netid, ¶ms, stats);
if (revision_id < 0) {
// TODO: Remove errno once callers stop using it
errno = ESRCH;
return -ESRCH;
}
bool usable_servers[MAXNS];
int usableServersCount = android_net_res_stats_get_usable_servers(
¶ms, stats, statp->nscount, usable_servers);
if ((flags & ANDROID_RESOLV_NO_RETRY) && usableServersCount > 1) {
auto hp = reinterpret_cast<const HEADER*>(buf);
// Select a random server based on the query id
int selectedServer = (hp->id % usableServersCount) + 1;
res_set_usable_server(selectedServer, statp->nscount, usable_servers);
}
/*
* Send request, RETRY times, or until successful.
*/
int retryTimes = (flags & ANDROID_RESOLV_NO_RETRY) ? 1 : params.retry_count;
for (int attempt = 0; attempt < retryTimes; ++attempt) {
for (int ns = 0; ns < statp->nscount; ns++) {
if (!usable_servers[ns]) continue;
struct sockaddr* nsap;
int nsaplen;
time_t now = 0;
int delay = 0;
*rcode = RCODE_INTERNAL_ERROR;
nsap = get_nsaddr(statp, (size_t) ns);
nsaplen = get_salen(nsap);
same_ns:
// TODO: Since we expect there is only one DNS server being queried here while this
// function tries to query all of private DNS servers. Consider moving it to other
// reasonable place. In addition, maybe add stats for private DNS.
if (!statp->use_local_nameserver) {
bool fallback = false;
resplen = res_tls_send(statp, Slice(const_cast<u_char*>(buf), buflen),
Slice(ans, anssiz), rcode, &fallback);
if (resplen > 0) {
if (cache_status == RESOLV_CACHE_NOTFOUND) {
_resolv_cache_add(statp->netid, buf, buflen, ans, resplen);
}
return resplen;
}
if (!fallback) {
_resolv_cache_query_failed(statp->netid, buf, buflen, flags);
res_nclose(statp);
return -terrno;
}
}
[[maybe_unused]] static const int niflags = NI_NUMERICHOST | NI_NUMERICSERV;
[[maybe_unused]] char abuf[NI_MAXHOST];
if (getnameinfo(nsap, (socklen_t)nsaplen, abuf, sizeof(abuf), NULL, 0, niflags) == 0)
LOG(DEBUG) << __func__ << ": Querying server (# " << ns + 1
<< ") address = " << abuf;
if (v_circuit) {
/* Use VC; at most one attempt per server. */
bool shouldRecordStats = (attempt == 0);
attempt = retryTimes;
n = send_vc(statp, ¶ms, buf, buflen, ans, anssiz, &terrno, ns, &now, rcode,
&delay);
/*
* Only record stats the first time we try a query. This ensures that
* queries that deterministically fail (e.g., a name that always returns
* SERVFAIL or times out) do not unduly affect the stats.
*/
if (shouldRecordStats) {
res_sample sample;
_res_stats_set_sample(&sample, now, *rcode, delay);
_resolv_cache_add_resolver_stats_sample(statp->netid, revision_id, ns, &sample,
params.max_samples);
}
LOG(INFO) << __func__ << ": used send_vc " << n;
if (n < 0) {
_resolv_cache_query_failed(statp->netid, buf, buflen, flags);
res_nclose(statp);
return -terrno;
};
if (n == 0) goto next_ns;
resplen = n;
} else {
/* Use datagrams. */
LOG(INFO) << __func__ << ": using send_dg";
n = send_dg(statp, ¶ms, buf, buflen, ans, anssiz, &terrno, ns, &v_circuit,
&gotsomewhere, &now, rcode, &delay);
/* Only record stats the first time we try a query. See above. */
if (attempt == 0) {
res_sample sample;
_res_stats_set_sample(&sample, now, *rcode, delay);
_resolv_cache_add_resolver_stats_sample(statp->netid, revision_id, ns, &sample,
params.max_samples);
}
LOG(INFO) << __func__ << ": used send_dg " << n;
if (n < 0) {
_resolv_cache_query_failed(statp->netid, buf, buflen, flags);
res_nclose(statp);
return -terrno;
};
if (n == 0) goto next_ns;
if (v_circuit) goto same_ns;
resplen = n;
}
LOG(DEBUG) << __func__ << ": got answer:";
res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
if (cache_status == RESOLV_CACHE_NOTFOUND) {
_resolv_cache_add(statp->netid, buf, buflen, ans, resplen);
}
/*
* If we have temporarily opened a virtual circuit,
* or if we haven't been asked to keep a socket open,
* close the socket.
*/
if ((v_circuit && (statp->options & RES_USEVC) == 0U) ||
(statp->options & RES_STAYOPEN) == 0U) {
res_nclose(statp);
}
return (resplen);
next_ns:;
} // for each ns
} // for each retry
res_nclose(statp);
if (!v_circuit) {
if (!gotsomewhere) {
// TODO: Remove errno once callers stop using it
errno = ECONNREFUSED; /* no nameservers found */
terrno = ECONNREFUSED;
} else {
// TODO: Remove errno once callers stop using it
errno = ETIMEDOUT; /* no answer obtained */
terrno = ETIMEDOUT;
}
} else {
errno = terrno;
}
_resolv_cache_query_failed(statp->netid, buf, buflen, flags);
return -terrno;
}
/* Private */
static int get_salen(const struct sockaddr* sa) {
if (sa->sa_family == AF_INET)
return (sizeof(struct sockaddr_in));
else if (sa->sa_family == AF_INET6)
return (sizeof(struct sockaddr_in6));
else
return (0); /* unknown, die on connect */
}
/*
* pick appropriate nsaddr_list for use. see res_init() for initialization.
*/
static struct sockaddr* get_nsaddr(res_state statp, size_t n) {
if (!statp->nsaddr_list[n].sin_family && statp->_u._ext.ext) {
/*
* - statp->_u._ext.ext->nsaddrs[n] holds an address that is larger
* than struct sockaddr, and
* - user code did not update statp->nsaddr_list[n].
*/
return (struct sockaddr*) (void*) &statp->_u._ext.ext->nsaddrs[n];
} else {
/*
* - user code updated statp->nsaddr_list[n], or
* - statp->nsaddr_list[n] has the same content as
* statp->_u._ext.ext->nsaddrs[n].
*/
return (struct sockaddr*) (void*) &statp->nsaddr_list[n];
}
}
static struct timespec get_timeout(const res_state statp, const res_params* params, const int ns) {
int msec;
// Legacy algorithm which scales the timeout by nameserver number.
// For instance, with 4 nameservers: 5s, 2.5s, 5s, 10s
// This has no effect with 1 or 2 nameservers
msec = params->base_timeout_msec << ns;
if (ns > 0) {
msec /= statp->nscount;
}
// For safety, don't allow OEMs and experiments to configure a timeout shorter than 1s.
if (msec < 1000) {
msec = 1000; // Use at least 1000ms
}
LOG(INFO) << __func__ << ": using timeout of " << msec << " msec";
struct timespec result;
result.tv_sec = msec / 1000;
result.tv_nsec = (msec % 1000) * 1000000;
return result;
}
static int send_vc(res_state statp, res_params* params, const u_char* buf, int buflen, u_char* ans,
int anssiz, int* terrno, int ns, time_t* at, int* rcode, int* delay) {
*at = time(NULL);
*delay = 0;
const HEADER* hp = (const HEADER*) (const void*) buf;
HEADER* anhp = (HEADER*) (void*) ans;
struct sockaddr* nsap;
int nsaplen;
int truncating, connreset, n;
struct iovec iov[2];
u_char* cp;
LOG(INFO) << __func__ << ": using send_vc";
nsap = get_nsaddr(statp, (size_t) ns);
nsaplen = get_salen(nsap);
connreset = 0;
same_ns:
truncating = 0;
struct timespec now = evNowTime();
/* Are we still talking to whom we want to talk to? */
if (statp->_vcsock >= 0 && (statp->_flags & RES_F_VC) != 0) {
struct sockaddr_storage peer;
socklen_t size = sizeof peer;
unsigned old_mark;
socklen_t mark_size = sizeof(old_mark);
if (getpeername(statp->_vcsock, (struct sockaddr*) (void*) &peer, &size) < 0 ||
!sock_eq((struct sockaddr*) (void*) &peer, nsap) ||
getsockopt(statp->_vcsock, SOL_SOCKET, SO_MARK, &old_mark, &mark_size) < 0 ||
old_mark != statp->_mark) {
res_nclose(statp);
statp->_flags &= ~RES_F_VC;
}
}
if (statp->_vcsock < 0 || (statp->_flags & RES_F_VC) == 0) {
if (statp->_vcsock >= 0) res_nclose(statp);
statp->_vcsock = socket(nsap->sa_family, SOCK_STREAM | SOCK_CLOEXEC, 0);
if (statp->_vcsock < 0) {
switch (errno) {
case EPROTONOSUPPORT:
case EPFNOSUPPORT:
case EAFNOSUPPORT:
Perror(statp, "socket(vc)", errno);
return 0;
default:
*terrno = errno;
Perror(statp, "socket(vc)", errno);
return -1;
}
}
fchown(statp->_vcsock, AID_DNS, -1);
if (statp->_mark != MARK_UNSET) {
if (setsockopt(statp->_vcsock, SOL_SOCKET, SO_MARK, &statp->_mark,
sizeof(statp->_mark)) < 0) {
*terrno = errno;
Perror(statp, "setsockopt", errno);
return -1;
}
}
errno = 0;
if (random_bind(statp->_vcsock, nsap->sa_family) < 0) {
*terrno = errno;
Aerror(statp, "bind/vc", errno, nsap, nsaplen);
res_nclose(statp);
return (0);
}
if (connect_with_timeout(statp->_vcsock, nsap, (socklen_t) nsaplen,
get_timeout(statp, params, ns)) < 0) {
*terrno = errno;
Aerror(statp, "connect/vc", errno, nsap, nsaplen);
res_nclose(statp);
/*
* The way connect_with_timeout() is implemented prevents us from reliably
* determining whether this was really a timeout or e.g. ECONNREFUSED. Since
* currently both cases are handled in the same way, there is no need to
* change this (yet). If we ever need to reliably distinguish between these
* cases, both connect_with_timeout() and retrying_poll() need to be
* modified, though.
*/
*rcode = RCODE_TIMEOUT;
return (0);
}
statp->_flags |= RES_F_VC;
}
/*
* Send length & message
*/
uint16_t len = htons(static_cast<uint16_t>(buflen));
iov[0] = evConsIovec(&len, INT16SZ);
iov[1] = evConsIovec((void*) buf, (size_t) buflen);
if (writev(statp->_vcsock, iov, 2) != (INT16SZ + buflen)) {
*terrno = errno;
Perror(statp, "write failed", errno);
res_nclose(statp);
return (0);
}
/*
* Receive length & response
*/
read_len:
cp = ans;
len = INT16SZ;
while ((n = read(statp->_vcsock, (char*) cp, (size_t) len)) > 0) {
cp += n;
if ((len -= n) == 0) break;
}
if (n <= 0) {
*terrno = errno;
Perror(statp, "read failed", errno);
res_nclose(statp);
/*
* A long running process might get its TCP
* connection reset if the remote server was
* restarted. Requery the server instead of
* trying a new one. When there is only one
* server, this means that a query might work
* instead of failing. We only allow one reset
* per query to prevent looping.
*/
if (*terrno == ECONNRESET && !connreset) {
connreset = 1;
res_nclose(statp);
goto same_ns;
}
res_nclose(statp);
return (0);
}
uint16_t resplen = ntohs(*reinterpret_cast<const uint16_t*>(ans));
if (resplen > anssiz) {
LOG(DEBUG) << __func__ << ": response truncated";
truncating = 1;
len = anssiz;
} else
len = resplen;
if (len < HFIXEDSZ) {
/*
* Undersized message.
*/
LOG(DEBUG) << __func__ << ": undersized: " << len;
*terrno = EMSGSIZE;
res_nclose(statp);
return (0);
}
cp = ans;
while (len != 0 && (n = read(statp->_vcsock, (char*) cp, (size_t) len)) > 0) {
cp += n;
len -= n;
}
if (n <= 0) {
*terrno = errno;
Perror(statp, "read(vc)", errno);
res_nclose(statp);
return (0);
}
if (truncating) {
/*
* Flush rest of answer so connection stays in synch.
*/
anhp->tc = 1;
len = resplen - anssiz;
while (len != 0) {
char junk[PACKETSZ];
n = read(statp->_vcsock, junk, (len > sizeof junk) ? sizeof junk : len);
if (n > 0)
len -= n;
else
break;
}
}
/*
* If the calling application has bailed out of
* a previous call and failed to arrange to have
* the circuit closed or the server has got
* itself confused, then drop the packet and
* wait for the correct one.
*/
if (hp->id != anhp->id) {
LOG(DEBUG) << __func__ << ": ld answer (unexpected):";
res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
goto read_len;
}
/*
* All is well, or the error is fatal. Signal that the
* next nameserver ought not be tried.
*/
if (resplen > 0) {
struct timespec done = evNowTime();
*delay = _res_stats_calculate_rtt(&done, &now);
*rcode = anhp->rcode;
}
return (resplen);
}
/* return -1 on error (errno set), 0 on success */
static int connect_with_timeout(int sock, const struct sockaddr* nsap, socklen_t salen,
const struct timespec timeout) {
int res, origflags;
origflags = fcntl(sock, F_GETFL, 0);
fcntl(sock, F_SETFL, origflags | O_NONBLOCK);
res = connect(sock, nsap, salen);
if (res < 0 && errno != EINPROGRESS) {
res = -1;
goto done;
}
if (res != 0) {
struct timespec now = evNowTime();
struct timespec finish = evAddTime(now, timeout);
LOG(INFO) << __func__ << ": " << sock << " send_vc";
res = retrying_poll(sock, POLLIN | POLLOUT, &finish);
if (res <= 0) {
res = -1;
}
}
done:
fcntl(sock, F_SETFL, origflags);
LOG(INFO) << __func__ << ": " << sock << " connect_with_const timeout returning " << res;
return res;
}
static int retrying_poll(const int sock, const short events, const struct timespec* finish) {
struct timespec now, timeout;
retry:
LOG(INFO) << __func__ << ": " << sock << " retrying_poll";
now = evNowTime();
if (evCmpTime(*finish, now) > 0)
timeout = evSubTime(*finish, now);
else
timeout = evConsTime(0L, 0L);
struct pollfd fds = {.fd = sock, .events = events};
int n = ppoll(&fds, 1, &timeout, /*sigmask=*/NULL);
if (n == 0) {
LOG(INFO) << __func__ << ": " << sock << "retrying_poll timeout";
errno = ETIMEDOUT;
return 0;
}
if (n < 0) {
if (errno == EINTR) goto retry;
PLOG(INFO) << __func__ << ": " << sock << " retrying_poll failed";
return n;
}
if (fds.revents & (POLLIN | POLLOUT | POLLERR)) {
int error;
socklen_t len = sizeof(error);
if (getsockopt(sock, SOL_SOCKET, SO_ERROR, &error, &len) < 0 || error) {
errno = error;
PLOG(INFO) << __func__ << ": " << sock << " retrying_poll getsockopt failed";
return -1;
}
}
LOG(INFO) << __func__ << ": " << sock << " retrying_poll returning " << n;
return n;
}
static int send_dg(res_state statp, res_params* params, const u_char* buf, int buflen, u_char* ans,
int anssiz, int* terrno, int ns, int* v_circuit, int* gotsomewhere, time_t* at,
int* rcode, int* delay) {
*at = time(NULL);
*delay = 0;
const HEADER* hp = (const HEADER*) (const void*) buf;
HEADER* anhp = (HEADER*) (void*) ans;
const struct sockaddr* nsap;
int nsaplen;
struct timespec now, timeout, finish, done;
struct sockaddr_storage from;
socklen_t fromlen;
int resplen, n, s;
nsap = get_nsaddr(statp, (size_t) ns);
nsaplen = get_salen(nsap);
if (statp->_u._ext.nssocks[ns] == -1) {
statp->_u._ext.nssocks[ns] = socket(nsap->sa_family, SOCK_DGRAM | SOCK_CLOEXEC, 0);
if (statp->_u._ext.nssocks[ns] < 0) {
switch (errno) {
case EPROTONOSUPPORT:
case EPFNOSUPPORT:
case EAFNOSUPPORT:
Perror(statp, "socket(dg)", errno);
return (0);
default:
*terrno = errno;
Perror(statp, "socket(dg)", errno);
return (-1);
}
}
fchown(statp->_u._ext.nssocks[ns], AID_DNS, -1);
if (statp->_mark != MARK_UNSET) {
if (setsockopt(statp->_u._ext.nssocks[ns], SOL_SOCKET, SO_MARK, &(statp->_mark),
sizeof(statp->_mark)) < 0) {
res_nclose(statp);
return -1;
}
}
#ifndef CANNOT_CONNECT_DGRAM
/*
* On a 4.3BSD+ machine (client and server,
* actually), sending to a nameserver datagram
* port with no nameserver will cause an
* ICMP port unreachable message to be returned.
* If our datagram socket is "connected" to the
* server, we get an ECONNREFUSED error on the next
* socket operation, and select returns if the
* error message is received. We can thus detect
* the absence of a nameserver without timing out.
*/
if (random_bind(statp->_u._ext.nssocks[ns], nsap->sa_family) < 0) {
Aerror(statp, "bind(dg)", errno, nsap, nsaplen);
res_nclose(statp);
return (0);
}
if (connect(statp->_u._ext.nssocks[ns], nsap, (socklen_t) nsaplen) < 0) {
Aerror(statp, "connect(dg)", errno, nsap, nsaplen);
res_nclose(statp);
return (0);
}
#endif /* !CANNOT_CONNECT_DGRAM */
LOG(DEBUG) << __func__ << ": new DG socket";
}
s = statp->_u._ext.nssocks[ns];
#ifndef CANNOT_CONNECT_DGRAM
if (send(s, (const char*) buf, (size_t) buflen, 0) != buflen) {
Perror(statp, "send", errno);
res_nclose(statp);
return 0;
}
#else /* !CANNOT_CONNECT_DGRAM */
if (sendto(s, (const char*) buf, buflen, 0, nsap, nsaplen) != buflen) {
Aerror(statp, "sendto", errno, nsap, nsaplen);
res_nclose(statp);
return 0;
}
#endif /* !CANNOT_CONNECT_DGRAM */
// Wait for reply.
timeout = get_timeout(statp, params, ns);
now = evNowTime();
finish = evAddTime(now, timeout);
retry:
n = retrying_poll(s, POLLIN, &finish);
if (n == 0) {
*rcode = RCODE_TIMEOUT;
LOG(DEBUG) << __func__ << ": timeout";
*gotsomewhere = 1;
return 0;
}
if (n < 0) {
Perror(statp, "poll", errno);
res_nclose(statp);
return 0;
}
errno = 0;
fromlen = sizeof(from);
resplen = recvfrom(s, (char*) ans, (size_t) anssiz, 0, (struct sockaddr*) (void*) &from,
&fromlen);
if (resplen <= 0) {
Perror(statp, "recvfrom", errno);
res_nclose(statp);
return 0;
}
*gotsomewhere = 1;
if (resplen < HFIXEDSZ) {
/*
* Undersized message.
*/
LOG(DEBUG) << __func__ << ": undersized: " << resplen;
*terrno = EMSGSIZE;
res_nclose(statp);
return 0;
}
if (hp->id != anhp->id) {
/*
* response from old query, ignore it.
* XXX - potential security hazard could
* be detected here.
*/
LOG(DEBUG) << __func__ << ": old answer:";
res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
goto retry;
}
if (!(statp->options & RES_INSECURE1) &&
!res_ourserver_p(statp, (struct sockaddr*) (void*) &from)) {
/*
* response from wrong server? ignore it.
* XXX - potential security hazard could
* be detected here.
*/
LOG(DEBUG) << __func__ << ": not our server:";
res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
goto retry;
}
if (anhp->rcode == FORMERR && (statp->options & RES_USE_EDNS0) != 0U) {
/*
* Do not retry if the server do not understand EDNS0.
* The case has to be captured here, as FORMERR packet do not
* carry query section, hence res_queriesmatch() returns 0.
*/
LOG(DEBUG) << __func__ << ": server rejected query with EDNS0:";
res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
/* record the error */
statp->_flags |= RES_F_EDNS0ERR;
res_nclose(statp);
return 0;
}
if (!(statp->options & RES_INSECURE2) &&
!res_queriesmatch(buf, buf + buflen, ans, ans + anssiz)) {
/*
* response contains wrong query? ignore it.
* XXX - potential security hazard could
* be detected here.
*/
LOG(DEBUG) << __func__ << ": wrong query name:";
res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
goto retry;
}
done = evNowTime();
*delay = _res_stats_calculate_rtt(&done, &now);
if (anhp->rcode == SERVFAIL || anhp->rcode == NOTIMP || anhp->rcode == REFUSED) {
LOG(DEBUG) << __func__ << ": server rejected query:";
res_pquery(ans, (resplen > anssiz) ? anssiz : resplen);
res_nclose(statp);
*rcode = anhp->rcode;
return 0;
}
if (!(statp->options & RES_IGNTC) && anhp->tc) {
/*
* To get the rest of answer,
* use TCP with same server.
*/
LOG(DEBUG) << __func__ << ": truncated answer";
*v_circuit = 1;
res_nclose(statp);
return 1;
}
/*
* All is well, or the error is fatal. Signal that the
* next nameserver ought not be tried.
*/
if (resplen > 0) {
*rcode = anhp->rcode;
}
return resplen;
}
static void Aerror(const res_state statp, const char* string, int error,
const struct sockaddr* address, int alen) {
const int save = errno;
char hbuf[NI_MAXHOST];
char sbuf[NI_MAXSERV];
constexpr int niflags = NI_NUMERICHOST | NI_NUMERICSERV;
if ((statp->options & RES_DEBUG) != 0U) {
if (getnameinfo(address, (socklen_t) alen, hbuf, sizeof(hbuf), sbuf, sizeof(sbuf),
niflags)) {
strncpy(hbuf, "?", sizeof(hbuf) - 1);
hbuf[sizeof(hbuf) - 1] = '\0';
strncpy(sbuf, "?", sizeof(sbuf) - 1);
sbuf[sizeof(sbuf) - 1] = '\0';
}
LOG(DEBUG) << __func__ << ": " << string << " ([" << hbuf << "]." << sbuf
<< "): " << strerror(error);
}
errno = save;
}
static void Perror(const res_state statp, const char* string, int error) {
if ((statp->options & RES_DEBUG) != 0U) {
LOG(DEBUG) << __func__ << ": " << string << ": " << strerror(error);
}
}
static int sock_eq(struct sockaddr* a, struct sockaddr* b) {
struct sockaddr_in *a4, *b4;
struct sockaddr_in6 *a6, *b6;
if (a->sa_family != b->sa_family) return 0;
switch (a->sa_family) {
case AF_INET:
a4 = (struct sockaddr_in*) (void*) a;
b4 = (struct sockaddr_in*) (void*) b;
return a4->sin_port == b4->sin_port && a4->sin_addr.s_addr == b4->sin_addr.s_addr;
case AF_INET6:
a6 = (struct sockaddr_in6*) (void*) a;
b6 = (struct sockaddr_in6*) (void*) b;
return a6->sin6_port == b6->sin6_port &&
#ifdef HAVE_SIN6_SCOPE_ID
a6->sin6_scope_id == b6->sin6_scope_id &&
#endif
IN6_ARE_ADDR_EQUAL(&a6->sin6_addr, &b6->sin6_addr);
default:
return 0;
}
}
static int res_tls_send(res_state statp, const Slice query, const Slice answer, int* rcode,
bool* fallback) {
int resplen = 0;
const unsigned netId = statp->netid;
const unsigned mark = statp->_mark;
PrivateDnsStatus privateDnsStatus = gPrivateDnsConfiguration.getStatus(netId);
if (privateDnsStatus.mode == PrivateDnsMode::OFF) {
*fallback = true;
return -1;
}
if (privateDnsStatus.validatedServers.empty()) {
if (privateDnsStatus.mode == PrivateDnsMode::OPPORTUNISTIC) {
*fallback = true;
return -1;
} else {
// Sleep and iterate some small number of times checking for the
// arrival of resolved and validated server IP addresses, instead
// of returning an immediate error.
// This is needed because as soon as a network becomes the default network, apps will
// send DNS queries on that network. If no servers have yet validated, and we do not
// block those queries, they would immediately fail, causing application-visible errors.
// Note that this can happen even before the network validates, since an unvalidated
// network can become the default network if no validated networks are available.
//
// TODO: see if there is a better way to address this problem, such as buffering the
// queries in a queue or only blocking queries for the first few seconds after a default
// network change.
for (int i = 0; i < 42; i++) {
std::this_thread::sleep_for(std::chrono::milliseconds(100));
if (!gPrivateDnsConfiguration.getStatus(netId).validatedServers.empty()) {
privateDnsStatus = gPrivateDnsConfiguration.getStatus(netId);
break;
}
}
if (privateDnsStatus.validatedServers.empty()) {
return -1;
}
}
}
LOG(INFO) << __func__ << ": performing query over TLS";
const auto response = sDnsTlsDispatcher.query(privateDnsStatus.validatedServers, mark, query,
answer, &resplen);
LOG(INFO) << __func__ << ": TLS query result: " << static_cast<int>(response);
if (privateDnsStatus.mode == PrivateDnsMode::OPPORTUNISTIC) {
// In opportunistic mode, handle falling back to cleartext in some
// cases (DNS shouldn't fail if a validated opportunistic mode server
// becomes unreachable for some reason).
switch (response) {
case DnsTlsTransport::Response::success:
*rcode = reinterpret_cast<HEADER*>(answer.base())->rcode;
return resplen;
case DnsTlsTransport::Response::network_error:
// No need to set the error timeout here since it will fallback to UDP.
case DnsTlsTransport::Response::internal_error:
// Note: this will cause cleartext queries to be emitted, with
// all of the EDNS0 goodness enabled. Fingers crossed. :-/
*fallback = true;
[[fallthrough]];
default:
return -1;
}
} else {
// Strict mode
switch (response) {
case DnsTlsTransport::Response::success:
*rcode = reinterpret_cast<HEADER*>(answer.base())->rcode;
return resplen;
case DnsTlsTransport::Response::network_error:
// This case happens when the query stored in DnsTlsTransport is expired since
// either 1) the query has been tried for 3 times but no response or 2) fail to
// establish the connection with the server.
*rcode = RCODE_TIMEOUT;
[[fallthrough]];
default:
return -1;
}
}
}
int resolv_res_nsend(const android_net_context* netContext, const uint8_t* msg, int msgLen,
uint8_t* ans, int ansLen, int* rcode, uint32_t flags) {
res_state res = res_get_state();
res_setnetcontext(res, netContext);
_resolv_populate_res_for_net(res);
*rcode = NOERROR;
return res_nsend(res, msg, msgLen, ans, ansLen, rcode, flags);
}