summaryrefslogtreecommitdiff
path: root/services
diff options
context:
space:
mode:
Diffstat (limited to 'services')
-rw-r--r--services/authzone.c39
-rw-r--r--services/cache/dns.c59
-rw-r--r--services/cache/dns.h3
-rw-r--r--services/cache/rrset.c10
-rw-r--r--services/listen_dnsport.c2534
-rw-r--r--services/listen_dnsport.h405
-rw-r--r--services/mesh.c13
-rw-r--r--services/mesh.h3
-rw-r--r--services/modstack.c2
-rw-r--r--services/rpz.c19
10 files changed, 3009 insertions, 78 deletions
diff --git a/services/authzone.c b/services/authzone.c
index 580a681f57ce..6f6c55d4397d 100644
--- a/services/authzone.c
+++ b/services/authzone.c
@@ -3684,6 +3684,29 @@ auth_zone_parse_notify_serial(sldns_buffer* pkt, uint32_t *serial)
return 1;
}
+/** print addr to str, and if not 53, append "@port_number", for logs. */
+static void addr_port_to_str(struct sockaddr_storage* addr, socklen_t addrlen,
+ char* buf, size_t len)
+{
+ uint16_t port = 0;
+ if(addr_is_ip6(addr, addrlen)) {
+ struct sockaddr_in6* sa = (struct sockaddr_in6*)addr;
+ port = ntohs((uint16_t)sa->sin6_port);
+ } else {
+ struct sockaddr_in* sa = (struct sockaddr_in*)addr;
+ port = ntohs((uint16_t)sa->sin_port);
+ }
+ if(port == UNBOUND_DNS_PORT) {
+ /* If it is port 53, print it plainly. */
+ addr_to_str(addr, addrlen, buf, len);
+ } else {
+ char a[256];
+ a[0]=0;
+ addr_to_str(addr, addrlen, a, sizeof(a));
+ snprintf(buf, len, "%s@%d", a, (int)port);
+ }
+}
+
/** see if addr appears in the list */
static int
addr_in_list(struct auth_addr* list, struct sockaddr_storage* addr,
@@ -5516,7 +5539,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env)
if(!xfr->task_transfer->cp) {
char zname[255+1], as[256];
dname_str(xfr->name, zname);
- addr_to_str(&addr, addrlen, as, sizeof(as));
+ addr_port_to_str(&addr, addrlen, as, sizeof(as));
verbose(VERB_ALGO, "cannot create http cp "
"connection for %s to %s", zname, as);
return 0;
@@ -5525,7 +5548,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env)
if(verbosity >= VERB_ALGO) {
char zname[255+1], as[256];
dname_str(xfr->name, zname);
- addr_to_str(&addr, addrlen, as, sizeof(as));
+ addr_port_to_str(&addr, addrlen, as, sizeof(as));
verbose(VERB_ALGO, "auth zone %s transfer next HTTP fetch from %s started", zname, as);
}
/* Create or refresh the list of allow_notify addrs */
@@ -5548,7 +5571,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env)
if(!xfr->task_transfer->cp) {
char zname[255+1], as[256];
dname_str(xfr->name, zname);
- addr_to_str(&addr, addrlen, as, sizeof(as));
+ addr_port_to_str(&addr, addrlen, as, sizeof(as));
verbose(VERB_ALGO, "cannot create tcp cp connection for "
"xfr %s to %s", zname, as);
return 0;
@@ -5557,7 +5580,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env)
if(verbosity >= VERB_ALGO) {
char zname[255+1], as[256];
dname_str(xfr->name, zname);
- addr_to_str(&addr, addrlen, as, sizeof(as));
+ addr_port_to_str(&addr, addrlen, as, sizeof(as));
verbose(VERB_ALGO, "auth zone %s transfer next %s fetch from %s started", zname,
(xfr->task_transfer->on_ixfr?"IXFR":"AXFR"), as);
}
@@ -5660,7 +5683,7 @@ xfr_master_add_addrs(struct auth_master* m, struct ub_packed_rrset_key* rrset,
}
if(verbosity >= VERB_ALGO) {
char s[64];
- addr_to_str(&a->addr, a->addrlen, s, sizeof(s));
+ addr_port_to_str(&a->addr, a->addrlen, s, sizeof(s));
verbose(VERB_ALGO, "auth host %s lookup %s",
m->host, s);
}
@@ -6406,7 +6429,7 @@ xfr_probe_send_probe(struct auth_xfer* xfr, struct module_env* env,
if(!xfr->task_probe->cp) {
char zname[255+1], as[256];
dname_str(xfr->name, zname);
- addr_to_str(&addr, addrlen, as, sizeof(as));
+ addr_port_to_str(&addr, addrlen, as, sizeof(as));
verbose(VERB_ALGO, "cannot create udp cp for "
"probe %s to %s", zname, as);
return 0;
@@ -6426,7 +6449,7 @@ xfr_probe_send_probe(struct auth_xfer* xfr, struct module_env* env,
(struct sockaddr*)&addr, addrlen, 0)) {
char zname[255+1], as[256];
dname_str(xfr->name, zname);
- addr_to_str(&addr, addrlen, as, sizeof(as));
+ addr_port_to_str(&addr, addrlen, as, sizeof(as));
verbose(VERB_ALGO, "failed to send soa probe for %s to %s",
zname, as);
return 0;
@@ -6434,7 +6457,7 @@ xfr_probe_send_probe(struct auth_xfer* xfr, struct module_env* env,
if(verbosity >= VERB_ALGO) {
char zname[255+1], as[256];
dname_str(xfr->name, zname);
- addr_to_str(&addr, addrlen, as, sizeof(as));
+ addr_port_to_str(&addr, addrlen, as, sizeof(as));
verbose(VERB_ALGO, "auth zone %s soa probe sent to %s", zname,
as);
}
diff --git a/services/cache/dns.c b/services/cache/dns.c
index 5e74c31693b3..7ab63bacf492 100644
--- a/services/cache/dns.c
+++ b/services/cache/dns.c
@@ -88,7 +88,7 @@ store_rrsets(struct module_env* env, struct reply_info* rep, time_t now,
/* update ref if it was in the cache */
switch(rrset_cache_update(env->rrset_cache, &rep->ref[i],
env->alloc, ((ntohs(rep->ref[i].key->rk.type)==
- LDNS_RR_TYPE_NS && !pside)?qstarttime:now + leeway))) {
+ LDNS_RR_TYPE_NS && !pside)?qstarttime:now) + leeway)) {
case 0: /* ref unchanged, item inserted */
break;
case 2: /* ref updated, cache is superior */
@@ -162,7 +162,7 @@ dns_cache_store_msg(struct module_env* env, struct query_info* qinfo,
size_t i;
/* store RRsets */
- for(i=0; i<rep->rrset_count; i++) {
+ for(i=0; i<rep->rrset_count; i++) {
rep->ref[i].key = rep->rrsets[i];
rep->ref[i].id = rep->rrsets[i]->id;
}
@@ -197,6 +197,7 @@ dns_cache_store_msg(struct module_env* env, struct query_info* qinfo,
reply_info_sortref(rep);
if(!(e = query_info_entrysetup(qinfo, rep, hash))) {
log_err("store_msg: malloc failed");
+ reply_info_delete(rep, NULL);
return;
}
slabhash_insert(env->msg_cache, hash, &e->entry, rep, env->alloc);
@@ -365,7 +366,7 @@ find_add_addrs(struct module_env* env, uint16_t qclass,
/** find and add A and AAAA records for missing nameservers in delegpt */
int
cache_fill_missing(struct module_env* env, uint16_t qclass,
- struct regional* region, struct delegpt* dp)
+ struct regional* region, struct delegpt* dp, uint32_t flags)
{
struct delegpt_ns* ns;
struct msgreply_entry* neg;
@@ -376,7 +377,7 @@ cache_fill_missing(struct module_env* env, uint16_t qclass,
continue;
ns->cache_lookup_count++;
akey = rrset_cache_lookup(env->rrset_cache, ns->name,
- ns->namelen, LDNS_RR_TYPE_A, qclass, 0, now, 0);
+ ns->namelen, LDNS_RR_TYPE_A, qclass, flags, now, 0);
if(akey) {
if(!delegpt_add_rrset_A(dp, region, akey, ns->lame,
NULL)) {
@@ -397,7 +398,7 @@ cache_fill_missing(struct module_env* env, uint16_t qclass,
}
}
akey = rrset_cache_lookup(env->rrset_cache, ns->name,
- ns->namelen, LDNS_RR_TYPE_AAAA, qclass, 0, now, 0);
+ ns->namelen, LDNS_RR_TYPE_AAAA, qclass, flags, now, 0);
if(akey) {
if(!delegpt_add_rrset_AAAA(dp, region, akey, ns->lame,
NULL)) {
@@ -607,22 +608,8 @@ tomsg(struct module_env* env, struct query_info* q, struct reply_info* r,
time_t now_control = now;
if(now > r->ttl) {
/* Check if we are allowed to serve expired */
- if(allow_expired) {
- if(env->cfg->serve_expired_ttl &&
- r->serve_expired_ttl < now) {
- return NULL;
- }
- /* Ignore expired failure answers */
- if(FLAGS_GET_RCODE(r->flags) !=
- LDNS_RCODE_NOERROR &&
- FLAGS_GET_RCODE(r->flags) !=
- LDNS_RCODE_NXDOMAIN &&
- FLAGS_GET_RCODE(r->flags) !=
- LDNS_RCODE_YXDOMAIN)
- return 0;
- } else {
+ if(!allow_expired || !reply_info_can_answer_expired(r, now))
return NULL;
- }
/* Change the current time so we can pass the below TTL checks when
* serving expired data. */
now_control = r->ttl - env->cfg->serve_expired_reply_ttl;
@@ -641,6 +628,7 @@ tomsg(struct module_env* env, struct query_info* q, struct reply_info* r,
else
msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL;
+ msg->rep->serve_expired_norec_ttl = 0;
msg->rep->security = r->security;
msg->rep->an_numrrsets = r->an_numrrsets;
msg->rep->ns_numrrsets = r->ns_numrrsets;
@@ -724,6 +712,7 @@ rrset_msg(struct ub_packed_rrset_key* rrset, struct regional* region,
msg->rep->ttl = d->ttl - now;
msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL;
+ msg->rep->serve_expired_norec_ttl = 0;
msg->rep->security = sec_status_unchecked;
msg->rep->an_numrrsets = 1;
msg->rep->ns_numrrsets = 0;
@@ -763,6 +752,7 @@ synth_dname_msg(struct ub_packed_rrset_key* rrset, struct regional* region,
msg->rep->ttl = d->ttl - now;
msg->rep->prefetch_ttl = PREFETCH_TTL_CALC(msg->rep->ttl);
msg->rep->serve_expired_ttl = msg->rep->ttl + SERVE_EXPIRED_TTL;
+ msg->rep->serve_expired_norec_ttl = 0;
msg->rep->security = sec_status_unchecked;
msg->rep->an_numrrsets = 1;
msg->rep->ns_numrrsets = 0;
@@ -1070,6 +1060,35 @@ dns_cache_store(struct module_env* env, struct query_info* msgqinf,
struct regional* region, uint32_t flags, time_t qstarttime)
{
struct reply_info* rep = NULL;
+ if(SERVE_EXPIRED) {
+ /* We are serving expired records. Before caching, check if a
+ * useful expired record exists. */
+ struct msgreply_entry* e = msg_cache_lookup(env,
+ msgqinf->qname, msgqinf->qname_len, msgqinf->qtype,
+ msgqinf->qclass, flags, 0, 0);
+ if(e) {
+ struct reply_info* cached = e->entry.data;
+ if(cached->ttl < *env->now
+ && reply_info_could_use_expired(cached, *env->now)
+ /* If we are validating make sure only
+ * validating modules can update such messages.
+ * In that case don't cache it and let a
+ * subsequent module handle the caching. For
+ * example, the iterator should not replace an
+ * expired secure answer with a fresh unchecked
+ * one and let the validator manage caching. */
+ && cached->security != sec_status_bogus
+ && (env->need_to_validate &&
+ msgrep->security == sec_status_unchecked)) {
+ verbose(VERB_ALGO, "a validated expired entry "
+ "could be overwritten, skip caching "
+ "the new message at this stage");
+ lock_rw_unlock(&e->entry.lock);
+ return 1;
+ }
+ lock_rw_unlock(&e->entry.lock);
+ }
+ }
/* alloc, malloc properly (not in region, like msg is) */
rep = reply_info_copy(msgrep, env->alloc, NULL);
if(!rep)
diff --git a/services/cache/dns.h b/services/cache/dns.h
index c2bf23c6de54..1dd537d2bd5d 100644
--- a/services/cache/dns.h
+++ b/services/cache/dns.h
@@ -202,10 +202,11 @@ struct dns_msg* dns_cache_lookup(struct module_env* env,
* @param qclass: which class to look in.
* @param region: where to store new dp info.
* @param dp: delegation point to fill missing entries.
+ * @param flags: rrset flags, or 0.
* @return false on alloc failure.
*/
int cache_fill_missing(struct module_env* env, uint16_t qclass,
- struct regional* region, struct delegpt* dp);
+ struct regional* region, struct delegpt* dp, uint32_t flags);
/**
* Utility, create new, unpacked data structure for cache response.
diff --git a/services/cache/rrset.c b/services/cache/rrset.c
index 2c03214c8fe2..a05ae5a56b78 100644
--- a/services/cache/rrset.c
+++ b/services/cache/rrset.c
@@ -128,8 +128,8 @@ need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns)
{
struct packed_rrset_data* newd = (struct packed_rrset_data*)nd;
struct packed_rrset_data* cached = (struct packed_rrset_data*)cd;
- /* o if new data is expired, current data is better */
- if( newd->ttl < timenow && cached->ttl >= timenow)
+ /* o if new data is expired, cached data is better */
+ if( newd->ttl < timenow && timenow <= cached->ttl)
return 0;
/* o store if rrset has been validated
* everything better than bogus data
@@ -140,9 +140,9 @@ need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns)
if( cached->security == sec_status_bogus &&
newd->security != sec_status_bogus && !equal)
return 1;
- /* o if current RRset is more trustworthy - insert it */
+ /* o if new RRset is more trustworthy - insert it */
if( newd->trust > cached->trust ) {
- /* if the cached rrset is bogus, and this one equal,
+ /* if the cached rrset is bogus, and new is equal,
* do not update the TTL - let it expire. */
if(equal && cached->ttl >= timenow &&
cached->security == sec_status_bogus)
@@ -155,7 +155,7 @@ need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns)
/* o same trust, but different in data - insert it */
if( newd->trust == cached->trust && !equal ) {
/* if this is type NS, do not 'stick' to owner that changes
- * the NS RRset, but use the old TTL for the new data, and
+ * the NS RRset, but use the cached TTL for the new data, and
* update to fetch the latest data. ttl is not expired, because
* that check was before this one. */
if(ns) {
diff --git a/services/listen_dnsport.c b/services/listen_dnsport.c
index 6c0691f2a73c..5dbac3650aaf 100644
--- a/services/listen_dnsport.c
+++ b/services/listen_dnsport.c
@@ -56,9 +56,11 @@
#include "util/net_help.h"
#include "sldns/sbuffer.h"
#include "sldns/parseutil.h"
+#include "sldns/wire2str.h"
#include "services/mesh.h"
#include "util/fptr_wlist.h"
#include "util/locks.h"
+#include "util/timeval_func.h"
#ifdef HAVE_NETDB_H
#include <netdb.h>
@@ -79,9 +81,30 @@
#ifdef HAVE_NET_IF_H
#include <net/if.h>
#endif
+
+#ifdef HAVE_TIME_H
+#include <time.h>
+#endif
+#include <sys/time.h>
+
+#ifdef HAVE_NGTCP2
+#include <ngtcp2/ngtcp2.h>
+#include <ngtcp2/ngtcp2_crypto.h>
+#ifdef HAVE_NGTCP2_NGTCP2_CRYPTO_QUICTLS_H
+#include <ngtcp2/ngtcp2_crypto_quictls.h>
+#else
+#include <ngtcp2/ngtcp2_crypto_openssl.h>
+#endif
+#endif
+
+#ifdef HAVE_OPENSSL_SSL_H
+#include <openssl/ssl.h>
+#endif
+
#ifdef HAVE_LINUX_NET_TSTAMP_H
#include <linux/net_tstamp.h>
#endif
+
/** number of queued TCP connections for listen() */
#define TCP_BACKLOG 256
@@ -109,9 +132,11 @@ static int http2_response_buffer_lock_inited = 0;
/**
* Debug print of the getaddrinfo returned address.
* @param addr: the address returned.
+ * @param additional: additional text that describes the type of socket,
+ * or NULL for no text.
*/
static void
-verbose_print_addr(struct addrinfo *addr)
+verbose_print_addr(struct addrinfo *addr, const char* additional)
{
if(verbosity >= VERB_ALGO) {
char buf[100];
@@ -126,13 +151,14 @@ verbose_print_addr(struct addrinfo *addr)
(void)strlcpy(buf, "(null)", sizeof(buf));
}
buf[sizeof(buf)-1] = 0;
- verbose(VERB_ALGO, "creating %s%s socket %s %d",
+ verbose(VERB_ALGO, "creating %s%s socket %s %d%s%s",
addr->ai_socktype==SOCK_DGRAM?"udp":
addr->ai_socktype==SOCK_STREAM?"tcp":"otherproto",
addr->ai_family==AF_INET?"4":
addr->ai_family==AF_INET6?"6":
"_otherfam", buf,
- ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port));
+ ntohs(((struct sockaddr_in*)addr->ai_addr)->sin_port),
+ (additional?" ":""), (additional?additional:""));
}
}
@@ -673,7 +699,7 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr,
int
create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
int* reuseport, int transparent, int mss, int nodelay, int freebind,
- int use_systemd, int dscp)
+ int use_systemd, int dscp, const char* additional)
{
int s = -1;
char* err;
@@ -692,7 +718,7 @@ create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
#if !defined(IP_FREEBIND)
(void)freebind;
#endif
- verbose_print_addr(addr);
+ verbose_print_addr(addr, additional);
*noproto = 0;
#ifdef HAVE_SYSTEMD
if (!use_systemd ||
@@ -1008,7 +1034,8 @@ static int
make_sock(int stype, const char* ifname, const char* port,
struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
- int use_systemd, int dscp, struct unbound_socket* ub_sock)
+ int use_systemd, int dscp, struct unbound_socket* ub_sock,
+ const char* additional)
{
struct addrinfo *res = NULL;
int r, s, inuse, noproto;
@@ -1032,7 +1059,7 @@ make_sock(int stype, const char* ifname, const char* port,
return -1;
}
if(stype == SOCK_DGRAM) {
- verbose_print_addr(res);
+ verbose_print_addr(res, additional);
s = create_udp_sock(res->ai_family, res->ai_socktype,
(struct sockaddr*)res->ai_addr, res->ai_addrlen,
v6only, &inuse, &noproto, (int)rcv, (int)snd, 1,
@@ -1045,7 +1072,7 @@ make_sock(int stype, const char* ifname, const char* port,
} else {
s = create_tcp_accept_sock(res, v6only, &noproto, reuseport,
transparent, tcp_mss, nodelay, freebind, use_systemd,
- dscp);
+ dscp, additional);
if(s == -1 && noproto && hints->ai_family == AF_INET6){
*noip6 = 1;
}
@@ -1079,7 +1106,8 @@ static int
make_sock_port(int stype, const char* ifname, const char* port,
struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd,
int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind,
- int use_systemd, int dscp, struct unbound_socket* ub_sock)
+ int use_systemd, int dscp, struct unbound_socket* ub_sock,
+ const char* additional)
{
char* s = strchr(ifname, '@');
if(s) {
@@ -1102,11 +1130,11 @@ make_sock_port(int stype, const char* ifname, const char* port,
p[strlen(s+1)]=0;
return make_sock(stype, newif, p, hints, v6only, noip6, rcv,
snd, reuseport, transparent, tcp_mss, nodelay, freebind,
- use_systemd, dscp, ub_sock);
+ use_systemd, dscp, ub_sock, additional);
}
return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd,
reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd,
- dscp, ub_sock);
+ dscp, ub_sock, additional);
}
/**
@@ -1254,6 +1282,8 @@ if_is_ssl(const char* ifname, const char* port, int ssl_port,
* @param use_systemd: if true, fetch sockets from systemd.
* @param dnscrypt_port: dnscrypt service port number
* @param dscp: DSCP to use.
+ * @param quic_port: dns over quic port number.
+ * @param http_notls_downstream: if no tls is used for https downstream.
* @param sock_queue_timeout: the sock_queue_timeout from config. Seconds to
* wait to discard if UDP packets have waited for long in the socket
* buffer.
@@ -1267,7 +1297,7 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
struct config_strlist* proxy_protocol_port,
int* reuseport, int transparent, int tcp_mss, int freebind,
int http2_nodelay, int use_systemd, int dnscrypt_port, int dscp,
- int sock_queue_timeout)
+ int quic_port, int http_notls_downstream, int sock_queue_timeout)
{
int s, noip6=0;
int is_https = if_is_https(ifname, port, https_port);
@@ -1275,6 +1305,8 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
int is_pp2 = if_is_pp2(ifname, port, proxy_protocol_port);
int nodelay = is_https && http2_nodelay;
struct unbound_socket* ub_sock;
+ int is_doq = if_is_quic(ifname, port, quic_port);
+ const char* add = NULL;
if(!do_udp && !do_tcp)
return 0;
@@ -1286,6 +1318,9 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
} else if(is_https) {
fatal_exit("PROXYv2 and DoH combination not "
"supported!");
+ } else if(is_doq) {
+ fatal_exit("PROXYv2 and DoQ combination not "
+ "supported!");
}
}
@@ -1295,7 +1330,8 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
return 0;
if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
&noip6, rcv, snd, reuseport, transparent,
- tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
+ tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock,
+ (is_dnscrypt?"udpancil_dnscrypt":"udpancil"))) == -1) {
free(ub_sock->addr);
free(ub_sock);
if(noip6) {
@@ -1323,13 +1359,36 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
return 0;
}
} else if(do_udp) {
+ enum listen_type udp_port_type;
ub_sock = calloc(1, sizeof(struct unbound_socket));
if(!ub_sock)
return 0;
+ if(is_dnscrypt) {
+ udp_port_type = listen_type_udp_dnscrypt;
+ add = "dnscrypt";
+ } else if(is_doq) {
+ udp_port_type = listen_type_doq;
+ add = "doq";
+ if(((strchr(ifname, '@') &&
+ atoi(strchr(ifname, '@')+1) == 53) ||
+ (!strchr(ifname, '@') && atoi(port) == 53))) {
+ log_err("DNS over QUIC is not allowed on "
+ "port 53. Port 53 is for DNS "
+ "datagrams. Error for "
+ "interface '%s'.", ifname);
+ free(ub_sock->addr);
+ free(ub_sock);
+ return 0;
+ }
+ } else {
+ udp_port_type = listen_type_udp;
+ add = NULL;
+ }
/* regular udp socket */
if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1,
&noip6, rcv, snd, reuseport, transparent,
- tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) {
+ tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock,
+ add)) == -1) {
free(ub_sock->addr);
free(ub_sock);
if(noip6) {
@@ -1338,14 +1397,25 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
}
return 0;
}
- if (sock_queue_timeout && !set_recvtimestamp(s)) {
- log_warn("socket timestamping is not available");
+ if(udp_port_type == listen_type_doq) {
+ if(!set_recvpktinfo(s, hints->ai_family)) {
+ sock_close(s);
+ free(ub_sock->addr);
+ free(ub_sock);
+ return 0;
+ }
}
- if(!port_insert(list, s, is_dnscrypt
- ?listen_type_udp_dnscrypt :
- (sock_queue_timeout ?
- listen_type_udpancil:listen_type_udp),
- is_pp2, ub_sock)) {
+ if(udp_port_type == listen_type_udp && sock_queue_timeout)
+ udp_port_type = listen_type_udpancil;
+ if (sock_queue_timeout) {
+ if(!set_recvtimestamp(s)) {
+ log_warn("socket timestamping is not available");
+ } else {
+ if(udp_port_type == listen_type_udp)
+ udp_port_type = listen_type_udpancil;
+ }
+ }
+ if(!port_insert(list, s, udp_port_type, is_pp2, ub_sock)) {
sock_close(s);
free(ub_sock->addr);
free(ub_sock);
@@ -1359,17 +1429,24 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp,
ub_sock = calloc(1, sizeof(struct unbound_socket));
if(!ub_sock)
return 0;
- if(is_ssl)
+ if(is_ssl) {
port_type = listen_type_ssl;
- else if(is_https)
+ add = "tls";
+ } else if(is_https) {
port_type = listen_type_http;
- else if(is_dnscrypt)
+ add = "https";
+ if(http_notls_downstream)
+ add = "http";
+ } else if(is_dnscrypt) {
port_type = listen_type_tcp_dnscrypt;
- else
+ add = "dnscrypt";
+ } else {
port_type = listen_type_tcp;
+ add = NULL;
+ }
if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1,
&noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay,
- freebind, use_systemd, dscp, ub_sock)) == -1) {
+ freebind, use_systemd, dscp, ub_sock, add)) == -1) {
free(ub_sock->addr);
free(ub_sock);
if(noip6) {
@@ -1446,8 +1523,10 @@ listen_create(struct comm_base* base, struct listen_port* ports,
size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
int harden_large_queries, uint32_t http_max_streams,
char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit,
- void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb,
- void *cb_arg)
+ void* sslctx, struct dt_env* dtenv, struct doq_table* doq_table,
+ struct ub_randstate* rnd, const char* ssl_service_key,
+ const char* ssl_service_pem, struct config_file* cfg,
+ comm_point_callback_type* cb, void *cb_arg)
{
struct listen_dnsport* front = (struct listen_dnsport*)
malloc(sizeof(struct listen_dnsport));
@@ -1471,6 +1550,16 @@ listen_create(struct comm_base* base, struct listen_port* ports,
cp = comm_point_create_udp(base, ports->fd,
front->udp_buff, ports->pp2_enabled, cb,
cb_arg, ports->socket);
+ } else if(ports->ftype == listen_type_doq) {
+#ifndef HAVE_NGTCP2
+ log_warn("Unbound is not compiled with "
+ "ngtcp2. This is required to use DNS "
+ "over QUIC.");
+#endif
+ cp = comm_point_create_doq(base, ports->fd,
+ front->udp_buff, cb, cb_arg, ports->socket,
+ doq_table, rnd, ssl_service_key,
+ ssl_service_pem, cfg);
} else if(ports->ftype == listen_type_tcp ||
ports->ftype == listen_type_tcp_dnscrypt) {
cp = comm_point_create_tcp(base, ports->fd,
@@ -1858,7 +1947,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
reuseport, cfg->ip_transparent,
cfg->tcp_mss, cfg->ip_freebind,
cfg->http_nodelay, cfg->use_systemd,
- cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
+ cfg->dnscrypt_port, cfg->ip_dscp,
+ cfg->quic_port, cfg->http_notls_downstream,
+ cfg->sock_queue_timeout)) {
listening_ports_free(list);
return NULL;
}
@@ -1875,7 +1966,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
reuseport, cfg->ip_transparent,
cfg->tcp_mss, cfg->ip_freebind,
cfg->http_nodelay, cfg->use_systemd,
- cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
+ cfg->dnscrypt_port, cfg->ip_dscp,
+ cfg->quic_port, cfg->http_notls_downstream,
+ cfg->sock_queue_timeout)) {
listening_ports_free(list);
return NULL;
}
@@ -1894,7 +1987,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
reuseport, cfg->ip_transparent,
cfg->tcp_mss, cfg->ip_freebind,
cfg->http_nodelay, cfg->use_systemd,
- cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
+ cfg->dnscrypt_port, cfg->ip_dscp,
+ cfg->quic_port, cfg->http_notls_downstream,
+ cfg->sock_queue_timeout)) {
listening_ports_free(list);
return NULL;
}
@@ -1910,7 +2005,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
reuseport, cfg->ip_transparent,
cfg->tcp_mss, cfg->ip_freebind,
cfg->http_nodelay, cfg->use_systemd,
- cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
+ cfg->dnscrypt_port, cfg->ip_dscp,
+ cfg->quic_port, cfg->http_notls_downstream,
+ cfg->sock_queue_timeout)) {
listening_ports_free(list);
return NULL;
}
@@ -1928,7 +2025,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
reuseport, cfg->ip_transparent,
cfg->tcp_mss, cfg->ip_freebind,
cfg->http_nodelay, cfg->use_systemd,
- cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
+ cfg->dnscrypt_port, cfg->ip_dscp,
+ cfg->quic_port, cfg->http_notls_downstream,
+ cfg->sock_queue_timeout)) {
listening_ports_free(list);
return NULL;
}
@@ -1944,7 +2043,9 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs,
reuseport, cfg->ip_transparent,
cfg->tcp_mss, cfg->ip_freebind,
cfg->http_nodelay, cfg->use_systemd,
- cfg->dnscrypt_port, cfg->ip_dscp, cfg->sock_queue_timeout)) {
+ cfg->dnscrypt_port, cfg->ip_dscp,
+ cfg->quic_port, cfg->http_notls_downstream,
+ cfg->sock_queue_timeout)) {
listening_ports_free(list);
return NULL;
}
@@ -3154,3 +3255,2368 @@ nghttp2_session_callbacks* http2_req_callbacks_create(void)
return callbacks;
}
#endif /* HAVE_NGHTTP2 */
+
+#ifdef HAVE_NGTCP2
+struct doq_table*
+doq_table_create(struct config_file* cfg, struct ub_randstate* rnd)
+{
+ struct doq_table* table = calloc(1, sizeof(*table));
+ if(!table)
+ return NULL;
+ table->idle_timeout = ((uint64_t)cfg->tcp_idle_timeout)*
+ NGTCP2_MILLISECONDS;
+ table->sv_scidlen = 16;
+ table->static_secret_len = 16;
+ table->static_secret = malloc(table->static_secret_len);
+ if(!table->static_secret) {
+ free(table);
+ return NULL;
+ }
+ doq_fill_rand(rnd, table->static_secret, table->static_secret_len);
+ table->conn_tree = rbtree_create(doq_conn_cmp);
+ if(!table->conn_tree) {
+ free(table->static_secret);
+ free(table);
+ return NULL;
+ }
+ table->conid_tree = rbtree_create(doq_conid_cmp);
+ if(!table->conid_tree) {
+ free(table->static_secret);
+ free(table->conn_tree);
+ free(table);
+ return NULL;
+ }
+ table->timer_tree = rbtree_create(doq_timer_cmp);
+ if(!table->timer_tree) {
+ free(table->static_secret);
+ free(table->conn_tree);
+ free(table->conid_tree);
+ free(table);
+ return NULL;
+ }
+ lock_rw_init(&table->lock);
+ lock_rw_init(&table->conid_lock);
+ lock_basic_init(&table->size_lock);
+ lock_protect(&table->lock, &table->static_secret,
+ sizeof(table->static_secret));
+ lock_protect(&table->lock, &table->static_secret_len,
+ sizeof(table->static_secret_len));
+ lock_protect(&table->lock, table->static_secret,
+ table->static_secret_len);
+ lock_protect(&table->lock, &table->sv_scidlen,
+ sizeof(table->sv_scidlen));
+ lock_protect(&table->lock, &table->idle_timeout,
+ sizeof(table->idle_timeout));
+ lock_protect(&table->lock, &table->conn_tree, sizeof(table->conn_tree));
+ lock_protect(&table->lock, table->conn_tree, sizeof(*table->conn_tree));
+ lock_protect(&table->conid_lock, table->conid_tree,
+ sizeof(*table->conid_tree));
+ lock_protect(&table->lock, table->timer_tree,
+ sizeof(*table->timer_tree));
+ lock_protect(&table->size_lock, &table->current_size,
+ sizeof(table->current_size));
+ return table;
+}
+
+/** delete elements from the connection tree */
+static void
+conn_tree_del(rbnode_type* node, void* arg)
+{
+ struct doq_table* table = (struct doq_table*)arg;
+ struct doq_conn* conn;
+ if(!node)
+ return;
+ conn = (struct doq_conn*)node->key;
+ if(conn->timer.timer_in_list) {
+ /* Remove timer from list first, because finding the rbnode
+ * element of the setlist of same timeouts needs tree lookup.
+ * Edit the tree structure after that lookup. */
+ doq_timer_list_remove(conn->table, &conn->timer);
+ }
+ if(conn->timer.timer_in_tree)
+ doq_timer_tree_remove(conn->table, &conn->timer);
+ doq_table_quic_size_subtract(table, sizeof(*conn)+conn->key.dcidlen);
+ doq_conn_delete(conn, table);
+}
+
+/** delete elements from the connection id tree */
+static void
+conid_tree_del(rbnode_type* node, void* ATTR_UNUSED(arg))
+{
+ if(!node)
+ return;
+ doq_conid_delete((struct doq_conid*)node->key);
+}
+
+void
+doq_table_delete(struct doq_table* table)
+{
+ if(!table)
+ return;
+ lock_rw_destroy(&table->lock);
+ free(table->static_secret);
+ if(table->conn_tree) {
+ traverse_postorder(table->conn_tree, conn_tree_del, table);
+ free(table->conn_tree);
+ }
+ lock_rw_destroy(&table->conid_lock);
+ if(table->conid_tree) {
+ /* The tree should be empty, because the doq_conn_delete calls
+ * above should have also removed their conid elements. */
+ traverse_postorder(table->conid_tree, conid_tree_del, NULL);
+ free(table->conid_tree);
+ }
+ lock_basic_destroy(&table->size_lock);
+ if(table->timer_tree) {
+ /* The tree should be empty, because the conn_tree_del calls
+ * above should also have removed them. Also the doq_timer
+ * is part of the doq_conn struct, so is already freed. */
+ free(table->timer_tree);
+ }
+ table->write_list_first = NULL;
+ table->write_list_last = NULL;
+ free(table);
+}
+
+struct doq_timer*
+doq_timer_find_time(struct doq_table* table, struct timeval* tv)
+{
+ struct doq_timer key;
+ struct rbnode_type* node;
+ memset(&key, 0, sizeof(key));
+ key.time.tv_sec = tv->tv_sec;
+ key.time.tv_usec = tv->tv_usec;
+ node = rbtree_search(table->timer_tree, &key);
+ if(node)
+ return (struct doq_timer*)node->key;
+ return NULL;
+}
+
+void
+doq_timer_tree_remove(struct doq_table* table, struct doq_timer* timer)
+{
+ if(!timer->timer_in_tree)
+ return;
+ rbtree_delete(table->timer_tree, timer);
+ timer->timer_in_tree = 0;
+ /* This item could have more timers in the same set. */
+ if(timer->setlist_first) {
+ struct doq_timer* rb_timer = timer->setlist_first;
+ /* del first element from setlist */
+ if(rb_timer->setlist_next)
+ rb_timer->setlist_next->setlist_prev = NULL;
+ else
+ timer->setlist_last = NULL;
+ timer->setlist_first = rb_timer->setlist_next;
+ rb_timer->setlist_prev = NULL;
+ rb_timer->setlist_next = NULL;
+ rb_timer->timer_in_list = 0;
+ /* insert it into the tree as new rb element */
+ memset(&rb_timer->node, 0, sizeof(rb_timer->node));
+ rb_timer->node.key = rb_timer;
+ rbtree_insert(table->timer_tree, &rb_timer->node);
+ rb_timer->timer_in_tree = 1;
+ /* the setlist, if any remainder, moves to the rb element */
+ rb_timer->setlist_first = timer->setlist_first;
+ rb_timer->setlist_last = timer->setlist_last;
+ timer->setlist_first = NULL;
+ timer->setlist_last = NULL;
+ rb_timer->worker_doq_socket = timer->worker_doq_socket;
+ }
+ timer->worker_doq_socket = NULL;
+}
+
+void
+doq_timer_list_remove(struct doq_table* table, struct doq_timer* timer)
+{
+ struct doq_timer* rb_timer;
+ if(!timer->timer_in_list)
+ return;
+ /* The item in the rbtree has the list start and end. */
+ rb_timer = doq_timer_find_time(table, &timer->time);
+ if(rb_timer) {
+ if(timer->setlist_prev)
+ timer->setlist_prev->setlist_next = timer->setlist_next;
+ else
+ rb_timer->setlist_first = timer->setlist_next;
+ if(timer->setlist_next)
+ timer->setlist_next->setlist_prev = timer->setlist_prev;
+ else
+ rb_timer->setlist_last = timer->setlist_prev;
+ timer->setlist_prev = NULL;
+ timer->setlist_next = NULL;
+ }
+ timer->timer_in_list = 0;
+}
+
+/** doq append timer to setlist */
+static void
+doq_timer_list_append(struct doq_timer* rb_timer, struct doq_timer* timer)
+{
+ log_assert(timer->timer_in_list == 0);
+ timer->timer_in_list = 1;
+ timer->setlist_next = NULL;
+ timer->setlist_prev = rb_timer->setlist_last;
+ if(rb_timer->setlist_last)
+ rb_timer->setlist_last->setlist_next = timer;
+ else
+ rb_timer->setlist_first = timer;
+ rb_timer->setlist_last = timer;
+}
+
+void
+doq_timer_unset(struct doq_table* table, struct doq_timer* timer)
+{
+ if(timer->timer_in_list) {
+ /* Remove timer from list first, because finding the rbnode
+ * element of the setlist of same timeouts needs tree lookup.
+ * Edit the tree structure after that lookup. */
+ doq_timer_list_remove(table, timer);
+ }
+ if(timer->timer_in_tree)
+ doq_timer_tree_remove(table, timer);
+ timer->worker_doq_socket = NULL;
+}
+
+void doq_timer_set(struct doq_table* table, struct doq_timer* timer,
+ struct doq_server_socket* worker_doq_socket, struct timeval* tv)
+{
+ struct doq_timer* rb_timer;
+ if(verbosity >= VERB_ALGO && timer->conn) {
+ char a[256];
+ struct timeval rel;
+ addr_to_str((void*)&timer->conn->key.paddr.addr,
+ timer->conn->key.paddr.addrlen, a, sizeof(a));
+ timeval_subtract(&rel, tv, worker_doq_socket->now_tv);
+ verbose(VERB_ALGO, "doq %s timer set %d.%6.6d in %d.%6.6d",
+ a, (int)tv->tv_sec, (int)tv->tv_usec,
+ (int)rel.tv_sec, (int)rel.tv_usec);
+ }
+ if(timer->timer_in_tree || timer->timer_in_list) {
+ if(timer->time.tv_sec == tv->tv_sec &&
+ timer->time.tv_usec == tv->tv_usec)
+ return; /* already set on that time */
+ doq_timer_unset(table, timer);
+ }
+ timer->time.tv_sec = tv->tv_sec;
+ timer->time.tv_usec = tv->tv_usec;
+ rb_timer = doq_timer_find_time(table, tv);
+ if(rb_timer) {
+ /* There is a timeout already with this value. Timer is
+ * added to the setlist. */
+ doq_timer_list_append(rb_timer, timer);
+ } else {
+ /* There is no timeout with this value. Make timer a new
+ * tree element. */
+ memset(&timer->node, 0, sizeof(timer->node));
+ timer->node.key = timer;
+ rbtree_insert(table->timer_tree, &timer->node);
+ timer->timer_in_tree = 1;
+ timer->setlist_first = NULL;
+ timer->setlist_last = NULL;
+ timer->worker_doq_socket = worker_doq_socket;
+ }
+}
+
+struct doq_conn*
+doq_conn_create(struct comm_point* c, struct doq_pkt_addr* paddr,
+ const uint8_t* dcid, size_t dcidlen, uint32_t version)
+{
+ struct doq_conn* conn = calloc(1, sizeof(*conn));
+ if(!conn)
+ return NULL;
+ conn->node.key = conn;
+ conn->doq_socket = c->doq_socket;
+ conn->table = c->doq_socket->table;
+ memmove(&conn->key.paddr.addr, &paddr->addr, paddr->addrlen);
+ conn->key.paddr.addrlen = paddr->addrlen;
+ memmove(&conn->key.paddr.localaddr, &paddr->localaddr,
+ paddr->localaddrlen);
+ conn->key.paddr.localaddrlen = paddr->localaddrlen;
+ conn->key.paddr.ifindex = paddr->ifindex;
+ conn->key.dcid = memdup((void*)dcid, dcidlen);
+ if(!conn->key.dcid) {
+ free(conn);
+ return NULL;
+ }
+ conn->key.dcidlen = dcidlen;
+ conn->version = version;
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+ ngtcp2_ccerr_default(&conn->ccerr);
+#else
+ ngtcp2_connection_close_error_default(&conn->last_error);
+#endif
+ rbtree_init(&conn->stream_tree, &doq_stream_cmp);
+ conn->timer.conn = conn;
+ lock_basic_init(&conn->lock);
+ lock_protect(&conn->lock, &conn->key, sizeof(conn->key));
+ lock_protect(&conn->lock, &conn->doq_socket, sizeof(conn->doq_socket));
+ lock_protect(&conn->lock, &conn->table, sizeof(conn->table));
+ lock_protect(&conn->lock, &conn->is_deleted, sizeof(conn->is_deleted));
+ lock_protect(&conn->lock, &conn->version, sizeof(conn->version));
+ lock_protect(&conn->lock, &conn->conn, sizeof(conn->conn));
+ lock_protect(&conn->lock, &conn->conid_list, sizeof(conn->conid_list));
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+ lock_protect(&conn->lock, &conn->ccerr, sizeof(conn->ccerr));
+#else
+ lock_protect(&conn->lock, &conn->last_error, sizeof(conn->last_error));
+#endif
+ lock_protect(&conn->lock, &conn->tls_alert, sizeof(conn->tls_alert));
+ lock_protect(&conn->lock, &conn->ssl, sizeof(conn->ssl));
+ lock_protect(&conn->lock, &conn->close_pkt, sizeof(conn->close_pkt));
+ lock_protect(&conn->lock, &conn->close_pkt_len, sizeof(conn->close_pkt_len));
+ lock_protect(&conn->lock, &conn->close_ecn, sizeof(conn->close_ecn));
+ lock_protect(&conn->lock, &conn->stream_tree, sizeof(conn->stream_tree));
+ lock_protect(&conn->lock, &conn->stream_write_first, sizeof(conn->stream_write_first));
+ lock_protect(&conn->lock, &conn->stream_write_last, sizeof(conn->stream_write_last));
+ lock_protect(&conn->lock, &conn->write_interest, sizeof(conn->write_interest));
+ lock_protect(&conn->lock, &conn->on_write_list, sizeof(conn->on_write_list));
+ lock_protect(&conn->lock, &conn->write_prev, sizeof(conn->write_prev));
+ lock_protect(&conn->lock, &conn->write_next, sizeof(conn->write_next));
+ return conn;
+}
+
+/** delete stream tree node */
+static void
+stream_tree_del(rbnode_type* node, void* arg)
+{
+ struct doq_table* table = (struct doq_table*)arg;
+ struct doq_stream* stream;
+ if(!node)
+ return;
+ stream = (struct doq_stream*)node;
+ if(stream->in)
+ doq_table_quic_size_subtract(table, stream->inlen);
+ if(stream->out)
+ doq_table_quic_size_subtract(table, stream->outlen);
+ doq_table_quic_size_subtract(table, sizeof(*stream));
+ doq_stream_delete(stream);
+}
+
+void
+doq_conn_delete(struct doq_conn* conn, struct doq_table* table)
+{
+ if(!conn)
+ return;
+ lock_basic_destroy(&conn->lock);
+ lock_rw_wrlock(&conn->table->conid_lock);
+ doq_conn_clear_conids(conn);
+ lock_rw_unlock(&conn->table->conid_lock);
+ ngtcp2_conn_del(conn->conn);
+ if(conn->stream_tree.count != 0) {
+ traverse_postorder(&conn->stream_tree, stream_tree_del, table);
+ }
+ free(conn->key.dcid);
+ SSL_free(conn->ssl);
+ free(conn->close_pkt);
+ free(conn);
+}
+
+int
+doq_conn_cmp(const void* key1, const void* key2)
+{
+ struct doq_conn* c = (struct doq_conn*)key1;
+ struct doq_conn* d = (struct doq_conn*)key2;
+ int r;
+ /* Compared in the order destination address, then
+ * local address, ifindex and then dcid.
+ * So that for a search for findlessorequal for the destination
+ * address will find connections to that address, with different
+ * dcids.
+ * Also a printout in sorted order prints the connections by IP
+ * address of destination, and then a number of them depending on the
+ * dcids. */
+ if(c->key.paddr.addrlen != d->key.paddr.addrlen) {
+ if(c->key.paddr.addrlen < d->key.paddr.addrlen)
+ return -1;
+ return 1;
+ }
+ if((r=memcmp(&c->key.paddr.addr, &d->key.paddr.addr,
+ c->key.paddr.addrlen))!=0)
+ return r;
+ if(c->key.paddr.localaddrlen != d->key.paddr.localaddrlen) {
+ if(c->key.paddr.localaddrlen < d->key.paddr.localaddrlen)
+ return -1;
+ return 1;
+ }
+ if((r=memcmp(&c->key.paddr.localaddr, &d->key.paddr.localaddr,
+ c->key.paddr.localaddrlen))!=0)
+ return r;
+ if(c->key.paddr.ifindex != d->key.paddr.ifindex) {
+ if(c->key.paddr.ifindex < d->key.paddr.ifindex)
+ return -1;
+ return 1;
+ }
+ if(c->key.dcidlen != d->key.dcidlen) {
+ if(c->key.dcidlen < d->key.dcidlen)
+ return -1;
+ return 1;
+ }
+ if((r=memcmp(c->key.dcid, d->key.dcid, c->key.dcidlen))!=0)
+ return r;
+ return 0;
+}
+
+int doq_conid_cmp(const void* key1, const void* key2)
+{
+ struct doq_conid* c = (struct doq_conid*)key1;
+ struct doq_conid* d = (struct doq_conid*)key2;
+ if(c->cidlen != d->cidlen) {
+ if(c->cidlen < d->cidlen)
+ return -1;
+ return 1;
+ }
+ return memcmp(c->cid, d->cid, c->cidlen);
+}
+
+int doq_timer_cmp(const void* key1, const void* key2)
+{
+ struct doq_timer* e = (struct doq_timer*)key1;
+ struct doq_timer* f = (struct doq_timer*)key2;
+ if(e->time.tv_sec < f->time.tv_sec)
+ return -1;
+ if(e->time.tv_sec > f->time.tv_sec)
+ return 1;
+ if(e->time.tv_usec < f->time.tv_usec)
+ return -1;
+ if(e->time.tv_usec > f->time.tv_usec)
+ return 1;
+ return 0;
+}
+
+int doq_stream_cmp(const void* key1, const void* key2)
+{
+ struct doq_stream* c = (struct doq_stream*)key1;
+ struct doq_stream* d = (struct doq_stream*)key2;
+ if(c->stream_id != d->stream_id) {
+ if(c->stream_id < d->stream_id)
+ return -1;
+ return 1;
+ }
+ return 0;
+}
+
+/** doq store a local address in repinfo */
+static void
+doq_repinfo_store_localaddr(struct comm_reply* repinfo,
+ struct doq_addr_storage* localaddr, socklen_t localaddrlen)
+{
+ /* use the pktinfo that we have for ancillary udp data otherwise,
+ * this saves space for a sockaddr */
+ memset(&repinfo->pktinfo, 0, sizeof(repinfo->pktinfo));
+ if(addr_is_ip6((void*)localaddr, localaddrlen)) {
+#ifdef IPV6_PKTINFO
+ struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr;
+ memmove(&repinfo->pktinfo.v6info.ipi6_addr,
+ &sa6->sin6_addr, sizeof(struct in6_addr));
+ repinfo->doq_srcport = sa6->sin6_port;
+#endif
+ repinfo->srctype = 6;
+ } else {
+#ifdef IP_PKTINFO
+ struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
+ memmove(&repinfo->pktinfo.v4info.ipi_addr,
+ &sa->sin_addr, sizeof(struct in_addr));
+ repinfo->doq_srcport = sa->sin_port;
+#elif defined(IP_RECVDSTADDR)
+ struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
+ memmove(&repinfo->pktinfo.v4addr, &sa->sin_addr,
+ sizeof(struct in_addr));
+ repinfo->doq_srcport = sa->sin_port;
+#endif
+ repinfo->srctype = 4;
+ }
+}
+
+/** doq retrieve localaddr from repinfo */
+static void
+doq_repinfo_retrieve_localaddr(struct comm_reply* repinfo,
+ struct doq_addr_storage* localaddr, socklen_t* localaddrlen)
+{
+ if(repinfo->srctype == 6) {
+#ifdef IPV6_PKTINFO
+ struct sockaddr_in6* sa6 = (struct sockaddr_in6*)localaddr;
+ *localaddrlen = (socklen_t)sizeof(struct sockaddr_in6);
+ memset(sa6, 0, *localaddrlen);
+ sa6->sin6_family = AF_INET6;
+ memmove(&sa6->sin6_addr, &repinfo->pktinfo.v6info.ipi6_addr,
+ *localaddrlen);
+ sa6->sin6_port = repinfo->doq_srcport;
+#endif
+ } else {
+#ifdef IP_PKTINFO
+ struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
+ *localaddrlen = (socklen_t)sizeof(struct sockaddr_in);
+ memset(sa, 0, *localaddrlen);
+ sa->sin_family = AF_INET;
+ memmove(&sa->sin_addr, &repinfo->pktinfo.v4info.ipi_addr,
+ *localaddrlen);
+ sa->sin_port = repinfo->doq_srcport;
+#elif defined(IP_RECVDSTADDR)
+ struct sockaddr_in* sa = (struct sockaddr_in*)localaddr;
+ *localaddrlen = (socklen_t)sizeof(struct sockaddr_in);
+ memset(sa, 0, *localaddrlen);
+ sa->sin_family = AF_INET;
+ memmove(&sa->sin_addr, &repinfo->pktinfo.v4addr,
+ sizeof(struct in_addr));
+ sa->sin_port = repinfo->doq_srcport;
+#endif
+ }
+}
+
+/** doq write a connection key into repinfo, false if it does not fit */
+static int
+doq_conn_key_store_repinfo(struct doq_conn_key* key,
+ struct comm_reply* repinfo)
+{
+ repinfo->is_proxied = 0;
+ repinfo->doq_ifindex = key->paddr.ifindex;
+ repinfo->remote_addrlen = key->paddr.addrlen;
+ memmove(&repinfo->remote_addr, &key->paddr.addr,
+ repinfo->remote_addrlen);
+ repinfo->client_addrlen = key->paddr.addrlen;
+ memmove(&repinfo->client_addr, &key->paddr.addr,
+ repinfo->client_addrlen);
+ doq_repinfo_store_localaddr(repinfo, &key->paddr.localaddr,
+ key->paddr.localaddrlen);
+ if(key->dcidlen > sizeof(repinfo->doq_dcid))
+ return 0;
+ repinfo->doq_dcidlen = key->dcidlen;
+ memmove(repinfo->doq_dcid, key->dcid, key->dcidlen);
+ return 1;
+}
+
+void
+doq_conn_key_from_repinfo(struct doq_conn_key* key, struct comm_reply* repinfo)
+{
+ key->paddr.ifindex = repinfo->doq_ifindex;
+ key->paddr.addrlen = repinfo->remote_addrlen;
+ memmove(&key->paddr.addr, &repinfo->remote_addr,
+ repinfo->remote_addrlen);
+ doq_repinfo_retrieve_localaddr(repinfo, &key->paddr.localaddr,
+ &key->paddr.localaddrlen);
+ key->dcidlen = repinfo->doq_dcidlen;
+ key->dcid = repinfo->doq_dcid;
+}
+
+/** doq add a stream to the connection */
+static void
+doq_conn_add_stream(struct doq_conn* conn, struct doq_stream* stream)
+{
+ (void)rbtree_insert(&conn->stream_tree, &stream->node);
+}
+
+/** doq delete a stream from the connection */
+static void
+doq_conn_del_stream(struct doq_conn* conn, struct doq_stream* stream)
+{
+ (void)rbtree_delete(&conn->stream_tree, &stream->node);
+}
+
+/** doq create new stream */
+static struct doq_stream*
+doq_stream_create(int64_t stream_id)
+{
+ struct doq_stream* stream = calloc(1, sizeof(*stream));
+ if(!stream)
+ return NULL;
+ stream->node.key = stream;
+ stream->stream_id = stream_id;
+ return stream;
+}
+
+void doq_stream_delete(struct doq_stream* stream)
+{
+ if(!stream)
+ return;
+ free(stream->in);
+ free(stream->out);
+ free(stream);
+}
+
+struct doq_stream*
+doq_stream_find(struct doq_conn* conn, int64_t stream_id)
+{
+ rbnode_type* node;
+ struct doq_stream key;
+ key.node.key = &key;
+ key.stream_id = stream_id;
+ node = rbtree_search(&conn->stream_tree, &key);
+ if(node)
+ return (struct doq_stream*)node->key;
+ return NULL;
+}
+
+/** doq put stream on the conn write list */
+static void
+doq_stream_on_write_list(struct doq_conn* conn, struct doq_stream* stream)
+{
+ if(stream->on_write_list)
+ return;
+ stream->write_prev = conn->stream_write_last;
+ if(conn->stream_write_last)
+ conn->stream_write_last->write_next = stream;
+ else
+ conn->stream_write_first = stream;
+ conn->stream_write_last = stream;
+ stream->write_next = NULL;
+ stream->on_write_list = 1;
+}
+
+/** doq remove stream from the conn write list */
+static void
+doq_stream_off_write_list(struct doq_conn* conn, struct doq_stream* stream)
+{
+ if(!stream->on_write_list)
+ return;
+ if(stream->write_next)
+ stream->write_next->write_prev = stream->write_prev;
+ else conn->stream_write_last = stream->write_prev;
+ if(stream->write_prev)
+ stream->write_prev->write_next = stream->write_next;
+ else conn->stream_write_first = stream->write_next;
+ stream->write_prev = NULL;
+ stream->write_next = NULL;
+ stream->on_write_list = 0;
+}
+
+/** doq stream remove in buffer */
+static void
+doq_stream_remove_in_buffer(struct doq_stream* stream, struct doq_table* table)
+{
+ if(stream->in) {
+ doq_table_quic_size_subtract(table, stream->inlen);
+ free(stream->in);
+ stream->in = NULL;
+ stream->inlen = 0;
+ }
+}
+
+/** doq stream remove out buffer */
+static void
+doq_stream_remove_out_buffer(struct doq_stream* stream,
+ struct doq_table* table)
+{
+ if(stream->out) {
+ doq_table_quic_size_subtract(table, stream->outlen);
+ free(stream->out);
+ stream->out = NULL;
+ stream->outlen = 0;
+ }
+}
+
+int
+doq_stream_close(struct doq_conn* conn, struct doq_stream* stream,
+ int send_shutdown)
+{
+ int ret;
+ if(stream->is_closed)
+ return 1;
+ stream->is_closed = 1;
+ doq_stream_off_write_list(conn, stream);
+ if(send_shutdown) {
+ verbose(VERB_ALGO, "doq: shutdown stream_id %d with app_error_code %d",
+ (int)stream->stream_id, (int)DOQ_APP_ERROR_CODE);
+ ret = ngtcp2_conn_shutdown_stream(conn->conn,
+#ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4
+ 0,
+#endif
+ stream->stream_id, DOQ_APP_ERROR_CODE);
+ if(ret != 0) {
+ log_err("doq ngtcp2_conn_shutdown_stream %d failed: %s",
+ (int)stream->stream_id, ngtcp2_strerror(ret));
+ return 0;
+ }
+ doq_conn_write_enable(conn);
+ }
+ verbose(VERB_ALGO, "doq: conn extend max streams bidi by 1");
+ ngtcp2_conn_extend_max_streams_bidi(conn->conn, 1);
+ doq_conn_write_enable(conn);
+ doq_stream_remove_in_buffer(stream, conn->doq_socket->table);
+ doq_stream_remove_out_buffer(stream, conn->doq_socket->table);
+ doq_table_quic_size_subtract(conn->doq_socket->table, sizeof(*stream));
+ doq_conn_del_stream(conn, stream);
+ doq_stream_delete(stream);
+ return 1;
+}
+
+/** doq stream pick up answer data from buffer */
+static int
+doq_stream_pickup_answer(struct doq_stream* stream, struct sldns_buffer* buf)
+{
+ stream->is_answer_available = 1;
+ if(stream->out) {
+ free(stream->out);
+ stream->out = NULL;
+ stream->outlen = 0;
+ }
+ stream->nwrite = 0;
+ stream->outlen = sldns_buffer_limit(buf);
+ /* For quic the output bytes have to stay allocated and available,
+ * for potential resends, until the remote end has acknowledged them.
+ * This includes the tcplen start uint16_t, in outlen_wire. */
+ stream->outlen_wire = htons(stream->outlen);
+ stream->out = memdup(sldns_buffer_begin(buf), sldns_buffer_limit(buf));
+ if(!stream->out) {
+ log_err("doq could not send answer: out of memory");
+ return 0;
+ }
+ return 1;
+}
+
+int
+doq_stream_send_reply(struct doq_conn* conn, struct doq_stream* stream,
+ struct sldns_buffer* buf)
+{
+ if(verbosity >= VERB_ALGO) {
+ char* s = sldns_wire2str_pkt(sldns_buffer_begin(buf),
+ sldns_buffer_limit(buf));
+ verbose(VERB_ALGO, "doq stream %d response\n%s",
+ (int)stream->stream_id, (s?s:"null"));
+ free(s);
+ }
+ if(stream->out)
+ doq_table_quic_size_subtract(conn->doq_socket->table,
+ stream->outlen);
+ if(!doq_stream_pickup_answer(stream, buf))
+ return 0;
+ doq_table_quic_size_add(conn->doq_socket->table, stream->outlen);
+ doq_stream_on_write_list(conn, stream);
+ doq_conn_write_enable(conn);
+ return 1;
+}
+
+/** doq stream data length has completed, allocations can be done. False on
+ * allocation failure. */
+static int
+doq_stream_datalen_complete(struct doq_stream* stream, struct doq_table* table)
+{
+ if(stream->inlen > 1024*1024) {
+ log_err("doq stream in length too large %d",
+ (int)stream->inlen);
+ return 0;
+ }
+ stream->in = calloc(1, stream->inlen);
+ if(!stream->in) {
+ log_err("doq could not read stream, calloc failed: "
+ "out of memory");
+ return 0;
+ }
+ doq_table_quic_size_add(table, stream->inlen);
+ return 1;
+}
+
+/** doq stream data is complete, the input data has been received. */
+static int
+doq_stream_data_complete(struct doq_conn* conn, struct doq_stream* stream)
+{
+ struct comm_point* c;
+ if(verbosity >= VERB_ALGO) {
+ char* s = sldns_wire2str_pkt(stream->in, stream->inlen);
+ char a[128];
+ addr_to_str((void*)&conn->key.paddr.addr,
+ conn->key.paddr.addrlen, a, sizeof(a));
+ verbose(VERB_ALGO, "doq %s stream %d incoming query\n%s",
+ a, (int)stream->stream_id, (s?s:"null"));
+ free(s);
+ }
+ stream->is_query_complete = 1;
+ c = conn->doq_socket->cp;
+ if(!stream->in) {
+ verbose(VERB_ALGO, "doq_stream_data_complete: no in buffer");
+ return 0;
+ }
+ if(stream->inlen > sldns_buffer_capacity(c->buffer)) {
+ verbose(VERB_ALGO, "doq_stream_data_complete: query too long");
+ return 0;
+ }
+ sldns_buffer_clear(c->buffer);
+ sldns_buffer_write(c->buffer, stream->in, stream->inlen);
+ sldns_buffer_flip(c->buffer);
+ c->repinfo.c = c;
+ if(!doq_conn_key_store_repinfo(&conn->key, &c->repinfo)) {
+ verbose(VERB_ALGO, "doq_stream_data_complete: connection "
+ "DCID too long");
+ return 0;
+ }
+ c->repinfo.doq_streamid = stream->stream_id;
+ conn->doq_socket->current_conn = conn;
+ fptr_ok(fptr_whitelist_comm_point(c->callback));
+ if( (*c->callback)(c, c->cb_arg, NETEVENT_NOERROR, &c->repinfo)) {
+ conn->doq_socket->current_conn = NULL;
+ if(!doq_stream_send_reply(conn, stream, c->buffer)) {
+ verbose(VERB_ALGO, "doq: failed to send_reply");
+ return 0;
+ }
+ return 1;
+ }
+ conn->doq_socket->current_conn = NULL;
+ return 1;
+}
+
+/** doq receive data for a stream, more bytes of the incoming data */
+static int
+doq_stream_recv_data(struct doq_stream* stream, const uint8_t* data,
+ size_t datalen, int* recv_done, struct doq_table* table)
+{
+ int got_data = 0;
+ /* read the tcplength uint16_t at the start */
+ if(stream->nread < 2) {
+ uint16_t tcplen = 0;
+ size_t todolen = 2 - stream->nread;
+
+ if(stream->nread > 0) {
+ /* put in the already read byte if there is one */
+ tcplen = stream->inlen;
+ }
+ if(datalen < todolen)
+ todolen = datalen;
+ memmove(((uint8_t*)&tcplen)+stream->nread, data, todolen);
+ stream->nread += todolen;
+ data += todolen;
+ datalen -= todolen;
+ if(stream->nread == 2) {
+ /* the initial length value is completed */
+ stream->inlen = ntohs(tcplen);
+ if(!doq_stream_datalen_complete(stream, table))
+ return 0;
+ } else {
+ /* store for later */
+ stream->inlen = tcplen;
+ return 1;
+ }
+ }
+ /* if there are more data bytes */
+ if(datalen > 0) {
+ size_t to_write = datalen;
+ if(stream->nread-2 > stream->inlen) {
+ verbose(VERB_ALGO, "doq stream buffer too small");
+ return 0;
+ }
+ if(datalen > stream->inlen - (stream->nread-2))
+ to_write = stream->inlen - (stream->nread-2);
+ if(to_write > 0) {
+ if(!stream->in) {
+ verbose(VERB_ALGO, "doq: stream has "
+ "no buffer");
+ return 0;
+ }
+ memmove(stream->in+(stream->nread-2), data, to_write);
+ stream->nread += to_write;
+ data += to_write;
+ datalen -= to_write;
+ got_data = 1;
+ }
+ }
+ /* Are there extra bytes received after the end? If so, log them. */
+ if(datalen > 0) {
+ if(verbosity >= VERB_ALGO)
+ log_hex("doq stream has extra bytes received after end",
+ (void*)data, datalen);
+ }
+ /* Is the input data complete? */
+ if(got_data && stream->nread >= stream->inlen+2) {
+ if(!stream->in) {
+ verbose(VERB_ALGO, "doq: completed stream has "
+ "no buffer");
+ return 0;
+ }
+ *recv_done = 1;
+ }
+ return 1;
+}
+
+/** doq receive FIN for a stream. No more bytes are going to arrive. */
+static int
+doq_stream_recv_fin(struct doq_conn* conn, struct doq_stream* stream, int
+ recv_done)
+{
+ if(!stream->is_query_complete && !recv_done) {
+ verbose(VERB_ALGO, "doq: stream recv FIN, but is "
+ "not complete, have %d of %d bytes",
+ ((int)stream->nread)-2, (int)stream->inlen);
+ if(!doq_stream_close(conn, stream, 1))
+ return 0;
+ }
+ return 1;
+}
+
+void doq_fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len)
+{
+ size_t i;
+ for(i=0; i<len; i++)
+ buf[i] = ub_random(rnd)&0xff;
+}
+
+/** generate new connection id, checks for duplicates.
+ * caller must hold lock on conid tree. */
+static int
+doq_conn_generate_new_conid(struct doq_conn* conn, uint8_t* data,
+ size_t datalen)
+{
+ int max_try = 100;
+ int i;
+ for(i=0; i<max_try; i++) {
+ doq_fill_rand(conn->doq_socket->rnd, data, datalen);
+ if(!doq_conid_find(conn->table, data, datalen)) {
+ /* Found an unused connection id. */
+ return 1;
+ }
+ }
+ verbose(VERB_ALGO, "doq_conn_generate_new_conid failed: could not "
+ "generate random unused connection id value in %d attempts.",
+ max_try);
+ return 0;
+}
+
+/** ngtcp2 rand callback function */
+static void
+doq_rand_cb(uint8_t* dest, size_t destlen, const ngtcp2_rand_ctx* rand_ctx)
+{
+ struct ub_randstate* rnd = (struct ub_randstate*)
+ rand_ctx->native_handle;
+ doq_fill_rand(rnd, dest, destlen);
+}
+
+/** ngtcp2 get_new_connection_id callback function */
+static int
+doq_get_new_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn), ngtcp2_cid* cid,
+ uint8_t* token, size_t cidlen, void* user_data)
+{
+ struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+ /* Lock the conid tree, so we can check for duplicates while
+ * generating the id, and then insert it, whilst keeping the tree
+ * locked against other modifications, guaranteeing uniqueness. */
+ lock_rw_wrlock(&doq_conn->table->conid_lock);
+ if(!doq_conn_generate_new_conid(doq_conn, cid->data, cidlen)) {
+ lock_rw_unlock(&doq_conn->table->conid_lock);
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ cid->datalen = cidlen;
+ if(ngtcp2_crypto_generate_stateless_reset_token(token,
+ doq_conn->doq_socket->static_secret,
+ doq_conn->doq_socket->static_secret_len, cid) != 0) {
+ lock_rw_unlock(&doq_conn->table->conid_lock);
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ if(!doq_conn_associate_conid(doq_conn, cid->data, cid->datalen)) {
+ lock_rw_unlock(&doq_conn->table->conid_lock);
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ lock_rw_unlock(&doq_conn->table->conid_lock);
+ return 0;
+}
+
+/** ngtcp2 remove_connection_id callback function */
+static int
+doq_remove_connection_id_cb(ngtcp2_conn* ATTR_UNUSED(conn),
+ const ngtcp2_cid* cid, void* user_data)
+{
+ struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+ lock_rw_wrlock(&doq_conn->table->conid_lock);
+ doq_conn_dissociate_conid(doq_conn, cid->data, cid->datalen);
+ lock_rw_unlock(&doq_conn->table->conid_lock);
+ return 0;
+}
+
+/** doq submit a new token */
+static int
+doq_submit_new_token(struct doq_conn* conn)
+{
+ uint8_t token[NGTCP2_CRYPTO_MAX_REGULAR_TOKENLEN];
+ ngtcp2_ssize tokenlen;
+ int ret;
+ const ngtcp2_path* path = ngtcp2_conn_get_path(conn->conn);
+ ngtcp2_tstamp ts = doq_get_timestamp_nanosec();
+
+ tokenlen = ngtcp2_crypto_generate_regular_token(token,
+ conn->doq_socket->static_secret,
+ conn->doq_socket->static_secret_len, path->remote.addr,
+ path->remote.addrlen, ts);
+ if(tokenlen < 0) {
+ log_err("doq ngtcp2_crypto_generate_regular_token failed");
+ return 1;
+ }
+
+ verbose(VERB_ALGO, "doq submit new token");
+ ret = ngtcp2_conn_submit_new_token(conn->conn, token, tokenlen);
+ if(ret != 0) {
+ log_err("doq ngtcp2_conn_submit_new_token failed: %s",
+ ngtcp2_strerror(ret));
+ return 0;
+ }
+ return 1;
+}
+
+/** ngtcp2 handshake_completed callback function */
+static int
+doq_handshake_completed_cb(ngtcp2_conn* ATTR_UNUSED(conn), void* user_data)
+{
+ struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+ verbose(VERB_ALGO, "doq handshake_completed callback");
+ verbose(VERB_ALGO, "ngtcp2_conn_get_max_data_left is %d",
+ (int)ngtcp2_conn_get_max_data_left(doq_conn->conn));
+#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI
+ verbose(VERB_ALGO, "ngtcp2_conn_get_max_local_streams_uni is %d",
+ (int)ngtcp2_conn_get_max_local_streams_uni(doq_conn->conn));
+#endif
+ verbose(VERB_ALGO, "ngtcp2_conn_get_streams_uni_left is %d",
+ (int)ngtcp2_conn_get_streams_uni_left(doq_conn->conn));
+ verbose(VERB_ALGO, "ngtcp2_conn_get_streams_bidi_left is %d",
+ (int)ngtcp2_conn_get_streams_bidi_left(doq_conn->conn));
+ verbose(VERB_ALGO, "negotiated cipher name is %s",
+ SSL_get_cipher_name(doq_conn->ssl));
+ if(verbosity > VERB_ALGO) {
+ const unsigned char* alpn = NULL;
+ unsigned int alpnlen = 0;
+ char alpnstr[128];
+ SSL_get0_alpn_selected(doq_conn->ssl, &alpn, &alpnlen);
+ if(alpnlen > sizeof(alpnstr)-1)
+ alpnlen = sizeof(alpnstr)-1;
+ memmove(alpnstr, alpn, alpnlen);
+ alpnstr[alpnlen]=0;
+ verbose(VERB_ALGO, "negotiated ALPN is '%s'", alpnstr);
+ }
+
+ if(!doq_submit_new_token(doq_conn))
+ return -1;
+ return 0;
+}
+
+/** ngtcp2 stream_open callback function */
+static int
+doq_stream_open_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id,
+ void* user_data)
+{
+ struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+ struct doq_stream* stream;
+ verbose(VERB_ALGO, "doq new stream %x", (int)stream_id);
+ if(doq_stream_find(doq_conn, stream_id)) {
+ verbose(VERB_ALGO, "doq: stream with this id already exists");
+ return 0;
+ }
+ if(stream_id != 0 && stream_id != 4 && /* allow one stream on a new connection */
+ !doq_table_quic_size_available(doq_conn->doq_socket->table,
+ doq_conn->doq_socket->cfg, sizeof(*stream)
+ + 100 /* estimated query in */
+ + 512 /* estimated response out */
+ )) {
+ int rv;
+ verbose(VERB_ALGO, "doq: no mem for new stream");
+ rv = ngtcp2_conn_shutdown_stream(doq_conn->conn,
+#ifdef HAVE_NGTCP2_CONN_SHUTDOWN_STREAM4
+ 0,
+#endif
+ stream_id, NGTCP2_CONNECTION_REFUSED);
+ if(rv != 0) {
+ log_err("ngtcp2_conn_shutdown_stream failed: %s",
+ ngtcp2_strerror(rv));
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ return 0;
+ }
+ stream = doq_stream_create(stream_id);
+ if(!stream) {
+ log_err("doq: could not doq_stream_create: out of memory");
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ doq_table_quic_size_add(doq_conn->doq_socket->table, sizeof(*stream));
+ doq_conn_add_stream(doq_conn, stream);
+ return 0;
+}
+
+/** ngtcp2 recv_stream_data callback function */
+static int
+doq_recv_stream_data_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags,
+ int64_t stream_id, uint64_t offset, const uint8_t* data,
+ size_t datalen, void* user_data, void* ATTR_UNUSED(stream_user_data))
+{
+ int recv_done = 0;
+ struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+ struct doq_stream* stream;
+ verbose(VERB_ALGO, "doq recv stream data stream id %d offset %d "
+ "datalen %d%s%s", (int)stream_id, (int)offset, (int)datalen,
+ ((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0?" FIN":""),
+#ifdef NGTCP2_STREAM_DATA_FLAG_0RTT
+ ((flags&NGTCP2_STREAM_DATA_FLAG_0RTT)!=0?" 0RTT":"")
+#else
+ ((flags&NGTCP2_STREAM_DATA_FLAG_EARLY)!=0?" EARLY":"")
+#endif
+ );
+ stream = doq_stream_find(doq_conn, stream_id);
+ if(!stream) {
+ verbose(VERB_ALGO, "doq: received stream data for "
+ "unknown stream %d", (int)stream_id);
+ return 0;
+ }
+ if(stream->is_closed) {
+ verbose(VERB_ALGO, "doq: stream is closed, ignore recv data");
+ return 0;
+ }
+ if(datalen != 0) {
+ if(!doq_stream_recv_data(stream, data, datalen, &recv_done,
+ doq_conn->doq_socket->table))
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ if((flags&NGTCP2_STREAM_DATA_FLAG_FIN)!=0) {
+ if(!doq_stream_recv_fin(doq_conn, stream, recv_done))
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ ngtcp2_conn_extend_max_stream_offset(doq_conn->conn, stream_id,
+ datalen);
+ ngtcp2_conn_extend_max_offset(doq_conn->conn, datalen);
+ if(recv_done) {
+ if(!doq_stream_data_complete(doq_conn, stream))
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ }
+ return 0;
+}
+
+/** ngtcp2 stream_close callback function */
+static int
+doq_stream_close_cb(ngtcp2_conn* ATTR_UNUSED(conn), uint32_t flags,
+ int64_t stream_id, uint64_t app_error_code, void* user_data,
+ void* ATTR_UNUSED(stream_user_data))
+{
+ struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+ struct doq_stream* stream;
+ if((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0)
+ verbose(VERB_ALGO, "doq stream close for stream id %d %sapp_error_code %d",
+ (int)stream_id,
+ (((flags&NGTCP2_STREAM_CLOSE_FLAG_APP_ERROR_CODE_SET)!=0)?
+ "APP_ERROR_CODE_SET ":""),
+ (int)app_error_code);
+ else
+ verbose(VERB_ALGO, "doq stream close for stream id %d",
+ (int)stream_id);
+
+ stream = doq_stream_find(doq_conn, stream_id);
+ if(!stream) {
+ verbose(VERB_ALGO, "doq: stream close for "
+ "unknown stream %d", (int)stream_id);
+ return 0;
+ }
+ if(!doq_stream_close(doq_conn, stream, 0))
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ return 0;
+}
+
+/** ngtcp2 stream_reset callback function */
+static int
+doq_stream_reset_cb(ngtcp2_conn* ATTR_UNUSED(conn), int64_t stream_id,
+ uint64_t final_size, uint64_t app_error_code, void* user_data,
+ void* ATTR_UNUSED(stream_user_data))
+{
+ struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+ struct doq_stream* stream;
+ verbose(VERB_ALGO, "doq stream reset for stream id %d final_size %d "
+ "app_error_code %d", (int)stream_id, (int)final_size,
+ (int)app_error_code);
+
+ stream = doq_stream_find(doq_conn, stream_id);
+ if(!stream) {
+ verbose(VERB_ALGO, "doq: stream reset for "
+ "unknown stream %d", (int)stream_id);
+ return 0;
+ }
+ if(!doq_stream_close(doq_conn, stream, 0))
+ return NGTCP2_ERR_CALLBACK_FAILURE;
+ return 0;
+}
+
+/** ngtcp2 acked_stream_data_offset callback function */
+static int
+doq_acked_stream_data_offset_cb(ngtcp2_conn* ATTR_UNUSED(conn),
+ int64_t stream_id, uint64_t offset, uint64_t datalen, void* user_data,
+ void* ATTR_UNUSED(stream_user_data))
+{
+ struct doq_conn* doq_conn = (struct doq_conn*)user_data;
+ struct doq_stream* stream;
+ verbose(VERB_ALGO, "doq stream acked data for stream id %d offset %d "
+ "datalen %d", (int)stream_id, (int)offset, (int)datalen);
+
+ stream = doq_stream_find(doq_conn, stream_id);
+ if(!stream) {
+ verbose(VERB_ALGO, "doq: stream acked data for "
+ "unknown stream %d", (int)stream_id);
+ return 0;
+ }
+ /* Acked the data from [offset .. offset+datalen). */
+ if(stream->is_closed)
+ return 0;
+ if(offset+datalen >= stream->outlen) {
+ doq_stream_remove_in_buffer(stream,
+ doq_conn->doq_socket->table);
+ doq_stream_remove_out_buffer(stream,
+ doq_conn->doq_socket->table);
+ }
+ return 0;
+}
+
+/** ngtc2p log_printf callback function */
+static void
+doq_log_printf_cb(void* ATTR_UNUSED(user_data), const char* fmt, ...)
+{
+ char buf[1024];
+ va_list ap;
+ va_start(ap, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, ap);
+ verbose(VERB_ALGO, "libngtcp2: %s", buf);
+ va_end(ap);
+}
+
+#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
+/** the doq application tx key callback, false on failure */
+static int
+doq_application_tx_key_cb(struct doq_conn* conn)
+{
+ verbose(VERB_ALGO, "doq application tx key cb");
+ /* The server does not want to open streams to the client,
+ * the client instead initiates by opening bidi streams. */
+ verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_data_left is %d",
+ (int)ngtcp2_conn_get_max_data_left(conn->conn));
+#ifdef HAVE_NGTCP2_CONN_GET_MAX_LOCAL_STREAMS_UNI
+ verbose(VERB_ALGO, "doq ngtcp2_conn_get_max_local_streams_uni is %d",
+ (int)ngtcp2_conn_get_max_local_streams_uni(conn->conn));
+#endif
+ verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_uni_left is %d",
+ (int)ngtcp2_conn_get_streams_uni_left(conn->conn));
+ verbose(VERB_ALGO, "doq ngtcp2_conn_get_streams_bidi_left is %d",
+ (int)ngtcp2_conn_get_streams_bidi_left(conn->conn));
+ return 1;
+}
+
+/** quic_method set_encryption_secrets function */
+static int
+doq_set_encryption_secrets(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level,
+ const uint8_t *read_secret, const uint8_t *write_secret,
+ size_t secret_len)
+{
+ struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl);
+#ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL
+ ngtcp2_encryption_level
+#else
+ ngtcp2_crypto_level
+#endif
+ level =
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL
+ ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level);
+#else
+ ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level);
+#endif
+
+ if(read_secret) {
+ verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_rx_key for level %d ossl %d", (int)level, (int)ossl_level);
+ if(ngtcp2_crypto_derive_and_install_rx_key(doq_conn->conn,
+ NULL, NULL, NULL, level, read_secret, secret_len)
+ != 0) {
+ log_err("ngtcp2_crypto_derive_and_install_rx_key "
+ "failed");
+ return 0;
+ }
+ }
+
+ if(write_secret) {
+ verbose(VERB_ALGO, "doq: ngtcp2_crypto_derive_and_install_tx_key for level %d ossl %d", (int)level, (int)ossl_level);
+ if(ngtcp2_crypto_derive_and_install_tx_key(doq_conn->conn,
+ NULL, NULL, NULL, level, write_secret, secret_len)
+ != 0) {
+ log_err("ngtcp2_crypto_derive_and_install_tx_key "
+ "failed");
+ return 0;
+ }
+ if(level == NGTCP2_CRYPTO_LEVEL_APPLICATION) {
+ if(!doq_application_tx_key_cb(doq_conn))
+ return 0;
+ }
+ }
+ return 1;
+}
+
+/** quic_method add_handshake_data function */
+static int
+doq_add_handshake_data(SSL *ssl, OSSL_ENCRYPTION_LEVEL ossl_level,
+ const uint8_t *data, size_t len)
+{
+ struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl);
+#ifdef HAVE_NGTCP2_ENCRYPTION_LEVEL
+ ngtcp2_encryption_level
+#else
+ ngtcp2_crypto_level
+#endif
+ level =
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_FROM_OSSL_ENCRYPTION_LEVEL
+ ngtcp2_crypto_quictls_from_ossl_encryption_level(ossl_level);
+#else
+ ngtcp2_crypto_openssl_from_ossl_encryption_level(ossl_level);
+#endif
+ int rv;
+
+ verbose(VERB_ALGO, "doq_add_handshake_data: "
+ "ngtcp2_con_submit_crypto_data level %d", (int)level);
+ rv = ngtcp2_conn_submit_crypto_data(doq_conn->conn, level, data, len);
+ if(rv != 0) {
+ log_err("ngtcp2_conn_submit_crypto_data failed: %s",
+ ngtcp2_strerror(rv));
+ ngtcp2_conn_set_tls_error(doq_conn->conn, rv);
+ return 0;
+ }
+ return 1;
+}
+
+/** quic_method flush_flight function */
+static int
+doq_flush_flight(SSL* ATTR_UNUSED(ssl))
+{
+ return 1;
+}
+
+/** quic_method send_alert function */
+static int
+doq_send_alert(SSL *ssl, enum ssl_encryption_level_t ATTR_UNUSED(level),
+ uint8_t alert)
+{
+ struct doq_conn* doq_conn = (struct doq_conn*)SSL_get_app_data(ssl);
+ doq_conn->tls_alert = alert;
+ return 1;
+}
+#endif /* HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT */
+
+/** ALPN select callback for the doq SSL context */
+static int
+doq_alpn_select_cb(SSL* ATTR_UNUSED(ssl), const unsigned char** out,
+ unsigned char* outlen, const unsigned char* in, unsigned int inlen,
+ void* ATTR_UNUSED(arg))
+{
+ /* select "doq" */
+ int ret = SSL_select_next_proto((void*)out, outlen,
+ (const unsigned char*)"\x03""doq", 4, in, inlen);
+ if(ret == OPENSSL_NPN_NEGOTIATED)
+ return SSL_TLSEXT_ERR_OK;
+ verbose(VERB_ALGO, "doq alpn_select_cb: ALPN from client does "
+ "not have 'doq'");
+ return SSL_TLSEXT_ERR_ALERT_FATAL;
+}
+
+/** create new tls session for server doq connection */
+static SSL_CTX*
+doq_ctx_server_setup(struct doq_server_socket* doq_socket)
+{
+ char* sid_ctx = "unbound server";
+#ifndef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
+ SSL_QUIC_METHOD* quic_method;
+#endif
+ SSL_CTX* ctx = SSL_CTX_new(TLS_server_method());
+ if(!ctx) {
+ log_crypto_err("Could not SSL_CTX_new");
+ return NULL;
+ }
+ SSL_CTX_set_options(ctx,
+ (SSL_OP_ALL & ~SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS) |
+ SSL_OP_SINGLE_ECDH_USE |
+ SSL_OP_CIPHER_SERVER_PREFERENCE |
+ SSL_OP_NO_ANTI_REPLAY);
+ SSL_CTX_set_mode(ctx, SSL_MODE_RELEASE_BUFFERS);
+ SSL_CTX_set_min_proto_version(ctx, TLS1_3_VERSION);
+ SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION);
+#ifdef HAVE_SSL_CTX_SET_ALPN_SELECT_CB
+ SSL_CTX_set_alpn_select_cb(ctx, doq_alpn_select_cb, NULL);
+#endif
+ SSL_CTX_set_default_verify_paths(ctx);
+ if(!SSL_CTX_use_certificate_chain_file(ctx,
+ doq_socket->ssl_service_pem)) {
+ log_err("doq: error for cert file: %s",
+ doq_socket->ssl_service_pem);
+ log_crypto_err("doq: error in "
+ "SSL_CTX_use_certificate_chain_file");
+ SSL_CTX_free(ctx);
+ return NULL;
+ }
+ if(!SSL_CTX_use_PrivateKey_file(ctx, doq_socket->ssl_service_key,
+ SSL_FILETYPE_PEM)) {
+ log_err("doq: error for private key file: %s",
+ doq_socket->ssl_service_key);
+ log_crypto_err("doq: error in SSL_CTX_use_PrivateKey_file");
+ SSL_CTX_free(ctx);
+ return NULL;
+ }
+ if(!SSL_CTX_check_private_key(ctx)) {
+ log_err("doq: error for key file: %s",
+ doq_socket->ssl_service_key);
+ log_crypto_err("doq: error in SSL_CTX_check_private_key");
+ SSL_CTX_free(ctx);
+ return NULL;
+ }
+ SSL_CTX_set_session_id_context(ctx, (void*)sid_ctx, strlen(sid_ctx));
+ if(doq_socket->ssl_verify_pem && doq_socket->ssl_verify_pem[0]) {
+ if(!SSL_CTX_load_verify_locations(ctx,
+ doq_socket->ssl_verify_pem, NULL)) {
+ log_err("doq: error for verify pem file: %s",
+ doq_socket->ssl_verify_pem);
+ log_crypto_err("doq: error in "
+ "SSL_CTX_load_verify_locations");
+ SSL_CTX_free(ctx);
+ return NULL;
+ }
+ SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(
+ doq_socket->ssl_verify_pem));
+ SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER|
+ SSL_VERIFY_CLIENT_ONCE|
+ SSL_VERIFY_FAIL_IF_NO_PEER_CERT, NULL);
+ }
+
+ SSL_CTX_set_max_early_data(ctx, 0xffffffff);
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
+ if(ngtcp2_crypto_quictls_configure_server_context(ctx) != 0) {
+ log_err("ngtcp2_crypto_quictls_configure_server_context failed");
+ SSL_CTX_free(ctx);
+ return NULL;
+ }
+#else
+ /* The quic_method needs to remain valid during the SSL_CTX
+ * lifetime, so we allocate it. It is freed with the
+ * doq_server_socket. */
+ quic_method = calloc(1, sizeof(SSL_QUIC_METHOD));
+ if(!quic_method) {
+ log_err("calloc failed: out of memory");
+ SSL_CTX_free(ctx);
+ return NULL;
+ }
+ doq_socket->quic_method = quic_method;
+ quic_method->set_encryption_secrets = doq_set_encryption_secrets;
+ quic_method->add_handshake_data = doq_add_handshake_data;
+ quic_method->flush_flight = doq_flush_flight;
+ quic_method->send_alert = doq_send_alert;
+ SSL_CTX_set_quic_method(ctx, doq_socket->quic_method);
+#endif
+ return ctx;
+}
+
+/** Get the ngtcp2_conn from ssl userdata of type ngtcp2_conn_ref */
+static ngtcp2_conn* doq_conn_ref_get_conn(ngtcp2_crypto_conn_ref* conn_ref)
+{
+ struct doq_conn* conn = (struct doq_conn*)conn_ref->user_data;
+ return conn->conn;
+}
+
+/** create new SSL session for server connection */
+static SSL*
+doq_ssl_server_setup(SSL_CTX* ctx, struct doq_conn* conn)
+{
+ SSL* ssl = SSL_new(ctx);
+ if(!ssl) {
+ log_crypto_err("doq: SSL_new failed");
+ return NULL;
+ }
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
+ conn->conn_ref.get_conn = &doq_conn_ref_get_conn;
+ conn->conn_ref.user_data = conn;
+ SSL_set_app_data(ssl, &conn->conn_ref);
+#else
+ SSL_set_app_data(ssl, conn);
+#endif
+ SSL_set_accept_state(ssl);
+ SSL_set_quic_early_data_enabled(ssl, 1);
+ return ssl;
+}
+
+/** setup the doq_socket server tls context */
+int
+doq_socket_setup_ctx(struct doq_server_socket* doq_socket)
+{
+ doq_socket->ctx = doq_ctx_server_setup(doq_socket);
+ if(!doq_socket->ctx)
+ return 0;
+ return 1;
+}
+
+int
+doq_conn_setup(struct doq_conn* conn, uint8_t* scid, size_t scidlen,
+ uint8_t* ocid, size_t ocidlen, const uint8_t* token, size_t tokenlen)
+{
+ int rv;
+ struct ngtcp2_cid dcid, sv_scid, scid_cid;
+ struct ngtcp2_path path;
+ struct ngtcp2_callbacks callbacks;
+ struct ngtcp2_settings settings;
+ struct ngtcp2_transport_params params;
+ memset(&dcid, 0, sizeof(dcid));
+ memset(&sv_scid, 0, sizeof(sv_scid));
+ memset(&scid_cid, 0, sizeof(scid_cid));
+ memset(&path, 0, sizeof(path));
+ memset(&callbacks, 0, sizeof(callbacks));
+ memset(&settings, 0, sizeof(settings));
+ memset(&params, 0, sizeof(params));
+
+ ngtcp2_cid_init(&scid_cid, scid, scidlen);
+ ngtcp2_cid_init(&dcid, conn->key.dcid, conn->key.dcidlen);
+
+ path.remote.addr = (struct sockaddr*)&conn->key.paddr.addr;
+ path.remote.addrlen = conn->key.paddr.addrlen;
+ path.local.addr = (struct sockaddr*)&conn->key.paddr.localaddr;
+ path.local.addrlen = conn->key.paddr.localaddrlen;
+
+ callbacks.recv_client_initial = ngtcp2_crypto_recv_client_initial_cb;
+ callbacks.recv_crypto_data = ngtcp2_crypto_recv_crypto_data_cb;
+ callbacks.encrypt = ngtcp2_crypto_encrypt_cb;
+ callbacks.decrypt = ngtcp2_crypto_decrypt_cb;
+ callbacks.hp_mask = ngtcp2_crypto_hp_mask;
+ callbacks.update_key = ngtcp2_crypto_update_key_cb;
+ callbacks.delete_crypto_aead_ctx =
+ ngtcp2_crypto_delete_crypto_aead_ctx_cb;
+ callbacks.delete_crypto_cipher_ctx =
+ ngtcp2_crypto_delete_crypto_cipher_ctx_cb;
+ callbacks.get_path_challenge_data =
+ ngtcp2_crypto_get_path_challenge_data_cb;
+ callbacks.version_negotiation = ngtcp2_crypto_version_negotiation_cb;
+ callbacks.rand = doq_rand_cb;
+ callbacks.get_new_connection_id = doq_get_new_connection_id_cb;
+ callbacks.remove_connection_id = doq_remove_connection_id_cb;
+ callbacks.handshake_completed = doq_handshake_completed_cb;
+ callbacks.stream_open = doq_stream_open_cb;
+ callbacks.stream_close = doq_stream_close_cb;
+ callbacks.stream_reset = doq_stream_reset_cb;
+ callbacks.acked_stream_data_offset = doq_acked_stream_data_offset_cb;
+ callbacks.recv_stream_data = doq_recv_stream_data_cb;
+
+ ngtcp2_settings_default(&settings);
+ if(verbosity >= VERB_ALGO) {
+ settings.log_printf = doq_log_printf_cb;
+ }
+ settings.rand_ctx.native_handle = conn->doq_socket->rnd;
+ settings.initial_ts = doq_get_timestamp_nanosec();
+ settings.max_stream_window = 6*1024*1024;
+ settings.max_window = 6*1024*1024;
+#ifdef HAVE_STRUCT_NGTCP2_SETTINGS_TOKENLEN
+ settings.token = (void*)token;
+ settings.tokenlen = tokenlen;
+#else
+ settings.token.base = (void*)token;
+ settings.token.len = tokenlen;
+#endif
+
+ ngtcp2_transport_params_default(&params);
+ params.max_idle_timeout = conn->doq_socket->idle_timeout;
+ params.active_connection_id_limit = 7;
+ params.initial_max_stream_data_bidi_local = 256*1024;
+ params.initial_max_stream_data_bidi_remote = 256*1024;
+ params.initial_max_data = 1024*1024;
+ /* DoQ uses bidi streams, so we allow 0 uni streams. */
+ params.initial_max_streams_uni = 0;
+ /* Initial max on number of bidi streams the remote end can open.
+ * That is the number of queries it can make, at first. */
+ params.initial_max_streams_bidi = 10;
+ if(ocid) {
+ ngtcp2_cid_init(&params.original_dcid, ocid, ocidlen);
+ ngtcp2_cid_init(&params.retry_scid, conn->key.dcid,
+ conn->key.dcidlen);
+ params.retry_scid_present = 1;
+ } else {
+ ngtcp2_cid_init(&params.original_dcid, conn->key.dcid,
+ conn->key.dcidlen);
+ }
+#ifdef HAVE_STRUCT_NGTCP2_TRANSPORT_PARAMS_ORIGINAL_DCID_PRESENT
+ params.original_dcid_present = 1;
+#endif
+ doq_fill_rand(conn->doq_socket->rnd, params.stateless_reset_token,
+ sizeof(params.stateless_reset_token));
+ sv_scid.datalen = conn->doq_socket->sv_scidlen;
+ lock_rw_wrlock(&conn->table->conid_lock);
+ if(!doq_conn_generate_new_conid(conn, sv_scid.data, sv_scid.datalen)) {
+ lock_rw_unlock(&conn->table->conid_lock);
+ return 0;
+ }
+
+ rv = ngtcp2_conn_server_new(&conn->conn, &scid_cid, &sv_scid, &path,
+ conn->version, &callbacks, &settings, &params, NULL, conn);
+ if(rv != 0) {
+ lock_rw_unlock(&conn->table->conid_lock);
+ log_err("ngtcp2_conn_server_new failed: %s",
+ ngtcp2_strerror(rv));
+ return 0;
+ }
+ if(!doq_conn_setup_conids(conn)) {
+ lock_rw_unlock(&conn->table->conid_lock);
+ log_err("doq_conn_setup_conids failed: out of memory");
+ return 0;
+ }
+ lock_rw_unlock(&conn->table->conid_lock);
+ conn->ssl = doq_ssl_server_setup((SSL_CTX*)conn->doq_socket->ctx,
+ conn);
+ if(!conn->ssl) {
+ log_err("doq_ssl_server_setup failed");
+ return 0;
+ }
+ ngtcp2_conn_set_tls_native_handle(conn->conn, conn->ssl);
+ doq_conn_write_enable(conn);
+ return 1;
+}
+
+struct doq_conid*
+doq_conid_find(struct doq_table* table, const uint8_t* data, size_t datalen)
+{
+ struct rbnode_type* node;
+ struct doq_conid key;
+ key.node.key = &key;
+ key.cid = (void*)data;
+ key.cidlen = datalen;
+ node = rbtree_search(table->conid_tree, &key);
+ if(node)
+ return (struct doq_conid*)node->key;
+ return NULL;
+}
+
+/** insert conid in the conid list */
+static void
+doq_conid_list_insert(struct doq_conn* conn, struct doq_conid* conid)
+{
+ conid->prev = NULL;
+ conid->next = conn->conid_list;
+ if(conn->conid_list)
+ conn->conid_list->prev = conid;
+ conn->conid_list = conid;
+}
+
+/** remove conid from the conid list */
+static void
+doq_conid_list_remove(struct doq_conn* conn, struct doq_conid* conid)
+{
+ if(conid->prev)
+ conid->prev->next = conid->next;
+ else conn->conid_list = conid->next;
+ if(conid->next)
+ conid->next->prev = conid->prev;
+}
+
+/** create a doq_conid */
+static struct doq_conid*
+doq_conid_create(uint8_t* data, size_t datalen, struct doq_conn_key* key)
+{
+ struct doq_conid* conid;
+ conid = calloc(1, sizeof(*conid));
+ if(!conid)
+ return NULL;
+ conid->cid = memdup(data, datalen);
+ if(!conid->cid) {
+ free(conid);
+ return NULL;
+ }
+ conid->cidlen = datalen;
+ conid->node.key = conid;
+ conid->key = *key;
+ conid->key.dcid = memdup(key->dcid, key->dcidlen);
+ if(!conid->key.dcid) {
+ free(conid->cid);
+ free(conid);
+ return NULL;
+ }
+ return conid;
+}
+
+void
+doq_conid_delete(struct doq_conid* conid)
+{
+ if(!conid)
+ return;
+ free(conid->key.dcid);
+ free(conid->cid);
+ free(conid);
+}
+
+/** return true if the conid is for the conn. */
+static int
+conid_is_for_conn(struct doq_conn* conn, struct doq_conid* conid)
+{
+ if(conid->key.dcidlen == conn->key.dcidlen &&
+ memcmp(conid->key.dcid, conn->key.dcid, conid->key.dcidlen)==0
+ && conid->key.paddr.addrlen == conn->key.paddr.addrlen &&
+ memcmp(&conid->key.paddr.addr, &conn->key.paddr.addr,
+ conid->key.paddr.addrlen) == 0 &&
+ conid->key.paddr.localaddrlen == conn->key.paddr.localaddrlen &&
+ memcmp(&conid->key.paddr.localaddr, &conn->key.paddr.localaddr,
+ conid->key.paddr.localaddrlen) == 0 &&
+ conid->key.paddr.ifindex == conn->key.paddr.ifindex)
+ return 1;
+ return 0;
+}
+
+int
+doq_conn_associate_conid(struct doq_conn* conn, uint8_t* data, size_t datalen)
+{
+ struct doq_conid* conid;
+ conid = doq_conid_find(conn->table, data, datalen);
+ if(conid && !conid_is_for_conn(conn, conid)) {
+ verbose(VERB_ALGO, "doq connection id already exists for "
+ "another doq_conn. Ignoring second connection id.");
+ /* Already exists to another conn, ignore it.
+ * This works, in that the conid is listed in the doq_conn
+ * conid_list element, and removed from there. So our conid
+ * tree and list are fine, when created and removed.
+ * The tree now does not have the lookup element pointing
+ * to this connection. */
+ return 1;
+ }
+ if(conid)
+ return 1; /* already inserted */
+ conid = doq_conid_create(data, datalen, &conn->key);
+ if(!conid)
+ return 0;
+ doq_conid_list_insert(conn, conid);
+ (void)rbtree_insert(conn->table->conid_tree, &conid->node);
+ return 1;
+}
+
+void
+doq_conn_dissociate_conid(struct doq_conn* conn, const uint8_t* data,
+ size_t datalen)
+{
+ struct doq_conid* conid;
+ conid = doq_conid_find(conn->table, data, datalen);
+ if(conid && !conid_is_for_conn(conn, conid))
+ return;
+ if(conid) {
+ (void)rbtree_delete(conn->table->conid_tree,
+ conid->node.key);
+ doq_conid_list_remove(conn, conid);
+ doq_conid_delete(conid);
+ }
+}
+
+/** associate the scid array and also the dcid.
+ * caller must hold the locks on conn and doq_table.conid_lock. */
+static int
+doq_conn_setup_id_array_and_dcid(struct doq_conn* conn,
+ struct ngtcp2_cid* scids, size_t num_scid)
+{
+ size_t i;
+ for(i=0; i<num_scid; i++) {
+ if(!doq_conn_associate_conid(conn, scids[i].data,
+ scids[i].datalen))
+ return 0;
+ }
+ if(!doq_conn_associate_conid(conn, conn->key.dcid, conn->key.dcidlen))
+ return 0;
+ return 1;
+}
+
+int
+doq_conn_setup_conids(struct doq_conn* conn)
+{
+ size_t num_scid =
+#ifndef HAVE_NGTCP2_CONN_GET_NUM_SCID
+ ngtcp2_conn_get_scid(conn->conn, NULL);
+#else
+ ngtcp2_conn_get_num_scid(conn->conn);
+#endif
+ if(num_scid <= 4) {
+ struct ngtcp2_cid ids[4];
+ /* Usually there are not that many scids when just accepted,
+ * like only 2. */
+ ngtcp2_conn_get_scid(conn->conn, ids);
+ return doq_conn_setup_id_array_and_dcid(conn, ids, num_scid);
+ } else {
+ struct ngtcp2_cid *scids = calloc(num_scid,
+ sizeof(struct ngtcp2_cid));
+ if(!scids)
+ return 0;
+ ngtcp2_conn_get_scid(conn->conn, scids);
+ if(!doq_conn_setup_id_array_and_dcid(conn, scids, num_scid)) {
+ free(scids);
+ return 0;
+ }
+ free(scids);
+ }
+ return 1;
+}
+
+void
+doq_conn_clear_conids(struct doq_conn* conn)
+{
+ struct doq_conid* p, *next;
+ if(!conn)
+ return;
+ p = conn->conid_list;
+ while(p) {
+ next = p->next;
+ (void)rbtree_delete(conn->table->conid_tree, p->node.key);
+ doq_conid_delete(p);
+ p = next;
+ }
+ conn->conid_list = NULL;
+}
+
+ngtcp2_tstamp doq_get_timestamp_nanosec(void)
+{
+#ifdef CLOCK_REALTIME
+ struct timespec tp;
+ memset(&tp, 0, sizeof(tp));
+ /* Get a nanosecond time, that can be compared with the event base. */
+ if(clock_gettime(CLOCK_REALTIME, &tp) == -1) {
+ log_err("clock_gettime failed: %s", strerror(errno));
+ }
+ return ((uint64_t)tp.tv_sec)*((uint64_t)1000000000) +
+ ((uint64_t)tp.tv_nsec);
+#else
+ struct timeval tv;
+ if(gettimeofday(&tv, NULL) < 0) {
+ log_err("gettimeofday failed: %s", strerror(errno));
+ }
+ return ((uint64_t)tv.tv_sec)*((uint64_t)1000000000) +
+ ((uint64_t)tv.tv_usec)*((uint64_t)1000);
+#endif /* CLOCK_REALTIME */
+}
+
+/** doq start the closing period for the connection. */
+static int
+doq_conn_start_closing_period(struct comm_point* c, struct doq_conn* conn)
+{
+ struct ngtcp2_path_storage ps;
+ struct ngtcp2_pkt_info pi;
+ ngtcp2_ssize ret;
+ if(!conn)
+ return 1;
+ if(
+#ifdef HAVE_NGTCP2_CONN_IN_CLOSING_PERIOD
+ ngtcp2_conn_in_closing_period(conn->conn)
+#else
+ ngtcp2_conn_is_in_closing_period(conn->conn)
+#endif
+ )
+ return 1;
+ if(
+#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
+ ngtcp2_conn_in_draining_period(conn->conn)
+#else
+ ngtcp2_conn_is_in_draining_period(conn->conn)
+#endif
+ ) {
+ doq_conn_write_disable(conn);
+ return 1;
+ }
+ ngtcp2_path_storage_zero(&ps);
+ sldns_buffer_clear(c->doq_socket->pkt_buf);
+ /* the call to ngtcp2_conn_write_connection_close causes the
+ * conn to be closed. It is now in the closing period. */
+ ret = ngtcp2_conn_write_connection_close(conn->conn, &ps.path,
+ &pi, sldns_buffer_begin(c->doq_socket->pkt_buf),
+ sldns_buffer_remaining(c->doq_socket->pkt_buf),
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+ &conn->ccerr
+#else
+ &conn->last_error
+#endif
+ , doq_get_timestamp_nanosec());
+ if(ret < 0) {
+ log_err("doq ngtcp2_conn_write_connection_close failed: %s",
+ ngtcp2_strerror(ret));
+ return 0;
+ }
+ if(ret == 0) {
+ return 0;
+ }
+ sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
+ sldns_buffer_flip(c->doq_socket->pkt_buf);
+
+ /* The close packet is allocated, because it may have to be repeated.
+ * When incoming packets have this connection dcid. */
+ conn->close_pkt = memdup(sldns_buffer_begin(c->doq_socket->pkt_buf),
+ sldns_buffer_limit(c->doq_socket->pkt_buf));
+ if(!conn->close_pkt) {
+ log_err("doq: could not allocate close packet: out of memory");
+ return 0;
+ }
+ conn->close_pkt_len = sldns_buffer_limit(c->doq_socket->pkt_buf);
+ conn->close_ecn = pi.ecn;
+ return 1;
+}
+
+/** doq send the close packet for the connection, perhaps again. */
+int
+doq_conn_send_close(struct comm_point* c, struct doq_conn* conn)
+{
+ if(!conn)
+ return 0;
+ if(!conn->close_pkt)
+ return 0;
+ if(conn->close_pkt_len > sldns_buffer_capacity(c->doq_socket->pkt_buf))
+ return 0;
+ sldns_buffer_clear(c->doq_socket->pkt_buf);
+ sldns_buffer_write(c->doq_socket->pkt_buf, conn->close_pkt, conn->close_pkt_len);
+ sldns_buffer_flip(c->doq_socket->pkt_buf);
+ verbose(VERB_ALGO, "doq send connection close");
+ doq_send_pkt(c, &conn->key.paddr, conn->close_ecn);
+ doq_conn_write_disable(conn);
+ return 1;
+}
+
+/** doq close the connection on error. If it returns a failure, it
+ * does not wait to send a close, and the connection can be dropped. */
+static int
+doq_conn_close_error(struct comm_point* c, struct doq_conn* conn)
+{
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+ if(conn->ccerr.type == NGTCP2_CCERR_TYPE_IDLE_CLOSE)
+ return 0;
+#else
+ if(conn->last_error.type ==
+ NGTCP2_CONNECTION_CLOSE_ERROR_CODE_TYPE_TRANSPORT_IDLE_CLOSE)
+ return 0;
+#endif
+ if(!doq_conn_start_closing_period(c, conn))
+ return 0;
+ if(
+#ifdef HAVE_NGTCP2_CONN_IN_DRAINING_PERIOD
+ ngtcp2_conn_in_draining_period(conn->conn)
+#else
+ ngtcp2_conn_is_in_draining_period(conn->conn)
+#endif
+ ) {
+ doq_conn_write_disable(conn);
+ return 1;
+ }
+ doq_conn_write_enable(conn);
+ if(!doq_conn_send_close(c, conn))
+ return 0;
+ return 1;
+}
+
+int
+doq_conn_recv(struct comm_point* c, struct doq_pkt_addr* paddr,
+ struct doq_conn* conn, struct ngtcp2_pkt_info* pi, int* err_retry,
+ int* err_drop)
+{
+ int ret;
+ ngtcp2_tstamp ts;
+ struct ngtcp2_path path;
+ memset(&path, 0, sizeof(path));
+ path.remote.addr = (struct sockaddr*)&paddr->addr;
+ path.remote.addrlen = paddr->addrlen;
+ path.local.addr = (struct sockaddr*)&paddr->localaddr;
+ path.local.addrlen = paddr->localaddrlen;
+ ts = doq_get_timestamp_nanosec();
+
+ ret = ngtcp2_conn_read_pkt(conn->conn, &path, pi,
+ sldns_buffer_begin(c->doq_socket->pkt_buf),
+ sldns_buffer_limit(c->doq_socket->pkt_buf), ts);
+ if(ret != 0) {
+ if(err_retry)
+ *err_retry = 0;
+ if(err_drop)
+ *err_drop = 0;
+ if(ret == NGTCP2_ERR_DRAINING) {
+ verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s",
+ ngtcp2_strerror(ret));
+ doq_conn_write_disable(conn);
+ return 0;
+ } else if(ret == NGTCP2_ERR_DROP_CONN) {
+ verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s",
+ ngtcp2_strerror(ret));
+ if(err_drop)
+ *err_drop = 1;
+ return 0;
+ } else if(ret == NGTCP2_ERR_RETRY) {
+ verbose(VERB_ALGO, "ngtcp2_conn_read_pkt returned %s",
+ ngtcp2_strerror(ret));
+ if(err_retry)
+ *err_retry = 1;
+ if(err_drop)
+ *err_drop = 1;
+ return 0;
+ } else if(ret == NGTCP2_ERR_CRYPTO) {
+ if(
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+ !conn->ccerr.error_code
+#else
+ !conn->last_error.error_code
+#endif
+ ) {
+ /* in picotls the tls alert may need to be
+ * copied, but this is with openssl. And there
+ * is conn->tls_alert. */
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+ ngtcp2_ccerr_set_tls_alert(&conn->ccerr,
+ conn->tls_alert, NULL, 0);
+#else
+ ngtcp2_connection_close_error_set_transport_error_tls_alert(
+ &conn->last_error, conn->tls_alert,
+ NULL, 0);
+#endif
+ }
+ } else {
+ if(
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+ !conn->ccerr.error_code
+#else
+ !conn->last_error.error_code
+#endif
+ ) {
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+ ngtcp2_ccerr_set_liberr(&conn->ccerr, ret,
+ NULL, 0);
+#else
+ ngtcp2_connection_close_error_set_transport_error_liberr(
+ &conn->last_error, ret, NULL, 0);
+#endif
+ }
+ }
+ log_err("ngtcp2_conn_read_pkt failed: %s",
+ ngtcp2_strerror(ret));
+ if(!doq_conn_close_error(c, conn)) {
+ if(err_drop)
+ *err_drop = 1;
+ }
+ return 0;
+ }
+ doq_conn_write_enable(conn);
+ return 1;
+}
+
+/** doq stream write is done */
+static void
+doq_stream_write_is_done(struct doq_conn* conn, struct doq_stream* stream)
+{
+ /* Cannot deallocate, the buffer may be needed for resends. */
+ doq_stream_off_write_list(conn, stream);
+}
+
+int
+doq_conn_write_streams(struct comm_point* c, struct doq_conn* conn,
+ int* err_drop)
+{
+ struct doq_stream* stream = conn->stream_write_first;
+ ngtcp2_path_storage ps;
+ ngtcp2_tstamp ts = doq_get_timestamp_nanosec();
+ size_t num_packets = 0, max_packets = 65535;
+ ngtcp2_path_storage_zero(&ps);
+
+ for(;;) {
+ int64_t stream_id;
+ uint32_t flags = 0;
+ ngtcp2_pkt_info pi;
+ ngtcp2_vec datav[2];
+ size_t datav_count = 0;
+ ngtcp2_ssize ret, ndatalen = 0;
+ int fin;
+
+ if(stream) {
+ /* data to send */
+ verbose(VERB_ALGO, "doq: doq_conn write stream %d",
+ (int)stream->stream_id);
+ stream_id = stream->stream_id;
+ fin = 1;
+ if(stream->nwrite < 2) {
+ datav[0].base = ((uint8_t*)&stream->
+ outlen_wire) + stream->nwrite;
+ datav[0].len = 2 - stream->nwrite;
+ datav[1].base = stream->out;
+ datav[1].len = stream->outlen;
+ datav_count = 2;
+ } else {
+ datav[0].base = stream->out +
+ (stream->nwrite-2);
+ datav[0].len = stream->outlen -
+ (stream->nwrite-2);
+ datav_count = 1;
+ }
+ } else {
+ /* no data to send */
+ verbose(VERB_ALGO, "doq: doq_conn write stream -1");
+ stream_id = -1;
+ fin = 0;
+ datav[0].base = NULL;
+ datav[0].len = 0;
+ datav_count = 1;
+ }
+
+ /* if more streams, set it to write more */
+ if(stream && stream->write_next)
+ flags |= NGTCP2_WRITE_STREAM_FLAG_MORE;
+ if(fin)
+ flags |= NGTCP2_WRITE_STREAM_FLAG_FIN;
+
+ sldns_buffer_clear(c->doq_socket->pkt_buf);
+ ret = ngtcp2_conn_writev_stream(conn->conn, &ps.path, &pi,
+ sldns_buffer_begin(c->doq_socket->pkt_buf),
+ sldns_buffer_remaining(c->doq_socket->pkt_buf),
+ &ndatalen, flags, stream_id, datav, datav_count, ts);
+ if(ret < 0) {
+ if(ret == NGTCP2_ERR_WRITE_MORE) {
+ verbose(VERB_ALGO, "doq: write more, ndatalen %d", (int)ndatalen);
+ if(stream) {
+ if(ndatalen >= 0)
+ stream->nwrite += ndatalen;
+ if(stream->nwrite >= stream->outlen+2)
+ doq_stream_write_is_done(
+ conn, stream);
+ stream = stream->write_next;
+ }
+ continue;
+ } else if(ret == NGTCP2_ERR_STREAM_DATA_BLOCKED) {
+ verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_DATA_BLOCKED");
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+ ngtcp2_ccerr_set_application_error(
+ &conn->ccerr, -1, NULL, 0);
+#else
+ ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0);
+#endif
+ if(err_drop)
+ *err_drop = 0;
+ if(!doq_conn_close_error(c, conn)) {
+ if(err_drop)
+ *err_drop = 1;
+ }
+ return 0;
+ } else if(ret == NGTCP2_ERR_STREAM_SHUT_WR) {
+ verbose(VERB_ALGO, "doq: ngtcp2_conn_writev_stream returned NGTCP2_ERR_STREAM_SHUT_WR");
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+ ngtcp2_ccerr_set_application_error(
+ &conn->ccerr, -1, NULL, 0);
+#else
+ ngtcp2_connection_close_error_set_application_error(&conn->last_error, -1, NULL, 0);
+#endif
+ if(err_drop)
+ *err_drop = 0;
+ if(!doq_conn_close_error(c, conn)) {
+ if(err_drop)
+ *err_drop = 1;
+ }
+ return 0;
+ }
+
+ log_err("doq: ngtcp2_conn_writev_stream failed: %s",
+ ngtcp2_strerror(ret));
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+ ngtcp2_ccerr_set_liberr(&conn->ccerr, ret, NULL, 0);
+#else
+ ngtcp2_connection_close_error_set_transport_error_liberr(
+ &conn->last_error, ret, NULL, 0);
+#endif
+ if(err_drop)
+ *err_drop = 0;
+ if(!doq_conn_close_error(c, conn)) {
+ if(err_drop)
+ *err_drop = 1;
+ }
+ return 0;
+ }
+ verbose(VERB_ALGO, "doq: writev_stream pkt size %d ndatawritten %d",
+ (int)ret, (int)ndatalen);
+
+ if(ndatalen >= 0 && stream) {
+ stream->nwrite += ndatalen;
+ if(stream->nwrite >= stream->outlen+2)
+ doq_stream_write_is_done(conn, stream);
+ }
+ if(ret == 0) {
+ /* congestion limited */
+ doq_conn_write_disable(conn);
+ ngtcp2_conn_update_pkt_tx_time(conn->conn, ts);
+ return 1;
+ }
+ sldns_buffer_set_position(c->doq_socket->pkt_buf, ret);
+ sldns_buffer_flip(c->doq_socket->pkt_buf);
+ doq_send_pkt(c, &conn->key.paddr, pi.ecn);
+
+ if(c->doq_socket->have_blocked_pkt)
+ break;
+ if(++num_packets == max_packets)
+ break;
+ if(stream)
+ stream = stream->write_next;
+ }
+ ngtcp2_conn_update_pkt_tx_time(conn->conn, ts);
+ return 1;
+}
+
+void
+doq_conn_write_enable(struct doq_conn* conn)
+{
+ conn->write_interest = 1;
+}
+
+void
+doq_conn_write_disable(struct doq_conn* conn)
+{
+ conn->write_interest = 0;
+}
+
+/** doq append the connection to the write list */
+static void
+doq_conn_write_list_append(struct doq_table* table, struct doq_conn* conn)
+{
+ if(conn->on_write_list)
+ return;
+ conn->write_prev = table->write_list_last;
+ if(table->write_list_last)
+ table->write_list_last->write_next = conn;
+ else table->write_list_first = conn;
+ conn->write_next = NULL;
+ table->write_list_last = conn;
+ conn->on_write_list = 1;
+}
+
+void
+doq_conn_write_list_remove(struct doq_table* table, struct doq_conn* conn)
+{
+ if(!conn->on_write_list)
+ return;
+ if(conn->write_next)
+ conn->write_next->write_prev = conn->write_prev;
+ else table->write_list_last = conn->write_prev;
+ if(conn->write_prev)
+ conn->write_prev->write_next = conn->write_next;
+ else table->write_list_first = conn->write_next;
+ conn->write_prev = NULL;
+ conn->write_next = NULL;
+ conn->on_write_list = 0;
+}
+
+void
+doq_conn_set_write_list(struct doq_table* table, struct doq_conn* conn)
+{
+ if(conn->write_interest && conn->on_write_list)
+ return;
+ if(!conn->write_interest && !conn->on_write_list)
+ return;
+ if(conn->write_interest)
+ doq_conn_write_list_append(table, conn);
+ else doq_conn_write_list_remove(table, conn);
+}
+
+struct doq_conn*
+doq_table_pop_first(struct doq_table* table)
+{
+ struct doq_conn* conn = table->write_list_first;
+ if(!conn)
+ return NULL;
+ lock_basic_lock(&conn->lock);
+ table->write_list_first = conn->write_next;
+ if(conn->write_next)
+ conn->write_next->write_prev = NULL;
+ else table->write_list_last = NULL;
+ conn->write_next = NULL;
+ conn->write_prev = NULL;
+ conn->on_write_list = 0;
+ return conn;
+}
+
+int
+doq_conn_check_timer(struct doq_conn* conn, struct timeval* tv)
+{
+ ngtcp2_tstamp expiry = ngtcp2_conn_get_expiry(conn->conn);
+ ngtcp2_tstamp now = doq_get_timestamp_nanosec();
+ ngtcp2_tstamp t;
+
+ if(expiry <= now) {
+ /* The timer has already expired, add with zero timeout.
+ * This should call the callback straight away. Calling it
+ * from the event callbacks is cleaner than calling it here,
+ * because then it is always called with the same locks and
+ * so on. This routine only has the conn.lock. */
+ t = now;
+ } else {
+ t = expiry;
+ }
+
+ /* convert to timeval */
+ memset(tv, 0, sizeof(*tv));
+ tv->tv_sec = t / NGTCP2_SECONDS;
+ tv->tv_usec = (t / NGTCP2_MICROSECONDS)%1000000;
+
+ /* If we already have a timer, is it the right value? */
+ if(conn->timer.timer_in_tree || conn->timer.timer_in_list) {
+ if(conn->timer.time.tv_sec == tv->tv_sec &&
+ conn->timer.time.tv_usec == tv->tv_usec)
+ return 0;
+ }
+ return 1;
+}
+
+/* doq print connection log */
+static void
+doq_conn_log_line(struct doq_conn* conn, char* s)
+{
+ char remotestr[256], localstr[256];
+ addr_to_str((void*)&conn->key.paddr.addr, conn->key.paddr.addrlen,
+ remotestr, sizeof(remotestr));
+ addr_to_str((void*)&conn->key.paddr.localaddr,
+ conn->key.paddr.localaddrlen, localstr, sizeof(localstr));
+ log_info("doq conn %s %s %s", remotestr, localstr, s);
+}
+
+int
+doq_conn_handle_timeout(struct doq_conn* conn)
+{
+ ngtcp2_tstamp now = doq_get_timestamp_nanosec();
+ int rv;
+
+ if(verbosity >= VERB_ALGO)
+ doq_conn_log_line(conn, "timeout");
+
+ rv = ngtcp2_conn_handle_expiry(conn->conn, now);
+ if(rv != 0) {
+ verbose(VERB_ALGO, "ngtcp2_conn_handle_expiry failed: %s",
+ ngtcp2_strerror(rv));
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+ ngtcp2_ccerr_set_liberr(&conn->ccerr, rv, NULL, 0);
+#else
+ ngtcp2_connection_close_error_set_transport_error_liberr(
+ &conn->last_error, rv, NULL, 0);
+#endif
+ if(!doq_conn_close_error(conn->doq_socket->cp, conn)) {
+ /* failed, return for deletion */
+ return 0;
+ }
+ return 1;
+ }
+ doq_conn_write_enable(conn);
+ if(!doq_conn_write_streams(conn->doq_socket->cp, conn, NULL)) {
+ /* failed, return for deletion. */
+ return 0;
+ }
+ return 1;
+}
+
+void
+doq_table_quic_size_add(struct doq_table* table, size_t add)
+{
+ lock_basic_lock(&table->size_lock);
+ table->current_size += add;
+ lock_basic_unlock(&table->size_lock);
+}
+
+void
+doq_table_quic_size_subtract(struct doq_table* table, size_t subtract)
+{
+ lock_basic_lock(&table->size_lock);
+ if(table->current_size < subtract)
+ table->current_size = 0;
+ else table->current_size -= subtract;
+ lock_basic_unlock(&table->size_lock);
+}
+
+int
+doq_table_quic_size_available(struct doq_table* table,
+ struct config_file* cfg, size_t mem)
+{
+ size_t cur;
+ lock_basic_lock(&table->size_lock);
+ cur = table->current_size;
+ lock_basic_unlock(&table->size_lock);
+
+ if(cur + mem > cfg->quic_size)
+ return 0;
+ return 1;
+}
+
+size_t doq_table_quic_size_get(struct doq_table* table)
+{
+ size_t sz;
+ if(!table)
+ return 0;
+ lock_basic_lock(&table->size_lock);
+ sz = table->current_size;
+ lock_basic_unlock(&table->size_lock);
+ return sz;
+}
+#endif /* HAVE_NGTCP2 */
diff --git a/services/listen_dnsport.h b/services/listen_dnsport.h
index 84ac4b068b1b..c29f4d72b0a2 100644
--- a/services/listen_dnsport.h
+++ b/services/listen_dnsport.h
@@ -43,10 +43,16 @@
#define LISTEN_DNSPORT_H
#include "util/netevent.h"
+#include "util/rbtree.h"
+#include "util/locks.h"
#include "daemon/acl_list.h"
#ifdef HAVE_NGHTTP2_NGHTTP2_H
#include <nghttp2/nghttp2.h>
#endif
+#ifdef HAVE_NGTCP2
+#include <ngtcp2/ngtcp2.h>
+#include <ngtcp2/ngtcp2_crypto.h>
+#endif
struct listen_list;
struct config_file;
struct addrinfo;
@@ -100,7 +106,9 @@ enum listen_type {
/** udp ipv6 (v4mapped) for use with ancillary data + dnscrypt*/
listen_type_udpancil_dnscrypt,
/** HTTP(2) over TLS over TCP */
- listen_type_http
+ listen_type_http,
+ /** DNS over QUIC */
+ listen_type_doq
};
/*
@@ -188,6 +196,11 @@ int resolve_interface_names(char** ifs, int num_ifs,
* @param tcp_conn_limit: TCP connection limit info.
* @param sslctx: nonNULL if ssl context.
* @param dtenv: nonNULL if dnstap enabled.
+ * @param doq_table: the doq connection table, with shared information.
+ * @param rnd: random state.
+ * @param ssl_service_key: the SSL service key file.
+ * @param ssl_service_pem: the SSL service pem file.
+ * @param cfg: config file struct.
* @param cb: callback function when a request arrives. It is passed
* the packet and user argument. Return true to send a reply.
* @param cb_arg: user data argument for callback function.
@@ -198,8 +211,10 @@ listen_create(struct comm_base* base, struct listen_port* ports,
size_t bufsize, int tcp_accept_count, int tcp_idle_timeout,
int harden_large_queries, uint32_t http_max_streams,
char* http_endpoint, int http_notls, struct tcl_list* tcp_conn_limit,
- void* sslctx, struct dt_env* dtenv, comm_point_callback_type* cb,
- void *cb_arg);
+ void* sslctx, struct dt_env* dtenv, struct doq_table* doq_table,
+ struct ub_randstate* rnd, const char* ssl_service_key,
+ const char* ssl_service_pem, struct config_file* cfg,
+ comm_point_callback_type* cb, void *cb_arg);
/**
* delete the listening structure
@@ -278,11 +293,12 @@ int create_udp_sock(int family, int socktype, struct sockaddr* addr,
* @param freebind: set IP_FREEBIND socket option.
* @param use_systemd: if true, fetch sockets from systemd.
* @param dscp: DSCP to use.
+ * @param additional: additional log information for the socket type.
* @return: the socket. -1 on error.
*/
int create_tcp_accept_sock(struct addrinfo *addr, int v6only, int* noproto,
int* reuseport, int transparent, int mss, int nodelay, int freebind,
- int use_systemd, int dscp);
+ int use_systemd, int dscp, const char* additional);
/**
* Create and bind local listening socket
@@ -452,6 +468,377 @@ int http2_submit_dns_response(struct http2_session* h2_session);
int http2_submit_dns_response(void* v);
#endif /* HAVE_NGHTTP2 */
+#ifdef HAVE_NGTCP2
+struct doq_conid;
+struct doq_server_socket;
+
+/**
+ * DoQ shared connection table. This is the connections for the host.
+ * And some config parameter values for connections. The host has to
+ * respond on that ip,port for those connections, so they are shared
+ * between threads.
+ */
+struct doq_table {
+ /** the lock on the tree and config elements. insert and deletion,
+ * also lookup in the tree needs to hold the lock. */
+ lock_rw_type lock;
+ /** rbtree of doq_conn, the connections to different destination
+ * addresses, and can be found by dcid. */
+ struct rbtree_type* conn_tree;
+ /** lock for the conid tree, needed for the conid tree and also
+ * the conid elements */
+ lock_rw_type conid_lock;
+ /** rbtree of doq_conid, connections can be found by their
+ * connection ids. Lookup by connection id, finds doq_conn. */
+ struct rbtree_type* conid_tree;
+ /** the server scid length */
+ int sv_scidlen;
+ /** the static secret for the server */
+ uint8_t* static_secret;
+ /** length of the static secret */
+ size_t static_secret_len;
+ /** the idle timeout in nanoseconds */
+ uint64_t idle_timeout;
+ /** the list of write interested connections, hold the doq_table.lock
+ * to change them */
+ struct doq_conn* write_list_first, *write_list_last;
+ /** rbtree of doq_timer. */
+ struct rbtree_type* timer_tree;
+ /** lock on the current_size counter. */
+ lock_basic_type size_lock;
+ /** current use, in bytes, of QUIC buffers.
+ * The doq_conn ngtcp2_conn structure, SSL structure and conid structs
+ * are not counted. */
+ size_t current_size;
+};
+
+/** create doq table */
+struct doq_table* doq_table_create(struct config_file* cfg,
+ struct ub_randstate* rnd);
+
+/** delete doq table */
+void doq_table_delete(struct doq_table* table);
+
+/**
+ * Timer information for doq timer.
+ */
+struct doq_timer {
+ /** The rbnode in the tree sorted by timeout value. Key this struct. */
+ struct rbnode_type node;
+ /** The timeout value. Absolute time value. */
+ struct timeval time;
+ /** If the timer is in the time tree, with the node. */
+ int timer_in_tree;
+ /** If there are more timers with the exact same timeout value,
+ * they form a set of timers. The rbnode timer has a link to the list
+ * with the other timers in the set. The rbnode timer is not a
+ * member of the list with the other timers. The other timers are not
+ * linked into the tree. */
+ struct doq_timer* setlist_first, *setlist_last;
+ /** If the timer is on the setlist. */
+ int timer_in_list;
+ /** If in the setlist, the next and prev element. */
+ struct doq_timer* setlist_next, *setlist_prev;
+ /** The connection that is timeouted. */
+ struct doq_conn* conn;
+ /** The worker that is waiting for the timeout event.
+ * Set for the rbnode tree linked element. If a worker is waiting
+ * for the event. If NULL, no worker is waiting for this timeout. */
+ struct doq_server_socket* worker_doq_socket;
+};
+
+/**
+ * Key information that makes a doq_conn node in the tree lookup.
+ */
+struct doq_conn_key {
+ /** the remote endpoint and local endpoint and ifindex */
+ struct doq_pkt_addr paddr;
+ /** the doq connection dcid */
+ uint8_t* dcid;
+ /** length of dcid */
+ size_t dcidlen;
+};
+
+/**
+ * DoQ connection, for DNS over QUIC. One connection to a remote endpoint
+ * with a number of streams in it. Every stream is like a tcp stream with
+ * a uint16_t length, query read, and a uint16_t length and answer written.
+ */
+struct doq_conn {
+ /** rbtree node, key is addresses and dcid */
+ struct rbnode_type node;
+ /** lock on the connection */
+ lock_basic_type lock;
+ /** the key information, with dcid and address endpoint */
+ struct doq_conn_key key;
+ /** the doq server socket for inside callbacks */
+ struct doq_server_socket* doq_socket;
+ /** the doq table this connection is part of */
+ struct doq_table* table;
+ /** if the connection is about to be deleted. */
+ uint8_t is_deleted;
+ /** the version, the client chosen version of QUIC */
+ uint32_t version;
+ /** the ngtcp2 connection, a server connection */
+ struct ngtcp2_conn* conn;
+ /** the connection ids that are associated with this doq_conn.
+ * There can be a number, that can change. They are linked here,
+ * so that upon removal, the list of actually associated conid
+ * elements can be removed as well. */
+ struct doq_conid* conid_list;
+ /** the ngtcp2 last error for the connection */
+#ifdef HAVE_NGTCP2_CCERR_DEFAULT
+ struct ngtcp2_ccerr ccerr;
+#else
+ struct ngtcp2_connection_close_error last_error;
+#endif
+ /** the recent tls alert error code */
+ uint8_t tls_alert;
+ /** the ssl context, SSL* */
+ void* ssl;
+#ifdef HAVE_NGTCP2_CRYPTO_QUICTLS_CONFIGURE_SERVER_CONTEXT
+ /** the connection reference for ngtcp2_conn and userdata in ssl */
+ struct ngtcp2_crypto_conn_ref conn_ref;
+#endif
+ /** closure packet, if any */
+ uint8_t* close_pkt;
+ /** length of closure packet. */
+ size_t close_pkt_len;
+ /** closure ecn */
+ uint32_t close_ecn;
+ /** the streams for this connection, of type doq_stream */
+ struct rbtree_type stream_tree;
+ /** the streams that want write, they have something to write.
+ * The list is ordered, the last have to wait for the first to
+ * get their data written. */
+ struct doq_stream* stream_write_first, *stream_write_last;
+ /** the conn has write interest if true, no write interest if false. */
+ uint8_t write_interest;
+ /** if the conn is on the connection write list */
+ uint8_t on_write_list;
+ /** the connection write list prev and next, if on the write list */
+ struct doq_conn* write_prev, *write_next;
+ /** The timer for the connection. If unused, it is not in the tree
+ * and not in the list. It is alloced here, so that it is prealloced.
+ * It has to be set after every read and write on the connection, so
+ * this improves performance, but also the allocation does not fail. */
+ struct doq_timer timer;
+};
+
+/**
+ * Connection ID and the doq_conn that is that connection. A connection
+ * has an original dcid, and then more connection ids associated.
+ */
+struct doq_conid {
+ /** rbtree node, key is the connection id. */
+ struct rbnode_type node;
+ /** the next and prev in the list of conids for the doq_conn */
+ struct doq_conid* next, *prev;
+ /** key to the doq_conn that is the connection */
+ struct doq_conn_key key;
+ /** the connection id, byte string */
+ uint8_t* cid;
+ /** the length of cid */
+ size_t cidlen;
+};
+
+/**
+ * DoQ stream, for DNS over QUIC.
+ */
+struct doq_stream {
+ /** the rbtree node for the stream, key is the stream_id */
+ rbnode_type node;
+ /** the stream id */
+ int64_t stream_id;
+ /** if the stream is closed */
+ uint8_t is_closed;
+ /** if the query is complete */
+ uint8_t is_query_complete;
+ /** the number of bytes read on the stream, up to querylen+2. */
+ size_t nread;
+ /** the length of the input query bytes */
+ size_t inlen;
+ /** the input bytes */
+ uint8_t* in;
+ /** does the stream have an answer to send */
+ uint8_t is_answer_available;
+ /** the answer bytes sent, up to outlen+2. */
+ size_t nwrite;
+ /** the length of the output answer bytes */
+ size_t outlen;
+ /** the output length in network wireformat */
+ uint16_t outlen_wire;
+ /** the output packet bytes */
+ uint8_t* out;
+ /** if the stream is on the write list */
+ uint8_t on_write_list;
+ /** the prev and next on the write list, if on the list */
+ struct doq_stream* write_prev, *write_next;
+};
+
+/** doq application error code that is sent when a stream is closed */
+#define DOQ_APP_ERROR_CODE 1
+
+/**
+ * Create the doq connection.
+ * @param c: the comm point for the listening doq socket.
+ * @param paddr: with remote and local address and ifindex for the
+ * connection destination. This is where packets are sent.
+ * @param dcid: the dcid, Destination Connection ID.
+ * @param dcidlen: length of dcid.
+ * @param version: client chosen version.
+ * @return new doq connection or NULL on allocation failure.
+ */
+struct doq_conn* doq_conn_create(struct comm_point* c,
+ struct doq_pkt_addr* paddr, const uint8_t* dcid, size_t dcidlen,
+ uint32_t version);
+
+/**
+ * Delete the doq connection structure.
+ * @param conn: to delete.
+ * @param table: with memory size.
+ */
+void doq_conn_delete(struct doq_conn* conn, struct doq_table* table);
+
+/** compare function of doq_conn */
+int doq_conn_cmp(const void* key1, const void* key2);
+
+/** compare function of doq_conid */
+int doq_conid_cmp(const void* key1, const void* key2);
+
+/** compare function of doq_timer */
+int doq_timer_cmp(const void* key1, const void* key2);
+
+/** compare function of doq_stream */
+int doq_stream_cmp(const void* key1, const void* key2);
+
+/** setup the doq_socket server tls context */
+int doq_socket_setup_ctx(struct doq_server_socket* doq_socket);
+
+/** setup the doq connection callbacks, and settings. */
+int doq_conn_setup(struct doq_conn* conn, uint8_t* scid, size_t scidlen,
+ uint8_t* ocid, size_t ocidlen, const uint8_t* token, size_t tokenlen);
+
+/** fill a buffer with random data */
+void doq_fill_rand(struct ub_randstate* rnd, uint8_t* buf, size_t len);
+
+/** delete a doq_conid */
+void doq_conid_delete(struct doq_conid* conid);
+
+/** add a connection id to the doq_conn.
+ * caller must hold doq_table.conid_lock. */
+int doq_conn_associate_conid(struct doq_conn* conn, uint8_t* data,
+ size_t datalen);
+
+/** remove a connection id from the doq_conn.
+ * caller must hold doq_table.conid_lock. */
+void doq_conn_dissociate_conid(struct doq_conn* conn, const uint8_t* data,
+ size_t datalen);
+
+/** initial setup to link current connection ids to the doq_conn */
+int doq_conn_setup_conids(struct doq_conn* conn);
+
+/** remove the connection ids from the doq_conn.
+ * caller must hold doq_table.conid_lock. */
+void doq_conn_clear_conids(struct doq_conn* conn);
+
+/** find a conid in the doq_conn connection.
+ * caller must hold table.conid_lock. */
+struct doq_conid* doq_conid_find(struct doq_table* doq_table,
+ const uint8_t* data, size_t datalen);
+
+/** receive a packet for a connection */
+int doq_conn_recv(struct comm_point* c, struct doq_pkt_addr* paddr,
+ struct doq_conn* conn, struct ngtcp2_pkt_info* pi, int* err_retry,
+ int* err_drop);
+
+/** send packets for a connection */
+int doq_conn_write_streams(struct comm_point* c, struct doq_conn* conn,
+ int* err_drop);
+
+/** send the close packet for the connection, perhaps again. */
+int doq_conn_send_close(struct comm_point* c, struct doq_conn* conn);
+
+/** delete doq stream */
+void doq_stream_delete(struct doq_stream* stream);
+
+/** doq read a connection key from repinfo. It is not malloced, but points
+ * into the repinfo for the dcid. */
+void doq_conn_key_from_repinfo(struct doq_conn_key* key,
+ struct comm_reply* repinfo);
+
+/** doq find a stream in the connection */
+struct doq_stream* doq_stream_find(struct doq_conn* conn, int64_t stream_id);
+
+/** doq shutdown the stream. */
+int doq_stream_close(struct doq_conn* conn, struct doq_stream* stream,
+ int send_shutdown);
+
+/** send reply for a connection */
+int doq_stream_send_reply(struct doq_conn* conn, struct doq_stream* stream,
+ struct sldns_buffer* buf);
+
+/** the connection has write interest, wants to write packets */
+void doq_conn_write_enable(struct doq_conn* conn);
+
+/** the connection has no write interest, does not want to write packets */
+void doq_conn_write_disable(struct doq_conn* conn);
+
+/** set the connection on or off the write list, depending on write interest */
+void doq_conn_set_write_list(struct doq_table* table, struct doq_conn* conn);
+
+/** doq remove the connection from the write list */
+void doq_conn_write_list_remove(struct doq_table* table,
+ struct doq_conn* conn);
+
+/** doq get the first conn from the write list, if any, popped from list.
+ * Locks the conn that is returned. */
+struct doq_conn* doq_table_pop_first(struct doq_table* table);
+
+/**
+ * doq check if the timer for the conn needs to be changed.
+ * @param conn: connection, caller must hold lock on it.
+ * @param tv: time value, absolute time, returned.
+ * @return true if timer needs to be set to tv, false if no change is needed
+ * to the timer. The timer is already set to the right time in that case.
+ */
+int doq_conn_check_timer(struct doq_conn* conn, struct timeval* tv);
+
+/** doq remove timer from tree */
+void doq_timer_tree_remove(struct doq_table* table, struct doq_timer* timer);
+
+/** doq remove timer from list */
+void doq_timer_list_remove(struct doq_table* table, struct doq_timer* timer);
+
+/** doq unset the timer if it was set. */
+void doq_timer_unset(struct doq_table* table, struct doq_timer* timer);
+
+/** doq set the timer and add it. */
+void doq_timer_set(struct doq_table* table, struct doq_timer* timer,
+ struct doq_server_socket* worker_doq_socket, struct timeval* tv);
+
+/** doq find a timeout in the timer tree */
+struct doq_timer* doq_timer_find_time(struct doq_table* table,
+ struct timeval* tv);
+
+/** doq handle timeout for a connection. Pass conn locked. Returns false for
+ * deletion. */
+int doq_conn_handle_timeout(struct doq_conn* conn);
+
+/** doq add size to the current quic buffer counter */
+void doq_table_quic_size_add(struct doq_table* table, size_t add);
+
+/** doq subtract size from the current quic buffer counter */
+void doq_table_quic_size_subtract(struct doq_table* table, size_t subtract);
+
+/** doq check if mem is available for quic. */
+int doq_table_quic_size_available(struct doq_table* table,
+ struct config_file* cfg, size_t mem);
+
+/** doq get the quic size value */
+size_t doq_table_quic_size_get(struct doq_table* table);
+#endif /* HAVE_NGTCP2 */
+
char* set_ip_dscp(int socket, int addrfamily, int ds);
/** for debug and profiling purposes only
@@ -459,4 +846,14 @@ char* set_ip_dscp(int socket, int addrfamily, int ds);
*/
void verbose_print_unbound_socket(struct unbound_socket* ub_sock);
+/** event callback for testcode/doqclient */
+void doq_client_event_cb(int fd, short event, void* arg);
+
+/** timer event callback for testcode/doqclient */
+void doq_client_timer_cb(int fd, short event, void* arg);
+
+#ifdef HAVE_NGTCP2
+/** get a timestamp in nanoseconds */
+ngtcp2_tstamp doq_get_timestamp_nanosec(void);
+#endif
#endif /* LISTEN_DNSPORT_H */
diff --git a/services/mesh.c b/services/mesh.c
index 522118844b44..d512ab3d32d4 100644
--- a/services/mesh.c
+++ b/services/mesh.c
@@ -311,7 +311,7 @@ int mesh_make_new_space(struct mesh_area* mesh, sldns_buffer* qbuf)
struct dns_msg*
mesh_serve_expired_lookup(struct module_qstate* qstate,
- struct query_info* lookup_qinfo)
+ struct query_info* lookup_qinfo, int* is_expired)
{
hashvalue_type h;
struct lruhash_entry* e;
@@ -321,6 +321,7 @@ mesh_serve_expired_lookup(struct module_qstate* qstate,
time_t timenow = *qstate->env->now;
int must_validate = (!(qstate->query_flags&BIT_CD)
|| qstate->env->cfg->ignore_cd) && qstate->env->need_to_validate;
+ *is_expired = 0;
/* Lookup cache */
h = query_info_hash(lookup_qinfo, qstate->query_flags);
e = slabhash_lookup(qstate->env->msg_cache, h, lookup_qinfo, 0);
@@ -328,6 +329,7 @@ mesh_serve_expired_lookup(struct module_qstate* qstate,
key = (struct msgreply_entry*)e->key;
data = (struct reply_info*)e->data;
+ if(data->ttl < timenow) *is_expired = 1;
msg = tomsg(qstate->env, &key->key, data, qstate->region, timenow,
qstate->env->cfg->serve_expired, qstate->env->scratch);
if(!msg)
@@ -2176,6 +2178,7 @@ mesh_serve_expired_callback(void* arg)
int must_validate = (!(qstate->query_flags&BIT_CD)
|| qstate->env->cfg->ignore_cd) && qstate->env->need_to_validate;
int i = 0;
+ int is_expired;
if(!qstate->serve_expired_data) return;
verbose(VERB_ALGO, "Serve expired: Trying to reply with expired data");
comm_timer_delete(qstate->serve_expired_data->timer);
@@ -2193,7 +2196,7 @@ mesh_serve_expired_callback(void* arg)
fptr_ok(fptr_whitelist_serve_expired_lookup(
qstate->serve_expired_data->get_cached_answer));
msg = (*qstate->serve_expired_data->get_cached_answer)(qstate,
- lookup_qinfo);
+ lookup_qinfo, &is_expired);
if(!msg)
return;
/* Reset these in case we pass a second time from here. */
@@ -2285,8 +2288,10 @@ mesh_serve_expired_callback(void* arg)
/* Add EDE Stale Answer (RCF8914). Ignore global ede as this is
* warning instead of an error */
- if (r->edns.edns_present && qstate->env->cfg->ede_serve_expired &&
- qstate->env->cfg->ede) {
+ if(r->edns.edns_present &&
+ qstate->env->cfg->ede_serve_expired &&
+ qstate->env->cfg->ede &&
+ is_expired) {
edns_opt_list_append_ede(&r->edns.opt_list_out,
mstate->s.region, LDNS_EDE_STALE_ANSWER, NULL);
}
diff --git a/services/mesh.h b/services/mesh.h
index 5bd53e065e8f..26ececbe6210 100644
--- a/services/mesh.h
+++ b/services/mesh.h
@@ -673,11 +673,12 @@ void mesh_serve_expired_callback(void* arg);
* the same behavior as when replying from cache.
* @param qstate: the module qstate.
* @param lookup_qinfo: the query info to look for in the cache.
+ * @param is_expired: set if the cached answer is expired.
* @return dns_msg if a cached answer was found, otherwise NULL.
*/
struct dns_msg*
mesh_serve_expired_lookup(struct module_qstate* qstate,
- struct query_info* lookup_qinfo);
+ struct query_info* lookup_qinfo, int* is_expired);
/**
* See if the mesh has space for more queries. You can allocate queries
diff --git a/services/modstack.c b/services/modstack.c
index 6c8af0505b69..fa68cc71d2ff 100644
--- a/services/modstack.c
+++ b/services/modstack.c
@@ -265,7 +265,7 @@ modstack_call_init(struct module_stack* stack, const char* module_conf,
int i, changed = 0;
env->need_to_validate = 0; /* set by module init below */
for(i=0; i<stack->num; i++) {
- while(*module_conf && isspace(*module_conf))
+ while(*module_conf && isspace((unsigned char)*module_conf))
module_conf++;
if(strncmp(stack->mod[i]->name, module_conf,
strlen(stack->mod[i]->name))) {
diff --git a/services/rpz.c b/services/rpz.c
index d8999a8a55eb..3b92ee53837e 100644
--- a/services/rpz.c
+++ b/services/rpz.c
@@ -1969,6 +1969,7 @@ rpz_synthesize_nodata(struct rpz* ATTR_UNUSED(r), struct module_qstate* ms,
0, /* ttl */
0, /* prettl */
0, /* expttl */
+ 0, /* norecttl */
0, /* an */
0, /* ns */
0, /* ar */
@@ -1999,6 +2000,7 @@ rpz_synthesize_nxdomain(struct rpz* r, struct module_qstate* ms,
0, /* ttl */
0, /* prettl */
0, /* expttl */
+ 0, /* norecttl */
0, /* an */
0, /* ns */
0, /* ar */
@@ -2031,6 +2033,7 @@ rpz_synthesize_localdata_from_rrset(struct rpz* ATTR_UNUSED(r), struct module_qs
0, /* ttl */
0, /* prettl */
0, /* expttl */
+ 0, /* norecttl */
1, /* an */
0, /* ns */
0, /* ar */
@@ -2176,6 +2179,7 @@ rpz_synthesize_cname_override_msg(struct rpz* r, struct module_qstate* ms,
0, /* ttl */
0, /* prettl */
0, /* expttl */
+ 0, /* norecttl */
1, /* an */
0, /* ns */
0, /* ar */
@@ -2288,15 +2292,18 @@ rpz_apply_nsip_trigger(struct module_qstate* ms, struct query_info* qchase,
if(action == RPZ_LOCAL_DATA_ACTION && raddr->data == NULL) {
verbose(VERB_ALGO, "rpz: bug: nsip local data action but no local data");
ret = rpz_synthesize_nodata(r, ms, qchase, az);
+ ms->rpz_applied = 1;
goto done;
}
switch(action) {
case RPZ_NXDOMAIN_ACTION:
ret = rpz_synthesize_nxdomain(r, ms, qchase, az);
+ ms->rpz_applied = 1;
break;
case RPZ_NODATA_ACTION:
ret = rpz_synthesize_nodata(r, ms, qchase, az);
+ ms->rpz_applied = 1;
break;
case RPZ_TCP_ONLY_ACTION:
/* basically a passthru here but the tcp-only will be
@@ -2306,11 +2313,13 @@ rpz_apply_nsip_trigger(struct module_qstate* ms, struct query_info* qchase,
break;
case RPZ_DROP_ACTION:
ret = rpz_synthesize_nodata(r, ms, qchase, az);
+ ms->rpz_applied = 1;
ms->is_drop = 1;
break;
case RPZ_LOCAL_DATA_ACTION:
ret = rpz_synthesize_nsip_localdata(r, ms, qchase, raddr, az);
if(ret == NULL) { ret = rpz_synthesize_nodata(r, ms, qchase, az); }
+ ms->rpz_applied = 1;
break;
case RPZ_PASSTHRU_ACTION:
ret = NULL;
@@ -2318,6 +2327,7 @@ rpz_apply_nsip_trigger(struct module_qstate* ms, struct query_info* qchase,
break;
case RPZ_CNAME_OVERRIDE_ACTION:
ret = rpz_synthesize_cname_override_msg(r, ms, qchase);
+ ms->rpz_applied = 1;
break;
default:
verbose(VERB_ALGO, "rpz: nsip: bug: unhandled or invalid action: '%s'",
@@ -2352,9 +2362,11 @@ rpz_apply_nsdname_trigger(struct module_qstate* ms, struct query_info* qchase,
switch(action) {
case RPZ_NXDOMAIN_ACTION:
ret = rpz_synthesize_nxdomain(r, ms, qchase, az);
+ ms->rpz_applied = 1;
break;
case RPZ_NODATA_ACTION:
ret = rpz_synthesize_nodata(r, ms, qchase, az);
+ ms->rpz_applied = 1;
break;
case RPZ_TCP_ONLY_ACTION:
/* basically a passthru here but the tcp-only will be
@@ -2364,11 +2376,13 @@ rpz_apply_nsdname_trigger(struct module_qstate* ms, struct query_info* qchase,
break;
case RPZ_DROP_ACTION:
ret = rpz_synthesize_nodata(r, ms, qchase, az);
+ ms->rpz_applied = 1;
ms->is_drop = 1;
break;
case RPZ_LOCAL_DATA_ACTION:
ret = rpz_synthesize_nsdname_localdata(r, ms, qchase, z, match, az);
if(ret == NULL) { ret = rpz_synthesize_nodata(r, ms, qchase, az); }
+ ms->rpz_applied = 1;
break;
case RPZ_PASSTHRU_ACTION:
ret = NULL;
@@ -2376,6 +2390,7 @@ rpz_apply_nsdname_trigger(struct module_qstate* ms, struct query_info* qchase,
break;
case RPZ_CNAME_OVERRIDE_ACTION:
ret = rpz_synthesize_cname_override_msg(r, ms, qchase);
+ ms->rpz_applied = 1;
break;
default:
verbose(VERB_ALGO, "rpz: nsdname: bug: unhandled or invalid action: '%s'",
@@ -2579,9 +2594,11 @@ struct dns_msg* rpz_callback_from_iterator_cname(struct module_qstate* ms,
switch(localzone_type_to_rpz_action(lzt)) {
case RPZ_NXDOMAIN_ACTION:
ret = rpz_synthesize_nxdomain(r, ms, &is->qchase, a);
+ ms->rpz_applied = 1;
break;
case RPZ_NODATA_ACTION:
ret = rpz_synthesize_nodata(r, ms, &is->qchase, a);
+ ms->rpz_applied = 1;
break;
case RPZ_TCP_ONLY_ACTION:
/* basically a passthru here but the tcp-only will be
@@ -2591,11 +2608,13 @@ struct dns_msg* rpz_callback_from_iterator_cname(struct module_qstate* ms,
break;
case RPZ_DROP_ACTION:
ret = rpz_synthesize_nodata(r, ms, &is->qchase, a);
+ ms->rpz_applied = 1;
ms->is_drop = 1;
break;
case RPZ_LOCAL_DATA_ACTION:
ret = rpz_synthesize_qname_localdata_msg(r, ms, &is->qchase, z, a);
if(ret == NULL) { ret = rpz_synthesize_nodata(r, ms, &is->qchase, a); }
+ ms->rpz_applied = 1;
break;
case RPZ_PASSTHRU_ACTION:
ret = NULL;