diff options
Diffstat (limited to 'services')
| -rw-r--r-- | services/authzone.c | 1405 | ||||
| -rw-r--r-- | services/authzone.h | 96 | ||||
| -rw-r--r-- | services/cache/dns.c | 5 | ||||
| -rw-r--r-- | services/cache/dns.h | 4 | ||||
| -rw-r--r-- | services/cache/infra.c | 3 | ||||
| -rw-r--r-- | services/listen_dnsport.c | 263 | ||||
| -rw-r--r-- | services/listen_dnsport.h | 28 | ||||
| -rw-r--r-- | services/localzone.c | 16 | ||||
| -rw-r--r-- | services/localzone.h | 2 | ||||
| -rw-r--r-- | services/mesh.c | 17 | ||||
| -rw-r--r-- | services/modstack.c | 65 | ||||
| -rw-r--r-- | services/outside_network.c | 463 | ||||
| -rw-r--r-- | services/outside_network.h | 45 | ||||
| -rw-r--r-- | services/rpz.c | 26 |
14 files changed, 2186 insertions, 252 deletions
diff --git a/services/authzone.c b/services/authzone.c index 2ef782c1f45c..e6e3a8cff9b4 100644 --- a/services/authzone.c +++ b/services/authzone.c @@ -67,7 +67,11 @@ #include "sldns/parseutil.h" #include "sldns/keyraw.h" #include "validator/val_nsec3.h" +#include "validator/val_nsec.h" #include "validator/val_secalgo.h" +#include "validator/val_sigcrypt.h" +#include "validator/val_anchor.h" +#include "validator/val_utils.h" #include <ctype.h> /** bytes to use for NSEC3 hash buffer. 20 for sha1 */ @@ -1741,9 +1745,45 @@ int auth_zone_write_file(struct auth_zone* z, const char* fname) return 1; } +/** offline verify for zonemd, while reading a zone file to immediately + * spot bad hashes in zonefile as they are read. + * Creates temp buffers, but uses anchors and validation environment + * from the module_env. */ +static void +zonemd_offline_verify(struct auth_zone* z, struct module_env* env_for_val, + struct module_stack* mods) +{ + struct module_env env; + time_t now = 0; + if(!z->zonemd_check) + return; + env = *env_for_val; + env.scratch_buffer = sldns_buffer_new(env.cfg->msg_buffer_size); + if(!env.scratch_buffer) { + log_err("out of memory"); + goto clean_exit; + } + env.scratch = regional_create(); + if(!env.now) { + env.now = &now; + now = time(NULL); + } + if(!env.scratch) { + log_err("out of memory"); + goto clean_exit; + } + auth_zone_verify_zonemd(z, &env, mods, NULL, 1, 0); + +clean_exit: + /* clean up and exit */ + sldns_buffer_free(env.scratch_buffer); + regional_destroy(env.scratch); +} + /** read all auth zones from file (if they have) */ static int -auth_zones_read_zones(struct auth_zones* az, struct config_file* cfg) +auth_zones_read_zones(struct auth_zones* az, struct config_file* cfg, + struct module_env* env, struct module_stack* mods) { struct auth_zone* z; lock_rw_wrlock(&az->lock); @@ -1754,12 +1794,162 @@ auth_zones_read_zones(struct auth_zones* az, struct config_file* cfg) lock_rw_unlock(&az->lock); return 0; } + if(z->zonefile && z->zonefile[0]!=0 && env) + zonemd_offline_verify(z, env, mods); lock_rw_unlock(&z->lock); } lock_rw_unlock(&az->lock); return 1; } +/** fetch the content of a ZONEMD RR from the rdata */ +static int zonemd_fetch_parameters(struct auth_rrset* zonemd_rrset, size_t i, + uint32_t* serial, int* scheme, int* hashalgo, uint8_t** hash, + size_t* hashlen) +{ + size_t rr_len; + uint8_t* rdata; + if(i >= zonemd_rrset->data->count) + return 0; + rr_len = zonemd_rrset->data->rr_len[i]; + if(rr_len < 2+4+1+1) + return 0; /* too short, for rdlen+serial+scheme+algo */ + rdata = zonemd_rrset->data->rr_data[i]; + *serial = sldns_read_uint32(rdata+2); + *scheme = rdata[6]; + *hashalgo = rdata[7]; + *hashlen = rr_len - 8; + if(*hashlen == 0) + *hash = NULL; + else *hash = rdata+8; + return 1; +} + +/** + * See if the ZONEMD scheme, hash occurs more than once. + * @param zonemd_rrset: the zonemd rrset to check with the RRs in it. + * @param index: index of the original, this is allowed to have that + * scheme and hashalgo, but other RRs should not have it. + * @param scheme: the scheme to check for. + * @param hashalgo: the hash algorithm to check for. + * @return true if it occurs more than once. + */ +static int zonemd_is_duplicate_scheme_hash(struct auth_rrset* zonemd_rrset, + size_t index, int scheme, int hashalgo) +{ + size_t j; + for(j=0; j<zonemd_rrset->data->count; j++) { + uint32_t serial2 = 0; + int scheme2 = 0, hashalgo2 = 0; + uint8_t* hash2 = NULL; + size_t hashlen2 = 0; + if(index == j) { + /* this is the original */ + continue; + } + if(!zonemd_fetch_parameters(zonemd_rrset, j, &serial2, + &scheme2, &hashalgo2, &hash2, &hashlen2)) { + /* malformed, skip it */ + continue; + } + if(scheme == scheme2 && hashalgo == hashalgo2) { + /* duplicate scheme, hash */ + verbose(VERB_ALGO, "zonemd duplicate for scheme %d " + "and hash %d", scheme, hashalgo); + return 1; + } + } + return 0; +} + +/** + * Check ZONEMDs if present for the auth zone. Depending on config + * it can warn or fail on that. Checks the hash of the ZONEMD. + * @param z: auth zone to check for. + * caller must hold lock on zone. + * @param env: module env for temp buffers. + * @param reason: returned on failure. + * @return false on failure, true if hash checks out. + */ +static int auth_zone_zonemd_check_hash(struct auth_zone* z, + struct module_env* env, char** reason) +{ + /* loop over ZONEMDs and see which one is valid. if not print + * failure (depending on config) */ + struct auth_data* apex; + struct auth_rrset* zonemd_rrset; + size_t i; + struct regional* region = NULL; + struct sldns_buffer* buf = NULL; + uint32_t soa_serial = 0; + region = env->scratch; + regional_free_all(region); + buf = env->scratch_buffer; + if(!auth_zone_get_serial(z, &soa_serial)) { + *reason = "zone has no SOA serial"; + return 0; + } + + apex = az_find_name(z, z->name, z->namelen); + if(!apex) { + *reason = "zone has no apex"; + return 0; + } + zonemd_rrset = az_domain_rrset(apex, LDNS_RR_TYPE_ZONEMD); + if(!zonemd_rrset || zonemd_rrset->data->count==0) { + *reason = "zone has no ZONEMD"; + return 0; /* no RRset or no RRs in rrset */ + } + + /* we have a ZONEMD, check if it is correct */ + for(i=0; i<zonemd_rrset->data->count; i++) { + uint32_t serial = 0; + int scheme = 0, hashalgo = 0; + uint8_t* hash = NULL; + size_t hashlen = 0; + if(!zonemd_fetch_parameters(zonemd_rrset, i, &serial, &scheme, + &hashalgo, &hash, &hashlen)) { + /* malformed RR */ + *reason = "ZONEMD rdata malformed"; + continue; + } + /* check for duplicates */ + if(zonemd_is_duplicate_scheme_hash(zonemd_rrset, i, scheme, + hashalgo)) { + /* duplicate hash of the same scheme,hash + * is not allowed. */ + *reason = "ZONEMD RRSet contains more than one RR " + "with the same scheme and hash algorithm"; + continue; + } + regional_free_all(region); + if(serial != soa_serial) { + *reason = "ZONEMD serial is wrong"; + continue; + } + if(auth_zone_generate_zonemd_check(z, scheme, hashalgo, + hash, hashlen, region, buf, reason)) { + /* success */ + if(verbosity >= VERB_ALGO) { + char zstr[255+1]; + dname_str(z->name, zstr); + verbose(VERB_ALGO, "auth-zone %s ZONEMD hash is correct", zstr); + } + return 1; + } + /* try next one */ + } + /* fail, we may have reason */ + if(!*reason) + *reason = "no ZONEMD records found"; + if(verbosity >= VERB_ALGO) { + char zstr[255+1]; + dname_str(z->name, zstr); + verbose(VERB_ALGO, "auth-zone %s ZONEMD failed: %s", zstr, *reason); + } + return 0; +} + /** find serial number of zone or false if none */ int auth_zone_get_serial(struct auth_zone* z, uint32_t* serial) @@ -1779,7 +1969,7 @@ auth_zone_get_serial(struct auth_zone* z, uint32_t* serial) } /** Find auth_zone SOA and populate the values in xfr(soa values). */ -static int +int xfr_find_soa(struct auth_zone* z, struct auth_xfer* xfr) { struct auth_data* apex; @@ -1908,6 +2098,8 @@ auth_zones_cfg(struct auth_zones* az, struct config_auth* c) z->for_downstream = c->for_downstream; z->for_upstream = c->for_upstream; z->fallback_enabled = c->fallback_enabled; + z->zonemd_check = c->zonemd_check; + z->zonemd_reject_absence = c->zonemd_reject_absence; if(c->isrpz && !z->rpz){ if(!(z->rpz = rpz_create(c))){ fatal_exit("Could not setup RPZ zones"); @@ -2000,7 +2192,8 @@ az_delete_deleted_zones(struct auth_zones* az) } int auth_zones_apply_cfg(struct auth_zones* az, struct config_file* cfg, - int setup, int* is_rpz) + int setup, int* is_rpz, struct module_env* env, + struct module_stack* mods) { struct config_auth* p; az_setall_deleted(az); @@ -2016,7 +2209,7 @@ int auth_zones_apply_cfg(struct auth_zones* az, struct config_file* cfg, } } az_delete_deleted_zones(az); - if(!auth_zones_read_zones(az, cfg)) + if(!auth_zones_read_zones(az, cfg, env, mods)) return 0; if(setup) { if(!auth_zones_setup_zones(az)) @@ -4959,6 +5152,9 @@ xfr_write_after_update(struct auth_xfer* xfr, struct module_env* env) lock_rw_unlock(&z->lock); return; } +#ifdef UB_ON_WINDOWS + (void)unlink(zfilename); /* windows does not replace file with rename() */ +#endif if(rename(tmpfile, zfilename) < 0) { log_err("could not rename(%s, %s): %s", tmpfile, zfilename, strerror(errno)); @@ -4969,6 +5165,28 @@ xfr_write_after_update(struct auth_xfer* xfr, struct module_env* env) lock_rw_unlock(&z->lock); } +/** reacquire locks and structures. Starts with no locks, ends + * with xfr and z locks, if fail, no z lock */ +static int xfr_process_reacquire_locks(struct auth_xfer* xfr, + struct module_env* env, struct auth_zone** z) +{ + /* release xfr lock, then, while holding az->lock grab both + * z->lock and xfr->lock */ + lock_rw_rdlock(&env->auth_zones->lock); + *z = auth_zone_find(env->auth_zones, xfr->name, xfr->namelen, + xfr->dclass); + if(!*z) { + lock_rw_unlock(&env->auth_zones->lock); + lock_basic_lock(&xfr->lock); + *z = NULL; + return 0; + } + lock_rw_wrlock(&(*z)->lock); + lock_basic_lock(&xfr->lock); + lock_rw_unlock(&env->auth_zones->lock); + return 1; +} + /** process chunk list and update zone in memory, * return false if it did not work */ static int @@ -4978,21 +5196,12 @@ xfr_process_chunk_list(struct auth_xfer* xfr, struct module_env* env, struct auth_zone* z; /* obtain locks and structures */ - /* release xfr lock, then, while holding az->lock grab both - * z->lock and xfr->lock */ lock_basic_unlock(&xfr->lock); - lock_rw_rdlock(&env->auth_zones->lock); - z = auth_zone_find(env->auth_zones, xfr->name, xfr->namelen, - xfr->dclass); - if(!z) { - lock_rw_unlock(&env->auth_zones->lock); + if(!xfr_process_reacquire_locks(xfr, env, &z)) { /* the zone is gone, ignore xfr results */ - lock_basic_lock(&xfr->lock); return 0; } - lock_rw_wrlock(&z->lock); - lock_basic_lock(&xfr->lock); - lock_rw_unlock(&env->auth_zones->lock); + /* holding xfr and z locks */ /* apply data */ if(xfr->task_transfer->master->http) { @@ -5027,6 +5236,35 @@ xfr_process_chunk_list(struct auth_xfer* xfr, struct module_env* env, " (or malformed RR)", xfr->task_transfer->master->host); return 0; } + + /* release xfr lock while verifying zonemd because it may have + * to spawn lookups in the state machines */ + lock_basic_unlock(&xfr->lock); + /* holding z lock */ + auth_zone_verify_zonemd(z, env, &env->mesh->mods, NULL, 0, 0); + if(z->zone_expired) { + char zname[256]; + /* ZONEMD must have failed */ + /* reacquire locks, so we hold xfr lock on exit of routine, + * and both xfr and z again after releasing xfr for potential + * state machine mesh callbacks */ + lock_rw_unlock(&z->lock); + if(!xfr_process_reacquire_locks(xfr, env, &z)) + return 0; + dname_str(xfr->name, zname); + verbose(VERB_ALGO, "xfr from %s: ZONEMD failed for %s, transfer is failed", xfr->task_transfer->master->host, zname); + xfr->zone_expired = 1; + lock_rw_unlock(&z->lock); + return 0; + } + /* reacquire locks, so we hold xfr lock on exit of routine, + * and both xfr and z again after releasing xfr for potential + * state machine mesh callbacks */ + lock_rw_unlock(&z->lock); + if(!xfr_process_reacquire_locks(xfr, env, &z)) + return 0; + /* holding xfr and z locks */ + if(xfr->have_zone) xfr->lease_time = *env->now; @@ -5188,7 +5426,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env) xfr->task_transfer->cp = outnet_comm_point_for_http( env->outnet, auth_xfer_transfer_http_callback, xfr, &addr, addrlen, -1, master->ssl, master->host, - master->file); + master->file, env->cfg); if(!xfr->task_transfer->cp) { char zname[255+1], as[256]; dname_str(xfr->name, zname); @@ -5210,7 +5448,7 @@ xfr_transfer_init_fetch(struct auth_xfer* xfr, struct module_env* env) /* perform AXFR/IXFR */ /* set the packet to be written */ /* create new ID */ - xfr->task_transfer->id = (uint16_t)(ub_random(env->rnd)&0xffff); + xfr->task_transfer->id = GET_RANDOM_ID(env->rnd); xfr_create_ixfr_packet(xfr, env->scratch_buffer, xfr->task_transfer->id, master); @@ -6060,7 +6298,7 @@ xfr_probe_send_probe(struct auth_xfer* xfr, struct module_env* env, /* create new ID for new probes, but not on timeout retries, * this means we'll accept replies to previous retries to same ip */ if(timeout == AUTH_PROBE_TIMEOUT) - xfr->task_probe->id = (uint16_t)(ub_random(env->rnd)&0xffff); + xfr->task_probe->id = GET_RANDOM_ID(env->rnd); xfr_create_soa_probe_packet(xfr, env->scratch_buffer, xfr->task_probe->id); /* we need to remove the cp if we have a different ip4/ip6 type now */ @@ -6933,12 +7171,14 @@ xfer_set_masters(struct auth_master** list, struct config_auth* c, if(with_http) for(p = c->urls; p; p = p->next) { m = auth_master_new(&list); + if(!m) return 0; m->http = 1; if(!parse_url(p->str, &m->host, &m->file, &m->port, &m->ssl)) return 0; } for(p = c->masters; p; p = p->next) { m = auth_master_new(&list); + if(!m) return 0; m->ixfr = 1; /* this flag is not configurable */ m->host = strdup(p->str); if(!m->host) { @@ -6948,6 +7188,7 @@ xfer_set_masters(struct auth_master** list, struct config_auth* c, } for(p = c->allow_notify; p; p = p->next) { m = auth_master_new(&list); + if(!m) return 0; m->allow_notify = 1; m->host = strdup(p->str); if(!m->host) { @@ -6972,3 +7213,1131 @@ compare_serial(uint32_t a, uint32_t b) return 1; } } + +int zonemd_hashalgo_supported(int hashalgo) +{ + if(hashalgo == ZONEMD_ALGO_SHA384) return 1; + if(hashalgo == ZONEMD_ALGO_SHA512) return 1; + return 0; +} + +int zonemd_scheme_supported(int scheme) +{ + if(scheme == ZONEMD_SCHEME_SIMPLE) return 1; + return 0; +} + +/** initialize hash for hashing with zonemd hash algo */ +static struct secalgo_hash* zonemd_digest_init(int hashalgo, char** reason) +{ + struct secalgo_hash *h; + if(hashalgo == ZONEMD_ALGO_SHA384) { + /* sha384 */ + h = secalgo_hash_create_sha384(); + if(!h) + *reason = "digest sha384 could not be created"; + return h; + } else if(hashalgo == ZONEMD_ALGO_SHA512) { + /* sha512 */ + h = secalgo_hash_create_sha512(); + if(!h) + *reason = "digest sha512 could not be created"; + return h; + } + /* unknown hash algo */ + *reason = "unsupported algorithm"; + return NULL; +} + +/** update the hash for zonemd */ +static int zonemd_digest_update(int hashalgo, struct secalgo_hash* h, + uint8_t* data, size_t len, char** reason) +{ + if(hashalgo == ZONEMD_ALGO_SHA384) { + if(!secalgo_hash_update(h, data, len)) { + *reason = "digest sha384 failed"; + return 0; + } + return 1; + } else if(hashalgo == ZONEMD_ALGO_SHA512) { + if(!secalgo_hash_update(h, data, len)) { + *reason = "digest sha512 failed"; + return 0; + } + return 1; + } + /* unknown hash algo */ + *reason = "unsupported algorithm"; + return 0; +} + +/** finish the hash for zonemd */ +static int zonemd_digest_finish(int hashalgo, struct secalgo_hash* h, + uint8_t* result, size_t hashlen, size_t* resultlen, char** reason) +{ + if(hashalgo == ZONEMD_ALGO_SHA384) { + if(hashlen < 384/8) { + *reason = "digest buffer too small for sha384"; + return 0; + } + if(!secalgo_hash_final(h, result, hashlen, resultlen)) { + *reason = "digest sha384 finish failed"; + return 0; + } + return 1; + } else if(hashalgo == ZONEMD_ALGO_SHA512) { + if(hashlen < 512/8) { + *reason = "digest buffer too small for sha512"; + return 0; + } + if(!secalgo_hash_final(h, result, hashlen, resultlen)) { + *reason = "digest sha512 finish failed"; + return 0; + } + return 1; + } + /* unknown algo */ + *reason = "unsupported algorithm"; + return 0; +} + +/** add rrsets from node to the list */ +static size_t authdata_rrsets_to_list(struct auth_rrset** array, + size_t arraysize, struct auth_rrset* first) +{ + struct auth_rrset* rrset = first; + size_t num = 0; + while(rrset) { + if(num >= arraysize) + return num; + array[num] = rrset; + num++; + rrset = rrset->next; + } + return num; +} + +/** compare rr list entries */ +static int rrlist_compare(const void* arg1, const void* arg2) +{ + struct auth_rrset* r1 = *(struct auth_rrset**)arg1; + struct auth_rrset* r2 = *(struct auth_rrset**)arg2; + uint16_t t1, t2; + if(r1 == NULL) t1 = LDNS_RR_TYPE_RRSIG; + else t1 = r1->type; + if(r2 == NULL) t2 = LDNS_RR_TYPE_RRSIG; + else t2 = r2->type; + if(t1 < t2) + return -1; + if(t1 > t2) + return 1; + return 0; +} + +/** add type RRSIG to rr list if not one there already, + * this is to perform RRSIG collate processing at that point. */ +static void addrrsigtype_if_needed(struct auth_rrset** array, + size_t arraysize, size_t* rrnum, struct auth_data* node) +{ + if(az_domain_rrset(node, LDNS_RR_TYPE_RRSIG)) + return; /* already one there */ + if((*rrnum) >= arraysize) + return; /* array too small? */ + array[*rrnum] = NULL; /* nothing there, but need entry in list */ + (*rrnum)++; +} + +/** collate the RRs in an RRset using the simple scheme */ +static int zonemd_simple_rrset(struct auth_zone* z, int hashalgo, + struct secalgo_hash* h, struct auth_data* node, + struct auth_rrset* rrset, struct regional* region, + struct sldns_buffer* buf, char** reason) +{ + /* canonicalize */ + struct ub_packed_rrset_key key; + memset(&key, 0, sizeof(key)); + key.entry.key = &key; + key.entry.data = rrset->data; + key.rk.dname = node->name; + key.rk.dname_len = node->namelen; + key.rk.type = htons(rrset->type); + key.rk.rrset_class = htons(z->dclass); + if(!rrset_canonicalize_to_buffer(region, buf, &key)) { + *reason = "out of memory"; + return 0; + } + regional_free_all(region); + + /* hash */ + if(!zonemd_digest_update(hashalgo, h, sldns_buffer_begin(buf), + sldns_buffer_limit(buf), reason)) { + return 0; + } + return 1; +} + +/** count number of RRSIGs in a domain name rrset list */ +static size_t zonemd_simple_count_rrsig(struct auth_rrset* rrset, + struct auth_rrset** rrlist, size_t rrnum, + struct auth_zone* z, struct auth_data* node) +{ + size_t i, count = 0; + if(rrset) { + size_t j; + for(j = 0; j<rrset->data->count; j++) { + if(rrsig_rdata_get_type_covered(rrset->data-> + rr_data[j], rrset->data->rr_len[j]) == + LDNS_RR_TYPE_ZONEMD && + query_dname_compare(z->name, node->name)==0) { + /* omit RRSIGs over type ZONEMD at apex */ + continue; + } + count++; + } + } + for(i=0; i<rrnum; i++) { + if(rrlist[i] && rrlist[i]->type == LDNS_RR_TYPE_ZONEMD && + query_dname_compare(z->name, node->name)==0) { + /* omit RRSIGs over type ZONEMD at apex */ + continue; + } + count += (rrlist[i]?rrlist[i]->data->rrsig_count:0); + } + return count; +} + +/** allocate sparse rrset data for the number of entries in tepm region */ +static int zonemd_simple_rrsig_allocs(struct regional* region, + struct packed_rrset_data* data, size_t count) +{ + data->rr_len = regional_alloc(region, sizeof(*data->rr_len) * count); + if(!data->rr_len) { + return 0; + } + data->rr_ttl = regional_alloc(region, sizeof(*data->rr_ttl) * count); + if(!data->rr_ttl) { + return 0; + } + data->rr_data = regional_alloc(region, sizeof(*data->rr_data) * count); + if(!data->rr_data) { + return 0; + } + return 1; +} + +/** add the RRSIGs from the rrs in the domain into the data */ +static void add_rrlist_rrsigs_into_data(struct packed_rrset_data* data, + size_t* done, struct auth_rrset** rrlist, size_t rrnum, + struct auth_zone* z, struct auth_data* node) +{ + size_t i; + for(i=0; i<rrnum; i++) { + size_t j; + if(!rrlist[i]) + continue; + if(rrlist[i] && rrlist[i]->type == LDNS_RR_TYPE_ZONEMD && + query_dname_compare(z->name, node->name)==0) { + /* omit RRSIGs over type ZONEMD at apex */ + continue; + } + for(j = 0; j<rrlist[i]->data->rrsig_count; j++) { + data->rr_len[*done] = rrlist[i]->data->rr_len[rrlist[i]->data->count + j]; + data->rr_ttl[*done] = rrlist[i]->data->rr_ttl[rrlist[i]->data->count + j]; + /* reference the rdata in the rrset, no need to + * copy it, it is no longer needed at the end of + * the routine */ + data->rr_data[*done] = rrlist[i]->data->rr_data[rrlist[i]->data->count + j]; + (*done)++; + } + } +} + +static void add_rrset_into_data(struct packed_rrset_data* data, + size_t* done, struct auth_rrset* rrset, + struct auth_zone* z, struct auth_data* node) +{ + if(rrset) { + size_t j; + for(j = 0; j<rrset->data->count; j++) { + if(rrsig_rdata_get_type_covered(rrset->data-> + rr_data[j], rrset->data->rr_len[j]) == + LDNS_RR_TYPE_ZONEMD && + query_dname_compare(z->name, node->name)==0) { + /* omit RRSIGs over type ZONEMD at apex */ + continue; + } + data->rr_len[*done] = rrset->data->rr_len[j]; + data->rr_ttl[*done] = rrset->data->rr_ttl[j]; + /* reference the rdata in the rrset, no need to + * copy it, it is no longer need at the end of + * the routine */ + data->rr_data[*done] = rrset->data->rr_data[j]; + (*done)++; + } + } +} + +/** collate the RRSIGs using the simple scheme */ +static int zonemd_simple_rrsig(struct auth_zone* z, int hashalgo, + struct secalgo_hash* h, struct auth_data* node, + struct auth_rrset* rrset, struct auth_rrset** rrlist, size_t rrnum, + struct regional* region, struct sldns_buffer* buf, char** reason) +{ + /* the rrset pointer can be NULL, this means it is type RRSIG and + * there is no ordinary type RRSIG there. The RRSIGs are stored + * with the RRsets in their data. + * + * The RRset pointer can be nonNULL. This happens if there is + * no RR that is covered by the RRSIG for the domain. Then this + * RRSIG RR is stored in an rrset of type RRSIG. The other RRSIGs + * are stored in the rrset entries for the RRs in the rr list for + * the domain node. We need to collate the rrset's data, if any, and + * the rrlist's rrsigs */ + /* if this is the apex, omit RRSIGs that cover type ZONEMD */ + /* build rrsig rrset */ + size_t done = 0; + struct ub_packed_rrset_key key; + struct packed_rrset_data data; + memset(&key, 0, sizeof(key)); + memset(&data, 0, sizeof(data)); + key.entry.key = &key; + key.entry.data = &data; + key.rk.dname = node->name; + key.rk.dname_len = node->namelen; + key.rk.type = htons(LDNS_RR_TYPE_RRSIG); + key.rk.rrset_class = htons(z->dclass); + data.count = zonemd_simple_count_rrsig(rrset, rrlist, rrnum, z, node); + if(!zonemd_simple_rrsig_allocs(region, &data, data.count)) { + *reason = "out of memory"; + regional_free_all(region); + return 0; + } + /* all the RRSIGs stored in the other rrsets for this domain node */ + add_rrlist_rrsigs_into_data(&data, &done, rrlist, rrnum, z, node); + /* plus the RRSIGs stored in an rrset of type RRSIG for this node */ + add_rrset_into_data(&data, &done, rrset, z, node); + + /* canonicalize */ + if(!rrset_canonicalize_to_buffer(region, buf, &key)) { + *reason = "out of memory"; + regional_free_all(region); + return 0; + } + regional_free_all(region); + + /* hash */ + if(!zonemd_digest_update(hashalgo, h, sldns_buffer_begin(buf), + sldns_buffer_limit(buf), reason)) { + return 0; + } + return 1; +} + +/** collate a domain's rrsets using the simple scheme */ +static int zonemd_simple_domain(struct auth_zone* z, int hashalgo, + struct secalgo_hash* h, struct auth_data* node, + struct regional* region, struct sldns_buffer* buf, char** reason) +{ + const size_t rrlistsize = 65536; + struct auth_rrset* rrlist[rrlistsize]; + size_t i, rrnum = 0; + /* see if the domain is out of scope, the zone origin, + * that would be omitted */ + if(!dname_subdomain_c(node->name, z->name)) + return 1; /* continue */ + /* loop over the rrsets in ascending order. */ + rrnum = authdata_rrsets_to_list(rrlist, rrlistsize, node->rrsets); + addrrsigtype_if_needed(rrlist, rrlistsize, &rrnum, node); + qsort(rrlist, rrnum, sizeof(*rrlist), rrlist_compare); + for(i=0; i<rrnum; i++) { + if(rrlist[i] && rrlist[i]->type == LDNS_RR_TYPE_ZONEMD && + query_dname_compare(z->name, node->name) == 0) { + /* omit type ZONEMD at apex */ + continue; + } + if(rrlist[i] == NULL || rrlist[i]->type == + LDNS_RR_TYPE_RRSIG) { + if(!zonemd_simple_rrsig(z, hashalgo, h, node, + rrlist[i], rrlist, rrnum, region, buf, reason)) + return 0; + } else if(!zonemd_simple_rrset(z, hashalgo, h, node, + rrlist[i], region, buf, reason)) { + return 0; + } + } + return 1; +} + +/** collate the zone using the simple scheme */ +static int zonemd_simple_collate(struct auth_zone* z, int hashalgo, + struct secalgo_hash* h, struct regional* region, + struct sldns_buffer* buf, char** reason) +{ + /* our tree is sorted in canonical order, so we can just loop over + * the tree */ + struct auth_data* n; + RBTREE_FOR(n, struct auth_data*, &z->data) { + if(!zonemd_simple_domain(z, hashalgo, h, n, region, buf, + reason)) + return 0; + } + return 1; +} + +int auth_zone_generate_zonemd_hash(struct auth_zone* z, int scheme, + int hashalgo, uint8_t* hash, size_t hashlen, size_t* resultlen, + struct regional* region, struct sldns_buffer* buf, char** reason) +{ + struct secalgo_hash* h = zonemd_digest_init(hashalgo, reason); + if(!h) { + if(!*reason) + *reason = "digest init fail"; + return 0; + } + if(scheme == ZONEMD_SCHEME_SIMPLE) { + if(!zonemd_simple_collate(z, hashalgo, h, region, buf, reason)) { + if(!*reason) *reason = "scheme simple collate fail"; + secalgo_hash_delete(h); + return 0; + } + } + if(!zonemd_digest_finish(hashalgo, h, hash, hashlen, resultlen, + reason)) { + secalgo_hash_delete(h); + *reason = "digest finish fail"; + return 0; + } + secalgo_hash_delete(h); + return 1; +} + +int auth_zone_generate_zonemd_check(struct auth_zone* z, int scheme, + int hashalgo, uint8_t* hash, size_t hashlen, struct regional* region, + struct sldns_buffer* buf, char** reason) +{ + uint8_t gen[512]; + size_t genlen = 0; + if(!zonemd_hashalgo_supported(hashalgo)) { + *reason = "unsupported algorithm"; + return 0; + } + if(!zonemd_scheme_supported(scheme)) { + *reason = "unsupported scheme"; + return 0; + } + if(hashlen < 12) { + /* the ZONEMD draft requires digests to fail if too small */ + *reason = "digest length too small, less than 12"; + return 0; + } + /* generate digest */ + if(!auth_zone_generate_zonemd_hash(z, scheme, hashalgo, gen, + sizeof(gen), &genlen, region, buf, reason)) { + /* reason filled in by zonemd hash routine */ + return 0; + } + /* check digest length */ + if(hashlen != genlen) { + *reason = "incorrect digest length"; + if(verbosity >= VERB_ALGO) { + verbose(VERB_ALGO, "zonemd scheme=%d hashalgo=%d", + scheme, hashalgo); + log_hex("ZONEMD should be ", gen, genlen); + log_hex("ZONEMD to check is", hash, hashlen); + } + return 0; + } + /* check digest */ + if(memcmp(hash, gen, genlen) != 0) { + *reason = "incorrect digest"; + if(verbosity >= VERB_ALGO) { + verbose(VERB_ALGO, "zonemd scheme=%d hashalgo=%d", + scheme, hashalgo); + log_hex("ZONEMD should be ", gen, genlen); + log_hex("ZONEMD to check is", hash, hashlen); + } + return 0; + } + return 1; +} + +/** log auth zone message with zone name in front. */ +static void auth_zone_log(uint8_t* name, enum verbosity_value level, + const char* format, ...) ATTR_FORMAT(printf, 3, 4); +static void auth_zone_log(uint8_t* name, enum verbosity_value level, + const char* format, ...) +{ + va_list args; + va_start(args, format); + if(verbosity >= level) { + char str[255+1]; + char msg[MAXSYSLOGMSGLEN]; + dname_str(name, str); + vsnprintf(msg, sizeof(msg), format, args); + verbose(level, "auth zone %s %s", str, msg); + } + va_end(args); +} + +/** ZONEMD, dnssec verify the rrset with the dnskey */ +static int zonemd_dnssec_verify_rrset(struct auth_zone* z, + struct module_env* env, struct module_stack* mods, + struct ub_packed_rrset_key* dnskey, struct auth_data* node, + struct auth_rrset* rrset, char** why_bogus) +{ + struct ub_packed_rrset_key pk; + enum sec_status sec; + struct val_env* ve; + int m; + m = modstack_find(mods, "validator"); + if(m == -1) { + auth_zone_log(z->name, VERB_ALGO, "zonemd dnssec verify: have " + "DNSKEY chain of trust, but no validator module"); + return 0; + } + ve = (struct val_env*)env->modinfo[m]; + + memset(&pk, 0, sizeof(pk)); + pk.entry.key = &pk; + pk.entry.data = rrset->data; + pk.rk.dname = node->name; + pk.rk.dname_len = node->namelen; + pk.rk.type = htons(rrset->type); + pk.rk.rrset_class = htons(z->dclass); + if(verbosity >= VERB_ALGO) { + char typestr[32]; + typestr[0]=0; + sldns_wire2str_type_buf(rrset->type, typestr, sizeof(typestr)); + auth_zone_log(z->name, VERB_ALGO, + "zonemd: verify %s RRset with DNSKEY", typestr); + } + sec = dnskeyset_verify_rrset(env, ve, &pk, dnskey, NULL, why_bogus, + LDNS_SECTION_ANSWER, NULL); + if(sec == sec_status_secure) { + return 1; + } + if(why_bogus) + auth_zone_log(z->name, VERB_ALGO, "DNSSEC verify was bogus: %s", *why_bogus); + return 0; +} + +/** check for nsec3, the RR with params equal, if bitmap has the type */ +static int nsec3_of_param_has_type(struct auth_rrset* nsec3, int algo, + size_t iter, uint8_t* salt, size_t saltlen, uint16_t rrtype) +{ + int i, count = (int)nsec3->data->count; + struct ub_packed_rrset_key pk; + memset(&pk, 0, sizeof(pk)); + pk.entry.data = nsec3->data; + for(i=0; i<count; i++) { + int rralgo; + size_t rriter, rrsaltlen; + uint8_t* rrsalt; + if(!nsec3_get_params(&pk, i, &rralgo, &rriter, &rrsalt, + &rrsaltlen)) + continue; /* no parameters, malformed */ + if(rralgo != algo || rriter != iter || rrsaltlen != saltlen) + continue; /* different parameters */ + if(saltlen != 0) { + if(rrsalt == NULL || salt == NULL) + continue; + if(memcmp(rrsalt, salt, saltlen) != 0) + continue; /* different salt parameters */ + } + if(nsec3_has_type(&pk, i, rrtype)) + return 1; + } + return 0; +} + +/** Verify the absence of ZONEMD with DNSSEC by checking NSEC, NSEC3 type flag. + * return false on failure, reason contains description of failure. */ +static int zonemd_check_dnssec_absence(struct auth_zone* z, + struct module_env* env, struct module_stack* mods, + struct ub_packed_rrset_key* dnskey, struct auth_data* apex, + char** reason, char** why_bogus) +{ + struct auth_rrset* nsec = NULL; + if(!apex) { + *reason = "zone has no apex domain but ZONEMD missing"; + return 0; + } + nsec = az_domain_rrset(apex, LDNS_RR_TYPE_NSEC); + if(nsec) { + struct ub_packed_rrset_key pk; + /* dnssec verify the NSEC */ + if(!zonemd_dnssec_verify_rrset(z, env, mods, dnskey, apex, + nsec, why_bogus)) { + *reason = "DNSSEC verify failed for NSEC RRset"; + return 0; + } + /* check type bitmap */ + memset(&pk, 0, sizeof(pk)); + pk.entry.data = nsec->data; + if(nsec_has_type(&pk, LDNS_RR_TYPE_ZONEMD)) { + *reason = "DNSSEC NSEC bitmap says type ZONEMD exists"; + return 0; + } + auth_zone_log(z->name, VERB_ALGO, "zonemd DNSSEC NSEC verification of absence of ZONEMD secure"); + } else { + /* NSEC3 perhaps ? */ + int algo; + size_t iter, saltlen; + uint8_t* salt; + struct auth_rrset* nsec3param = az_domain_rrset(apex, + LDNS_RR_TYPE_NSEC3PARAM); + struct auth_data* match; + struct auth_rrset* nsec3; + if(!nsec3param) { + *reason = "zone has no NSEC information but ZONEMD missing"; + return 0; + } + if(!az_nsec3_param(z, &algo, &iter, &salt, &saltlen)) { + *reason = "zone has no NSEC information but ZONEMD missing"; + return 0; + } + /* find the NSEC3 record */ + match = az_nsec3_find_exact(z, z->name, z->namelen, algo, + iter, salt, saltlen); + if(!match) { + *reason = "zone has no NSEC3 domain for the apex but ZONEMD missing"; + return 0; + } + nsec3 = az_domain_rrset(match, LDNS_RR_TYPE_NSEC3); + if(!nsec3) { + *reason = "zone has no NSEC3 RRset for the apex but ZONEMD missing"; + return 0; + } + /* dnssec verify the NSEC3 */ + if(!zonemd_dnssec_verify_rrset(z, env, mods, dnskey, match, + nsec3, why_bogus)) { + *reason = "DNSSEC verify failed for NSEC3 RRset"; + return 0; + } + /* check type bitmap */ + if(nsec3_of_param_has_type(nsec3, algo, iter, salt, saltlen, + LDNS_RR_TYPE_ZONEMD)) { + *reason = "DNSSEC NSEC3 bitmap says type ZONEMD exists"; + return 0; + } + auth_zone_log(z->name, VERB_ALGO, "zonemd DNSSEC NSEC3 verification of absence of ZONEMD secure"); + } + + return 1; +} + +/** Verify the SOA and ZONEMD DNSSEC signatures. + * return false on failure, reason contains description of failure. */ +static int zonemd_check_dnssec_soazonemd(struct auth_zone* z, + struct module_env* env, struct module_stack* mods, + struct ub_packed_rrset_key* dnskey, struct auth_data* apex, + struct auth_rrset* zonemd_rrset, char** reason, char** why_bogus) +{ + struct auth_rrset* soa; + if(!apex) { + *reason = "zone has no apex domain"; + return 0; + } + soa = az_domain_rrset(apex, LDNS_RR_TYPE_SOA); + if(!soa) { + *reason = "zone has no SOA RRset"; + return 0; + } + if(!zonemd_dnssec_verify_rrset(z, env, mods, dnskey, apex, soa, + why_bogus)) { + *reason = "DNSSEC verify failed for SOA RRset"; + return 0; + } + if(!zonemd_dnssec_verify_rrset(z, env, mods, dnskey, apex, + zonemd_rrset, why_bogus)) { + *reason = "DNSSEC verify failed for ZONEMD RRset"; + return 0; + } + auth_zone_log(z->name, VERB_ALGO, "zonemd DNSSEC verification of SOA and ZONEMD RRsets secure"); + return 1; +} + +/** + * Fail the ZONEMD verification. + * @param z: auth zone that fails. + * @param env: environment with config, to ignore failure or not. + * @param reason: failure string description. + * @param why_bogus: failure string for DNSSEC verification failure. + * @param result: strdup result in here if not NULL. + */ +static void auth_zone_zonemd_fail(struct auth_zone* z, struct module_env* env, + char* reason, char* why_bogus, char** result) +{ + char zstr[255+1]; + /* if fail: log reason, and depending on config also take action + * and drop the zone, eg. it is gone from memory, set zone_expired */ + dname_str(z->name, zstr); + if(!reason) reason = "verification failed"; + if(result) { + if(why_bogus) { + char res[1024]; + snprintf(res, sizeof(res), "%s: %s", reason, + why_bogus); + *result = strdup(res); + } else { + *result = strdup(reason); + } + if(!*result) log_err("out of memory"); + } else { + log_warn("auth zone %s: ZONEMD verification failed: %s", zstr, reason); + } + + if(env->cfg->zonemd_permissive_mode) { + verbose(VERB_ALGO, "zonemd-permissive-mode enabled, " + "not blocking zone %s", zstr); + return; + } + + /* expired means the zone gives servfail and is not used by + * lookup if fallback_enabled*/ + z->zone_expired = 1; +} + +/** + * Verify the zonemd with DNSSEC and hash check, with given key. + * @param z: auth zone. + * @param env: environment with config and temp buffers. + * @param mods: module stack with validator env for verification. + * @param dnskey: dnskey that we can use, or NULL. If nonnull, the key + * has been verified and is the start of the chain of trust. + * @param is_insecure: if true, the dnskey is not used, the zone is insecure. + * And dnssec is not used. It is DNSSEC secure insecure or not under + * a trust anchor. + * @param result: if not NULL result reason copied here. + */ +static void +auth_zone_verify_zonemd_with_key(struct auth_zone* z, struct module_env* env, + struct module_stack* mods, struct ub_packed_rrset_key* dnskey, + int is_insecure, char** result) +{ + char* reason = NULL, *why_bogus = NULL; + struct auth_data* apex = NULL; + struct auth_rrset* zonemd_rrset = NULL; + int zonemd_absent = 0, zonemd_absence_dnssecok = 0; + + /* see if ZONEMD is present or absent. */ + apex = az_find_name(z, z->name, z->namelen); + if(!apex) { + zonemd_absent = 1; + } else { + zonemd_rrset = az_domain_rrset(apex, LDNS_RR_TYPE_ZONEMD); + if(!zonemd_rrset || zonemd_rrset->data->count==0) { + zonemd_absent = 1; + zonemd_rrset = NULL; + } + } + + /* if no DNSSEC, done. */ + /* if no ZONEMD, and DNSSEC, use DNSKEY to verify NSEC or NSEC3 for + * zone apex. Check ZONEMD bit is turned off or else fail */ + /* if ZONEMD, and DNSSEC, check DNSSEC signature on SOA and ZONEMD, + * or else fail */ + if(!dnskey && !is_insecure) { + auth_zone_zonemd_fail(z, env, "DNSKEY missing", NULL, result); + return; + } else if(!zonemd_rrset && dnskey && !is_insecure) { + /* fetch, DNSSEC verify, and check NSEC/NSEC3 */ + if(!zonemd_check_dnssec_absence(z, env, mods, dnskey, apex, + &reason, &why_bogus)) { + auth_zone_zonemd_fail(z, env, reason, why_bogus, result); + return; + } + zonemd_absence_dnssecok = 1; + } else if(zonemd_rrset && dnskey && !is_insecure) { + /* check DNSSEC verify of SOA and ZONEMD */ + if(!zonemd_check_dnssec_soazonemd(z, env, mods, dnskey, apex, + zonemd_rrset, &reason, &why_bogus)) { + auth_zone_zonemd_fail(z, env, reason, why_bogus, result); + return; + } + } + + if(zonemd_absent && z->zonemd_reject_absence) { + auth_zone_zonemd_fail(z, env, "ZONEMD absent and that is not allowed by config", NULL, result); + return; + } + if(zonemd_absent && zonemd_absence_dnssecok) { + auth_zone_log(z->name, VERB_ALGO, "DNSSEC verified nonexistence of ZONEMD"); + if(result) { + *result = strdup("DNSSEC verified nonexistence of ZONEMD"); + if(!*result) log_err("out of memory"); + } + return; + } + if(zonemd_absent) { + auth_zone_log(z->name, VERB_ALGO, "no ZONEMD present"); + if(result) { + *result = strdup("no ZONEMD present"); + if(!*result) log_err("out of memory"); + } + return; + } + + /* check ZONEMD checksum and report or else fail. */ + if(!auth_zone_zonemd_check_hash(z, env, &reason)) { + auth_zone_zonemd_fail(z, env, reason, NULL, result); + return; + } + + /* success! log the success */ + auth_zone_log(z->name, VERB_ALGO, "ZONEMD verification successful"); + if(result) { + *result = strdup("ZONEMD verification successful"); + if(!*result) log_err("out of memory"); + } +} + +/** + * verify the zone DNSKEY rrset from the trust anchor + * This is possible because the anchor is for the zone itself, and can + * thus apply straight to the zone DNSKEY set. + * @param z: the auth zone. + * @param env: environment with time and temp buffers. + * @param mods: module stack for validator environment for dnssec validation. + * @param anchor: trust anchor to use + * @param is_insecure: returned, true if the zone is securely insecure. + * @param why_bogus: if the routine fails, returns the failure reason. + * @param keystorage: where to store the ub_packed_rrset_key that is created + * on success. A pointer to it is returned on success. + * @return the dnskey RRset, reference to zone data and keystorage, or + * NULL on failure. + */ +static struct ub_packed_rrset_key* +zonemd_get_dnskey_from_anchor(struct auth_zone* z, struct module_env* env, + struct module_stack* mods, struct trust_anchor* anchor, + int* is_insecure, char** why_bogus, + struct ub_packed_rrset_key* keystorage) +{ + struct auth_data* apex; + struct auth_rrset* dnskey_rrset; + enum sec_status sec; + struct val_env* ve; + int m; + + apex = az_find_name(z, z->name, z->namelen); + if(!apex) { + *why_bogus = "have trust anchor, but zone has no apex domain for DNSKEY"; + return 0; + } + dnskey_rrset = az_domain_rrset(apex, LDNS_RR_TYPE_DNSKEY); + if(!dnskey_rrset || dnskey_rrset->data->count==0) { + *why_bogus = "have trust anchor, but zone has no DNSKEY"; + return 0; + } + + m = modstack_find(mods, "validator"); + if(m == -1) { + *why_bogus = "have trust anchor, but no validator module"; + return 0; + } + ve = (struct val_env*)env->modinfo[m]; + + memset(keystorage, 0, sizeof(*keystorage)); + keystorage->entry.key = keystorage; + keystorage->entry.data = dnskey_rrset->data; + keystorage->rk.dname = apex->name; + keystorage->rk.dname_len = apex->namelen; + keystorage->rk.type = htons(LDNS_RR_TYPE_DNSKEY); + keystorage->rk.rrset_class = htons(z->dclass); + auth_zone_log(z->name, VERB_QUERY, + "zonemd: verify DNSKEY RRset with trust anchor"); + sec = val_verify_DNSKEY_with_TA(env, ve, keystorage, anchor->ds_rrset, + anchor->dnskey_rrset, NULL, why_bogus, NULL); + regional_free_all(env->scratch); + if(sec == sec_status_secure) { + /* success */ + *is_insecure = 0; + return keystorage; + } else if(sec == sec_status_insecure) { + /* insecure */ + *is_insecure = 1; + } else { + /* bogus */ + *is_insecure = 0; + auth_zone_log(z->name, VERB_ALGO, + "zonemd: verify DNSKEY RRset with trust anchor failed: %s", *why_bogus); + } + return NULL; +} + +/** callback for ZONEMD lookup of DNSKEY */ +void auth_zonemd_dnskey_lookup_callback(void* arg, int rcode, sldns_buffer* buf, + enum sec_status sec, char* why_bogus, int ATTR_UNUSED(was_ratelimited)) +{ + struct auth_zone* z = (struct auth_zone*)arg; + struct module_env* env; + char* reason = NULL; + struct ub_packed_rrset_key* dnskey = NULL; + int is_insecure = 0; + + lock_rw_wrlock(&z->lock); + env = z->zonemd_callback_env; + /* release the env variable so another worker can pick up the + * ZONEMD verification task if it wants to */ + z->zonemd_callback_env = NULL; + if(!env || env->outnet->want_to_quit || z->zone_deleted) { + lock_rw_unlock(&z->lock); + return; /* stop on quit */ + } + + /* process result */ + if(sec == sec_status_bogus) { + reason = why_bogus; + if(!reason) + reason = "lookup of DNSKEY was bogus"; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of DNSKEY was bogus: %s", reason); + } else if(rcode == LDNS_RCODE_NOERROR) { + uint16_t wanted_qtype = LDNS_RR_TYPE_DNSKEY; + struct regional* temp = env->scratch; + struct query_info rq; + struct reply_info* rep; + memset(&rq, 0, sizeof(rq)); + rep = parse_reply_in_temp_region(buf, temp, &rq); + if(rep && rq.qtype == wanted_qtype && + query_dname_compare(z->name, rq.qname) == 0 && + FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_NOERROR) { + /* parsed successfully */ + struct ub_packed_rrset_key* answer = + reply_find_answer_rrset(&rq, rep); + if(answer && sec == sec_status_secure) { + dnskey = answer; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of DNSKEY was secure"); + } else if(sec == sec_status_secure && !answer) { + is_insecure = 1; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of DNSKEY has no content, but is secure, treat as insecure"); + } else if(sec == sec_status_insecure) { + is_insecure = 1; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of DNSKEY was insecure"); + } else if(sec == sec_status_indeterminate) { + is_insecure = 1; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of DNSKEY was indeterminate, treat as insecure"); + } else { + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of DNSKEY has nodata"); + reason = "lookup of DNSKEY has nodata"; + } + } else if(rep && rq.qtype == wanted_qtype && + query_dname_compare(z->name, rq.qname) == 0 && + FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_NXDOMAIN && + sec == sec_status_secure) { + /* secure nxdomain, so the zone is like some RPZ zone + * that does not exist in the wider internet, with + * a secure nxdomain answer outside of it. So we + * treat the zonemd zone without a dnssec chain of + * trust, as insecure. */ + is_insecure = 1; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of DNSKEY was secure NXDOMAIN, treat as insecure"); + } else if(rep && rq.qtype == wanted_qtype && + query_dname_compare(z->name, rq.qname) == 0 && + FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_NXDOMAIN && + sec == sec_status_insecure) { + is_insecure = 1; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of DNSKEY was insecure NXDOMAIN, treat as insecure"); + } else if(rep && rq.qtype == wanted_qtype && + query_dname_compare(z->name, rq.qname) == 0 && + FLAGS_GET_RCODE(rep->flags) == LDNS_RCODE_NXDOMAIN && + sec == sec_status_indeterminate) { + is_insecure = 1; + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of DNSKEY was indeterminate NXDOMAIN, treat as insecure"); + } else { + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of DNSKEY has no answer"); + reason = "lookup of DNSKEY has no answer"; + } + } else { + auth_zone_log(z->name, VERB_ALGO, + "zonemd lookup of DNSKEY failed"); + reason = "lookup of DNSKEY failed"; + } + + if(reason) { + auth_zone_zonemd_fail(z, env, reason, NULL, NULL); + lock_rw_unlock(&z->lock); + return; + } + + auth_zone_verify_zonemd_with_key(z, env, &env->mesh->mods, dnskey, + is_insecure, NULL); + regional_free_all(env->scratch); + lock_rw_unlock(&z->lock); +} + +/** lookup DNSKEY for ZONEMD verification */ +static int +zonemd_lookup_dnskey(struct auth_zone* z, struct module_env* env) +{ + struct query_info qinfo; + uint16_t qflags = BIT_RD; + struct edns_data edns; + sldns_buffer* buf = env->scratch_buffer; + + if(z->zonemd_callback_env) { + /* another worker is already working on the callback + * for the DNSKEY lookup for ZONEMD verification. + * We do not also have to do ZONEMD verification, let that + * worker do it */ + auth_zone_log(z->name, VERB_ALGO, + "zonemd needs lookup of DNSKEY and that already worked on by another worker"); + return 1; + } + + /* use mesh_new_callback to lookup the DNSKEY, + * and then wait for them to be looked up (in cache, or query) */ + qinfo.qname_len = z->namelen; + qinfo.qname = z->name; + qinfo.qclass = z->dclass; + qinfo.qtype = LDNS_RR_TYPE_DNSKEY; + qinfo.local_alias = NULL; + if(verbosity >= VERB_ALGO) { + char buf1[512]; + char buf2[LDNS_MAX_DOMAINLEN+1]; + dname_str(z->name, buf2); + snprintf(buf1, sizeof(buf1), "auth zone %s: lookup DNSKEY " + "for zonemd verification", buf2); + log_query_info(VERB_ALGO, buf1, &qinfo); + } + edns.edns_present = 1; + edns.ext_rcode = 0; + edns.edns_version = 0; + edns.bits = EDNS_DO; + edns.opt_list = NULL; + if(sldns_buffer_capacity(buf) < 65535) + edns.udp_size = (uint16_t)sldns_buffer_capacity(buf); + else edns.udp_size = 65535; + + /* store the worker-specific module env for the callback. + * We can then reference this when the callback executes */ + z->zonemd_callback_env = env; + /* the callback can be called straight away */ + lock_rw_unlock(&z->lock); + if(!mesh_new_callback(env->mesh, &qinfo, qflags, &edns, buf, 0, + &auth_zonemd_dnskey_lookup_callback, z)) { + lock_rw_wrlock(&z->lock); + log_err("out of memory lookup up dnskey for zonemd"); + return 0; + } + lock_rw_wrlock(&z->lock); + return 1; +} + +void auth_zone_verify_zonemd(struct auth_zone* z, struct module_env* env, + struct module_stack* mods, char** result, int offline, int only_online) +{ + char* reason = NULL, *why_bogus = NULL; + struct trust_anchor* anchor = NULL; + struct ub_packed_rrset_key* dnskey = NULL; + struct ub_packed_rrset_key keystorage; + int is_insecure = 0; + /* verify the ZONEMD if present. + * If not present check if absence is allowed by DNSSEC */ + if(!z->zonemd_check) + return; + + /* if zone is under a trustanchor */ + /* is it equal to trustanchor - get dnskey's verified */ + /* else, find chain of trust by fetching DNSKEYs lookup for zone */ + /* result if that, if insecure, means no DNSSEC for the ZONEMD, + * otherwise we have the zone DNSKEY for the DNSSEC verification. */ + if(env->anchors) + anchor = anchors_lookup(env->anchors, z->name, z->namelen, + z->dclass); + if(anchor && anchor->numDS == 0 && anchor->numDNSKEY == 0) { + /* domain-insecure trust anchor for unsigned zones */ + lock_basic_unlock(&anchor->lock); + if(only_online) + return; + dnskey = NULL; + is_insecure = 1; + } else if(anchor && query_dname_compare(z->name, anchor->name) == 0) { + if(only_online) { + lock_basic_unlock(&anchor->lock); + return; + } + /* equal to trustanchor, no need for online lookups */ + dnskey = zonemd_get_dnskey_from_anchor(z, env, mods, anchor, + &is_insecure, &why_bogus, &keystorage); + lock_basic_unlock(&anchor->lock); + if(!dnskey && !reason && !is_insecure) { + reason = "verify DNSKEY RRset with trust anchor failed"; + } + } else if(anchor) { + lock_basic_unlock(&anchor->lock); + /* perform online lookups */ + if(offline) + return; + /* setup online lookups, and wait for them */ + if(zonemd_lookup_dnskey(z, env)) { + /* wait for the lookup */ + return; + } + reason = "could not lookup DNSKEY for chain of trust"; + } else { + /* the zone is not under a trust anchor */ + if(only_online) + return; + dnskey = NULL; + is_insecure = 1; + } + + if(reason) { + auth_zone_zonemd_fail(z, env, reason, why_bogus, result); + return; + } + + auth_zone_verify_zonemd_with_key(z, env, mods, dnskey, is_insecure, + result); + regional_free_all(env->scratch); +} + +void auth_zones_pickup_zonemd_verify(struct auth_zones* az, + struct module_env* env) +{ + struct auth_zone key; + uint8_t savezname[255+1]; + size_t savezname_len; + struct auth_zone* z; + key.node.key = &key; + lock_rw_rdlock(&az->lock); + RBTREE_FOR(z, struct auth_zone*, &az->ztree) { + lock_rw_wrlock(&z->lock); + if(!z->zonemd_check) { + lock_rw_unlock(&z->lock); + continue; + } + key.dclass = z->dclass; + key.namelabs = z->namelabs; + if(z->namelen > sizeof(savezname)) { + lock_rw_unlock(&z->lock); + log_err("auth_zones_pickup_zonemd_verify: zone name too long"); + continue; + } + savezname_len = z->namelen; + memmove(savezname, z->name, z->namelen); + lock_rw_unlock(&az->lock); + auth_zone_verify_zonemd(z, env, &env->mesh->mods, NULL, 0, 1); + lock_rw_unlock(&z->lock); + lock_rw_rdlock(&az->lock); + /* find the zone we had before, it is not deleted, + * because we have a flag for that that is processed at + * apply_cfg time */ + key.namelen = savezname_len; + key.name = savezname; + z = (struct auth_zone*)rbtree_search(&az->ztree, &key); + if(!z) + break; + } + lock_rw_unlock(&az->lock); +} diff --git a/services/authzone.h b/services/authzone.h index 3d94f30d6202..ffe234d59b53 100644 --- a/services/authzone.h +++ b/services/authzone.h @@ -132,8 +132,17 @@ struct auth_zone { /** for upstream: this zone answers queries that unbound intends to * send upstream. */ int for_upstream; + /** check ZONEMD records */ + int zonemd_check; + /** reject absence of ZONEMD records */ + int zonemd_reject_absence; /** RPZ zones */ struct rpz* rpz; + /** store the env (worker thread specific) for the zonemd callbacks + * from the mesh with the results of the lookup, if nonNULL, some + * worker has already picked up the zonemd verification task and + * this worker does not have to do it as well. */ + struct module_env* zonemd_callback_env; /** zone has been deleted */ int zone_deleted; /** deletelist pointer, unused normally except during delete */ @@ -474,10 +483,13 @@ struct auth_zones* auth_zones_create(void); * @param cfg: config to apply. * @param setup: if true, also sets up values in the auth zones structure * @param is_rpz: set to 1 if at least one RPZ zone is configured. + * @param env: environment for offline verification. + * @param mods: modules in environment. * @return false on failure. */ int auth_zones_apply_cfg(struct auth_zones* az, struct config_file* cfg, - int setup, int* is_rpz); + int setup, int* is_rpz, struct module_env* env, + struct module_stack* mods); /** initial pick up of worker timeouts, ties events to worker event loop * @param az: auth zones structure @@ -625,6 +637,9 @@ int auth_zone_read_zonefile(struct auth_zone* z, struct config_file* cfg); /** find serial number of zone or false if none (no SOA record) */ int auth_zone_get_serial(struct auth_zone* z, uint32_t* serial); +/** Find auth_zone SOA and populate the values in xfr(soa values). */ +int xfr_find_soa(struct auth_zone* z, struct auth_xfer* xfr); + /** compare auth_zones for sorted rbtree */ int auth_zone_cmp(const void* z1, const void* z2); @@ -685,4 +700,83 @@ void auth_xfer_transfer_lookup_callback(void* arg, int rcode, */ int compare_serial(uint32_t a, uint32_t b); +/** + * Generate ZONEMD digest for the auth zone. + * @param z: the auth zone to digest. + * omits zonemd at apex and its RRSIG from the digest. + * @param scheme: the collation scheme to use. Numbers as defined for ZONEMD. + * @param hashalgo: the hash algo, from the registry defined for ZONEMD type. + * @param hash: the result buffer. + * @param buflen: size of the result buffer, must be large enough. or the + * routine fails. + * @param resultlen: size of the hash in the result buffer of the result. + * @param region: temp region for allocs during canonicalisation. + * @param buf: temp buffer during canonicalisation. + * @param reason: failure reason, returns a string, NULL on success. + * @return false on failure. + */ +int auth_zone_generate_zonemd_hash(struct auth_zone* z, int scheme, + int hashalgo, uint8_t* hash, size_t buflen, size_t* resultlen, + struct regional* region, struct sldns_buffer* buf, char** reason); + +/** ZONEMD scheme definitions */ +#define ZONEMD_SCHEME_SIMPLE 1 + +/** ZONEMD hash algorithm definition for SHA384 */ +#define ZONEMD_ALGO_SHA384 1 +/** ZONEMD hash algorithm definition for SHA512 */ +#define ZONEMD_ALGO_SHA512 2 + +/** returns true if a zonemd hash algo is supported */ +int zonemd_hashalgo_supported(int hashalgo); +/** returns true if a zonemd scheme is supported */ +int zonemd_scheme_supported(int scheme); + +/** + * Check ZONEMD digest for the auth zone. + * @param z: auth zone to digest. + * @param scheme: zonemd scheme. + * @param hashalgo: zonemd hash algorithm. + * @param hash: the hash to check. + * @param hashlen: length of hash buffer. + * @param region: temp region for allocs during canonicalisation. + * @param buf: temp buffer during canonicalisation. + * @param reason: string returned with failure reason. + * @return false on failure. + */ +int auth_zone_generate_zonemd_check(struct auth_zone* z, int scheme, + int hashalgo, uint8_t* hash, size_t hashlen, struct regional* region, + struct sldns_buffer* buf, char** reason); + +/** + * Perform ZONEMD checks and verification for the auth zone. + * This includes DNSSEC verification if applicable. + * @param z: auth zone to check. Caller holds lock. wrlock. + * @param env: with temp region, buffer and config. + * @param mods: module stack for validator env. + * @param result: if not NULL, result string strdupped in here. + * @param offline: if true, there is no spawned lookup when online is needed. + * Those zones are skipped for ZONEMD checking. + * @param only_online: if true, only for ZONEMD that need online lookup + * of DNSKEY chain of trust are processed. + */ +void auth_zone_verify_zonemd(struct auth_zone* z, struct module_env* env, + struct module_stack* mods, char** result, int offline, + int only_online); + +/** mesh callback for zonemd on lookup of dnskey */ +void auth_zonemd_dnskey_lookup_callback(void* arg, int rcode, + struct sldns_buffer* buf, enum sec_status sec, char* why_bogus, + int was_ratelimited); + +/** + * Check the ZONEMD records that need online DNSSEC chain lookups, + * for them spawn the lookup process to get it checked out. + * Attaches the lookup process to the worker event base and mesh state. + * @param az: auth zones, every zones is checked. + * @param env: env of the worker where the task is attached. + */ +void auth_zones_pickup_zonemd_verify(struct auth_zones* az, + struct module_env* env); + #endif /* SERVICES_AUTHZONE_H */ diff --git a/services/cache/dns.c b/services/cache/dns.c index f3149b614b54..5b64fe47520c 100644 --- a/services/cache/dns.c +++ b/services/cache/dns.c @@ -801,7 +801,7 @@ struct dns_msg* dns_cache_lookup(struct module_env* env, uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass, uint16_t flags, struct regional* region, struct regional* scratch, - int no_partial) + int no_partial, uint8_t* dpname, size_t dpnamelen) { struct lruhash_entry* e; struct query_info k; @@ -923,6 +923,9 @@ dns_cache_lookup(struct module_env* env, * the same. We search upwards for NXDOMAINs. */ if(env->cfg->harden_below_nxdomain) { while(!dname_is_root(k.qname)) { + if(dpname && dpnamelen + && !dname_subdomain_c(k.qname, dpname)) + break; /* no synth nxdomain above the stub */ dname_remove_label(&k.qname, &k.qname_len); h = query_info_hash(&k, flags); e = slabhash_lookup(env->msg_cache, h, &k, 0); diff --git a/services/cache/dns.h b/services/cache/dns.h index f1b77fb36c00..bece83702960 100644 --- a/services/cache/dns.h +++ b/services/cache/dns.h @@ -164,6 +164,8 @@ struct dns_msg* tomsg(struct module_env* env, struct query_info* q, * @param scratch: where to allocate temporary data. * @param no_partial: if true, only complete messages and not a partial * one (with only the start of the CNAME chain and not the rest). + * @param dpname: if not NULL, do not return NXDOMAIN above this name. + * @param dpnamelen: length of dpname. * @return new response message (alloced in region, rrsets do not have IDs). * or NULL on error or if not found in cache. * TTLs are made relative to the current time. @@ -171,7 +173,7 @@ struct dns_msg* tomsg(struct module_env* env, struct query_info* q, struct dns_msg* dns_cache_lookup(struct module_env* env, uint8_t* qname, size_t qnamelen, uint16_t qtype, uint16_t qclass, uint16_t flags, struct regional* region, struct regional* scratch, - int no_partial); + int no_partial, uint8_t* dpname, size_t dpnamelen); /** * find and add A and AAAA records for missing nameservers in delegpt diff --git a/services/cache/infra.c b/services/cache/infra.c index 2d16bcd6e405..518e69622f83 100644 --- a/services/cache/infra.c +++ b/services/cache/infra.c @@ -236,6 +236,9 @@ infra_create(struct config_file* cfg) sizeof(struct infra_cache)); size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+ sizeof(struct infra_data)+INFRA_BYTES_NAME); + if(!infra) { + return NULL; + } infra->hosts = slabhash_create(cfg->infra_cache_slabs, INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc, &infra_delkeyfunc, &infra_deldatafunc, NULL); diff --git a/services/listen_dnsport.c b/services/listen_dnsport.c index b790660f2396..b43def567501 100644 --- a/services/listen_dnsport.c +++ b/services/listen_dnsport.c @@ -133,6 +133,16 @@ verbose_print_addr(struct addrinfo *addr) } } +void +verbose_print_unbound_socket(struct unbound_socket* ub_sock) +{ + if(verbosity >= VERB_ALGO) { + log_info("listing of unbound_socket structure:"); + verbose_print_addr(ub_sock->addr); + log_info("s is: %d, fam is: %s", ub_sock->s, ub_sock->fam == AF_INET?"AF_INET":"AF_INET6"); + } +} + #ifdef HAVE_SYSTEMD static int systemd_get_activated(int family, int socktype, int listen, @@ -442,6 +452,10 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, if(err != NULL) log_warn("error setting IP DiffServ codepoint %d on UDP socket: %s", dscp, err); if(family == AF_INET6) { +# if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) + int omit6_set = 0; + int action; +# endif # if defined(IPV6_V6ONLY) if(v6only) { int val=(v6only==2)?0:1; @@ -490,6 +504,39 @@ create_udp_sock(int family, int socktype, struct sockaddr* addr, return -1; } # endif /* IPv6 MTU */ +# if defined(IPV6_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) +# if defined(IP_PMTUDISC_OMIT) + action = IP_PMTUDISC_OMIT; + if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, + &action, (socklen_t)sizeof(action)) < 0) { + + if (errno != EINVAL) { + log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_OMIT...) failed: %s", + strerror(errno)); + sock_close(s); + *noproto = 0; + *inuse = 0; + return -1; + } + } + else + { + omit6_set = 1; + } +# endif + if (omit6_set == 0) { + action = IP_PMTUDISC_DONT; + if (setsockopt(s, IPPROTO_IPV6, IPV6_MTU_DISCOVER, + &action, (socklen_t)sizeof(action)) < 0) { + log_err("setsockopt(..., IPV6_MTU_DISCOVER, IP_PMTUDISC_DONT...) failed: %s", + strerror(errno)); + sock_close(s); + *noproto = 0; + *inuse = 0; + return -1; + } + } +# endif /* IPV6_MTU_DISCOVER */ } else if(family == AF_INET) { # if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) /* linux 3.15 has IP_PMTUDISC_OMIT, Hannes Frederic Sowa made it so that @@ -916,7 +963,7 @@ static int make_sock(int stype, const char* ifname, const char* port, struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, - int use_systemd, int dscp) + int use_systemd, int dscp, struct unbound_socket* ub_sock) { struct addrinfo *res = NULL; int r, s, inuse, noproto; @@ -958,7 +1005,11 @@ make_sock(int stype, const char* ifname, const char* port, *noip6 = 1; } } - freeaddrinfo(res); + + ub_sock->addr = res; + ub_sock->s = s; + ub_sock->fam = hints->ai_family; + return s; } @@ -967,7 +1018,7 @@ static int make_sock_port(int stype, const char* ifname, const char* port, struct addrinfo *hints, int v6only, int* noip6, size_t rcv, size_t snd, int* reuseport, int transparent, int tcp_mss, int nodelay, int freebind, - int use_systemd, int dscp) + int use_systemd, int dscp, struct unbound_socket* ub_sock) { char* s = strchr(ifname, '@'); if(s) { @@ -990,11 +1041,11 @@ make_sock_port(int stype, const char* ifname, const char* port, p[strlen(s+1)]=0; return make_sock(stype, newif, p, hints, v6only, noip6, rcv, snd, reuseport, transparent, tcp_mss, nodelay, freebind, - use_systemd, dscp); + use_systemd, dscp, ub_sock); } return make_sock(stype, ifname, port, hints, v6only, noip6, rcv, snd, reuseport, transparent, tcp_mss, nodelay, freebind, use_systemd, - dscp); + dscp, ub_sock); } /** @@ -1002,10 +1053,11 @@ make_sock_port(int stype, const char* ifname, const char* port, * @param list: list head. changed. * @param s: fd. * @param ftype: if fd is UDP. + * @param ub_sock: socket with address. * @return false on failure. list in unchanged then. */ static int -port_insert(struct listen_port** list, int s, enum listen_type ftype) +port_insert(struct listen_port** list, int s, enum listen_type ftype, struct unbound_socket* ub_sock) { struct listen_port* item = (struct listen_port*)malloc( sizeof(struct listen_port)); @@ -1014,6 +1066,7 @@ port_insert(struct listen_port** list, int s, enum listen_type ftype) item->next = *list; item->fd = s; item->ftype = ftype; + item->socket = ub_sock; *list = item; return 1; } @@ -1043,7 +1096,7 @@ set_recvpktinfo(int s, int family) return 0; } # else - log_err("no IPV6_RECVPKTINFO and no IPV6_PKTINFO option, please " + log_err("no IPV6_RECVPKTINFO and IPV6_PKTINFO options, please " "disable interface-automatic or do-ip6 in config"); return 0; # endif /* defined IPV6_RECVPKTINFO */ @@ -1093,18 +1146,6 @@ if_is_ssl(const char* ifname, const char* port, int ssl_port, return 0; } -/** see if interface is https, its port number == the https port number */ -static int -if_is_https(const char* ifname, const char* port, int https_port) -{ - char* p = strchr(ifname, '@'); - if(!p && atoi(port) == https_port) - return 1; - if(p && atoi(p+1) == https_port) - return 1; - return 0; -} - /** * Helper for ports_open. Creates one interface (or NULL for default). * @param ifname: The interface ip address. @@ -1142,6 +1183,7 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, int s, noip6=0; int is_https = if_is_https(ifname, port, https_port); int nodelay = is_https && http2_nodelay; + struct unbound_socket* ub_sock; #ifdef USE_DNSCRYPT int is_dnscrypt = ((strchr(ifname, '@') && atoi(strchr(ifname, '@')+1) == dnscrypt_port) || @@ -1153,10 +1195,16 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, if(!do_udp && !do_tcp) return 0; + if(do_auto) { + ub_sock = calloc(1, sizeof(struct unbound_socket)); + if(!ub_sock) + return 0; if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, &noip6, rcv, snd, reuseport, transparent, - tcp_mss, nodelay, freebind, use_systemd, dscp)) == -1) { + tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) { + freeaddrinfo(ub_sock->addr); + free(ub_sock); if(noip6) { log_warn("IPv6 protocol not available"); return 1; @@ -1166,18 +1214,27 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, /* getting source addr packet info is highly non-portable */ if(!set_recvpktinfo(s, hints->ai_family)) { sock_close(s); + freeaddrinfo(ub_sock->addr); + free(ub_sock); return 0; } if(!port_insert(list, s, - is_dnscrypt?listen_type_udpancil_dnscrypt:listen_type_udpancil)) { + is_dnscrypt?listen_type_udpancil_dnscrypt:listen_type_udpancil, ub_sock)) { sock_close(s); + freeaddrinfo(ub_sock->addr); + free(ub_sock); return 0; } } else if(do_udp) { + ub_sock = calloc(1, sizeof(struct unbound_socket)); + if(!ub_sock) + return 0; /* regular udp socket */ if((s = make_sock_port(SOCK_DGRAM, ifname, port, hints, 1, &noip6, rcv, snd, reuseport, transparent, - tcp_mss, nodelay, freebind, use_systemd, dscp)) == -1) { + tcp_mss, nodelay, freebind, use_systemd, dscp, ub_sock)) == -1) { + freeaddrinfo(ub_sock->addr); + free(ub_sock); if(noip6) { log_warn("IPv6 protocol not available"); return 1; @@ -1185,8 +1242,10 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, return 0; } if(!port_insert(list, s, - is_dnscrypt?listen_type_udp_dnscrypt:listen_type_udp)) { + is_dnscrypt?listen_type_udp_dnscrypt:listen_type_udp, ub_sock)) { sock_close(s); + freeaddrinfo(ub_sock->addr); + free(ub_sock); return 0; } } @@ -1194,6 +1253,9 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, int is_ssl = if_is_ssl(ifname, port, ssl_port, tls_additional_port); enum listen_type port_type; + ub_sock = calloc(1, sizeof(struct unbound_socket)); + if(!ub_sock) + return 0; if(is_ssl) port_type = listen_type_ssl; else if(is_https) @@ -1204,7 +1266,9 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, port_type = listen_type_tcp; if((s = make_sock_port(SOCK_STREAM, ifname, port, hints, 1, &noip6, 0, 0, reuseport, transparent, tcp_mss, nodelay, - freebind, use_systemd, dscp)) == -1) { + freebind, use_systemd, dscp, ub_sock)) == -1) { + freeaddrinfo(ub_sock->addr); + free(ub_sock); if(noip6) { /*log_warn("IPv6 protocol not available");*/ return 1; @@ -1213,8 +1277,10 @@ ports_create_if(const char* ifname, int do_auto, int do_udp, int do_tcp, } if(is_ssl) verbose(VERB_ALGO, "setup TCP for SSL service"); - if(!port_insert(list, s, port_type)) { + if(!port_insert(list, s, port_type, ub_sock)) { sock_close(s); + freeaddrinfo(ub_sock->addr); + free(ub_sock); return 0; } } @@ -1280,14 +1346,14 @@ listen_create(struct comm_base* base, struct listen_port* ports, if(ports->ftype == listen_type_udp || ports->ftype == listen_type_udp_dnscrypt) cp = comm_point_create_udp(base, ports->fd, - front->udp_buff, cb, cb_arg); + front->udp_buff, cb, cb_arg, ports->socket); else if(ports->ftype == listen_type_tcp || ports->ftype == listen_type_tcp_dnscrypt) cp = comm_point_create_tcp(base, ports->fd, tcp_accept_count, tcp_idle_timeout, harden_large_queries, 0, NULL, tcp_conn_limit, bufsize, front->udp_buff, - ports->ftype, cb, cb_arg); + ports->ftype, cb, cb_arg, ports->socket); else if(ports->ftype == listen_type_ssl || ports->ftype == listen_type_http) { cp = comm_point_create_tcp(base, ports->fd, @@ -1295,7 +1361,7 @@ listen_create(struct comm_base* base, struct listen_port* ports, harden_large_queries, http_max_streams, http_endpoint, tcp_conn_limit, bufsize, front->udp_buff, - ports->ftype, cb, cb_arg); + ports->ftype, cb, cb_arg, ports->socket); if(http_notls && ports->ftype == listen_type_http) cp->ssl = NULL; else @@ -1322,7 +1388,7 @@ listen_create(struct comm_base* base, struct listen_port* ports, } else if(ports->ftype == listen_type_udpancil || ports->ftype == listen_type_udpancil_dnscrypt) cp = comm_point_create_udp_ancil(base, ports->fd, - front->udp_buff, cb, cb_arg); + front->udp_buff, cb, cb_arg, ports->socket); if(!cp) { log_err("can't create commpoint"); listen_delete(front); @@ -1506,13 +1572,12 @@ resolve_ifa_name(struct ifaddrs *ifas, const char *search_ifa, char ***ip_addres } #endif /* HAVE_GETIFADDRS */ -int resolve_interface_names(struct config_file* cfg, char*** resif, - int* num_resif) +int resolve_interface_names(char** ifs, int num_ifs, + struct config_strlist* list, char*** resif, int* num_resif) { #ifdef HAVE_GETIFADDRS - int i; struct ifaddrs *addrs = NULL; - if(cfg->num_ifs == 0) { + if(num_ifs == 0 && list == NULL) { *resif = NULL; *num_resif = 0; return 1; @@ -1523,38 +1588,73 @@ int resolve_interface_names(struct config_file* cfg, char*** resif, freeifaddrs(addrs); return 0; } - for(i=0; i<cfg->num_ifs; i++) { - if(!resolve_ifa_name(addrs, cfg->ifs[i], resif, num_resif)) { - freeifaddrs(addrs); - config_del_strarray(*resif, *num_resif); - *resif = NULL; - *num_resif = 0; - return 0; + if(ifs) { + int i; + for(i=0; i<num_ifs; i++) { + if(!resolve_ifa_name(addrs, ifs[i], resif, num_resif)) { + freeifaddrs(addrs); + config_del_strarray(*resif, *num_resif); + *resif = NULL; + *num_resif = 0; + return 0; + } } } + if(list) { + struct config_strlist* p; + for(p = list; p; p = p->next) { + if(!resolve_ifa_name(addrs, p->str, resif, num_resif)) { + freeifaddrs(addrs); + config_del_strarray(*resif, *num_resif); + *resif = NULL; + *num_resif = 0; + return 0; + } +} + } freeifaddrs(addrs); return 1; #else - int i; - if(cfg->num_ifs == 0) { + struct config_strlist* p; + if(num_ifs == 0 && list == NULL) { *resif = NULL; *num_resif = 0; return 1; } - *num_resif = cfg->num_ifs; + *num_resif = num_ifs; + for(p = list; p; p = p->next) { + (*num_resif)++; + } *resif = calloc(*num_resif, sizeof(**resif)); if(!*resif) { log_err("out of memory"); return 0; } - for(i=0; i<*num_resif; i++) { - (*resif)[i] = strdup(cfg->ifs[i]); - if(!((*resif)[i])) { - log_err("out of memory"); - config_del_strarray(*resif, *num_resif); - *resif = NULL; - *num_resif = 0; - return 0; + if(ifs) { + int i; + for(i=0; i<num_ifs; i++) { + (*resif)[i] = strdup(ifs[i]); + if(!((*resif)[i])) { + log_err("out of memory"); + config_del_strarray(*resif, *num_resif); + *resif = NULL; + *num_resif = 0; + return 0; + } + } + } + if(list) { + int idx = num_ifs; + for(p = list; p; p = p->next) { + (*resif)[idx] = strdup(p->str); + if(!((*resif)[idx])) { + log_err("out of memory"); + config_del_strarray(*resif, *num_resif); + *resif = NULL; + *num_resif = 0; + return 0; + } + idx++; } } return 1; @@ -1656,6 +1756,7 @@ listening_ports_open(struct config_file* cfg, char** ifs, int num_ifs, } } } + return list; } @@ -1667,6 +1768,11 @@ void listening_ports_free(struct listen_port* list) if(list->fd != -1) { sock_close(list->fd); } + /* rc_ports don't have ub_socket */ + if(list->socket) { + freeaddrinfo(list->socket->addr); + free(list->socket); + } free(list); list = nx; } @@ -2371,6 +2477,10 @@ static int http2_query_read_done(struct http2_session* h2_session, "buffer already assigned to stream"); return -1; } + + /* the c->buffer might be used by mesh_send_reply and no be cleard + * need to be cleared before use */ + sldns_buffer_clear(h2_session->c->buffer); if(sldns_buffer_remaining(h2_session->c->buffer) < sldns_buffer_remaining(h2_stream->qbuffer)) { /* qbuffer will be free'd in frame close cb */ @@ -2572,18 +2682,45 @@ static int http2_buffer_uri_query(struct http2_session* h2_session, return 0; } - if(!(b64len = sldns_b64url_pton( - (char const *)start, length, - sldns_buffer_current(h2_stream->qbuffer), - expectb64len)) || b64len < 0) { - lock_basic_lock(&http2_query_buffer_count_lock); - http2_query_buffer_count -= expectb64len; - lock_basic_unlock(&http2_query_buffer_count_lock); - sldns_buffer_free(h2_stream->qbuffer); - h2_stream->qbuffer = NULL; - /* return without error, method can be an - * unknown POST */ - return 1; + if(sldns_b64_contains_nonurl((char const*)start, length)) { + char buf[65536+4]; + verbose(VERB_ALGO, "HTTP2 stream contains wrong b64 encoding"); + /* copy to the scratch buffer temporarily to terminate the + * string with a zero */ + if(length+1 > sizeof(buf)) { + /* too long */ + lock_basic_lock(&http2_query_buffer_count_lock); + http2_query_buffer_count -= expectb64len; + lock_basic_unlock(&http2_query_buffer_count_lock); + sldns_buffer_free(h2_stream->qbuffer); + h2_stream->qbuffer = NULL; + return 1; + } + memmove(buf, start, length); + buf[length] = 0; + if(!(b64len = sldns_b64_pton(buf, sldns_buffer_current( + h2_stream->qbuffer), expectb64len)) || b64len < 0) { + lock_basic_lock(&http2_query_buffer_count_lock); + http2_query_buffer_count -= expectb64len; + lock_basic_unlock(&http2_query_buffer_count_lock); + sldns_buffer_free(h2_stream->qbuffer); + h2_stream->qbuffer = NULL; + return 1; + } + } else { + if(!(b64len = sldns_b64url_pton( + (char const *)start, length, + sldns_buffer_current(h2_stream->qbuffer), + expectb64len)) || b64len < 0) { + lock_basic_lock(&http2_query_buffer_count_lock); + http2_query_buffer_count -= expectb64len; + lock_basic_unlock(&http2_query_buffer_count_lock); + sldns_buffer_free(h2_stream->qbuffer); + h2_stream->qbuffer = NULL; + /* return without error, method can be an + * unknown POST */ + return 1; + } } sldns_buffer_skip(h2_stream->qbuffer, (size_t)b64len); return 1; diff --git a/services/listen_dnsport.h b/services/listen_dnsport.h index f438ff4580f7..1e51be9bfcab 100644 --- a/services/listen_dnsport.h +++ b/services/listen_dnsport.h @@ -102,6 +102,18 @@ enum listen_type { listen_type_http }; +/* + * socket properties (just like NSD nsd_socket structure definition) + */ +struct unbound_socket { + /** socket-address structure */ + struct addrinfo * addr; + /** socket descriptor returned by socket() syscall */ + int s; + /** address family (AF_INET/IF_INET6) */ + int fam; +}; + /** * Single linked list to store shared ports that have been * opened for use by all threads. @@ -113,6 +125,8 @@ struct listen_port { int fd; /** type of file descriptor, udp or tcp */ enum listen_type ftype; + /** fill in unbpound_socket structure for every opened socket at Unbound startup */ + struct unbound_socket* socket; }; /** @@ -136,16 +150,19 @@ struct listen_port* listening_ports_open(struct config_file* cfg, */ void listening_ports_free(struct listen_port* list); +struct config_strlist; /** * Resolve interface names in config and store result IP addresses - * @param cfg: config + * @param ifs: array of interfaces. The list of interface names, if not NULL. + * @param num_ifs: length of ifs array. + * @param list: if not NULL, this is used as the list of interface names. * @param resif: string array (malloced array of malloced strings) with * result. NULL if cfg has none. * @param num_resif: length of resif. Zero if cfg has zero num_ifs. * @return 0 on failure. */ -int resolve_interface_names(struct config_file* cfg, char*** resif, - int* num_resif); +int resolve_interface_names(char** ifs, int num_ifs, + struct config_strlist* list, char*** resif, int* num_resif); /** * Create commpoints with for this thread for the shared ports. @@ -424,4 +441,9 @@ int http2_submit_dns_response(void* v); char* set_ip_dscp(int socket, int addrfamily, int ds); +/** for debug and profiling purposes only + * @param ub_sock: the structure containing created socket info we want to print or log for + */ +void verbose_print_unbound_socket(struct unbound_socket* ub_sock); + #endif /* LISTEN_DNSPORT_H */ diff --git a/services/localzone.c b/services/localzone.c index fd2ff2bb67f7..54f55ab810e4 100644 --- a/services/localzone.c +++ b/services/localzone.c @@ -745,9 +745,15 @@ static int lz_enter_zones(struct local_zones* zones, struct config_file* cfg) { struct config_str2list* p; +#ifndef THREADS_DISABLED struct local_zone* z; +#endif for(p = cfg->local_zones; p; p = p->next) { - if(!(z=lz_enter_zone(zones, p->str, p->str2, + if(!( +#ifndef THREADS_DISABLED + z= +#endif + lz_enter_zone(zones, p->str, p->str2, LDNS_RR_CLASS_IN))) return 0; lock_rw_unlock(&z->lock); @@ -1027,7 +1033,9 @@ lz_setup_implicit(struct local_zones* zones, struct config_file* cfg) } if(have_name) { uint8_t* n2; +#ifndef THREADS_DISABLED struct local_zone* z; +#endif /* allocate zone of smallest shared topdomain to contain em */ n2 = nm; dname_remove_labels(&n2, &nmlen, nmlabs - match); @@ -1039,7 +1047,11 @@ lz_setup_implicit(struct local_zones* zones, struct config_file* cfg) } log_nametypeclass(VERB_ALGO, "implicit transparent local-zone", n2, 0, dclass); - if(!(z=lz_enter_zone_dname(zones, n2, nmlen, match, + if(!( +#ifndef THREADS_DISABLED + z= +#endif + lz_enter_zone_dname(zones, n2, nmlen, match, local_zone_transparent, dclass))) { return 0; } diff --git a/services/localzone.h b/services/localzone.h index 3da5c8754bf3..b52d81dc72cb 100644 --- a/services/localzone.h +++ b/services/localzone.h @@ -158,7 +158,7 @@ struct local_zone { rbtree_type data; /** if data contains zone apex SOA data, this is a ptr to it. */ struct ub_packed_rrset_key* soa; - /** if data contains zone apex SOA data, this is a prt to an + /** if data contains zone apex SOA data, this is a ptr to an * artificial negative SOA rrset (TTL is the minimum of the TTL and the * SOA.MINIMUM). */ struct ub_packed_rrset_key* soa_negative; diff --git a/services/mesh.c b/services/mesh.c index 91d23debf351..5679a8b64e58 100644 --- a/services/mesh.c +++ b/services/mesh.c @@ -99,7 +99,7 @@ timeval_divide(struct timeval* avg, const struct timeval* sum, size_t d) { #ifndef S_SPLINT_S size_t leftover; - if(d == 0) { + if(d <= 0) { avg->tv_sec = 0; avg->tv_usec = 0; return; @@ -108,7 +108,13 @@ timeval_divide(struct timeval* avg, const struct timeval* sum, size_t d) avg->tv_usec = sum->tv_usec / d; /* handle fraction from seconds divide */ leftover = sum->tv_sec - avg->tv_sec*d; - avg->tv_usec += (leftover*1000000)/d; + if(leftover <= 0) + leftover = 0; + avg->tv_usec += (((long long)leftover)*((long long)1000000))/d; + if(avg->tv_sec < 0) + avg->tv_sec = 0; + if(avg->tv_usec < 0) + avg->tv_usec = 0; #endif } @@ -433,7 +439,7 @@ mesh_serve_expired_init(struct mesh_state* mstate, int timeout) mstate->s.serve_expired_data->get_cached_answer = mstate->s.serve_expired_data->get_cached_answer? mstate->s.serve_expired_data->get_cached_answer: - mesh_serve_expired_lookup; + &mesh_serve_expired_lookup; /* In case this timer already popped, start it again */ if(!mstate->s.serve_expired_data->timer) { @@ -1813,8 +1819,7 @@ mesh_detect_cycle(struct module_qstate* qstate, struct query_info* qinfo, { struct mesh_area* mesh = qstate->env->mesh; struct mesh_state* dep_m = NULL; - if(!mesh_state_is_unique(qstate->mesh_info)) - dep_m = mesh_area_find(mesh, NULL, qinfo, flags, prime, valrec); + dep_m = mesh_area_find(mesh, NULL, qinfo, flags, prime, valrec); return mesh_detect_cycle_found(qstate, dep_m); } @@ -1941,7 +1946,7 @@ mesh_serve_expired_callback(void* arg) while(1) { fptr_ok(fptr_whitelist_serve_expired_lookup( qstate->serve_expired_data->get_cached_answer)); - msg = qstate->serve_expired_data->get_cached_answer(qstate, + msg = (*qstate->serve_expired_data->get_cached_answer)(qstate, lookup_qinfo); if(!msg) return; diff --git a/services/modstack.c b/services/modstack.c index a600549b16c3..da8e623c16da 100644 --- a/services/modstack.c +++ b/services/modstack.c @@ -88,57 +88,56 @@ count_modules(const char* s) return num; } -void +void modstack_init(struct module_stack* stack) { stack->num = 0; stack->mod = NULL; } -int +int modstack_config(struct module_stack* stack, const char* module_conf) { - int i; - verbose(VERB_QUERY, "module config: \"%s\"", module_conf); - stack->num = count_modules(module_conf); - if(stack->num == 0) { - log_err("error: no modules specified"); - return 0; - } - if(stack->num > MAX_MODULE) { - log_err("error: too many modules (%d max %d)", - stack->num, MAX_MODULE); - return 0; - } - stack->mod = (struct module_func_block**)calloc((size_t) - stack->num, sizeof(struct module_func_block*)); - if(!stack->mod) { - log_err("out of memory"); - return 0; - } - for(i=0; i<stack->num; i++) { - stack->mod[i] = module_factory(&module_conf); - if(!stack->mod[i]) { + int i; + verbose(VERB_QUERY, "module config: \"%s\"", module_conf); + stack->num = count_modules(module_conf); + if(stack->num == 0) { + log_err("error: no modules specified"); + return 0; + } + if(stack->num > MAX_MODULE) { + log_err("error: too many modules (%d max %d)", + stack->num, MAX_MODULE); + return 0; + } + stack->mod = (struct module_func_block**)calloc((size_t) + stack->num, sizeof(struct module_func_block*)); + if(!stack->mod) { + log_err("out of memory"); + return 0; + } + for(i=0; i<stack->num; i++) { + stack->mod[i] = module_factory(&module_conf); + if(!stack->mod[i]) { char md[256]; snprintf(md, sizeof(md), "%s", module_conf); if(strchr(md, ' ')) *(strchr(md, ' ')) = 0; if(strchr(md, '\t')) *(strchr(md, '\t')) = 0; - log_err("Unknown value in module-config, module: '%s'." + log_err("Unknown value in module-config, module: '%s'." " This module is not present (not compiled in)," - " See the list of linked modules with unbound -h", - md); - return 0; - } - } - return 1; + " See the list of linked modules with unbound -V", md); + return 0; + } + } + return 1; } /** The list of module names */ const char** module_list_avail(void) { - /* these are the modules available */ - static const char* names[] = { + /* these are the modules available */ + static const char* names[] = { "dns64", #ifdef WITH_PYTHONMODULE "python", @@ -156,7 +155,7 @@ module_list_avail(void) "subnetcache", #endif #ifdef USE_IPSET - "ipset", + "ipset", #endif "respip", "validator", diff --git a/services/outside_network.c b/services/outside_network.c index 6c6b42ccbdb8..a3f982e72185 100644 --- a/services/outside_network.c +++ b/services/outside_network.c @@ -90,8 +90,8 @@ static int randomize_and_send_udp(struct pending* pend, sldns_buffer* packet, static void waiting_list_remove(struct outside_network* outnet, struct waiting_tcp* w); -/** remove reused element from tree and lru list */ -static void reuse_tcp_remove_tree_list(struct outside_network* outnet, +/** select a DNS ID for a TCP stream */ +static uint16_t tcp_select_id(struct outside_network* outnet, struct reuse_tcp* reuse); int @@ -198,15 +198,17 @@ waiting_tcp_delete(struct waiting_tcp* w) * Pick random outgoing-interface of that family, and bind it. * port set to 0 so OS picks a port number for us. * if it is the ANY address, do not bind. + * @param pend: pending tcp structure, for storing the local address choice. * @param w: tcp structure with destination address. * @param s: socket fd. * @return false on error, socket closed. */ static int -pick_outgoing_tcp(struct waiting_tcp* w, int s) +pick_outgoing_tcp(struct pending_tcp* pend, struct waiting_tcp* w, int s) { struct port_if* pi = NULL; int num; + pend->pi = NULL; #ifdef INET6 if(addr_is_ip6(&w->addr, w->addrlen)) num = w->outnet->num_ip6; @@ -226,6 +228,7 @@ pick_outgoing_tcp(struct waiting_tcp* w, int s) #endif pi = &w->outnet->ip4_ifs[ub_random_max(w->outnet->rnd, num)]; log_assert(pi); + pend->pi = pi; if(addr_is_any(&pi->addr, pi->addrlen)) { /* binding to the ANY interface is for listening sockets */ return 1; @@ -235,7 +238,14 @@ pick_outgoing_tcp(struct waiting_tcp* w, int s) ((struct sockaddr_in6*)&pi->addr)->sin6_port = 0; else ((struct sockaddr_in*)&pi->addr)->sin_port = 0; if(bind(s, (struct sockaddr*)&pi->addr, pi->addrlen) != 0) { - log_err("outgoing tcp: bind: %s", sock_strerror(errno)); +#ifndef USE_WINSOCK +#ifdef EADDRNOTAVAIL + if(!(verbosity < 4 && errno == EADDRNOTAVAIL)) +#endif +#else /* USE_WINSOCK */ + if(!(verbosity < 4 && WSAGetLastError() == WSAEADDRNOTAVAIL)) +#endif + log_err("outgoing tcp: bind: %s", sock_strerror(errno)); sock_close(s); return 0; } @@ -337,6 +347,8 @@ log_reuse_tcp(enum verbosity_value v, const char* msg, struct reuse_tcp* reuse) uint16_t port; char addrbuf[128]; if(verbosity < v) return; + if(!reuse || !reuse->pending || !reuse->pending->c) + return; addr_to_str(&reuse->addr, reuse->addrlen, addrbuf, sizeof(addrbuf)); port = ntohs(((struct sockaddr_in*)&reuse->addr)->sin_port); verbose(v, "%s %s#%u fd %d", msg, addrbuf, (unsigned)port, @@ -356,6 +368,8 @@ static struct waiting_tcp* reuse_write_wait_pop(struct reuse_tcp* reuse) w->write_wait_next->write_wait_prev = NULL; else reuse->write_wait_last = NULL; w->write_wait_queued = 0; + w->write_wait_next = NULL; + w->write_wait_prev = NULL; return w; } @@ -363,6 +377,8 @@ static struct waiting_tcp* reuse_write_wait_pop(struct reuse_tcp* reuse) static void reuse_write_wait_remove(struct reuse_tcp* reuse, struct waiting_tcp* w) { + log_assert(w); + log_assert(w->write_wait_queued); if(!w) return; if(!w->write_wait_queued) @@ -370,10 +386,16 @@ static void reuse_write_wait_remove(struct reuse_tcp* reuse, if(w->write_wait_prev) w->write_wait_prev->write_wait_next = w->write_wait_next; else reuse->write_wait_first = w->write_wait_next; + log_assert(!w->write_wait_prev || + w->write_wait_prev->write_wait_next != w->write_wait_prev); if(w->write_wait_next) w->write_wait_next->write_wait_prev = w->write_wait_prev; else reuse->write_wait_last = w->write_wait_prev; + log_assert(!w->write_wait_next + || w->write_wait_next->write_wait_prev != w->write_wait_next); w->write_wait_queued = 0; + w->write_wait_next = NULL; + w->write_wait_prev = NULL; } /** push the element after the last on the writewait list */ @@ -384,6 +406,8 @@ static void reuse_write_wait_push_back(struct reuse_tcp* reuse, log_assert(!w->write_wait_queued); if(reuse->write_wait_last) { reuse->write_wait_last->write_wait_next = w; + log_assert(reuse->write_wait_last->write_wait_next != + reuse->write_wait_last); w->write_wait_prev = reuse->write_wait_last; } else { reuse->write_wait_first = w; @@ -396,9 +420,18 @@ static void reuse_write_wait_push_back(struct reuse_tcp* reuse, void reuse_tree_by_id_insert(struct reuse_tcp* reuse, struct waiting_tcp* w) { +#ifdef UNBOUND_DEBUG + rbnode_type* added; +#endif log_assert(w->id_node.key == NULL); w->id_node.key = w; +#ifdef UNBOUND_DEBUG + added = +#else + (void) +#endif rbtree_insert(&reuse->tree_by_id, &w->id_node); + log_assert(added); /* should have been added */ } /** find element in tree by id */ @@ -424,34 +457,45 @@ tree_by_id_get_id(rbnode_type* node) } /** insert into reuse tcp tree and LRU, false on failure (duplicate) */ -static int +int reuse_tcp_insert(struct outside_network* outnet, struct pending_tcp* pend_tcp) { log_reuse_tcp(VERB_CLIENT, "reuse_tcp_insert", &pend_tcp->reuse); if(pend_tcp->reuse.item_on_lru_list) { if(!pend_tcp->reuse.node.key) - log_err("internal error: reuse_tcp_insert: on lru list without key"); + log_err("internal error: reuse_tcp_insert: " + "in lru list without key"); return 1; } pend_tcp->reuse.node.key = &pend_tcp->reuse; pend_tcp->reuse.pending = pend_tcp; if(!rbtree_insert(&outnet->tcp_reuse, &pend_tcp->reuse.node)) { - /* this is a duplicate connection, close this one */ - verbose(VERB_CLIENT, "reuse_tcp_insert: duplicate connection"); - pend_tcp->reuse.node.key = NULL; - return 0; + /* We are not in the LRU list but we are already in the + * tcp_reuse tree, strange. + * Continue to add ourselves to the LRU list. */ + log_err("internal error: reuse_tcp_insert: in lru list but " + "not in the tree"); } /* insert into LRU, first is newest */ pend_tcp->reuse.lru_prev = NULL; if(outnet->tcp_reuse_first) { pend_tcp->reuse.lru_next = outnet->tcp_reuse_first; + log_assert(pend_tcp->reuse.lru_next != &pend_tcp->reuse); outnet->tcp_reuse_first->lru_prev = &pend_tcp->reuse; + log_assert(outnet->tcp_reuse_first->lru_prev != + outnet->tcp_reuse_first); } else { pend_tcp->reuse.lru_next = NULL; outnet->tcp_reuse_last = &pend_tcp->reuse; } outnet->tcp_reuse_first = &pend_tcp->reuse; pend_tcp->reuse.item_on_lru_list = 1; + log_assert((!outnet->tcp_reuse_first && !outnet->tcp_reuse_last) || + (outnet->tcp_reuse_first && outnet->tcp_reuse_last)); + log_assert(outnet->tcp_reuse_first != outnet->tcp_reuse_first->lru_next && + outnet->tcp_reuse_first != outnet->tcp_reuse_first->lru_prev); + log_assert(outnet->tcp_reuse_last != outnet->tcp_reuse_last->lru_next && + outnet->tcp_reuse_last != outnet->tcp_reuse_last->lru_prev); return 1; } @@ -511,7 +555,7 @@ reuse_tcp_find(struct outside_network* outnet, struct sockaddr_storage* addr, while(result && result != RBTREE_NULL && reuse_cmp_addrportssl(result->key, &key_p.reuse) == 0) { if(((struct reuse_tcp*)result)->tree_by_id.count < - MAX_REUSE_TCP_QUERIES) { + outnet->max_reuse_tcp_queries) { /* same address, port, ssl-yes-or-no, and has * space for another query */ return (struct reuse_tcp*)result; @@ -567,7 +611,7 @@ outnet_tcp_take_into_use(struct waiting_tcp* w) if(s == -1) return 0; - if(!pick_outgoing_tcp(w, s)) + if(!pick_outgoing_tcp(pend, w, s)) return 0; fd_set_nonblock(s); @@ -689,28 +733,65 @@ outnet_tcp_take_into_use(struct waiting_tcp* w) /** Touch the lru of a reuse_tcp element, it is in use. * This moves it to the front of the list, where it is not likely to * be closed. Items at the back of the list are closed to make space. */ -static void +void reuse_tcp_lru_touch(struct outside_network* outnet, struct reuse_tcp* reuse) { if(!reuse->item_on_lru_list) { log_err("internal error: we need to touch the lru_list but item not in list"); return; /* not on the list, no lru to modify */ } + log_assert(reuse->lru_prev || + (!reuse->lru_prev && outnet->tcp_reuse_first == reuse)); if(!reuse->lru_prev) return; /* already first in the list */ /* remove at current position */ /* since it is not first, there is a previous element */ reuse->lru_prev->lru_next = reuse->lru_next; + log_assert(reuse->lru_prev->lru_next != reuse->lru_prev); if(reuse->lru_next) reuse->lru_next->lru_prev = reuse->lru_prev; else outnet->tcp_reuse_last = reuse->lru_prev; + log_assert(!reuse->lru_next || reuse->lru_next->lru_prev != reuse->lru_next); + log_assert(outnet->tcp_reuse_last != outnet->tcp_reuse_last->lru_next && + outnet->tcp_reuse_last != outnet->tcp_reuse_last->lru_prev); /* insert at the front */ reuse->lru_prev = NULL; reuse->lru_next = outnet->tcp_reuse_first; + if(outnet->tcp_reuse_first) { + outnet->tcp_reuse_first->lru_prev = reuse; + } + log_assert(reuse->lru_next != reuse); /* since it is not first, it is not the only element and * lru_next is thus not NULL and thus reuse is now not the last in * the list, so outnet->tcp_reuse_last does not need to be modified */ outnet->tcp_reuse_first = reuse; + log_assert(outnet->tcp_reuse_first != outnet->tcp_reuse_first->lru_next && + outnet->tcp_reuse_first != outnet->tcp_reuse_first->lru_prev); + log_assert((!outnet->tcp_reuse_first && !outnet->tcp_reuse_last) || + (outnet->tcp_reuse_first && outnet->tcp_reuse_last)); +} + +/** Snip the last reuse_tcp element off of the LRU list */ +struct reuse_tcp* +reuse_tcp_lru_snip(struct outside_network* outnet) +{ + struct reuse_tcp* reuse = outnet->tcp_reuse_last; + if(!reuse) return NULL; + /* snip off of LRU */ + log_assert(reuse->lru_next == NULL); + if(reuse->lru_prev) { + outnet->tcp_reuse_last = reuse->lru_prev; + reuse->lru_prev->lru_next = NULL; + } else { + outnet->tcp_reuse_last = NULL; + outnet->tcp_reuse_first = NULL; + } + log_assert((!outnet->tcp_reuse_first && !outnet->tcp_reuse_last) || + (outnet->tcp_reuse_first && outnet->tcp_reuse_last)); + reuse->item_on_lru_list = 0; + reuse->lru_next = NULL; + reuse->lru_prev = NULL; + return reuse; } /** call callback on waiting_tcp, if not NULL */ @@ -718,30 +799,89 @@ static void waiting_tcp_callback(struct waiting_tcp* w, struct comm_point* c, int error, struct comm_reply* reply_info) { - if(w->cb) { + if(w && w->cb) { fptr_ok(fptr_whitelist_pending_tcp(w->cb)); (void)(*w->cb)(c, w->cb_arg, error, reply_info); } } +/** add waiting_tcp element to the outnet tcp waiting list */ +static void +outnet_add_tcp_waiting(struct outside_network* outnet, struct waiting_tcp* w) +{ + struct timeval tv; + log_assert(!w->on_tcp_waiting_list); + if(w->on_tcp_waiting_list) + return; + w->next_waiting = NULL; + if(outnet->tcp_wait_last) + outnet->tcp_wait_last->next_waiting = w; + else outnet->tcp_wait_first = w; + outnet->tcp_wait_last = w; + w->on_tcp_waiting_list = 1; +#ifndef S_SPLINT_S + tv.tv_sec = w->timeout/1000; + tv.tv_usec = (w->timeout%1000)*1000; +#endif + comm_timer_set(w->timer, &tv); +} + +/** add waiting_tcp element as first to the outnet tcp waiting list */ +static void +outnet_add_tcp_waiting_first(struct outside_network* outnet, + struct waiting_tcp* w, int reset_timer) +{ + struct timeval tv; + log_assert(!w->on_tcp_waiting_list); + if(w->on_tcp_waiting_list) + return; + w->next_waiting = outnet->tcp_wait_first; + if(!outnet->tcp_wait_last) + outnet->tcp_wait_last = w; + outnet->tcp_wait_first = w; + w->on_tcp_waiting_list = 1; + if(reset_timer) { +#ifndef S_SPLINT_S + tv.tv_sec = w->timeout/1000; + tv.tv_usec = (w->timeout%1000)*1000; +#endif + comm_timer_set(w->timer, &tv); + } + log_assert( + (!outnet->tcp_reuse_first && !outnet->tcp_reuse_last) || + (outnet->tcp_reuse_first && outnet->tcp_reuse_last)); +} + /** see if buffers can be used to service TCP queries */ static void use_free_buffer(struct outside_network* outnet) { struct waiting_tcp* w; - while(outnet->tcp_free && outnet->tcp_wait_first - && !outnet->want_to_quit) { + while(outnet->tcp_wait_first && !outnet->want_to_quit) { +#ifdef USE_DNSTAP + struct pending_tcp* pend_tcp = NULL; +#endif struct reuse_tcp* reuse = NULL; w = outnet->tcp_wait_first; + log_assert(w->on_tcp_waiting_list); outnet->tcp_wait_first = w->next_waiting; if(outnet->tcp_wait_last == w) outnet->tcp_wait_last = NULL; + log_assert( + (!outnet->tcp_reuse_first && !outnet->tcp_reuse_last) || + (outnet->tcp_reuse_first && outnet->tcp_reuse_last)); w->on_tcp_waiting_list = 0; reuse = reuse_tcp_find(outnet, &w->addr, w->addrlen, w->ssl_upstream); + /* re-select an ID when moving to a new TCP buffer */ + w->id = tcp_select_id(outnet, reuse); + LDNS_ID_SET(w->pkt, w->id); if(reuse) { log_reuse_tcp(VERB_CLIENT, "use free buffer for waiting tcp: " "found reuse", reuse); +#ifdef USE_DNSTAP + pend_tcp = reuse->pending; +#endif reuse_tcp_lru_touch(outnet, reuse); comm_timer_disable(w->timer); w->next_waiting = (void*)reuse->pending; @@ -758,7 +898,7 @@ use_free_buffer(struct outside_network* outnet) reuse->pending->c->fd, reuse->pending, w); } - } else { + } else if(outnet->tcp_free) { struct pending_tcp* pend = w->outnet->tcp_free; rbtree_init(&pend->reuse.tree_by_id, reuse_id_cmp); pend->reuse.pending = pend; @@ -768,37 +908,47 @@ use_free_buffer(struct outside_network* outnet) waiting_tcp_callback(w, NULL, NETEVENT_CLOSED, NULL); waiting_tcp_delete(w); +#ifdef USE_DNSTAP + w = NULL; +#endif } +#ifdef USE_DNSTAP + pend_tcp = pend; +#endif + } else { + /* no reuse and no free buffer, put back at the start */ + outnet_add_tcp_waiting_first(outnet, w, 0); + break; + } +#ifdef USE_DNSTAP + if(outnet->dtenv && pend_tcp && w && w->sq && + (outnet->dtenv->log_resolver_query_messages || + outnet->dtenv->log_forwarder_query_messages)) { + sldns_buffer tmp; + sldns_buffer_init_frm_data(&tmp, w->pkt, w->pkt_len); + dt_msg_send_outside_query(outnet->dtenv, &w->sq->addr, + &pend_tcp->pi->addr, comm_tcp, w->sq->zone, + w->sq->zonelen, &tmp); } - } -} - -/** add waiting_tcp element to the outnet tcp waiting list */ -static void -outnet_add_tcp_waiting(struct outside_network* outnet, struct waiting_tcp* w) -{ - struct timeval tv; - if(w->on_tcp_waiting_list) - return; - w->next_waiting = NULL; - if(outnet->tcp_wait_last) - outnet->tcp_wait_last->next_waiting = w; - else outnet->tcp_wait_first = w; - outnet->tcp_wait_last = w; - w->on_tcp_waiting_list = 1; -#ifndef S_SPLINT_S - tv.tv_sec = w->timeout/1000; - tv.tv_usec = (w->timeout%1000)*1000; #endif - comm_timer_set(w->timer, &tv); + } } /** delete element from tree by id */ static void reuse_tree_by_id_delete(struct reuse_tcp* reuse, struct waiting_tcp* w) { +#ifdef UNBOUND_DEBUG + rbnode_type* rem; +#endif log_assert(w->id_node.key != NULL); +#ifdef UNBOUND_DEBUG + rem = +#else + (void) +#endif rbtree_delete(&reuse->tree_by_id, w); + log_assert(rem); /* should have been there */ w->id_node.key = NULL; } @@ -857,15 +1007,24 @@ reuse_move_writewait_away(struct outside_network* outnet, } /** remove reused element from tree and lru list */ -static void +void reuse_tcp_remove_tree_list(struct outside_network* outnet, struct reuse_tcp* reuse) { verbose(VERB_CLIENT, "reuse_tcp_remove_tree_list"); if(reuse->node.key) { /* delete it from reuse tree */ - (void)rbtree_delete(&outnet->tcp_reuse, reuse); + if(!rbtree_delete(&outnet->tcp_reuse, reuse)) { + /* should not be possible, it should be there */ + char buf[256]; + addr_to_str(&reuse->addr, reuse->addrlen, buf, + sizeof(buf)); + log_err("reuse tcp delete: node not present, internal error, %s ssl %d lru %d", buf, reuse->is_ssl, reuse->item_on_lru_list); + } reuse->node.key = NULL; + /* defend against loops on broken tree by zeroing the + * rbnode structure */ + memset(&reuse->node, 0, sizeof(reuse->node)); } /* delete from reuse list */ if(reuse->item_on_lru_list) { @@ -874,21 +1033,38 @@ reuse_tcp_remove_tree_list(struct outside_network* outnet, * and thus have a pending pointer to the struct */ log_assert(reuse->lru_prev->pending); reuse->lru_prev->lru_next = reuse->lru_next; + log_assert(reuse->lru_prev->lru_next != reuse->lru_prev); } else { log_assert(!reuse->lru_next || reuse->lru_next->pending); outnet->tcp_reuse_first = reuse->lru_next; + log_assert(!outnet->tcp_reuse_first || + (outnet->tcp_reuse_first != + outnet->tcp_reuse_first->lru_next && + outnet->tcp_reuse_first != + outnet->tcp_reuse_first->lru_prev)); } if(reuse->lru_next) { /* assert that members of the lru list are waiting * and thus have a pending pointer to the struct */ log_assert(reuse->lru_next->pending); reuse->lru_next->lru_prev = reuse->lru_prev; + log_assert(reuse->lru_next->lru_prev != reuse->lru_next); } else { log_assert(!reuse->lru_prev || reuse->lru_prev->pending); outnet->tcp_reuse_last = reuse->lru_prev; - } + log_assert(!outnet->tcp_reuse_last || + (outnet->tcp_reuse_last != + outnet->tcp_reuse_last->lru_next && + outnet->tcp_reuse_last != + outnet->tcp_reuse_last->lru_prev)); + } + log_assert((!outnet->tcp_reuse_first && !outnet->tcp_reuse_last) || + (outnet->tcp_reuse_first && outnet->tcp_reuse_last)); reuse->item_on_lru_list = 0; + reuse->lru_next = NULL; + reuse->lru_prev = NULL; } + reuse->pending = NULL; } /** helper function that deletes an element from the tree of readwait @@ -915,8 +1091,12 @@ decommission_pending_tcp(struct outside_network* outnet, struct pending_tcp* pend) { verbose(VERB_CLIENT, "decommission_pending_tcp"); - pend->next_free = outnet->tcp_free; - outnet->tcp_free = pend; + /* A certain code path can lead here twice for the same pending_tcp + * creating a loop in the free pending_tcp list. */ + if(outnet->tcp_free != pend) { + pend->next_free = outnet->tcp_free; + outnet->tcp_free = pend; + } if(pend->reuse.node.key) { /* needs unlink from the reuse tree to get deleted */ reuse_tcp_remove_tree_list(outnet, &pend->reuse); @@ -977,22 +1157,22 @@ static void reuse_cb_and_decommission(struct outside_network* outnet, /** set timeout on tcp fd and setup read event to catch incoming dns msgs */ static void -reuse_tcp_setup_timeout(struct pending_tcp* pend_tcp) +reuse_tcp_setup_timeout(struct pending_tcp* pend_tcp, int tcp_reuse_timeout) { log_reuse_tcp(VERB_CLIENT, "reuse_tcp_setup_timeout", &pend_tcp->reuse); - comm_point_start_listening(pend_tcp->c, -1, REUSE_TIMEOUT); + comm_point_start_listening(pend_tcp->c, -1, tcp_reuse_timeout); } /** set timeout on tcp fd and setup read event to catch incoming dns msgs */ static void -reuse_tcp_setup_read_and_timeout(struct pending_tcp* pend_tcp) +reuse_tcp_setup_read_and_timeout(struct pending_tcp* pend_tcp, int tcp_reuse_timeout) { log_reuse_tcp(VERB_CLIENT, "reuse_tcp_setup_readtimeout", &pend_tcp->reuse); sldns_buffer_clear(pend_tcp->c->buffer); pend_tcp->c->tcp_is_reading = 1; pend_tcp->c->tcp_byte_count = 0; comm_point_stop_listening(pend_tcp->c); - comm_point_start_listening(pend_tcp->c, -1, REUSE_TIMEOUT); + comm_point_start_listening(pend_tcp->c, -1, tcp_reuse_timeout); } int @@ -1002,6 +1182,7 @@ outnet_tcp_cb(struct comm_point* c, void* arg, int error, struct pending_tcp* pend = (struct pending_tcp*)arg; struct outside_network* outnet = pend->reuse.outnet; struct waiting_tcp* w = NULL; + log_assert(pend->reuse.item_on_lru_list && pend->reuse.node.key); verbose(VERB_ALGO, "outnettcp cb"); if(error == NETEVENT_TIMEOUT) { if(pend->c->tcp_write_and_read) { @@ -1048,7 +1229,7 @@ outnet_tcp_cb(struct comm_point* c, void* arg, int error, pend->reuse.cp_more_write_again = 0; pend->c->tcp_is_reading = 1; comm_point_stop_listening(pend->c); - reuse_tcp_setup_timeout(pend); + reuse_tcp_setup_timeout(pend, outnet->tcp_reuse_timeout); } return 0; } else if(error != NETEVENT_NOERROR) { @@ -1101,7 +1282,7 @@ outnet_tcp_cb(struct comm_point* c, void* arg, int error, * and there could be more bytes to read on the input */ if(pend->reuse.tree_by_id.count != 0) pend->reuse.cp_more_read_again = 1; - reuse_tcp_setup_read_and_timeout(pend); + reuse_tcp_setup_read_and_timeout(pend, outnet->tcp_reuse_timeout); return 0; } verbose(VERB_CLIENT, "outnet_tcp_cb reuse after cb: decommission it"); @@ -1369,7 +1550,8 @@ outside_network_create(struct comm_base *base, size_t bufsize, int numavailports, size_t unwanted_threshold, int tcp_mss, void (*unwanted_action)(void*), void* unwanted_param, int do_udp, void* sslctx, int delayclose, int tls_use_sni, struct dt_env* dtenv, - int udp_connect) + int udp_connect, int max_reuse_tcp_queries, int tcp_reuse_timeout, + int tcp_auth_query_timeout) { struct outside_network* outnet = (struct outside_network*) calloc(1, sizeof(struct outside_network)); @@ -1381,6 +1563,9 @@ outside_network_create(struct comm_base *base, size_t bufsize, comm_base_timept(base, &outnet->now_secs, &outnet->now_tv); outnet->base = base; outnet->num_tcp = num_tcp; + outnet->max_reuse_tcp_queries = max_reuse_tcp_queries; + outnet->tcp_reuse_timeout= tcp_reuse_timeout; + outnet->tcp_auth_query_timeout = tcp_auth_query_timeout; outnet->num_tcp_outgoing = 0; outnet->infra = infra; outnet->rnd = rnd; @@ -1457,7 +1642,7 @@ outside_network_create(struct comm_base *base, size_t bufsize, return NULL; } pc->cp = comm_point_create_udp(outnet->base, -1, - outnet->udp_buff, outnet_udp_cb, outnet); + outnet->udp_buff, outnet_udp_cb, outnet, NULL); if(!pc->cp) { log_err("malloc failed"); free(pc); @@ -1609,22 +1794,19 @@ outside_network_delete(struct outside_network* outnet) size_t i; for(i=0; i<outnet->num_tcp; i++) if(outnet->tcp_conns[i]) { - if(outnet->tcp_conns[i]->query && - !outnet->tcp_conns[i]->query-> - on_tcp_waiting_list) { + struct pending_tcp* pend; + pend = outnet->tcp_conns[i]; + if(pend->reuse.item_on_lru_list) { /* delete waiting_tcp elements that * the tcp conn is working on */ - struct pending_tcp* pend = - (struct pending_tcp*)outnet-> - tcp_conns[i]->query-> - next_waiting; decommission_pending_tcp(outnet, pend); } comm_point_delete(outnet->tcp_conns[i]->c); - waiting_tcp_delete(outnet->tcp_conns[i]->query); free(outnet->tcp_conns[i]); + outnet->tcp_conns[i] = NULL; } free(outnet->tcp_conns); + outnet->tcp_conns = NULL; } if(outnet->tcp_wait_first) { struct waiting_tcp* p = outnet->tcp_wait_first, *np; @@ -1742,14 +1924,14 @@ select_id(struct outside_network* outnet, struct pending* pend, sldns_buffer* packet) { int id_tries = 0; - pend->id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff; + pend->id = GET_RANDOM_ID(outnet->rnd); LDNS_ID_SET(sldns_buffer_begin(packet), pend->id); /* insert in tree */ pend->node.key = pend; while(!rbtree_insert(outnet->pending, &pend->node)) { /* change ID to avoid collision */ - pend->id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff; + pend->id = GET_RANDOM_ID(outnet->rnd); LDNS_ID_SET(sldns_buffer_begin(packet), pend->id); id_tries++; if(id_tries == MAX_ID_RETRY) { @@ -1779,6 +1961,7 @@ static int udp_connect_needs_log(int err) # ifdef ENETDOWN case ENETDOWN: # endif + case EPERM: if(verbosity >= VERB_ALGO) return 1; return 0; @@ -1931,11 +2114,21 @@ randomize_and_send_udp(struct pending* pend, sldns_buffer* packet, int timeout) comm_timer_set(pend->timer, &tv); #ifdef USE_DNSTAP + /* + * sending src (local service)/dst (upstream) addresses over DNSTAP + * There are no chances to get the src (local service) addr if unbound + * is not configured with specific outgoing IP-addresses. So we will + * pass 0.0.0.0 (::) to argument for + * dt_msg_send_outside_query()/dt_msg_send_outside_response() calls. + */ if(outnet->dtenv && (outnet->dtenv->log_resolver_query_messages || - outnet->dtenv->log_forwarder_query_messages)) - dt_msg_send_outside_query(outnet->dtenv, &pend->addr, comm_udp, - pend->sq->zone, pend->sq->zonelen, packet); + outnet->dtenv->log_forwarder_query_messages)) { + log_addr(VERB_ALGO, "from local addr", &pend->pc->pif->addr, pend->pc->pif->addrlen); + log_addr(VERB_ALGO, "request to upstream", &pend->addr, pend->addrlen); + dt_msg_send_outside_query(outnet->dtenv, &pend->addr, &pend->pc->pif->addr, comm_udp, + pend->sq->zone, pend->sq->zonelen, packet); + } #endif return 1; } @@ -2011,24 +2204,20 @@ outnet_tcptimer(void* arg) static void reuse_tcp_close_oldest(struct outside_network* outnet) { - struct pending_tcp* pend; + struct reuse_tcp* reuse; verbose(VERB_CLIENT, "reuse_tcp_close_oldest"); - if(!outnet->tcp_reuse_last) return; - pend = outnet->tcp_reuse_last->pending; - - /* snip off of LRU */ - log_assert(pend->reuse.lru_next == NULL); - if(pend->reuse.lru_prev) { - outnet->tcp_reuse_last = pend->reuse.lru_prev; - pend->reuse.lru_prev->lru_next = NULL; - } else { - outnet->tcp_reuse_last = NULL; - outnet->tcp_reuse_first = NULL; - } - pend->reuse.item_on_lru_list = 0; - + reuse = reuse_tcp_lru_snip(outnet); + if(!reuse) return; /* free up */ - reuse_cb_and_decommission(outnet, pend, NETEVENT_CLOSED); + reuse_cb_and_decommission(outnet, reuse->pending, NETEVENT_CLOSED); +} + +static uint16_t +tcp_select_id(struct outside_network* outnet, struct reuse_tcp* reuse) +{ + if(reuse) + return reuse_tcp_select_id(reuse, outnet); + return GET_RANDOM_ID(outnet->rnd); } /** find spare ID value for reuse tcp stream. That is random and also does @@ -2044,13 +2233,13 @@ reuse_tcp_select_id(struct reuse_tcp* reuse, struct outside_network* outnet) /* make really sure the tree is not empty */ if(reuse->tree_by_id.count == 0) { - id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff; + id = GET_RANDOM_ID(outnet->rnd); return id; } /* try to find random empty spots by picking them */ for(i = 0; i<try_random; i++) { - id = ((unsigned)ub_random(outnet->rnd)>>8) & 0xffff; + id = GET_RANDOM_ID(outnet->rnd); if(!reuse_tcp_by_id_find(reuse, id)) { return id; } @@ -2126,6 +2315,7 @@ pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet, reuse_tcp_lru_touch(sq->outnet, reuse); } + log_assert(!reuse || (reuse && pend)); /* if !pend but we have reuse streams, close a reuse stream * to be able to open a new one to this target, no use waiting * to reuse a file descriptor while another query needs to use @@ -2133,6 +2323,7 @@ pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet, if(!pend) { reuse_tcp_close_oldest(sq->outnet); pend = sq->outnet->tcp_free; + log_assert(!reuse || (pend == reuse->pending)); } /* allocate space to store query */ @@ -2148,9 +2339,7 @@ pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet, w->pkt = (uint8_t*)w + sizeof(struct waiting_tcp); w->pkt_len = sldns_buffer_limit(packet); memmove(w->pkt, sldns_buffer_begin(packet), w->pkt_len); - if(reuse) - w->id = reuse_tcp_select_id(reuse, sq->outnet); - else w->id = ((unsigned)ub_random(sq->outnet->rnd)>>8) & 0xffff; + w->id = tcp_select_id(sq->outnet, reuse); LDNS_ID_SET(w->pkt, w->id); memcpy(&w->addr, &sq->addr, sq->addrlen); w->addrlen = sq->addrlen; @@ -2167,9 +2356,13 @@ pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet, w->write_wait_next = NULL; w->write_wait_queued = 0; w->error_count = 0; +#ifdef USE_DNSTAP + w->sq = NULL; +#endif if(pend) { /* we have a buffer available right now */ if(reuse) { + log_assert(reuse == &pend->reuse); /* reuse existing fd, write query and continue */ /* store query in tree by id */ verbose(VERB_CLIENT, "pending_tcp_query: reuse, store"); @@ -2201,20 +2394,28 @@ pending_tcp_query(struct serviced_query* sq, sldns_buffer* packet, return NULL; } } +#ifdef USE_DNSTAP + if(sq->outnet->dtenv && + (sq->outnet->dtenv->log_resolver_query_messages || + sq->outnet->dtenv->log_forwarder_query_messages)) { + /* use w->pkt, because it has the ID value */ + sldns_buffer tmp; + sldns_buffer_init_frm_data(&tmp, w->pkt, w->pkt_len); + dt_msg_send_outside_query(sq->outnet->dtenv, &sq->addr, + &pend->pi->addr, comm_tcp, sq->zone, + sq->zonelen, &tmp); + } +#endif } else { /* queue up */ /* waiting for a buffer on the outside network buffer wait * list */ verbose(VERB_CLIENT, "pending_tcp_query: queue to wait"); - outnet_add_tcp_waiting(sq->outnet, w); - } #ifdef USE_DNSTAP - if(sq->outnet->dtenv && - (sq->outnet->dtenv->log_resolver_query_messages || - sq->outnet->dtenv->log_forwarder_query_messages)) - dt_msg_send_outside_query(sq->outnet->dtenv, &sq->addr, - comm_tcp, sq->zone, sq->zonelen, packet); + w->sq = sq; #endif + outnet_add_tcp_waiting(sq->outnet, w); + } return w; } @@ -2348,6 +2549,9 @@ waiting_list_remove(struct outside_network* outnet, struct waiting_tcp* w) prev = p; p = p->next_waiting; } + /* waiting_list_remove is currently called only with items that are + * already in the waiting list. */ + log_assert(0); } /** reuse tcp stream, remove serviced query from stream, @@ -2386,7 +2590,7 @@ reuse_tcp_remove_serviced_keep(struct waiting_tcp* w, if(!reuse_tcp_insert(sq->outnet, pend_tcp)) { return 0; } - reuse_tcp_setup_timeout(pend_tcp); + reuse_tcp_setup_timeout(pend_tcp, sq->outnet->tcp_reuse_timeout); return 1; } return 0; @@ -2720,6 +2924,15 @@ serviced_tcp_callback(struct comm_point* c, void* arg, int error, { struct serviced_query* sq = (struct serviced_query*)arg; struct comm_reply r2; +#ifdef USE_DNSTAP + struct waiting_tcp* w = (struct waiting_tcp*)sq->pending; + struct pending_tcp* pend_tcp = NULL; + struct port_if* pi = NULL; + if(!w->on_tcp_waiting_list && w->next_waiting) { + pend_tcp = (struct pending_tcp*)w->next_waiting; + pi = pend_tcp->pi; + } +#endif sq->pending = NULL; /* removed after this callback */ if(error != NETEVENT_NOERROR) log_addr(VERB_QUERY, "tcp error for address", @@ -2728,12 +2941,19 @@ serviced_tcp_callback(struct comm_point* c, void* arg, int error, infra_update_tcp_works(sq->outnet->infra, &sq->addr, sq->addrlen, sq->zone, sq->zonelen); #ifdef USE_DNSTAP - if(error==NETEVENT_NOERROR && sq->outnet->dtenv && + /* + * sending src (local service)/dst (upstream) addresses over DNSTAP + */ + if(error==NETEVENT_NOERROR && pi && sq->outnet->dtenv && (sq->outnet->dtenv->log_resolver_response_messages || - sq->outnet->dtenv->log_forwarder_response_messages)) + sq->outnet->dtenv->log_forwarder_response_messages)) { + log_addr(VERB_ALGO, "response from upstream", &sq->addr, sq->addrlen); + log_addr(VERB_ALGO, "to local addr", &pi->addr, pi->addrlen); dt_msg_send_outside_response(sq->outnet->dtenv, &sq->addr, - c->type, sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen, - &sq->last_sent_time, sq->outnet->now_tv, c->buffer); + &pi->addr, c->type, sq->zone, sq->zonelen, sq->qbuf, + sq->qbuflen, &sq->last_sent_time, sq->outnet->now_tv, + c->buffer); + } #endif if(error==NETEVENT_NOERROR && sq->status == serviced_query_TCP_EDNS && (LDNS_RCODE_WIRE(sldns_buffer_begin(c->buffer)) == @@ -2804,7 +3024,7 @@ serviced_tcp_initiate(struct serviced_query* sq, sldns_buffer* buff) sq->status==serviced_query_TCP_EDNS?"EDNS":""); serviced_encode(sq, buff, sq->status == serviced_query_TCP_EDNS); sq->last_sent_time = *sq->outnet->now_tv; - sq->pending = pending_tcp_query(sq, buff, TCP_AUTH_QUERY_TIMEOUT, + sq->pending = pending_tcp_query(sq, buff, sq->outnet->tcp_auth_query_timeout, serviced_tcp_callback, sq); if(!sq->pending) { /* delete from tree so that a retry by above layer does not @@ -2832,10 +3052,10 @@ serviced_tcp_send(struct serviced_query* sq, sldns_buffer* buff) sq->last_sent_time = *sq->outnet->now_tv; if(sq->tcp_upstream || sq->ssl_upstream) { timeout = rtt; - if(rtt >= UNKNOWN_SERVER_NICENESS && rtt < TCP_AUTH_QUERY_TIMEOUT) - timeout = TCP_AUTH_QUERY_TIMEOUT; + if(rtt >= UNKNOWN_SERVER_NICENESS && rtt < sq->outnet->tcp_auth_query_timeout) + timeout = sq->outnet->tcp_auth_query_timeout; } else { - timeout = TCP_AUTH_QUERY_TIMEOUT; + timeout = sq->outnet->tcp_auth_query_timeout; } sq->pending = pending_tcp_query(sq, buff, timeout, serviced_tcp_callback, sq); @@ -2887,6 +3107,10 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, struct serviced_query* sq = (struct serviced_query*)arg; struct outside_network* outnet = sq->outnet; struct timeval now = *sq->outnet->now_tv; +#ifdef USE_DNSTAP + struct pending* p = (struct pending*)sq->pending; + struct port_if* pi = p->pc->pif; +#endif sq->pending = NULL; /* removed after callback */ if(error == NETEVENT_TIMEOUT) { @@ -2924,12 +3148,18 @@ serviced_udp_callback(struct comm_point* c, void* arg, int error, return 0; } #ifdef USE_DNSTAP + /* + * sending src (local service)/dst (upstream) addresses over DNSTAP + */ if(error == NETEVENT_NOERROR && outnet->dtenv && (outnet->dtenv->log_resolver_response_messages || - outnet->dtenv->log_forwarder_response_messages)) - dt_msg_send_outside_response(outnet->dtenv, &sq->addr, c->type, - sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen, - &sq->last_sent_time, sq->outnet->now_tv, c->buffer); + outnet->dtenv->log_forwarder_response_messages)) { + log_addr(VERB_ALGO, "response from upstream", &sq->addr, sq->addrlen); + log_addr(VERB_ALGO, "to local addr", &pi->addr, pi->addrlen); + dt_msg_send_outside_response(outnet->dtenv, &sq->addr, &pi->addr, c->type, + sq->zone, sq->zonelen, sq->qbuf, sq->qbuflen, + &sq->last_sent_time, sq->outnet->now_tv, c->buffer); + } #endif if( (sq->status == serviced_query_UDP_EDNS ||sq->status == serviced_query_UDP_EDNS_FRAG) @@ -3203,7 +3433,7 @@ outnet_comm_point_for_udp(struct outside_network* outnet, return NULL; } cp = comm_point_create_udp(outnet->base, fd, outnet->udp_buff, - cb, cb_arg); + cb, cb_arg, NULL); if(!cp) { log_err("malloc failure"); close(fd); @@ -3309,15 +3539,28 @@ outnet_comm_point_for_tcp(struct outside_network* outnet, return cp; } +/** setup the User-Agent HTTP header based on http-user-agent configuration */ +static void +setup_http_user_agent(sldns_buffer* buf, struct config_file* cfg) +{ + if(cfg->hide_http_user_agent) return; + if(cfg->http_user_agent==NULL || cfg->http_user_agent[0] == 0) { + sldns_buffer_printf(buf, "User-Agent: %s/%s\r\n", PACKAGE_NAME, + PACKAGE_VERSION); + } else { + sldns_buffer_printf(buf, "User-Agent: %s\r\n", cfg->http_user_agent); + } +} + /** setup http request headers in buffer for sending query to destination */ static int -setup_http_request(sldns_buffer* buf, char* host, char* path) +setup_http_request(sldns_buffer* buf, char* host, char* path, + struct config_file* cfg) { sldns_buffer_clear(buf); sldns_buffer_printf(buf, "GET /%s HTTP/1.1\r\n", path); sldns_buffer_printf(buf, "Host: %s\r\n", host); - sldns_buffer_printf(buf, "User-Agent: unbound/%s\r\n", - PACKAGE_VERSION); + setup_http_user_agent(buf, cfg); /* We do not really do multiple queries per connection, * but this header setting is also not needed. * sldns_buffer_printf(buf, "Connection: close\r\n") */ @@ -3333,7 +3576,7 @@ struct comm_point* outnet_comm_point_for_http(struct outside_network* outnet, comm_point_callback_type* cb, void* cb_arg, struct sockaddr_storage* to_addr, socklen_t to_addrlen, int timeout, - int ssl, char* host, char* path) + int ssl, char* host, char* path, struct config_file* cfg) { /* cp calls cb with err=NETEVENT_DONE when transfer is done */ struct comm_point* cp; @@ -3369,7 +3612,7 @@ outnet_comm_point_for_http(struct outside_network* outnet, comm_point_start_listening(cp, fd, timeout); /* setup http request in cp->buffer */ - if(!setup_http_request(cp->buffer, host, path)) { + if(!setup_http_request(cp->buffer, host, path, cfg)) { log_err("error setting up http request"); comm_point_delete(cp); return NULL; diff --git a/services/outside_network.h b/services/outside_network.h index fe287af4fcce..d0d532e6425f 100644 --- a/services/outside_network.h +++ b/services/outside_network.h @@ -63,6 +63,7 @@ struct edns_option; struct module_env; struct module_qstate; struct query_info; +struct config_file; /** * Send queries to outside servers and wait for answers from servers. @@ -158,6 +159,12 @@ struct outside_network { size_t num_tcp; /** number of tcp communication points in use. */ size_t num_tcp_outgoing; + /** max number of queries on a reuse connection */ + size_t max_reuse_tcp_queries; + /** timeout for REUSE entries in milliseconds. */ + int tcp_reuse_timeout; + /** timeout in milliseconds for TCP queries to auth servers. */ + int tcp_auth_query_timeout; /** * tree of still-open and waiting tcp connections for reuse. * can be closed and reopened to get a new tcp connection. @@ -295,11 +302,6 @@ struct reuse_tcp { struct outside_network* outnet; }; -/** max number of queries on a reuse connection */ -#define MAX_REUSE_TCP_QUERIES 200 -/** timeout for REUSE entries in milliseconds. */ -#define REUSE_TIMEOUT 60000 - /** * A query that has an answer pending for it. */ @@ -344,6 +346,8 @@ struct pending { struct pending_tcp { /** next in list of free tcp comm points, or NULL. */ struct pending_tcp* next_free; + /** port for of the outgoing interface that is used */ + struct port_if* pi; /** tcp comm point it was sent on (and reply must come back on). */ struct comm_point* c; /** the query being serviced, NULL if the pending_tcp is unused. */ @@ -408,6 +412,10 @@ struct waiting_tcp { char* tls_auth_name; /** the packet was involved in an error, to stop looping errors */ int error_count; +#ifdef USE_DNSTAP + /** serviced query pointer for dnstap to get logging info, if nonNULL*/ + struct serviced_query* sq; +#endif }; /** @@ -534,6 +542,9 @@ struct serviced_query { * @param tls_use_sni: if SNI is used for TLS connections. * @param dtenv: environment to send dnstap events with (if enabled). * @param udp_connect: if the udp_connect option is enabled. + * @param max_reuse_tcp_queries: max number of queries on a reuse connection. + * @param tcp_reuse_timeout: timeout for REUSE entries in milliseconds. + * @param tcp_auth_query_timeout: timeout in milliseconds for TCP queries to auth servers. * @return: the new structure (with no pending answers) or NULL on error. */ struct outside_network* outside_network_create(struct comm_base* base, @@ -543,7 +554,8 @@ struct outside_network* outside_network_create(struct comm_base* base, int numavailports, size_t unwanted_threshold, int tcp_mss, void (*unwanted_action)(void*), void* unwanted_param, int do_udp, void* sslctx, int delayclose, int tls_use_sni, struct dt_env *dtenv, - int udp_connect); + int udp_connect, int max_reuse_tcp_queries, int tcp_reuse_timeout, + int tcp_auth_query_timeout); /** * Delete outside_network structure. @@ -670,12 +682,28 @@ struct waiting_tcp* reuse_tcp_by_id_find(struct reuse_tcp* reuse, uint16_t id); /** insert element in tree by id */ void reuse_tree_by_id_insert(struct reuse_tcp* reuse, struct waiting_tcp* w); +/** insert element in tcp_reuse tree and LRU list */ +int reuse_tcp_insert(struct outside_network* outnet, + struct pending_tcp* pend_tcp); + +/** touch the LRU of the element */ +void reuse_tcp_lru_touch(struct outside_network* outnet, + struct reuse_tcp* reuse); + +/** remove element from tree and LRU list */ +void reuse_tcp_remove_tree_list(struct outside_network* outnet, + struct reuse_tcp* reuse); + +/** snip the last reuse_tcp element off of the LRU list if any */ +struct reuse_tcp* reuse_tcp_lru_snip(struct outside_network* outnet); + /** delete readwait waiting_tcp elements, deletes the elements in the list */ void reuse_del_readwait(rbtree_type* tree_by_id); /** get TCP file descriptor for address, returns -1 on failure, * tcp_mss is 0 or maxseg size to set for TCP packets. */ -int outnet_get_tcp_fd(struct sockaddr_storage* addr, socklen_t addrlen, int tcp_mss, int dscp); +int outnet_get_tcp_fd(struct sockaddr_storage* addr, socklen_t addrlen, + int tcp_mss, int dscp); /** * Create udp commpoint suitable for sending packets to the destination. @@ -729,12 +757,13 @@ struct comm_point* outnet_comm_point_for_tcp(struct outside_network* outnet, * @param ssl: set to true for https. * @param host: hostname to use for the destination. part of http request. * @param path: pathname to lookup, eg. name of the file on the destination. + * @param cfg: running configuration for User-Agent setup. * @return http_out commpoint, or NULL. */ struct comm_point* outnet_comm_point_for_http(struct outside_network* outnet, comm_point_callback_type* cb, void* cb_arg, struct sockaddr_storage* to_addr, socklen_t to_addrlen, int timeout, - int ssl, char* host, char* path); + int ssl, char* host, char* path, struct config_file* cfg); /** connect tcp connection to addr, 0 on failure */ int outnet_tcp_connect(int s, struct sockaddr_storage* addr, socklen_t addrlen); diff --git a/services/rpz.c b/services/rpz.c index 2b6b0ac3fccf..3a1ec00d7d38 100644 --- a/services/rpz.c +++ b/services/rpz.c @@ -162,6 +162,7 @@ rpz_rr_to_action(uint16_t rr_type, uint8_t* rdatawl, size_t rdatalen) case LDNS_RR_TYPE_RRSIG: case LDNS_RR_TYPE_NSEC: case LDNS_RR_TYPE_NSEC3: + case LDNS_RR_TYPE_NSEC3PARAM: return RPZ_INVALID_ACTION; case LDNS_RR_TYPE_CNAME: break; @@ -479,8 +480,21 @@ rpz_insert_qname_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, int newzone = 0; if(a == RPZ_TCP_ONLY_ACTION || a == RPZ_INVALID_ACTION) { - verbose(VERB_ALGO, "RPZ: skipping unsupported action: %s", - rpz_action_to_string(a)); + char str[255+1]; + if(rrtype == LDNS_RR_TYPE_SOA || rrtype == LDNS_RR_TYPE_NS || + rrtype == LDNS_RR_TYPE_DNAME || + rrtype == LDNS_RR_TYPE_DNSKEY || + rrtype == LDNS_RR_TYPE_RRSIG || + rrtype == LDNS_RR_TYPE_NSEC || + rrtype == LDNS_RR_TYPE_NSEC3PARAM || + rrtype == LDNS_RR_TYPE_NSEC3 || + rrtype == LDNS_RR_TYPE_DS) { + free(dname); + return; /* no need to log these types as unsupported */ + } + dname_str(dname, str); + verbose(VERB_ALGO, "RPZ: qname trigger, %s skipping unsupported action: %s", + str, rpz_action_to_string(a)); free(dname); return; } @@ -552,8 +566,10 @@ rpz_insert_response_ip_trigger(struct rpz* r, uint8_t* dname, size_t dnamelen, if(a == RPZ_TCP_ONLY_ACTION || a == RPZ_INVALID_ACTION || respa == respip_invalid) { - verbose(VERB_ALGO, "RPZ: skipping unsupported action: %s", - rpz_action_to_string(a)); + char str[255+1]; + dname_str(dname, str); + verbose(VERB_ALGO, "RPZ: respip trigger, %s skipping unsupported action: %s", + str, rpz_action_to_string(a)); return 0; } @@ -702,7 +718,7 @@ rpz_find_zone(struct rpz* r, uint8_t* qname, size_t qname_len, uint16_t qclass, * zone match, append '*' to that and do another lookup. */ ce = dname_get_shared_topdomain(z->name, qname); - if(!ce /* should not happen */ || !*ce /* root */) { + if(!ce /* should not happen */) { lock_rw_unlock(&z->lock); if(zones_keep_lock) { lock_rw_unlock(&r->local_zones->lock); |
