diff options
author | Doug Barton <dougb@FreeBSD.org> | 2009-01-09 09:58:33 +0000 |
---|---|---|
committer | Doug Barton <dougb@FreeBSD.org> | 2009-01-09 09:58:33 +0000 |
commit | 0da30d61e624fa6fbd4d6b2057244e0c9e0c213b (patch) | |
tree | 76e88c380de063d6c34d9c662df82c2ee84b3bf1 /lib | |
parent | 84c329fd83557f6415f6355007797dbcbaa685b0 (diff) |
Notes
Diffstat (limited to 'lib')
77 files changed, 4111 insertions, 1112 deletions
diff --git a/lib/bind/api b/lib/bind/api index d4b1ecd36b2ba..7ffeba810f243 100644 --- a/lib/bind/api +++ b/lib/bind/api @@ -1,3 +1,3 @@ -LIBINTERFACE = 4 -LIBREVISION = 9 -LIBAGE = 0 +LIBINTERFACE = 5 +LIBREVISION = 2 +LIBAGE = 1 diff --git a/lib/bind/bsd/Makefile.in b/lib/bind/bsd/Makefile.in index dd7b616e482d0..998cd637e2b11 100644 --- a/lib/bind/bsd/Makefile.in +++ b/lib/bind/bsd/Makefile.in @@ -1,7 +1,7 @@ -# Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") +# Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") # Copyright (C) 2001 Internet Software Consortium. # -# Permission to use, copy, modify, and distribute this software for any +# Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # @@ -13,7 +13,7 @@ # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. -# $Id: Makefile.in,v 1.6.206.1 2004/03/06 08:13:22 marka Exp $ +# $Id: Makefile.in,v 1.6.206.3 2008/04/28 23:45:35 tbox Exp $ srcdir= @srcdir@ VPATH = @srcdir@ @@ -34,6 +34,6 @@ SRCS= daemon.c ftruncate.c gettimeofday.c mktemp.c putenv.c \ TARGETS= ${OBJS} -CINCLUDES= -I.. -I${srcdir}/../include +CINCLUDES= -I.. -I../include -I${srcdir}/../include @BIND9_MAKE_RULES@ diff --git a/lib/bind/bsd/strerror.c b/lib/bind/bsd/strerror.c index d13adbb03b4df..5fba248f36247 100644 --- a/lib/bind/bsd/strerror.c +++ b/lib/bind/bsd/strerror.c @@ -1,6 +1,6 @@ #if defined(LIBC_SCCS) && !defined(lint) static const char sccsid[] = "@(#)strerror.c 8.1 (Berkeley) 6/4/93"; -static const char rcsid[] = "$Id: strerror.c,v 1.3.2.1 2001/11/02 17:45:31 gson Exp $"; +static const char rcsid[] = "$Id: strerror.c,v 1.3.2.1.10.1 2008/04/28 04:25:42 marka Exp $"; #endif /* LIBC_SCCS and not lint */ /* @@ -60,12 +60,14 @@ isc_strerror(int num) { static char ebuf[40] = UPREFIX; /* 64-bit number + slop */ u_int errnum; char *p, *t; +#ifndef USE_SYSERROR_LIST const char *ret; +#endif char tmp[40]; errnum = num; /* convert to unsigned */ #ifdef USE_SYSERROR_LIST - if (errnum < sys_nerr) + if (errnum < (u_int)sys_nerr) return (sys_errlist[errnum]); #else #undef strerror diff --git a/lib/bind/bsd/strtoul.c b/lib/bind/bsd/strtoul.c index d110f30943dfa..0741fc5504a3d 100644 --- a/lib/bind/bsd/strtoul.c +++ b/lib/bind/bsd/strtoul.c @@ -1,6 +1,6 @@ #if defined(LIBC_SCCS) && !defined(lint) static const char sccsid[] = "@(#)strtoul.c 8.1 (Berkeley) 6/4/93"; -static const char rcsid[] = "$Id: strtoul.c,v 1.1.2.1 2003/06/27 03:51:35 marka Exp $"; +static const char rcsid[] = "$Id: strtoul.c,v 1.1.2.1.4.1 2008/04/28 04:25:42 marka Exp $"; #endif /* LIBC_SCCS and not lint */ /* @@ -70,7 +70,7 @@ strtoul(const char *nptr, char **endptr, int base) { * See strtol for comments as to the logic used. */ do { - c = *(unsigned char *)s++; + c = *(const unsigned char *)s++; } while (isspace(c)); if (c == '-') { neg = 1; @@ -87,7 +87,7 @@ strtoul(const char *nptr, char **endptr, int base) { base = c == '0' ? 8 : 10; cutoff = (u_long)ULONG_MAX / (u_long)base; cutlim = (u_long)ULONG_MAX % (u_long)base; - for (acc = 0, any = 0;; c = *(unsigned char*)s++) { + for (acc = 0, any = 0;; c = *(const unsigned char*)s++) { if (isdigit(c)) c -= '0'; else if (isalpha(c)) @@ -96,7 +96,7 @@ strtoul(const char *nptr, char **endptr, int base) { break; if (c >= base) break; - if (any < 0 || acc > cutoff || acc == cutoff && c > cutlim) + if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim)) any = -1; else { any = 1; @@ -110,7 +110,7 @@ strtoul(const char *nptr, char **endptr, int base) { } else if (neg) acc = -acc; if (endptr != 0) - *endptr = (char *)(any ? s - 1 : nptr); + DE_CONST((any ? s - 1 : nptr), *endptr); return (acc); } diff --git a/lib/bind/configure.in b/lib/bind/configure.in index 4b752f93f1e1e..5115dc8eabfdd 100644 --- a/lib/bind/configure.in +++ b/lib/bind/configure.in @@ -13,7 +13,7 @@ # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. -AC_REVISION($Revision: 1.83.2.5.2.38.2.1 $) +AC_REVISION($Revision: 1.83.2.5.2.42 $) AC_INIT(resolv/herror.c) AC_PREREQ(2.13) @@ -169,7 +169,7 @@ AC_PROG_CC AC_HEADER_STDC -AC_CHECK_HEADERS(fcntl.h db.h paths.h sys/time.h unistd.h sys/sockio.h sys/select.h sys/timers.h stropts.h) +AC_CHECK_HEADERS(fcntl.h db.h paths.h sys/time.h unistd.h sys/sockio.h sys/select.h sys/timers.h stropts.h memory.h) AC_C_CONST AC_C_INLINE @@ -461,6 +461,7 @@ AC_SUBST(WANT_THREADS_OBJS) AC_CHECK_FUNC(strlcat, AC_DEFINE(HAVE_STRLCAT)) AC_CHECK_FUNC(memmove, AC_DEFINE(HAVE_MEMMOVE)) AC_CHECK_FUNC(memchr, AC_DEFINE(HAVE_MEMCHR)) +AC_CHECK_FUNC(strtoul, , AC_DEFINE(NEED_STRTOUL)) AC_CHECK_FUNC(if_nametoindex, [USE_IFNAMELINKID="#define USE_IFNAMELINKID 1"], @@ -490,6 +491,16 @@ AC_CHECK_FUNC(strerror, [NEED_STRERROR="#undef NEED_STRERROR"], [NEED_STRERROR="#define NEED_STRERROR 1"]) AC_SUBST(NEED_STRERROR) +if test -n "$NEED_STRERROR" +then + AC_MSG_CHECKING([for extern char * sys_errlist[]]) + AC_TRY_LINK([ extern int sys_nerr; extern char *sys_errlist[]; ], + [ const char *p = sys_errlist[0]; ], + AC_MSG_RESULT(yes) + AC_DEFINE(USE_SYSERROR_LIST), + AC_MSG_RESULT(no)) +fi + # # flockfile is usually provided by pthreads, but we may want to use it # even if compiled with --disable-threads. @@ -666,6 +677,14 @@ AC_SUBST(PURIFY) # # GNU libtool support # +case $host in +sunos*) + # Just set the maximum command line length for sunos as it otherwise + # takes a exceptionally long time to work it out. Required for libtool. + lt_cv_sys_max_cmd_len=4096; + ;; +esac + AC_ARG_WITH(libtool, [ --with-libtool use GNU libtool (following indented options supported)], use_libtool="$withval", use_libtool="no") @@ -1050,6 +1069,7 @@ case "$host" in *-qnx*) PORT_DIR="port/qnx";; *-rhapsody*) PORT_DIR="port/rhapsody";; *-sunos4*) + AC_DEFINE(NEED_SUN4PROTOS) PORT_NONBLOCK="#define PORT_NONBLOCK O_NDELAY" PORT_DIR="port/sunos";; *-solaris2.[[01234]]) @@ -1246,6 +1266,38 @@ found_rt_iflist AC_CHECK_FUNC(strsep, [ISC_PLATFORM_NEEDSTRSEP="#undef ISC_PLATFORM_NEEDSTRSEP"], [ISC_PLATFORM_NEEDSTRSEP="#define ISC_PLATFORM_NEEDSTRSEP 1"]) + + +AC_MSG_CHECKING(for char *sprintf) +AC_TRY_COMPILE([ +#include <stdio.h> +], +[ char buf[2]; return(*sprintf(buf,"x"));], +AC_DEFINE(SPRINTF_CHAR) +AC_MSG_RESULT(yes) +, +AC_MSG_RESULT(no) +) + +AC_MSG_CHECKING(for char *vsprintf) +case $host in +*sunos4*) # not decared in any header file. +AC_DEFINE(VSPRINTF_CHAR) +AC_MSG_RESULT(yes) +;; +*) +AC_TRY_COMPILE([ +#include <stdio.h> +], +[ char buf[2]; return(*vsprintf(buf,"x"));], +AC_DEFINE(VSPRINTF_CHAR) +AC_MSG_RESULT(yes) +, +AC_MSG_RESULT(no) +) +;; +esac + AC_CHECK_FUNC(vsnprintf, [ISC_PLATFORM_NEEDVSNPRINTF="#undef ISC_PLATFORM_NEEDVSNPRINTF"], [ISC_EXTRA_OBJS="$ISC_EXTRA_OBJS print.$O" @@ -1256,12 +1308,7 @@ AC_SUBST(ISC_PLATFORM_NEEDVSNPRINTF) AC_SUBST(ISC_EXTRA_OBJS) AC_SUBST(ISC_EXTRA_SRCS) -AC_CHECK_FUNC(strerror, - [USE_SYSERROR_LIST="#undef USE_SYSERROR_LIST"], - [USE_SYSERROR_LIST="#define USE_SYSERROR_LIST 1"]) -AC_SUBST(USE_SYSERROR_LIST) -# # Determine the printf format characters to use when printing # values of type isc_int64_t. We make the assumption that platforms # where a "long long" is the same size as a "long" (e.g., Alpha/OSF1) @@ -2590,7 +2637,7 @@ case "$host" in *-solaris2.9) hack_shutup_in6addr_init_macros=yes ;; - *-solaris2.1[0-9]) + *-solaris2.1[[0-9]]) hack_shutup_in6addr_init_macros=yes ;; esac diff --git a/lib/bind/dst/Makefile.in b/lib/bind/dst/Makefile.in index 8b306591708ea..dbc0265669fdb 100644 --- a/lib/bind/dst/Makefile.in +++ b/lib/bind/dst/Makefile.in @@ -1,7 +1,7 @@ -# Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") +# Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") # Copyright (C) 2001 Internet Software Consortium. # -# Permission to use, copy, modify, and distribute this software for any +# Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # @@ -13,7 +13,7 @@ # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. -# $Id: Makefile.in,v 1.5.206.1 2004/03/06 08:13:22 marka Exp $ +# $Id: Makefile.in,v 1.5.206.3 2008/04/28 23:45:35 tbox Exp $ srcdir= @srcdir@ VPATH = @srcdir@ @@ -26,7 +26,7 @@ TARGETS= ${OBJS} CRYPTFLAGS= -DCYLINK_DSS -DHMAC_MD5 -DUSE_MD5 -DDNSSAFE -CINCLUDES= -I.. -I${srcdir}/../include ${CRYPTINCL} +CINCLUDES= -I.. -I../include -I${srcdir}/../include ${CRYPTINCL} CDEFINES= ${CRYPTFLAGS} @BIND9_MAKE_RULES@ diff --git a/lib/bind/dst/dst_api.c b/lib/bind/dst/dst_api.c index c1313075aeeec..f2bf7d2667710 100644 --- a/lib/bind/dst/dst_api.c +++ b/lib/bind/dst/dst_api.c @@ -1,5 +1,5 @@ #ifndef LINT -static const char rcsid[] = "$Header: /proj/cvs/prod/bind9/lib/bind/dst/dst_api.c,v 1.4.2.6.8.6 2007/09/24 17:26:10 each Exp $"; +static const char rcsid[] = "$Header: /proj/cvs/prod/bind9/lib/bind/dst/Attic/dst_api.c,v 1.4.2.6.8.6 2007/09/24 17:26:10 each Exp $"; #endif /* diff --git a/lib/bind/dst/hmac_link.c b/lib/bind/dst/hmac_link.c index efad2583f666b..6656226473146 100644 --- a/lib/bind/dst/hmac_link.c +++ b/lib/bind/dst/hmac_link.c @@ -1,6 +1,6 @@ #ifdef HMAC_MD5 #ifndef LINT -static const char rcsid[] = "$Header: /proj/cvs/prod/bind9/lib/bind/dst/hmac_link.c,v 1.2.2.1.4.4 2007/09/24 17:26:10 each Exp $"; +static const char rcsid[] = "$Header: /proj/cvs/prod/bind9/lib/bind/dst/Attic/hmac_link.c,v 1.2.2.1.4.4 2007/09/24 17:26:10 each Exp $"; #endif /* * Portions Copyright (c) 1995-1998 by Trusted Information Systems, Inc. diff --git a/lib/bind/dst/support.c b/lib/bind/dst/support.c index 8fe3cdb4780d8..8bcc4e0e6df15 100644 --- a/lib/bind/dst/support.c +++ b/lib/bind/dst/support.c @@ -1,4 +1,4 @@ -static const char rcsid[] = "$Header: /proj/cvs/prod/bind9/lib/bind/dst/support.c,v 1.2.2.1.10.2 2005/10/11 00:48:14 marka Exp $"; +static const char rcsid[] = "$Header: /proj/cvs/prod/bind9/lib/bind/dst/Attic/support.c,v 1.2.2.1.10.2 2005/10/11 00:48:14 marka Exp $"; /* diff --git a/lib/bind/include/arpa/nameser.h b/lib/bind/include/arpa/nameser.h index 23db49871dcfa..ab297a14c7967 100644 --- a/lib/bind/include/arpa/nameser.h +++ b/lib/bind/include/arpa/nameser.h @@ -49,7 +49,7 @@ */ /* - * $Id: nameser.h,v 1.2.2.4.4.1 2004/03/09 08:33:30 marka Exp $ + * $Id: nameser.h,v 1.2.2.4.4.2 2008/04/28 05:46:51 marka Exp $ */ #ifndef _ARPA_NAMESER_H_ @@ -430,9 +430,10 @@ typedef enum __ns_cert_types { #define NS_NXT_MAX 127 /* - * EDNS0 extended flags, host order. + * EDNS0 extended flags and option codes, host order. */ #define NS_OPT_DNSSEC_OK 0x8000U +#define NS_OPT_NSID 3 /* * Inline versions of get/put short/long. Pointer is advanced. diff --git a/lib/bind/include/isc/assertions.h b/lib/bind/include/isc/assertions.h index 9a9b9dec98559..4b3335368e871 100644 --- a/lib/bind/include/isc/assertions.h +++ b/lib/bind/include/isc/assertions.h @@ -16,7 +16,7 @@ */ /* - * $Id: assertions.h,v 1.1.206.1 2004/03/09 08:33:30 marka Exp $ + * $Id: assertions.h,v 1.1.206.2 2008/10/15 03:57:45 marka Exp $ */ #ifndef ASSERTIONS_H @@ -29,18 +29,19 @@ typedef enum { typedef void (*assertion_failure_callback)(const char *, int, assertion_type, const char *, int); +/* coverity[+kill] */ extern assertion_failure_callback __assertion_failed; void set_assertion_failure_callback(assertion_failure_callback f); const char *assertion_type_to_text(assertion_type type); -#ifdef CHECK_ALL +#if defined(CHECK_ALL) || defined(__COVERITY__) #define CHECK_REQUIRE 1 #define CHECK_ENSURE 1 #define CHECK_INSIST 1 #define CHECK_INVARIANT 1 #endif -#ifdef CHECK_NONE +#if defined(CHECK_NONE) && !defined(__COVERITY__) #define CHECK_REQUIRE 0 #define CHECK_ENSURE 0 #define CHECK_INSIST 0 diff --git a/lib/bind/include/isc/misc.h b/lib/bind/include/isc/misc.h index b08b02d2890e2..a391974c18cad 100644 --- a/lib/bind/include/isc/misc.h +++ b/lib/bind/include/isc/misc.h @@ -16,13 +16,14 @@ */ /* - * $Id: misc.h,v 1.2.2.1.4.1 2004/03/09 08:33:31 marka Exp $ + * $Id: misc.h,v 1.2.2.1.4.2 2008/04/28 04:25:42 marka Exp $ */ #ifndef _ISC_MISC_H #define _ISC_MISC_H #include <stdio.h> +#include <sys/types.h> #define bitncmp __bitncmp /*#define isc_movefile __isc_movefile */ diff --git a/lib/bind/include/resolv.h b/lib/bind/include/resolv.h index 87a95200bb952..0a1c7f26102b6 100644 --- a/lib/bind/include/resolv.h +++ b/lib/bind/include/resolv.h @@ -50,7 +50,7 @@ /* * @(#)resolv.h 8.1 (Berkeley) 6/2/93 - * $Id: resolv.h,v 1.7.2.11.4.3 2005/08/25 04:44:13 marka Exp $ + * $Id: resolv.h,v 1.7.2.11.4.4 2008/04/28 05:46:51 marka Exp $ */ #ifndef _RESOLV_H_ @@ -252,6 +252,7 @@ union res_sockaddr_union { #define RES_NOCHECKNAME 0x00008000 /* do not check names for sanity. */ #define RES_KEEPTSIG 0x00010000 /* do not strip TSIG records */ #define RES_BLAST 0x00020000 /* blast all recursive servers */ +#define RES_NSID 0x00040000 /*%< request name server ID */ #define RES_NOTLDQUERY 0x00100000 /* don't unqualified name as a tld */ #define RES_USE_DNSSEC 0x00200000 /* use DNSSEC using OK bit in OPT */ /* #define RES_DEBUG2 0x00400000 */ /* nslookup internal */ @@ -398,6 +399,7 @@ extern const struct res_sym __p_rcode_syms[]; #define sym_ntos __sym_ntos #define sym_ston __sym_ston #define res_nopt __res_nopt +#define res_nopt_rdata __res_nopt_rdata #define res_ndestroy __res_ndestroy #define res_nametoclass __res_nametoclass #define res_nametotype __res_nametotype @@ -484,6 +486,8 @@ int res_findzonecut2 __P((res_state, const char *, ns_class, int, union res_sockaddr_union *, int)); void res_nclose __P((res_state)); int res_nopt __P((res_state, int, u_char *, int, int)); +int res_nopt_rdata __P((res_state, int, u_char *, int, u_char *, + u_short, u_short, u_char *)); void res_send_setqhook __P((res_send_qhook)); void res_send_setrhook __P((res_send_rhook)); int __res_vinit __P((res_state, int)); diff --git a/lib/bind/inet/Makefile.in b/lib/bind/inet/Makefile.in index 96698fde7f8bf..9f79b92431548 100644 --- a/lib/bind/inet/Makefile.in +++ b/lib/bind/inet/Makefile.in @@ -1,7 +1,7 @@ -# Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") +# Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") # Copyright (C) 2001 Internet Software Consortium. # -# Permission to use, copy, modify, and distribute this software for any +# Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # @@ -13,7 +13,7 @@ # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. -# $Id: Makefile.in,v 1.4.206.1 2004/03/06 08:13:23 marka Exp $ +# $Id: Makefile.in,v 1.4.206.3 2008/04/28 23:45:35 tbox Exp $ srcdir= @srcdir@ VPATH = @srcdir@ @@ -30,6 +30,6 @@ SRCS= inet_addr.c inet_cidr_ntop.c inet_cidr_pton.c inet_data.c \ TARGETS= ${OBJS} -CINCLUDES= -I.. -I${srcdir}/../include +CINCLUDES= -I.. -I../include -I${srcdir}/../include @BIND9_MAKE_RULES@ diff --git a/lib/bind/inet/inet_net_pton.c b/lib/bind/inet/inet_net_pton.c index abecfc79cd2c8..6a67379c0e65d 100644 --- a/lib/bind/inet/inet_net_pton.c +++ b/lib/bind/inet/inet_net_pton.c @@ -16,7 +16,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static const char rcsid[] = "$Id: inet_net_pton.c,v 1.4.2.1.8.2 2004/03/17 00:29:47 marka Exp $"; +static const char rcsid[] = "$Id: inet_net_pton.c,v 1.4.2.1.8.3 2008/08/26 04:50:32 marka Exp $"; #endif #include "port_before.h" @@ -133,11 +133,11 @@ inet_net_pton_ipv4(const char *src, u_char *dst, size_t size) { INSIST(n >= 0 && n <= 9); bits *= 10; bits += n; + if (bits > 32) + goto enoent; } while ((ch = *src++) != '\0' && isascii(ch) && isdigit(ch)); if (ch != '\0') goto enoent; - if (bits > 32) - goto emsgsize; } /* Firey death and destruction unless we prefetched EOS. */ diff --git a/lib/bind/irs/Makefile.in b/lib/bind/irs/Makefile.in index 9695435ba66fd..ba0092b9ae367 100644 --- a/lib/bind/irs/Makefile.in +++ b/lib/bind/irs/Makefile.in @@ -1,7 +1,7 @@ -# Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") +# Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") # Copyright (C) 2001 Internet Software Consortium. # -# Permission to use, copy, modify, and distribute this software for any +# Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # @@ -13,7 +13,7 @@ # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. -# $Id: Makefile.in,v 1.7.206.3 2004/12/07 00:38:35 marka Exp $ +# $Id: Makefile.in,v 1.7.206.5 2008/04/28 23:45:35 tbox Exp $ srcdir= @srcdir@ VPATH = @srcdir@ @@ -65,6 +65,6 @@ WANT_IRS_THREADSGR_OBJS=getgrent_r.@O@ TARGETS= ${OBJS} -CINCLUDES= -I.. -I${srcdir}/../include +CINCLUDES= -I.. -I../include -I${srcdir}/../include @BIND9_MAKE_RULES@ diff --git a/lib/bind/irs/dns_ho.c b/lib/bind/irs/dns_ho.c index b1bd5f01e6794..d942ae1be64f5 100644 --- a/lib/bind/irs/dns_ho.c +++ b/lib/bind/irs/dns_ho.c @@ -52,7 +52,7 @@ /* BIND Id: gethnamaddr.c,v 8.15 1996/05/22 04:56:30 vixie Exp $ */ #if defined(LIBC_SCCS) && !defined(lint) -static const char rcsid[] = "$Id: dns_ho.c,v 1.5.2.7.4.9 2006/12/07 04:00:08 marka Exp $"; +static const char rcsid[] = "$Id: dns_ho.c,v 1.5.2.7.4.10 2008/09/24 06:04:13 marka Exp $"; #endif /* LIBC_SCCS and not lint */ /* Imports. */ @@ -962,7 +962,7 @@ gethostans(struct irs_ho *this, } if (m == 0) continue; - if (hap < &pvt->h_addr_ptrs[MAXADDRS-1]) + if (hap < &pvt->h_addr_ptrs[MAXADDRS]) hap++; *hap = NULL; bp += m; @@ -984,9 +984,10 @@ gethostans(struct irs_ho *this, *ap = NULL; *hap = NULL; - if (pvt->res->nsort && haveanswer > 1 && qtype == T_A) + if (pvt->res->nsort && hap != pvt->h_addr_ptrs && + qtype == T_A) addrsort(pvt->res, pvt->h_addr_ptrs, - haveanswer); + hap - pvt->h_addr_ptrs); if (pvt->host.h_name == NULL) { n = strlen(qname) + 1; /* for the \0 */ if (n > (ep - bp) || n >= MAXHOSTNAMELEN) @@ -1053,7 +1054,7 @@ add_hostent(struct pvt *pvt, char *bp, char **hap, struct addrinfo *ai) /* Avoid overflows. */ if (bp + addrlen > &pvt->hostbuf[sizeof(pvt->hostbuf) - 1]) return(-1); - if (hap >= &pvt->h_addr_ptrs[MAXADDRS-1]) + if (hap >= &pvt->h_addr_ptrs[MAXADDRS]) return(0); /* fail, but not treat it as an error. */ /* Suppress duplicates. */ diff --git a/lib/bind/irs/irp.c b/lib/bind/irs/irp.c index 649079c31f1c2..4ddf2d883f0a3 100644 --- a/lib/bind/irs/irp.c +++ b/lib/bind/irs/irp.c @@ -16,7 +16,7 @@ */ #if !defined(LINT) && !defined(CODECENTER) -static const char rcsid[] = "$Id: irp.c,v 1.3.2.1.10.4 2006/03/10 00:17:21 marka Exp $"; +static const char rcsid[] = "$Id: irp.c,v 1.3.2.1.10.5 2008/04/28 04:25:42 marka Exp $"; #endif /* Imports */ @@ -48,6 +48,12 @@ static const char rcsid[] = "$Id: irp.c,v 1.3.2.1.10.4 2006/03/10 00:17:21 marka #include "port_after.h" +#ifdef VSPRINTF_CHAR +# define VSPRINTF(x) strlen(vsprintf/**/x) +#else +# define VSPRINTF(x) ((size_t)vsprintf x) +#endif + /* Forward. */ static void irp_close(struct irs_acc *); @@ -541,7 +547,7 @@ irs_irp_send_command(struct irp_p *pvt, const char *fmt, ...) { } va_start(ap, fmt); - todo = vsprintf(buffer, fmt, ap); + todo = VSPRINTF((buffer, fmt, ap)); va_end(ap); if (todo > (int)sizeof(buffer) - 3) { syslog(LOG_CRIT, "memory overrun in irs_irp_send_command()"); diff --git a/lib/bind/isc/Makefile.in b/lib/bind/isc/Makefile.in index d8e8889ab385b..b03a9f2c0e9c2 100644 --- a/lib/bind/isc/Makefile.in +++ b/lib/bind/isc/Makefile.in @@ -1,7 +1,7 @@ -# Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") +# Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") # Copyright (C) 2001 Internet Software Consortium. # -# Permission to use, copy, modify, and distribute this software for any +# Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # @@ -13,7 +13,7 @@ # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. -# $Id: Makefile.in,v 1.6.206.1 2004/03/06 08:13:23 marka Exp $ +# $Id: Makefile.in,v 1.6.206.3 2008/04/28 23:45:35 tbox Exp $ srcdir= @srcdir@ VPATH = @srcdir@ @@ -30,6 +30,6 @@ SRCS= assertions.c base64.c bitncmp.c ctl_clnt.c ctl_p.c \ TARGETS= ${OBJS} -CINCLUDES= -I.. -I${srcdir}/../include +CINCLUDES= -I.. -I../include -I${srcdir}/../include @BIND9_MAKE_RULES@ diff --git a/lib/bind/isc/assertions.c b/lib/bind/isc/assertions.c index f1fb2efe95700..264670e1075e1 100644 --- a/lib/bind/isc/assertions.c +++ b/lib/bind/isc/assertions.c @@ -16,7 +16,7 @@ */ #if !defined(LINT) && !defined(CODECENTER) -static const char rcsid[] = "$Id: assertions.c,v 1.1.206.1 2004/03/09 08:33:39 marka Exp $"; +static const char rcsid[] = "$Id: assertions.c,v 1.1.206.2 2008/10/15 03:57:45 marka Exp $"; #endif #include "port_before.h" @@ -78,6 +78,7 @@ assertion_type_to_text(assertion_type type) { * Private. */ +/* coverity[+kill] */ static void default_assertion_failed(const char *file, int line, assertion_type type, const char *cond, int print_errno) diff --git a/lib/bind/isc/bitncmp.c b/lib/bind/isc/bitncmp.c index fcff9f71ed3f3..35b570b45fb02 100644 --- a/lib/bind/isc/bitncmp.c +++ b/lib/bind/isc/bitncmp.c @@ -16,7 +16,7 @@ */ #if defined(LIBC_SCCS) && !defined(lint) -static const char rcsid[] = "$Id: bitncmp.c,v 1.1.206.1 2004/03/09 08:33:39 marka Exp $"; +static const char rcsid[] = "$Id: bitncmp.c,v 1.1.206.2 2008/05/12 00:20:16 marka Exp $"; #endif #include "port_before.h" @@ -48,7 +48,7 @@ bitncmp(const void *l, const void *r, int n) { b = n / 8; x = memcmp(l, r, b); - if (x) + if (x || (n % 8) == 0) return (x); lb = ((const u_char *)l)[b]; diff --git a/lib/bind/isc/ctl_clnt.c b/lib/bind/isc/ctl_clnt.c index ddb2efbe660eb..d921b559c8758 100644 --- a/lib/bind/isc/ctl_clnt.c +++ b/lib/bind/isc/ctl_clnt.c @@ -1,5 +1,5 @@ #if !defined(lint) && !defined(SABER) -static const char rcsid[] = "$Id: ctl_clnt.c,v 1.4.2.1.4.4 2007/05/18 06:25:17 marka Exp $"; +static const char rcsid[] = "$Id: ctl_clnt.c,v 1.4.2.1.4.5 2008/04/28 04:25:42 marka Exp $"; #endif /* not lint */ /* @@ -38,6 +38,9 @@ static const char rcsid[] = "$Id: ctl_clnt.c,v 1.4.2.1.4.4 2007/05/18 06:25:17 m #include <string.h> #include <time.h> #include <unistd.h> +#ifdef HAVE_MEMORY_H +#include <memory.h> +#endif #include <isc/assertions.h> #include <isc/ctl.h> diff --git a/lib/bind/isc/ctl_srvr.c b/lib/bind/isc/ctl_srvr.c index 0d1b53dfef088..11d39c719275d 100644 --- a/lib/bind/isc/ctl_srvr.c +++ b/lib/bind/isc/ctl_srvr.c @@ -1,5 +1,5 @@ #if !defined(lint) && !defined(SABER) -static const char rcsid[] = "$Id: ctl_srvr.c,v 1.3.2.1.4.4 2006/12/07 04:52:50 marka Exp $"; +static const char rcsid[] = "$Id: ctl_srvr.c,v 1.3.2.1.4.5 2008/04/28 04:25:42 marka Exp $"; #endif /* not lint */ /* @@ -40,6 +40,9 @@ static const char rcsid[] = "$Id: ctl_srvr.c,v 1.3.2.1.4.4 2006/12/07 04:52:50 m #include <time.h> #include <unistd.h> #include <fcntl.h> +#ifdef HAVE_MEMORY_H +#include <memory.h> +#endif #include <isc/assertions.h> #include <isc/ctl.h> diff --git a/lib/bind/nameser/Makefile.in b/lib/bind/nameser/Makefile.in index aa4bc6cf6b4b7..b7b7bc7bad02c 100644 --- a/lib/bind/nameser/Makefile.in +++ b/lib/bind/nameser/Makefile.in @@ -1,7 +1,7 @@ -# Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") +# Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") # Copyright (C) 2001 Internet Software Consortium. # -# Permission to use, copy, modify, and distribute this software for any +# Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # @@ -13,7 +13,7 @@ # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. -# $Id: Makefile.in,v 1.4.206.1 2004/03/15 01:02:45 marka Exp $ +# $Id: Makefile.in,v 1.4.206.3 2008/04/28 23:45:35 tbox Exp $ srcdir= @srcdir@ VPATH = @srcdir@ @@ -26,6 +26,6 @@ SRCS= ns_date.c ns_name.c ns_netint.c ns_parse.c ns_print.c \ TARGETS= ${OBJS} -CINCLUDES= -I.. -I${srcdir}/../include +CINCLUDES= -I.. -I../include -I${srcdir}/../include @BIND9_MAKE_RULES@ diff --git a/lib/bind/port_after.h.in b/lib/bind/port_after.h.in index 162535ee5067a..8ee135bcdde6e 100644 --- a/lib/bind/port_after.h.in +++ b/lib/bind/port_after.h.in @@ -22,6 +22,10 @@ @NEED_DAEMON@ @NEED_STRSEP@ @NEED_STRERROR@ +#ifdef NEED_STRERROR +const char *isc_strerror(int); +#define strerror isc_strerror +#endif @HAS_INET6_STRUCTS@ @HAVE_SIN6_SCOPE_ID@ @NEED_IN6ADDR_ANY@ @@ -30,7 +34,6 @@ @NEED_GETTIMEOFDAY@ @HAVE_STRNDUP@ @USE_FIONBIO_IOCTL@ -@USE_SYSERROR_LIST@ @INNETGR_ARGS@ @SETNETGRENT_ARGS@ @USE_IFNAMELINKID@ @@ -419,4 +422,80 @@ setnetgrent_r(const char *netgroup, NGR_R_ENT_ARGS); NGR_R_SET_RETURN setnetgrent_r(const char *netgroup); #endif + +#ifdef NEED_STRTOUL +unsigned long strtoul(const char *, char **, int); +#endif + +#ifdef NEED_SUN4PROTOS +#include <stdarg.h> +#ifndef __SIZE_TYPE__ +#define __SIZE_TYPE__ int +#endif +struct sockaddr; +struct iovec; +struct timeval; +struct timezone; +int fprintf(FILE *, const char *, ...); +int getsockname(int, struct sockaddr *, int *); +int getpeername(int, struct sockaddr *, int *); +int socket(int, int, int); +int connect(int, const struct sockaddr *, int); +int writev(int, struct iovec *, int); +int readv(int, struct iovec *, int); +int send(int, const char *, int, int); +void bzero(char *, int); +int recvfrom(int, char *, int, int, struct sockaddr *, int *); +int syslog(int, const char *, ... ); +int printf(const char *, ...); +__SIZE_TYPE__ fread(void *, __SIZE_TYPE__, __SIZE_TYPE__, FILE *); +__SIZE_TYPE__ fwrite(const void *, __SIZE_TYPE__, __SIZE_TYPE__, FILE *); +int fclose(FILE *); +int ungetc(int, FILE *); +int scanf(const char *, ...); +int sscanf(const char *, const char *, ... ); +int tolower(int); +int toupper(int); +int strcasecmp(const char *, const char *); +int strncasecmp(const char *, const char *, int); +int select(int, fd_set *, fd_set *, fd_set *, struct timeval *); +#ifdef gettimeofday +#undef gettimeofday +int gettimeofday(struct timeval *, struct timezone *); +#define gettimeofday isc__gettimeofday +#else +int gettimeofday(struct timeval *, struct timezone *); +#endif +long strtol(const char*, char **, int); +int fseek(FILE *, long, int); +int setsockopt(int, int, int, const char *, int); +int bind(int, const struct sockaddr *, int); +void bcopy(char *, char *, int); +int fputc(char, FILE *); +int listen(int, int); +int accept(int, struct sockaddr *, int *); +int getsockopt(int, int, int, char *, int *); +int vfprintf(FILE *, const char *, va_list); +int fflush(FILE *); +int fgetc(FILE *); +int fputs(const char *, FILE *); +int fchown(int, int, int); +void setbuf(FILE *, char *); +int gethostname(char *, int); +int rename(const char *, const char *); +time_t time(time_t *); +int fscanf(FILE *, const char *, ...); +int sscanf(const char *, const char *, ...); +int ioctl(int, int, caddr_t); +void perror(const char *); + +#if !defined(__USE_FIXED_PROTOTYPES__) && !defined(__cplusplus) && !defined(__STRICT_ANSI__) +/* + * 'gcc -ansi' changes the prototype for vsprintf(). + * Use this prototype when 'gcc -ansi' is not in effect. + */ +char *vsprintf(char *, const char *, va_list); +#endif +#endif + #endif diff --git a/lib/bind/resolv/Makefile.in b/lib/bind/resolv/Makefile.in index a235fbc7a5e39..06ceb96aeaea5 100644 --- a/lib/bind/resolv/Makefile.in +++ b/lib/bind/resolv/Makefile.in @@ -1,7 +1,7 @@ -# Copyright (C) 2004, 2005 Internet Systems Consortium, Inc. ("ISC") +# Copyright (C) 2004, 2005, 2008 Internet Systems Consortium, Inc. ("ISC") # Copyright (C) 2001 Internet Software Consortium. # -# Permission to use, copy, modify, and distribute this software for any +# Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # @@ -13,7 +13,7 @@ # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. -# $Id: Makefile.in,v 1.3.206.3 2005/07/29 00:13:09 marka Exp $ +# $Id: Makefile.in,v 1.3.206.5 2008/04/28 23:45:35 tbox Exp $ srcdir= @srcdir@ VPATH = @srcdir@ @@ -28,7 +28,7 @@ SRCS= herror.c mtctxres.c res_comp.c res_data.c res_debug.c \ TARGETS= ${OBJS} -CINCLUDES= -I.. -I${srcdir}/../include +CINCLUDES= -I.. -I../include -I${srcdir}/../include CWARNINGS= @BIND9_MAKE_RULES@ diff --git a/lib/bind/resolv/res_debug.c b/lib/bind/resolv/res_debug.c index 8dda12c5e81cd..4c3cc7530fa78 100644 --- a/lib/bind/resolv/res_debug.c +++ b/lib/bind/resolv/res_debug.c @@ -95,7 +95,7 @@ #if defined(LIBC_SCCS) && !defined(lint) static const char sccsid[] = "@(#)res_debug.c 8.1 (Berkeley) 6/4/93"; -static const char rcsid[] = "$Id: res_debug.c,v 1.3.2.5.4.6 2005/07/28 07:43:22 marka Exp $"; +static const char rcsid[] = "$Id: res_debug.c,v 1.3.2.5.4.7 2008/04/28 05:46:51 marka Exp $"; #endif /* LIBC_SCCS and not lint */ #include "port_before.h" @@ -189,10 +189,56 @@ do_section(const res_state statp, p_type(ns_rr_type(rr)), p_class(ns_rr_class(rr))); else if (section == ns_s_ar && ns_rr_type(rr) == ns_t_opt) { + u_int16_t optcode, optlen, rdatalen = ns_rr_rdlen(rr); u_int32_t ttl = ns_rr_ttl(rr); + fprintf(file, "; EDNS: version: %u, udp=%u, flags=%04x\n", (ttl>>16)&0xff, ns_rr_class(rr), ttl&0xffff); + + while (rdatalen >= 4) { + const u_char *cp = ns_rr_rdata(rr); + int i; + + GETSHORT(optcode, cp); + GETSHORT(optlen, cp); + + if (optcode == NS_OPT_NSID) { + fputs("; NSID: ", file); + if (optlen == 0) { + fputs("; NSID\n", file); + } else { + fputs("; NSID: ", file); + for (i = 0; i < optlen; i++) + fprintf(file, "%02x ", + cp[i]); + fputs(" (",file); + for (i = 0; i < optlen; i++) + fprintf(file, "%c", + isprint(cp[i])? + cp[i] : '.'); + fputs(")\n", file); + } + } else { + if (optlen == 0) { + fprintf(file, "; OPT=%u\n", + optcode); + } else { + fprintf(file, "; OPT=%u: ", + optcode); + for (i = 0; i < optlen; i++) + fprintf(file, "%02x ", + cp[i]); + fputs(" (",file); + for (i = 0; i < optlen; i++) + fprintf(file, "%c", + isprint(cp[i]) ? + cp[i] : '.'); + fputs(")\n", file); + } + } + rdatalen -= 4 + optlen; + } } else { n = ns_sprintrr(handle, &rr, NULL, NULL, buf, buflen); @@ -204,7 +250,7 @@ do_section(const res_state statp, buf = malloc(buflen += 1024); if (buf == NULL) { fprintf(file, - ";; memory allocation failure\n"); + ";; memory allocation failure\n"); return; } continue; @@ -380,7 +426,7 @@ const struct res_sym __p_default_section_syms[] = { {ns_s_an, "ANSWER", (char *)0}, {ns_s_ns, "AUTHORITY", (char *)0}, {ns_s_ar, "ADDITIONAL", (char *)0}, - {0, (char *)0, (char *)0} + {0, (char *)0, (char *)0} }; const struct res_sym __p_update_section_syms[] = { @@ -388,7 +434,7 @@ const struct res_sym __p_update_section_syms[] = { {S_PREREQ, "PREREQUISITE", (char *)0}, {S_UPDATE, "UPDATE", (char *)0}, {S_ADDT, "ADDITIONAL", (char *)0}, - {0, (char *)0, (char *)0} + {0, (char *)0, (char *)0} }; const struct res_sym __p_key_syms[] = { @@ -616,6 +662,7 @@ p_option(u_long option) { case RES_USE_INET6: return "inet6"; #ifdef RES_USE_EDNS0 /* KAME extension */ case RES_USE_EDNS0: return "edns0"; + case RES_NSID: return "nsid"; #endif #ifdef RES_USE_DNAME case RES_USE_DNAME: return "dname"; diff --git a/lib/bind/resolv/res_mkquery.c b/lib/bind/resolv/res_mkquery.c index 89000edf6ad42..0e450e9b53040 100644 --- a/lib/bind/resolv/res_mkquery.c +++ b/lib/bind/resolv/res_mkquery.c @@ -70,7 +70,7 @@ #if defined(LIBC_SCCS) && !defined(lint) static const char sccsid[] = "@(#)res_mkquery.c 8.1 (Berkeley) 6/4/93"; -static const char rcsid[] = "$Id: res_mkquery.c,v 1.1.2.2.4.2 2004/03/16 12:34:18 marka Exp $"; +static const char rcsid[] = "$Id: res_mkquery.c,v 1.1.2.2.4.3 2008/04/28 05:46:51 marka Exp $"; #endif /* LIBC_SCCS and not lint */ #include "port_before.h" @@ -203,9 +203,6 @@ res_nmkquery(res_state statp, #ifdef RES_USE_EDNS0 /* attach OPT pseudo-RR, as documented in RFC2671 (EDNS0). */ -#ifndef T_OPT -#define T_OPT 41 -#endif int res_nopt(res_state statp, @@ -230,14 +227,14 @@ res_nopt(res_state statp, if ((ep - cp) < 1 + RRFIXEDSZ) return (-1); - *cp++ = 0; /* "." */ - - ns_put16(T_OPT, cp); /* TYPE */ + *cp++ = 0; /*%< "." */ + ns_put16(ns_t_opt, cp); /*%< TYPE */ cp += INT16SZ; - ns_put16(anslen & 0xffff, cp); /* CLASS = UDP payload size */ + ns_put16(anslen & 0xffff, cp); /*%< CLASS = UDP payload size */ cp += INT16SZ; - *cp++ = NOERROR; /* extended RCODE */ - *cp++ = 0; /* EDNS version */ + *cp++ = NOERROR; /*%< extended RCODE */ + *cp++ = 0; /*%< EDNS version */ + if (statp->options & RES_USE_DNSSEC) { #ifdef DEBUG if (statp->options & RES_DEBUG) @@ -247,10 +244,60 @@ res_nopt(res_state statp, } ns_put16(flags, cp); cp += INT16SZ; - ns_put16(0, cp); /* RDLEN */ + + ns_put16(0, cp); /*%< RDLEN */ cp += INT16SZ; + hp->arcount = htons(ntohs(hp->arcount) + 1); return (cp - buf); } + +/* + * Construct variable data (RDATA) block for OPT psuedo-RR, append it + * to the buffer, then update the RDLEN field (previously set to zero by + * res_nopt()) with the new RDATA length. + */ +int +res_nopt_rdata(res_state statp, + int n0, /*%< current offset in buffer */ + u_char *buf, /*%< buffer to put query */ + int buflen, /*%< size of buffer */ + u_char *rdata, /*%< ptr to start of opt rdata */ + u_short code, /*%< OPTION-CODE */ + u_short len, /*%< OPTION-LENGTH */ + u_char *data) /*%< OPTION_DATA */ +{ + register u_char *cp, *ep; + +#ifdef DEBUG + if ((statp->options & RES_DEBUG) != 0U) + printf(";; res_nopt_rdata()\n"); +#endif + + cp = buf + n0; + ep = buf + buflen; + + if ((ep - cp) < (4 + len)) + return (-1); + + if (rdata < (buf + 2) || rdata >= ep) + return (-1); + + ns_put16(code, cp); + cp += INT16SZ; + + ns_put16(len, cp); + cp += INT16SZ; + + memcpy(cp, data, len); + cp += len; + + len = cp - rdata; + ns_put16(len, rdata - 2); /* Update RDLEN field */ + + return (cp - buf); +} #endif + +/*! \file */ diff --git a/lib/bind/resolv/res_query.c b/lib/bind/resolv/res_query.c index 5156ce84c0861..6855b73f12c03 100644 --- a/lib/bind/resolv/res_query.c +++ b/lib/bind/resolv/res_query.c @@ -70,7 +70,7 @@ #if defined(LIBC_SCCS) && !defined(lint) static const char sccsid[] = "@(#)res_query.c 8.1 (Berkeley) 6/4/93"; -static const char rcsid[] = "$Id: res_query.c,v 1.2.2.3.4.2 2004/03/16 12:34:19 marka Exp $"; +static const char rcsid[] = "$Id: res_query.c,v 1.2.2.3.4.3 2008/04/28 05:46:51 marka Exp $"; #endif /* LIBC_SCCS and not lint */ #include "port_before.h" @@ -116,8 +116,9 @@ res_nquery(res_state statp, { u_char buf[MAXPACKET]; HEADER *hp = (HEADER *) answer; - int n; u_int oflags; + u_char *rdata; + int n; oflags = statp->_flags; @@ -133,8 +134,14 @@ again: buf, sizeof(buf)); #ifdef RES_USE_EDNS0 if (n > 0 && (statp->_flags & RES_F_EDNS0ERR) == 0 && - (statp->options & (RES_USE_EDNS0|RES_USE_DNSSEC)) != 0U) + (statp->options & (RES_USE_EDNS0|RES_USE_DNSSEC|RES_NSID))) { n = res_nopt(statp, n, buf, sizeof(buf), anslen); + rdata = &buf[n]; + if (n > 0 && (statp->options & RES_NSID) != 0U) { + n = res_nopt_rdata(statp, n, buf, sizeof(buf), rdata, + NS_OPT_NSID, 0, NULL); + } + } #endif if (n <= 0) { #ifdef DEBUG @@ -144,6 +151,7 @@ again: RES_SET_H_ERRNO(statp, NO_RECOVERY); return (n); } + n = res_nsend(statp, buf, n, answer, anslen); if (n < 0) { #ifdef RES_USE_EDNS0 diff --git a/lib/bind9/api b/lib/bind9/api index cff58c8ed232f..1bdcb768ee2c2 100644 --- a/lib/bind9/api +++ b/lib/bind9/api @@ -1,3 +1,3 @@ LIBINTERFACE = 0 -LIBREVISION = 10 +LIBREVISION = 11 LIBAGE = 0 diff --git a/lib/bind9/check.c b/lib/bind9/check.c index fe9836ca4b429..4aaa37e38b66e 100644 --- a/lib/bind9/check.c +++ b/lib/bind9/check.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004-2007 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004-2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 2001-2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: check.c,v 1.37.6.39 2007/12/14 01:28:26 marka Exp $ */ +/* $Id: check.c,v 1.37.6.41 2008/04/28 23:45:35 tbox Exp $ */ #include <config.h> @@ -213,13 +213,24 @@ check_dual_stack(const cfg_obj_t *options, isc_log_t *logctx) { } static isc_result_t -check_forward(const cfg_obj_t *options, isc_log_t *logctx) { +check_forward(const cfg_obj_t *options, const cfg_obj_t *global, + isc_log_t *logctx) +{ const cfg_obj_t *forward = NULL; const cfg_obj_t *forwarders = NULL; (void)cfg_map_get(options, "forward", &forward); (void)cfg_map_get(options, "forwarders", &forwarders); + if (forwarders != NULL && global != NULL) { + const char *file = cfg_obj_file(global); + unsigned int line = cfg_obj_line(global); + cfg_obj_log(forwarders, logctx, ISC_LOG_ERROR, + "forwarders declared in root zone and " + "in general configuration: %s:%u", + file, line); + return (ISC_R_FAILURE); + } if (forward != NULL && forwarders == NULL) { cfg_obj_log(forward, logctx, ISC_LOG_ERROR, "no matching 'forwarders' statement"); @@ -400,8 +411,8 @@ check_options(const cfg_obj_t *options, isc_log_t *logctx, isc_mem_t *mctx) { (void)cfg_map_get(options, "preferred-glue", &obj); if (obj != NULL) { const char *str; - str = cfg_obj_asstring(obj); - if (strcasecmp(str, "a") != 0 && + str = cfg_obj_asstring(obj); + if (strcasecmp(str, "a") != 0 && strcasecmp(str, "aaaa") != 0 && strcasecmp(str, "none") != 0) cfg_obj_log(obj, logctx, ISC_LOG_ERROR, @@ -430,7 +441,7 @@ check_options(const cfg_obj_t *options, isc_log_t *logctx, isc_mem_t *mctx) { isc_buffer_add(&b, strlen(str)); tresult = dns_name_fromtext(name, &b, dns_rootname, - ISC_FALSE, NULL); + ISC_FALSE, NULL); if (tresult != ISC_R_SUCCESS) { cfg_obj_log(obj, logctx, ISC_LOG_ERROR, "bad domain name '%s'", @@ -440,7 +451,7 @@ check_options(const cfg_obj_t *options, isc_log_t *logctx, isc_mem_t *mctx) { } } } - + /* * Set supported DNSSEC algorithms. */ @@ -586,7 +597,7 @@ get_masters_def(const cfg_obj_t *cctx, const char *name, const cfg_obj_t **ret) static isc_result_t validate_masters(const cfg_obj_t *obj, const cfg_obj_t *config, - isc_uint32_t *countp, isc_log_t *logctx, isc_mem_t *mctx) + isc_uint32_t *countp, isc_log_t *logctx, isc_mem_t *mctx) { isc_result_t result = ISC_R_SUCCESS; isc_result_t tresult; @@ -608,7 +619,7 @@ validate_masters(const cfg_obj_t *obj, const cfg_obj_t *config, newlist: list = cfg_tuple_get(obj, "addresses"); element = cfg_list_first(list); - resume: + resume: for ( ; element != NULL; element = cfg_list_next(element)) @@ -693,9 +704,9 @@ typedef struct { } optionstable; static isc_result_t -check_zoneconf(const cfg_obj_t *zconfig, const cfg_obj_t *config, - isc_symtab_t *symtab, dns_rdataclass_t defclass, - isc_log_t *logctx, isc_mem_t *mctx) +check_zoneconf(const cfg_obj_t *zconfig, const cfg_obj_t *voptions, + const cfg_obj_t *config, isc_symtab_t *symtab, + dns_rdataclass_t defclass, isc_log_t *logctx, isc_mem_t *mctx) { const char *zname; const char *typestr; @@ -708,6 +719,7 @@ check_zoneconf(const cfg_obj_t *zconfig, const cfg_obj_t *config, dns_rdataclass_t zclass; dns_fixedname_t fixedname; isc_buffer_t b; + isc_boolean_t root = ISC_FALSE; static optionstable options[] = { { "allow-query", MASTERZONE | SLAVEZONE | STUBZONE }, @@ -817,7 +829,7 @@ check_zoneconf(const cfg_obj_t *zconfig, const cfg_obj_t *config, isc_buffer_init(&b, zname, strlen(zname)); isc_buffer_add(&b, strlen(zname)); tresult = dns_name_fromtext(dns_fixedname_name(&fixedname), &b, - dns_rootname, ISC_TRUE, NULL); + dns_rootname, ISC_TRUE, NULL); if (tresult != ISC_R_SUCCESS) { cfg_obj_log(zconfig, logctx, ISC_LOG_ERROR, "zone '%s': is not a valid name", zname); @@ -832,6 +844,9 @@ check_zoneconf(const cfg_obj_t *zconfig, const cfg_obj_t *config, "previous definition: %s:%u", logctx, mctx); if (tresult != ISC_R_SUCCESS) result = tresult; + if (dns_name_equal(dns_fixedname_name(&fixedname), + dns_rootname)) + root = ISC_TRUE; } /* @@ -938,7 +953,18 @@ check_zoneconf(const cfg_obj_t *zconfig, const cfg_obj_t *config, /* * Check that forwarding is reasonable. */ - if (check_forward(zoptions, logctx) != ISC_R_SUCCESS) + obj = NULL; + if (root) { + if (voptions != NULL) + (void)cfg_map_get(voptions, "forwarders", &obj); + if (obj == NULL) { + const cfg_obj_t *options = NULL; + (void)cfg_map_get(config, "options", &options); + if (options != NULL) + (void)cfg_map_get(options, "forwarders", &obj); + } + } + if (check_forward(zoptions, obj, logctx) != ISC_R_SUCCESS) result = ISC_R_FAILURE; /* @@ -968,7 +994,7 @@ check_zoneconf(const cfg_obj_t *zconfig, const cfg_obj_t *config, result = tresult; } } - + return (result); } @@ -977,7 +1003,7 @@ bind9_check_key(const cfg_obj_t *key, isc_log_t *logctx) { const cfg_obj_t *algobj = NULL; const cfg_obj_t *secretobj = NULL; const char *keyname = cfg_obj_asstring(cfg_map_getname(key)); - + (void)cfg_map_get(key, "algorithm", &algobj); (void)cfg_map_get(key, "secret", &secretobj); if (secretobj == NULL || algobj == NULL) { @@ -1094,9 +1120,9 @@ check_servers(const cfg_obj_t *servers, isc_log_t *logctx) { } return (result); } - + static isc_result_t -check_viewconf(const cfg_obj_t *config, const cfg_obj_t *vconfig, +check_viewconf(const cfg_obj_t *config, const cfg_obj_t *voptions, dns_rdataclass_t vclass, isc_log_t *logctx, isc_mem_t *mctx) { const cfg_obj_t *servers = NULL; @@ -1116,8 +1142,8 @@ check_viewconf(const cfg_obj_t *config, const cfg_obj_t *vconfig, if (tresult != ISC_R_SUCCESS) return (ISC_R_NOMEMORY); - if (vconfig != NULL) - (void)cfg_map_get(vconfig, "zone", &zones); + if (voptions != NULL) + (void)cfg_map_get(voptions, "zone", &zones); else (void)cfg_map_get(config, "zone", &zones); @@ -1128,7 +1154,7 @@ check_viewconf(const cfg_obj_t *config, const cfg_obj_t *vconfig, isc_result_t tresult; const cfg_obj_t *zone = cfg_listelt_value(element); - tresult = check_zoneconf(zone, config, symtab, vclass, + tresult = check_zoneconf(zone, voptions, config, symtab, vclass, logctx, mctx); if (tresult != ISC_R_SUCCESS) result = ISC_R_FAILURE; @@ -1152,10 +1178,10 @@ check_viewconf(const cfg_obj_t *config, const cfg_obj_t *vconfig, isc_symtab_destroy(&symtab); return (tresult); } - - if (vconfig != NULL) { + + if (voptions != NULL) { keys = NULL; - (void)cfg_map_get(vconfig, "key", &keys); + (void)cfg_map_get(voptions, "key", &keys); tresult = check_keylist(keys, symtab, logctx); if (tresult == ISC_R_EXISTS) result = ISC_R_FAILURE; @@ -1170,47 +1196,48 @@ check_viewconf(const cfg_obj_t *config, const cfg_obj_t *vconfig, /* * Check that forwarding is reasonable. */ - if (vconfig == NULL) { + if (voptions == NULL) { const cfg_obj_t *options = NULL; (void)cfg_map_get(config, "options", &options); if (options != NULL) - if (check_forward(options, logctx) != ISC_R_SUCCESS) + if (check_forward(options, NULL, + logctx) != ISC_R_SUCCESS) result = ISC_R_FAILURE; } else { - if (check_forward(vconfig, logctx) != ISC_R_SUCCESS) + if (check_forward(voptions, NULL, logctx) != ISC_R_SUCCESS) result = ISC_R_FAILURE; } /* * Check that dual-stack-servers is reasonable. */ - if (vconfig == NULL) { + if (voptions == NULL) { const cfg_obj_t *options = NULL; (void)cfg_map_get(config, "options", &options); if (options != NULL) if (check_dual_stack(options, logctx) != ISC_R_SUCCESS) result = ISC_R_FAILURE; } else { - if (check_dual_stack(vconfig, logctx) != ISC_R_SUCCESS) + if (check_dual_stack(voptions, logctx) != ISC_R_SUCCESS) result = ISC_R_FAILURE; } /* * Check that rrset-order is reasonable. */ - if (vconfig != NULL) { - if (check_order(vconfig, logctx) != ISC_R_SUCCESS) + if (voptions != NULL) { + if (check_order(voptions, logctx) != ISC_R_SUCCESS) result = ISC_R_FAILURE; } - if (vconfig != NULL) { - (void)cfg_map_get(vconfig, "server", &servers); + if (voptions != NULL) { + (void)cfg_map_get(voptions, "server", &servers); if (servers != NULL && check_servers(servers, logctx) != ISC_R_SUCCESS) result = ISC_R_FAILURE; } - if (vconfig != NULL) - tresult = check_options(vconfig, logctx, mctx); + if (voptions != NULL) + tresult = check_options(voptions, logctx, mctx); else tresult = check_options(config, logctx, mctx); if (tresult != ISC_R_SUCCESS) @@ -1249,7 +1276,7 @@ bind9_check_namedconf(const cfg_obj_t *config, isc_log_t *logctx, check_servers(servers, logctx) != ISC_R_SUCCESS) result = ISC_R_FAILURE; - if (options != NULL && + if (options != NULL && check_order(options, logctx) != ISC_R_SUCCESS) result = ISC_R_FAILURE; @@ -1311,7 +1338,7 @@ bind9_check_namedconf(const cfg_obj_t *config, isc_log_t *logctx, const char *file; unsigned int line; RUNTIME_CHECK(isc_symtab_lookup(symtab, key, - vclass, &symvalue) == ISC_R_SUCCESS); + vclass, &symvalue) == ISC_R_SUCCESS); file = cfg_obj_file(symvalue.as_cpointer); line = cfg_obj_line(symvalue.as_cpointer); cfg_obj_log(view, logctx, ISC_LOG_ERROR, @@ -1351,8 +1378,8 @@ bind9_check_namedconf(const cfg_obj_t *config, isc_log_t *logctx, } } - tresult = cfg_map_get(config, "acl", &acls); - if (tresult == ISC_R_SUCCESS) { + tresult = cfg_map_get(config, "acl", &acls); + if (tresult == ISC_R_SUCCESS) { const cfg_listelt_t *elt; const cfg_listelt_t *elt2; const char *aclname; @@ -1371,7 +1398,7 @@ bind9_check_namedconf(const cfg_obj_t *config, isc_log_t *logctx, cfg_obj_log(acl, logctx, ISC_LOG_ERROR, "attempt to redefine " "builtin acl '%s'", - aclname); + aclname); result = ISC_R_FAILURE; break; } @@ -1401,8 +1428,8 @@ bind9_check_namedconf(const cfg_obj_t *config, isc_log_t *logctx, } } - tresult = cfg_map_get(config, "kal", &kals); - if (tresult == ISC_R_SUCCESS) { + tresult = cfg_map_get(config, "kal", &kals); + if (tresult == ISC_R_SUCCESS) { const cfg_listelt_t *elt; const cfg_listelt_t *elt2; const char *aclname; diff --git a/lib/dns/adb.c b/lib/dns/adb.c index a6c6d8b1de2c3..49f60ecbf8875 100644 --- a/lib/dns/adb.c +++ b/lib/dns/adb.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: adb.c,v 1.181.2.11.2.34 2008/04/03 06:07:11 tbox Exp $ */ +/* $Id: adb.c,v 1.181.2.11.2.41 2008/10/17 03:34:53 marka Exp $ */ /* * Implementation notes @@ -116,6 +116,7 @@ struct dns_adb { isc_mutex_t lock; isc_mutex_t reflock; /* Covers irefcnt, erefcnt */ + isc_mutex_t overmemlock; /*%< Covers overmem */ isc_mem_t *mctx; dns_view_t *view; isc_timermgr_t *timermgr; @@ -488,6 +489,7 @@ import_rdataset(dns_adbname_t *adbname, dns_rdataset_t *rdataset, isc_boolean_t new_addresses_added; dns_rdatatype_t rdtype; unsigned int findoptions; + dns_adbnamehooklist_t *hookhead; INSIST(DNS_ADBNAME_VALID(adbname)); adb = adbname->adb; @@ -512,10 +514,12 @@ import_rdataset(dns_adbname_t *adbname, dns_rdataset_t *rdataset, INSIST(rdata.length == 4); memcpy(&ina.s_addr, rdata.data, 4); isc_sockaddr_fromin(&sockaddr, &ina, 0); + hookhead = &adbname->v4; } else { INSIST(rdata.length == 16); memcpy(in6a.s6_addr, rdata.data, 16); isc_sockaddr_fromin6(&sockaddr, &in6a, 0); + hookhead = &adbname->v6; } INSIST(nh == NULL); @@ -544,7 +548,7 @@ import_rdataset(dns_adbname_t *adbname, dns_rdataset_t *rdataset, link_entry(adb, addr_bucket, entry); } else { - for (anh = ISC_LIST_HEAD(adbname->v4); + for (anh = ISC_LIST_HEAD(*hookhead); anh != NULL; anh = ISC_LIST_NEXT(anh, plink)) if (anh->entry == foundentry) @@ -557,12 +561,8 @@ import_rdataset(dns_adbname_t *adbname, dns_rdataset_t *rdataset, } new_addresses_added = ISC_TRUE; - if (nh != NULL) { - if (rdtype == dns_rdatatype_a) - ISC_LIST_APPEND(adbname->v4, nh, plink); - else - ISC_LIST_APPEND(adbname->v6, nh, plink); - } + if (nh != NULL) + ISC_LIST_APPEND(*hookhead, nh, plink); nh = NULL; result = dns_rdataset_next(rdataset); } @@ -1731,8 +1731,11 @@ copy_namehook_lists(dns_adb_t *adb, dns_adbfind_t *find, dns_name_t *zone, bucket = entry->lock_bucket; LOCK(&adb->entrylocks[bucket]); - if (entry_is_bad_for_zone(adb, entry, zone, now)) + if (!FIND_RETURNLAME(find) + && entry_is_bad_for_zone(adb, entry, zone, now)) { + find->options |= DNS_ADBFIND_LAMEPRUNED; goto nextv6; + } addrinfo = new_adbaddrinfo(adb, entry, find->port); if (addrinfo == NULL) { find->partial_result |= DNS_ADBFIND_INET6; @@ -1766,12 +1769,15 @@ shutdown_task(isc_task_t *task, isc_event_t *ev) { INSIST(DNS_ADB_VALID(adb)); /* + * Wait for lock around check_exit() call to be released. + */ + LOCK(&adb->lock); + /* * Kill the timer, and then the ADB itself. Note that this implies * that this task was the one scheduled to get timer events. If * this is not true (and it is unfortunate there is no way to INSIST() * this) badness will occur. */ - LOCK(&adb->lock); isc_timer_detach(&adb->timer); UNLOCK(&adb->lock); isc_event_free(&ev); @@ -1983,6 +1989,7 @@ destroy(dns_adb_t *adb) { DESTROYLOCK(&adb->reflock); DESTROYLOCK(&adb->lock); DESTROYLOCK(&adb->mplock); + DESTROYLOCK(&adb->overmemlock); isc_mem_putanddetach(&adb->mctx, adb, sizeof(dns_adb_t)); } @@ -2053,6 +2060,10 @@ dns_adb_create(isc_mem_t *mem, dns_view_t *view, isc_timermgr_t *timermgr, if (result != ISC_R_SUCCESS) goto fail0d; + result = isc_mutex_init(&adb->overmemlock); + if (result != ISC_R_SUCCESS) + goto fail0e; + /* * Initialize the bucket locks for names and elements. * May as well initialize the list heads, too. @@ -2155,6 +2166,8 @@ dns_adb_create(isc_mem_t *mem, dns_view_t *view, isc_timermgr_t *timermgr, if (adb->afmp != NULL) isc_mempool_destroy(&adb->afmp); + DESTROYLOCK(&adb->overmemlock); + fail0e: DESTROYLOCK(&adb->reflock); fail0d: DESTROYLOCK(&adb->mplock); @@ -3122,8 +3135,10 @@ fetch_callback(isc_task_t *task, isc_event_t *ev) { address_type = DNS_ADBFIND_INET6; fetch = name->fetch_aaaa; name->fetch_aaaa = NULL; - } - INSIST(address_type != 0); + } else + fetch = NULL; + + INSIST(address_type != 0 && fetch != NULL); dns_resolver_destroyfetch(&fetch->fetch); dev->fetch = NULL; @@ -3570,12 +3585,21 @@ water(void *arg, int mark) { DP(ISC_LOG_DEBUG(1), "adb reached %s water mark", overmem ? "high" : "low"); - adb->overmem = overmem; - if (overmem) { - isc_interval_set(&interval, 0, 1); - (void)isc_timer_reset(adb->timer, isc_timertype_once, NULL, - &interval, ISC_TRUE); + /* + * We can't use adb->lock as there is potential for water + * to be called when adb->lock is held. + */ + LOCK(&adb->overmemlock); + if (adb->overmem != overmem) { + adb->overmem = overmem; + if (overmem) { + isc_interval_set(&interval, 0, 1); + (void)isc_timer_reset(adb->timer, isc_timertype_once, + NULL, &interval, ISC_TRUE); + } + isc_mem_waterack(adb->mctx, mark); } + UNLOCK(&adb->overmemlock); } void diff --git a/lib/dns/api b/lib/dns/api index efe79b507c4ae..6bbd1729bc153 100644 --- a/lib/dns/api +++ b/lib/dns/api @@ -1,3 +1,3 @@ -LIBINTERFACE = 25 -LIBREVISION = 1 +LIBINTERFACE = 26 +LIBREVISION = 2 LIBAGE = 0 diff --git a/lib/dns/cache.c b/lib/dns/cache.c index f45af90d08d3b..1212a73212a7b 100644 --- a/lib/dns/cache.c +++ b/lib/dns/cache.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004-2006 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004-2006, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: cache.c,v 1.45.2.4.8.15 2006/08/01 01:07:05 marka Exp $ */ +/* $Id: cache.c,v 1.45.2.4.8.17 2008/04/28 23:45:37 tbox Exp $ */ #include <config.h> @@ -466,7 +466,7 @@ dns_cache_setcleaninginterval(dns_cache_t *cache, unsigned int t) { isc_timertype_ticker, NULL, &interval, ISC_FALSE); } - if (result != ISC_R_SUCCESS) + if (result != ISC_R_SUCCESS) isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE, ISC_LOG_WARNING, "could not set cache cleaning interval: %s", @@ -558,7 +558,7 @@ cache_cleaner_init(dns_cache_t *cache, isc_taskmgr_t *taskmgr, result = ISC_R_NOMEMORY; goto cleanup; } - + cleaner->overmem_event = isc_event_allocate(cache->mctx, cleaner, DNS_EVENT_CACHEOVERMEM, @@ -596,7 +596,7 @@ begin_cleaning(cache_cleaner_t *cleaner) { /* * Create an iterator, if it does not already exist, and - * position it at the beginning of the cache. + * position it at the beginning of the cache. */ if (cleaner->iterator == NULL) result = dns_db_createiterator(cleaner->cache->db, ISC_FALSE, @@ -635,7 +635,7 @@ begin_cleaning(cache_cleaner_t *cleaner) { isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1), "begin cache cleaning, mem inuse %lu", - (unsigned long)isc_mem_inuse(cleaner->cache->mctx)); + (unsigned long)isc_mem_inuse(cleaner->cache->mctx)); cleaner->state = cleaner_s_busy; isc_task_send(cleaner->task, &cleaner->resched_event); } @@ -695,7 +695,7 @@ static void overmem_cleaning_action(isc_task_t *task, isc_event_t *event) { cache_cleaner_t *cleaner = event->ev_arg; isc_boolean_t want_cleaning = ISC_FALSE; - + UNUSED(task); INSIST(task == cleaner->task); @@ -908,9 +908,12 @@ water(void *arg, int mark) { REQUIRE(VALID_CACHE(cache)); LOCK(&cache->cleaner.lock); - - dns_db_overmem(cache->db, overmem); - cache->cleaner.overmem = overmem; + + if (overmem != cache->cleaner.overmem) { + dns_db_overmem(cache->db, overmem); + cache->cleaner.overmem = overmem; + isc_mem_waterack(cache->mctx, mark); + } if (cache->cleaner.overmem_event != NULL) isc_task_send(cache->cleaner.task, @@ -1034,7 +1037,7 @@ dns_cache_flushname(dns_cache_t *cache, dns_name_t *name) { dns_rdatasetiter_t *iter = NULL; dns_dbnode_t *node = NULL; dns_db_t *db = NULL; - + LOCK(&cache->lock); if (cache->db != NULL) dns_db_attach(cache->db, &db); diff --git a/lib/dns/dispatch.c b/lib/dns/dispatch.c index 02a2f188d08ce..8fef86c36ffe0 100644 --- a/lib/dns/dispatch.c +++ b/lib/dns/dispatch.c @@ -15,17 +15,19 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: dispatch.c,v 1.101.2.6.2.21.4.5 2008/07/23 23:16:26 marka Exp $ */ +/* $Id: dispatch.c,v 1.101.2.6.2.36 2008/09/04 00:25:40 jinmei Exp $ */ #include <config.h> #include <stdlib.h> #include <sys/types.h> #include <unistd.h> +#include <stdlib.h> #include <isc/entropy.h> #include <isc/mem.h> #include <isc/mutex.h> +#include <isc/portset.h> #include <isc/print.h> #include <isc/random.h> #include <isc/string.h> @@ -44,13 +46,8 @@ typedef ISC_LIST(dns_dispentry_t) dns_displist_t; -typedef struct dns_qid { - unsigned int magic; - unsigned int qid_nbuckets; /* hash table size */ - unsigned int qid_increment; /* id increment on collision */ - isc_mutex_t lock; - dns_displist_t *qid_table; /* the table itself */ -} dns_qid_t; +typedef struct dispsocket dispsocket_t; +typedef ISC_LIST(dispsocket_t) dispsocketlist_t; /* ARC4 Random generator state */ typedef struct arc4ctx { @@ -58,14 +55,26 @@ typedef struct arc4ctx { isc_uint8_t j; isc_uint8_t s[256]; int count; + isc_entropy_t *entropy; /* entropy source for ARC4 */ + isc_mutex_t *lock; } arc4ctx_t; +typedef struct dns_qid { + unsigned int magic; + unsigned int qid_nbuckets; /* hash table size */ + unsigned int qid_increment; /* id increment on collision */ + isc_mutex_t lock; + dns_displist_t *qid_table; /* the table itself */ + dispsocketlist_t *sock_table; /* socket table */ +} dns_qid_t; + struct dns_dispatchmgr { /* Unlocked. */ unsigned int magic; isc_mem_t *mctx; dns_acl_t *blackhole; dns_portlist_t *portlist; + isc_entropy_t *entropy; /* entropy source */ /* Locked by "lock". */ isc_mutex_t lock; @@ -74,7 +83,7 @@ struct dns_dispatchmgr { /* Locked by arc4_lock. */ isc_mutex_t arc4_lock; - arc4ctx_t arc4ctx; /*%< ARC4 context for QID */ + arc4ctx_t arc4ctx; /* ARC4 context for QID */ /* locked by buffer lock */ dns_qid_t *qid; @@ -89,8 +98,27 @@ struct dns_dispatchmgr { isc_mempool_t *rpool; /* memory pool for replies */ isc_mempool_t *dpool; /* dispatch allocations */ isc_mempool_t *bpool; /* memory pool for buffers */ + isc_mempool_t *spool; /* memory pool for dispsocs */ - isc_entropy_t *entropy; /* entropy source */ + /* + * Locked by qid->lock if qid exists; otherwise, can be used without + * being locked. + * Memory footprint considerations: this is a simple implementation of + * available ports, i.e., an ordered array of the actual port numbers. + * This will require about 256KB of memory in the worst case (128KB for + * each of IPv4 and IPv6). We could reduce it by representing it as a + * more sophisticated way such as a list (or array) of ranges that are + * searched to identify a specific port. Our decision here is the saved + * memory isn't worth the implementation complexity, considering the + * fact that the whole BIND9 process (which is mainly named) already + * requires a pretty large memory footprint. We may, however, have to + * revisit the decision when we want to use it as a separate module for + * an environment where memory requirement is severer. + */ + in_port_t *v4ports; /* available ports for IPv4 */ + unsigned int nv4ports; /* # of available ports for IPv4 */ + in_port_t *v6ports; /* available ports for IPv4 */ + unsigned int nv6ports; /* # of available ports for IPv4 */ }; #define MGR_SHUTTINGDOWN 0x00000001U @@ -109,17 +137,65 @@ struct dns_dispentry { isc_taskaction_t action; void *arg; isc_boolean_t item_out; + dispsocket_t *dispsocket; ISC_LIST(dns_dispatchevent_t) items; ISC_LINK(dns_dispentry_t) link; }; +/* + * Maximum number of dispatch sockets that can be pooled for reuse. The + * appropriate value may vary, but experiments have shown a busy caching server + * may need more than 1000 sockets concurrently opened. The maximum allowable + * number of dispatch sockets (per manager) will be set to the double of this + * value. + */ +#ifndef DNS_DISPATCH_POOLSOCKS +#define DNS_DISPATCH_POOLSOCKS 2048 +#endif + +/* + * Quota to control the number of dispatch sockets. If a dispatch has more + * than the quota of sockets, new queries will purge oldest ones, so that + * a massive number of outstanding queries won't prevent subsequent queries + * (especially if the older ones take longer time and result in timeout). + */ +#ifndef DNS_DISPATCH_SOCKSQUOTA +#define DNS_DISPATCH_SOCKSQUOTA 3072 +#endif + +struct dispsocket { + unsigned int magic; + isc_socket_t *socket; + dns_dispatch_t *disp; + isc_sockaddr_t host; + in_port_t localport; + dns_dispentry_t *resp; + isc_task_t *task; + ISC_LINK(dispsocket_t) link; + unsigned int bucket; + ISC_LINK(dispsocket_t) blink; +}; + #define INVALID_BUCKET (0xffffdead) +/* + * Number of tasks for each dispatch that use separate sockets for different + * transactions. This must be a power of 2 as it will divide 32 bit numbers + * to get an uniformly random tasks selection. See get_dispsocket(). + */ +#define MAX_INTERNAL_TASKS 64 + struct dns_dispatch { /* Unlocked. */ unsigned int magic; /* magic */ dns_dispatchmgr_t *mgr; /* dispatch manager */ - isc_task_t *task; /* internal task */ + int ntasks; + /* + * internal task buckets. We use multiple tasks to distribute various + * socket events well when using separate dispatch sockets. We use the + * 1st task (task[0]) for internal control events. + */ + isc_task_t *task[MAX_INTERNAL_TASKS]; isc_socket_t *socket; /* isc socket attached to */ isc_sockaddr_t local; /* local address */ in_port_t localport; /* local UDP port */ @@ -141,10 +217,14 @@ struct dns_dispatch { tcpmsg_valid : 1, recv_pending : 1; /* is a recv() pending? */ isc_result_t shutdown_why; + ISC_LIST(dispsocket_t) activesockets; + ISC_LIST(dispsocket_t) inactivesockets; + unsigned int nsockets; unsigned int requests; /* how many requests we have */ unsigned int tcpbuffers; /* allocated buffers */ dns_tcpmsg_t tcpmsg; /* for tcp streams */ dns_qid_t *qid; + arc4ctx_t arc4ctx; /* for QID/UDP port num */ }; #define QID_MAGIC ISC_MAGIC('Q', 'i', 'd', ' ') @@ -153,6 +233,9 @@ struct dns_dispatch { #define RESPONSE_MAGIC ISC_MAGIC('D', 'r', 's', 'p') #define VALID_RESPONSE(e) ISC_MAGIC_VALID((e), RESPONSE_MAGIC) +#define DISPSOCK_MAGIC ISC_MAGIC('D', 's', 'o', 'c') +#define VALID_DISPSOCK(e) ISC_MAGIC_VALID((e), DISPSOCK_MAGIC) + #define DISPATCH_MAGIC ISC_MAGIC('D', 'i', 's', 'p') #define VALID_DISPATCH(e) ISC_MAGIC_VALID((e), DISPATCH_MAGIC) @@ -161,16 +244,33 @@ struct dns_dispatch { #define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \ (disp)->qid : (disp)->mgr->qid +#define DISP_ARC4CTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \ + (&(disp)->arc4ctx) : (&(disp)->mgr->arc4ctx) + +/* + * Locking a query port buffer is a bit tricky. We access the buffer without + * locking until qid is created. Technically, there is a possibility of race + * between the creation of qid and access to the port buffer; in practice, + * however, this should be safe because qid isn't created until the first + * dispatch is created and there should be no contending situation until then. + */ +#define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock)) +#define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock)) + /* * Statics. */ -static dns_dispentry_t *bucket_search(dns_qid_t *, isc_sockaddr_t *, - dns_messageid_t, in_port_t, unsigned int); +static dns_dispentry_t *entry_search(dns_qid_t *, isc_sockaddr_t *, + dns_messageid_t, in_port_t, unsigned int); static isc_boolean_t destroy_disp_ok(dns_dispatch_t *); static void destroy_disp(isc_task_t *task, isc_event_t *event); -static void udp_recv(isc_task_t *, isc_event_t *); +static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **); +static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *); +static void udp_exrecv(isc_task_t *, isc_event_t *); +static void udp_shrecv(isc_task_t *, isc_event_t *); +static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *); static void tcp_recv(isc_task_t *, isc_event_t *); -static void startrecv(dns_dispatch_t *); +static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *); static isc_uint32_t dns_hash(dns_qid_t *, isc_sockaddr_t *, dns_messageid_t, in_port_t); static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len); @@ -182,6 +282,11 @@ static dns_dispentry_t *linear_first(dns_qid_t *disp); static dns_dispentry_t *linear_next(dns_qid_t *disp, dns_dispentry_t *resp); static void dispatch_free(dns_dispatch_t **dispp); +static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr, + dns_dispatch_t *disp, + isc_socketmgr_t *sockmgr, + isc_sockaddr_t *localaddr, + isc_socket_t **sockp); static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, isc_taskmgr_t *taskmgr, @@ -192,8 +297,13 @@ static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr, static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr); static void destroy_mgr(dns_dispatchmgr_t **mgrp); static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, - unsigned int increment, dns_qid_t **qidp); + unsigned int increment, dns_qid_t **qidp, + isc_boolean_t needaddrtable); static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp); +static isc_result_t open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local, + unsigned int options, isc_socket_t **sockp); +static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock, + isc_sockaddr_t *sockaddrp); #define LVL(x) ISC_LOG_DEBUG(x) @@ -296,13 +406,15 @@ request_log(dns_dispatch_t *disp, dns_dispentry_t *resp, * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ static void -dispatch_arc4init(arc4ctx_t *actx) { +dispatch_arc4init(arc4ctx_t *actx, isc_entropy_t *entropy, isc_mutex_t *lock) { int n; for (n = 0; n < 256; n++) actx->s[n] = n; actx->i = 0; actx->j = 0; actx->count = 0; + actx->entropy = entropy; /* don't have to attach */ + actx->lock = lock; } static void @@ -346,7 +458,7 @@ dispatch_arc4get16(arc4ctx_t *actx) { } static void -dispatch_arc4stir(dns_dispatchmgr_t *mgr) { +dispatch_arc4stir(arc4ctx_t *actx) { int i; union { unsigned char rnd[128]; @@ -354,51 +466,55 @@ dispatch_arc4stir(dns_dispatchmgr_t *mgr) { } rnd; isc_result_t result; - if (mgr->entropy != NULL) { + if (actx->entropy != NULL) { /* * We accept any quality of random data to avoid blocking. */ - result = isc_entropy_getdata(mgr->entropy, rnd.rnd, + result = isc_entropy_getdata(actx->entropy, rnd.rnd, sizeof(rnd), NULL, 0); RUNTIME_CHECK(result == ISC_R_SUCCESS); } else { for (i = 0; i < 32; i++) isc_random_get(&rnd.rnd32[i]); } - dispatch_arc4addrandom(&mgr->arc4ctx, rnd.rnd, sizeof(rnd.rnd)); + dispatch_arc4addrandom(actx, rnd.rnd, sizeof(rnd.rnd)); /* * Discard early keystream, as per recommendations in: * http://www.wisdom.weizmann.ac.il/~itsik/RC4/Papers/Rc4_ksa.ps */ for (i = 0; i < 256; i++) - (void)dispatch_arc4get8(&mgr->arc4ctx); + (void)dispatch_arc4get8(actx); /* * Derived from OpenBSD's implementation. The rationale is not clear, * but should be conservative enough in safety, and reasonably large * for efficiency. */ - mgr->arc4ctx.count = 1600000; + actx->count = 1600000; } static isc_uint16_t -dispatch_arc4random(dns_dispatchmgr_t *mgr) { +dispatch_arc4random(arc4ctx_t *actx) { isc_uint16_t result; - LOCK(&mgr->arc4_lock); - mgr->arc4ctx.count -= sizeof(isc_uint16_t); - if (mgr->arc4ctx.count <= 0) - dispatch_arc4stir(mgr); - result = dispatch_arc4get16(&mgr->arc4ctx); - UNLOCK(&mgr->arc4_lock); + if (actx->lock != NULL) + LOCK(actx->lock); + + actx->count -= sizeof(isc_uint16_t); + if (actx->count <= 0) + dispatch_arc4stir(actx); + result = dispatch_arc4get16(actx); + + if (actx->lock != NULL) + UNLOCK(actx->lock); + return (result); } static isc_uint16_t -dispatch_arc4uniformrandom(dns_dispatchmgr_t *mgr, isc_uint16_t upper_bound) { +dispatch_arc4uniformrandom(arc4ctx_t *actx, isc_uint16_t upper_bound) { isc_uint16_t min, r; - /* The caller must hold the manager lock. */ if (upper_bound < 2) return (0); @@ -420,7 +536,7 @@ dispatch_arc4uniformrandom(dns_dispatchmgr_t *mgr, isc_uint16_t upper_bound) { * to re-roll. */ for (;;) { - r = dispatch_arc4random(mgr); + r = dispatch_arc4random(actx); if (r >= min) break; } @@ -503,13 +619,15 @@ destroy_disp_ok(dns_dispatch_t *disp) if (disp->recv_pending != 0) return (ISC_FALSE); + if (!ISC_LIST_EMPTY(disp->activesockets)) + return (ISC_FALSE); + if (disp->shutting_down == 0) return (ISC_FALSE); return (ISC_TRUE); } - /* * Called when refcount reaches 0 (and safe to destroy). * @@ -521,6 +639,8 @@ destroy_disp(isc_task_t *task, isc_event_t *event) { dns_dispatch_t *disp; dns_dispatchmgr_t *mgr; isc_boolean_t killmgr; + dispsocket_t *dispsocket; + int i; INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL); @@ -534,10 +654,16 @@ destroy_disp(isc_task_t *task, isc_event_t *event) { dispatch_log(disp, LVL(90), "shutting down; detaching from sock %p, task %p", - disp->socket, disp->task); + disp->socket, disp->task[0]); /* XXXX */ - isc_socket_detach(&disp->socket); - isc_task_detach(&disp->task); + if (disp->socket != NULL) + isc_socket_detach(&disp->socket); + while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) { + ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link); + destroy_dispsocket(disp, &dispsocket); + } + for (i = 0; i < disp->ntasks; i++) + isc_task_detach(&disp->task[i]); isc_event_free(&event); dispatch_free(&disp); @@ -548,14 +674,210 @@ destroy_disp(isc_task_t *task, isc_event_t *event) { destroy_mgr(&mgr); } +/* + * Find a dispsocket for socket address 'dest', and port number 'port'. + * Return NULL if no such entry exists. + */ +static dispsocket_t * +socket_search(dns_qid_t *qid, isc_sockaddr_t *dest, in_port_t port, + unsigned int bucket) +{ + dispsocket_t *dispsock; + + REQUIRE(bucket < qid->qid_nbuckets); + + dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]); + + while (dispsock != NULL) { + if (isc_sockaddr_equal(dest, &dispsock->host) && + dispsock->localport == port) + return (dispsock); + dispsock = ISC_LIST_NEXT(dispsock, blink); + } + + return (NULL); +} /* - * Find an entry for query ID 'id' and socket address 'dest' in 'qid'. + * Make a new socket for a single dispatch with a random port number. + * The caller must hold the disp->lock and qid->lock. + */ +static isc_result_t +get_dispsocket(dns_dispatch_t *disp, isc_sockaddr_t *dest, + isc_socketmgr_t *sockmgr, dns_qid_t *qid, + dispsocket_t **dispsockp, in_port_t *portp) +{ + int i; + isc_uint32_t r; + dns_dispatchmgr_t *mgr = disp->mgr; + isc_socket_t *sock = NULL; + isc_result_t result = ISC_R_FAILURE; + in_port_t port; + isc_sockaddr_t localaddr; + unsigned int bucket = 0; + dispsocket_t *dispsock; + unsigned int nports; + in_port_t *ports; + + if (isc_sockaddr_pf(&disp->local) == AF_INET) { + nports = disp->mgr->nv4ports; + ports = disp->mgr->v4ports; + } else { + nports = disp->mgr->nv6ports; + ports = disp->mgr->v6ports; + } + if (nports == 0) + return (ISC_R_ADDRNOTAVAIL); + + dispsock = ISC_LIST_HEAD(disp->inactivesockets); + if (dispsock != NULL) { + ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link); + sock = dispsock->socket; + dispsock->socket = NULL; + } else { + dispsock = isc_mempool_get(mgr->spool); + if (dispsock == NULL) + return (ISC_R_NOMEMORY); + + disp->nsockets++; + dispsock->socket = NULL; + dispsock->disp = disp; + dispsock->resp = NULL; + isc_random_get(&r); + dispsock->task = NULL; + isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task); + ISC_LINK_INIT(dispsock, link); + ISC_LINK_INIT(dispsock, blink); + dispsock->magic = DISPSOCK_MAGIC; + } + + /* + * Pick up a random UDP port and open a new socket with it. Avoid + * choosing ports that share the same destination because it will be + * very likely to fail in bind(2) or connect(2). + */ + localaddr = disp->local; + for (i = 0; i < 64; i++) { + port = ports[dispatch_arc4uniformrandom(DISP_ARC4CTX(disp), + nports)]; + isc_sockaddr_setport(&localaddr, port); + + bucket = dns_hash(qid, dest, 0, port); + if (socket_search(qid, dest, port, bucket) != NULL) + continue; + + result = open_socket(sockmgr, &localaddr, 0, &sock); + if (result == ISC_R_SUCCESS || result != ISC_R_ADDRINUSE) + break; + } + + if (result == ISC_R_SUCCESS) { + dispsock->socket = sock; + dispsock->host = *dest; + dispsock->localport = port; + dispsock->bucket = bucket; + ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink); + *dispsockp = dispsock; + *portp = port; + } else { + /* + * We could keep it in the inactive list, but since this should + * be an exceptional case and might be resource shortage, we'd + * rather destroy it. + */ + if (sock != NULL) + isc_socket_detach(&sock); + destroy_dispsocket(disp, &dispsock); + } + + return (result); +} + +/* + * Destroy a dedicated dispatch socket. + */ +static void +destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) { + dispsocket_t *dispsock; + dns_qid_t *qid; + + /* + * The dispatch must be locked. + */ + + REQUIRE(dispsockp != NULL && *dispsockp != NULL); + dispsock = *dispsockp; + REQUIRE(!ISC_LINK_LINKED(dispsock, link)); + + disp->nsockets--; + dispsock->magic = 0; + if (dispsock->socket != NULL) + isc_socket_detach(&dispsock->socket); + if (ISC_LINK_LINKED(dispsock, blink)) { + qid = DNS_QID(disp); + LOCK(&qid->lock); + ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock, + blink); + UNLOCK(&qid->lock); + } + if (dispsock->task != NULL) + isc_task_detach(&dispsock->task); + isc_mempool_put(disp->mgr->spool, dispsock); + + *dispsockp = NULL; +} + +/* + * Deactivate a dedicated dispatch socket. Move it to the inactive list for + * future reuse unless the total number of sockets are exceeding the maximum. + */ +static void +deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) { + isc_result_t result; + dns_qid_t *qid; + + /* + * The dispatch must be locked. + */ + ISC_LIST_UNLINK(disp->activesockets, dispsock, link); + if (dispsock->resp != NULL) { + INSIST(dispsock->resp->dispsocket == dispsock); + dispsock->resp->dispsocket = NULL; + } + + if (disp->nsockets > DNS_DISPATCH_POOLSOCKS) + destroy_dispsocket(disp, &dispsock); + else { + result = isc_socket_close(dispsock->socket); + + qid = DNS_QID(disp); + LOCK(&qid->lock); + ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock, + blink); + UNLOCK(&qid->lock); + + if (result == ISC_R_SUCCESS) + ISC_LIST_APPEND(disp->inactivesockets, dispsock, link); + else { + /* + * If the underlying system does not allow this + * optimization, destroy this temporary structure (and + * create a new one for a new transaction). + */ + INSIST(result == ISC_R_NOTIMPLEMENTED); + destroy_dispsocket(disp, &dispsock); + } + } +} + +/* + * Find an entry for query ID 'id', socket address 'dest', and port number + * 'port'. * Return NULL if no such entry exists. */ static dns_dispentry_t * -bucket_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id, - in_port_t port, unsigned int bucket) +entry_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id, + in_port_t port, unsigned int bucket) { dns_dispentry_t *res; @@ -564,7 +886,7 @@ bucket_search(dns_qid_t *qid, isc_sockaddr_t *dest, dns_messageid_t id, res = ISC_LIST_HEAD(qid->qid_table[bucket]); while (res != NULL) { - if ((res->id == id) && isc_sockaddr_equal(dest, &res->host) && + if (res->id == id && isc_sockaddr_equal(dest, &res->host) && res->port == port) { return (res); } @@ -638,6 +960,26 @@ allocate_event(dns_dispatch_t *disp) { return (ev); } +static void +udp_exrecv(isc_task_t *task, isc_event_t *ev) { + dispsocket_t *dispsock = ev->ev_arg; + + UNUSED(task); + + REQUIRE(VALID_DISPSOCK(dispsock)); + udp_recv(ev, dispsock->disp, dispsock); +} + +static void +udp_shrecv(isc_task_t *task, isc_event_t *ev) { + dns_dispatch_t *disp = ev->ev_arg; + + UNUSED(task); + + REQUIRE(VALID_DISPATCH(disp)); + udp_recv(ev, disp, NULL); +} + /* * General flow: * @@ -653,14 +995,13 @@ allocate_event(dns_dispatch_t *disp) { * restart. */ static void -udp_recv(isc_task_t *task, isc_event_t *ev_in) { +udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) { isc_socketevent_t *ev = (isc_socketevent_t *)ev_in; - dns_dispatch_t *disp = ev_in->ev_arg; dns_messageid_t id; isc_result_t dres; isc_buffer_t source; unsigned int flags; - dns_dispentry_t *resp; + dns_dispentry_t *resp = NULL; dns_dispatchevent_t *rev; unsigned int bucket; isc_boolean_t killit; @@ -669,8 +1010,8 @@ udp_recv(isc_task_t *task, isc_event_t *ev_in) { dns_qid_t *qid; isc_netaddr_t netaddr; int match; - - UNUSED(task); + int result; + isc_boolean_t qidlocked = ISC_FALSE; LOCK(&disp->lock); @@ -681,7 +1022,7 @@ udp_recv(isc_task_t *task, isc_event_t *ev_in) { "got packet: requests %d, buffers %d, recvs %d", disp->requests, disp->mgr->buffers, disp->recv_pending); - if (ev->ev_type == ISC_SOCKEVENT_RECVDONE) { + if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) { /* * Unless the receive event was imported from a listening * interface, in which case the event type is @@ -691,6 +1032,19 @@ udp_recv(isc_task_t *task, isc_event_t *ev_in) { disp->recv_pending = 0; } + if (dispsock != NULL && + (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) { + /* + * dispsock->resp can be NULL if this transaction was canceled + * just after receiving a response. Since this socket is + * exclusively used and there should be at most one receive + * event the canceled event should have been no effect. So + * we can (and should) deactivate the socket right now. + */ + deactivate_dispsocket(disp, dispsock); + dispsock = NULL; + } + if (disp->shutting_down) { /* * This dispatcher is shutting down. @@ -703,12 +1057,32 @@ udp_recv(isc_task_t *task, isc_event_t *ev_in) { killit = destroy_disp_ok(disp); UNLOCK(&disp->lock); if (killit) - isc_task_send(disp->task, &disp->ctlevent); + isc_task_send(disp->task[0], &disp->ctlevent); return; } - if (ev->result != ISC_R_SUCCESS) { + if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { + if (dispsock != NULL) { + resp = dispsock->resp; + id = resp->id; + if (ev->result != ISC_R_SUCCESS) { + /* + * This is most likely a network error on a + * connected socket. It makes no sense to + * check the address or parse the packet, but it + * will help to return the error to the caller. + */ + goto sendresponse; + } + } else { + free_buffer(disp, ev->region.base, ev->region.length); + + UNLOCK(&disp->lock); + isc_event_free(&ev_in); + return; + } + } else if (ev->result != ISC_R_SUCCESS) { free_buffer(disp, ev->region.base, ev->region.length); if (ev->result != ISC_R_CANCELED) @@ -769,15 +1143,29 @@ udp_recv(isc_task_t *task, isc_event_t *ev_in) { goto restart; } - /* response */ - bucket = dns_hash(qid, &ev->address, id, disp->localport); - LOCK(&qid->lock); - resp = bucket_search(qid, &ev->address, id, disp->localport, bucket); - dispatch_log(disp, LVL(90), - "search for response in bucket %d: %s", - bucket, (resp == NULL ? "not found" : "found")); - + /* + * Search for the corresponding response. If we are using an exclusive + * socket, we've already identified it and we can skip the search; but + * the ID and the address must match the expected ones. + */ if (resp == NULL) { + bucket = dns_hash(qid, &ev->address, id, disp->localport); + LOCK(&qid->lock); + qidlocked = ISC_TRUE; + resp = entry_search(qid, &ev->address, id, disp->localport, + bucket); + dispatch_log(disp, LVL(90), + "search for response in bucket %d: %s", + bucket, (resp == NULL ? "not found" : "found")); + + if (resp == NULL) { + free_buffer(disp, ev->region.base, ev->region.length); + goto unlock; + } + } else if (resp->id != id || !isc_sockaddr_equal(&ev->address, + &resp->host)) { + dispatch_log(disp, LVL(90), + "response to an exclusive socket doesn't match"); free_buffer(disp, ev->region.base, ev->region.length); goto unlock; } @@ -825,6 +1213,7 @@ udp_recv(isc_task_t *task, isc_event_t *ev_in) { } } + sendresponse: queue_response = resp->item_out; rev = allocate_event(resp->disp); if (rev == NULL) { @@ -839,7 +1228,7 @@ udp_recv(isc_task_t *task, isc_event_t *ev_in) { */ isc_buffer_init(&rev->buffer, ev->region.base, ev->region.length); isc_buffer_add(&rev->buffer, ev->n); - rev->result = ISC_R_SUCCESS; + rev->result = ev->result; rev->id = id; rev->addr = ev->address; rev->pktinfo = ev->pktinfo; @@ -858,14 +1247,23 @@ udp_recv(isc_task_t *task, isc_event_t *ev_in) { isc_task_send(resp->task, ISC_EVENT_PTR(&rev)); } unlock: - UNLOCK(&qid->lock); + if (qidlocked) + UNLOCK(&qid->lock); /* * Restart recv() to get the next packet. */ restart: - startrecv(disp); - + result = startrecv(disp, dispsock); + if (result != ISC_R_SUCCESS && dispsock != NULL) { + /* + * XXX: wired. There seems to be no recovery process other than + * deactivate this socket anyway (since we cannot start + * receiving, we won't be able to receive a cancel event + * from the user). + */ + deactivate_dispsocket(disp, dispsock); + } UNLOCK(&disp->lock); isc_event_free(&ev_in); @@ -965,7 +1363,7 @@ tcp_recv(isc_task_t *task, isc_event_t *ev_in) { killit = destroy_disp_ok(disp); UNLOCK(&disp->lock); if (killit) - isc_task_send(disp->task, &disp->ctlevent); + isc_task_send(disp->task[0], &disp->ctlevent); return; } @@ -1008,8 +1406,7 @@ tcp_recv(isc_task_t *task, isc_event_t *ev_in) { */ bucket = dns_hash(qid, &tcpmsg->address, id, disp->localport); LOCK(&qid->lock); - resp = bucket_search(qid, &tcpmsg->address, id, disp->localport, - bucket); + resp = entry_search(qid, &tcpmsg->address, id, disp->localport, bucket); dispatch_log(disp, LVL(90), "search for response in bucket %d: %s", bucket, (resp == NULL ? "not found" : "found")); @@ -1050,7 +1447,7 @@ tcp_recv(isc_task_t *task, isc_event_t *ev_in) { * Restart recv() to get the next packet. */ restart: - startrecv(disp); + (void)startrecv(disp, NULL); UNLOCK(&disp->lock); @@ -1060,22 +1457,33 @@ tcp_recv(isc_task_t *task, isc_event_t *ev_in) { /* * disp must be locked. */ -static void -startrecv(dns_dispatch_t *disp) { +static isc_result_t +startrecv(dns_dispatch_t *disp, dispsocket_t *dispsock) { isc_result_t res; isc_region_t region; + isc_socket_t *socket; if (disp->shutting_down == 1) - return; + return (ISC_R_SUCCESS); if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) - return; + return (ISC_R_SUCCESS); - if (disp->recv_pending != 0) - return; + if (disp->recv_pending != 0 && dispsock == NULL) + return (ISC_R_SUCCESS); if (disp->mgr->buffers >= disp->mgr->maxbuffers) - return; + return (ISC_R_NOMEMORY); + + if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 && + dispsock == NULL) + return (ISC_R_SUCCESS); + + if (dispsock != NULL) + socket = dispsock->socket; + else + socket = disp->socket; + INSIST(socket != NULL); switch (disp->socktype) { /* @@ -1085,33 +1493,45 @@ startrecv(dns_dispatch_t *disp) { region.length = disp->mgr->buffersize; region.base = allocate_udp_buffer(disp); if (region.base == NULL) - return; - res = isc_socket_recv(disp->socket, ®ion, 1, - disp->task, udp_recv, disp); - if (res != ISC_R_SUCCESS) { - free_buffer(disp, region.base, region.length); - disp->shutdown_why = res; - disp->shutting_down = 1; - do_cancel(disp); - return; + return (ISC_R_NOMEMORY); + if (dispsock != NULL) { + res = isc_socket_recv(socket, ®ion, 1, + dispsock->task, udp_exrecv, + dispsock); + if (res != ISC_R_SUCCESS) { + free_buffer(disp, region.base, region.length); + return (res); + } + } else { + res = isc_socket_recv(socket, ®ion, 1, + disp->task[0], udp_shrecv, disp); + if (res != ISC_R_SUCCESS) { + free_buffer(disp, region.base, region.length); + disp->shutdown_why = res; + disp->shutting_down = 1; + do_cancel(disp); + return (ISC_R_SUCCESS); /* recover by cancel */ + } + INSIST(disp->recv_pending == 0); + disp->recv_pending = 1; } - INSIST(disp->recv_pending == 0); - disp->recv_pending = 1; break; case isc_sockettype_tcp: - res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task, + res = dns_tcpmsg_readmessage(&disp->tcpmsg, disp->task[0], tcp_recv, disp); if (res != ISC_R_SUCCESS) { disp->shutdown_why = res; disp->shutting_down = 1; do_cancel(disp); - return; + return (ISC_R_SUCCESS); /* recover by cancel */ } INSIST(disp->recv_pending == 0); disp->recv_pending = 1; break; } + + return (ISC_R_SUCCESS); } /* @@ -1164,6 +1584,7 @@ destroy_mgr(dns_dispatchmgr_t **mgrp) { isc_mempool_destroy(&mgr->rpool); isc_mempool_destroy(&mgr->dpool); isc_mempool_destroy(&mgr->bpool); + isc_mempool_destroy(&mgr->spool); DESTROYLOCK(&mgr->pool_lock); @@ -1177,32 +1598,46 @@ destroy_mgr(dns_dispatchmgr_t **mgrp) { if (mgr->blackhole != NULL) dns_acl_detach(&mgr->blackhole); - if (mgr->portlist != NULL) - dns_portlist_detach(&mgr->portlist); - + if (mgr->v4ports != NULL) { + isc_mem_put(mctx, mgr->v4ports, + mgr->nv4ports * sizeof(in_port_t)); + } + if (mgr->v6ports != NULL) { + isc_mem_put(mctx, mgr->v6ports, + mgr->nv6ports * sizeof(in_port_t)); + } isc_mem_put(mctx, mgr, sizeof(dns_dispatchmgr_t)); isc_mem_detach(&mctx); } static isc_result_t -create_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local, - unsigned int options, isc_socket_t **sockp) +open_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local, + unsigned int options, isc_socket_t **sockp) { isc_socket_t *sock; isc_result_t result; - sock = NULL; - result = isc_socket_create(mgr, isc_sockaddr_pf(local), - isc_sockettype_udp, &sock); - if (result != ISC_R_SUCCESS) - return (result); + sock = *sockp; + if (sock == NULL) { + result = isc_socket_create(mgr, isc_sockaddr_pf(local), + isc_sockettype_udp, &sock); + if (result != ISC_R_SUCCESS) + return (result); + } else { + result = isc_socket_open(sock); + if (result != ISC_R_SUCCESS) + return (result); + } #ifndef ISC_ALLOW_MAPPED isc_socket_ipv6only(sock, ISC_TRUE); #endif result = isc_socket_bind(sock, local, options); if (result != ISC_R_SUCCESS) { - isc_socket_detach(&sock); + if (*sockp == NULL) + isc_socket_detach(&sock); + else + isc_socket_close(sock); return (result); } @@ -1211,6 +1646,24 @@ create_socket(isc_socketmgr_t *mgr, isc_sockaddr_t *local, } /* + * Create a temporary port list to set the initial default set of dispatch + * ports: [1024, 65535]. This is almost meaningless as the application will + * normally set the ports explicitly, but is provided to fill some minor corner + * cases. + */ +static isc_result_t +create_default_portset(isc_mem_t *mctx, isc_portset_t **portsetp) { + isc_result_t result; + + result = isc_portset_create(mctx, portsetp); + if (result != ISC_R_SUCCESS) + return (result); + isc_portset_addrange(*portsetp, 1024, 65535); + + return (ISC_R_SUCCESS); +} + +/* * Publics. */ @@ -1220,6 +1673,8 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy, { dns_dispatchmgr_t *mgr; isc_result_t result; + isc_portset_t *v4portset = NULL; + isc_portset_t *v6portset = NULL; REQUIRE(mctx != NULL); REQUIRE(mgrp != NULL && *mgrp == NULL); @@ -1232,7 +1687,6 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy, isc_mem_attach(mctx, &mgr->mctx); mgr->blackhole = NULL; - mgr->portlist = NULL; result = isc_mutex_init(&mgr->lock); if (result != ISC_R_SUCCESS) @@ -1287,20 +1741,43 @@ dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy, mgr->buffersize = 0; mgr->maxbuffers = 0; mgr->bpool = NULL; + mgr->spool = NULL; mgr->entropy = NULL; mgr->qid = NULL; mgr->state = 0; ISC_LIST_INIT(mgr->list); + mgr->v4ports = NULL; + mgr->v6ports = NULL; + mgr->nv4ports = 0; + mgr->nv6ports = 0; mgr->magic = DNS_DISPATCHMGR_MAGIC; + result = create_default_portset(mctx, &v4portset); + if (result == ISC_R_SUCCESS) { + result = create_default_portset(mctx, &v6portset); + if (result == ISC_R_SUCCESS) { + result = dns_dispatchmgr_setavailports(mgr, + v4portset, + v6portset); + } + } + if (v4portset != NULL) + isc_portset_destroy(mctx, &v4portset); + if (v6portset != NULL) + isc_portset_destroy(mctx, &v6portset); + if (result != ISC_R_SUCCESS) + goto kill_dpool; + if (entropy != NULL) isc_entropy_attach(entropy, &mgr->entropy); - dispatch_arc4init(&mgr->arc4ctx); + dispatch_arc4init(&mgr->arc4ctx, mgr->entropy, &mgr->arc4_lock); *mgrp = mgr; return (ISC_R_SUCCESS); + kill_dpool: + isc_mempool_destroy(&mgr->dpool); kill_rpool: isc_mempool_destroy(&mgr->rpool); kill_epool: @@ -1339,22 +1816,88 @@ dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr, dns_portlist_t *portlist) { REQUIRE(VALID_DISPATCHMGR(mgr)); - if (mgr->portlist != NULL) - dns_portlist_detach(&mgr->portlist); - if (portlist != NULL) - dns_portlist_attach(portlist, &mgr->portlist); + UNUSED(portlist); + + /* This function is deprecated: use dns_dispatchmgr_setavailports(). */ + return; } dns_portlist_t * dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr) { REQUIRE(VALID_DISPATCHMGR(mgr)); - return (mgr->portlist); + return (NULL); /* this function is deprecated */ +} + +isc_result_t +dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset, + isc_portset_t *v6portset) +{ + in_port_t *v4ports, *v6ports, p; + unsigned int nv4ports, nv6ports, i4, i6; + + REQUIRE(VALID_DISPATCHMGR(mgr)); + + nv4ports = isc_portset_nports(v4portset); + nv6ports = isc_portset_nports(v6portset); + + v4ports = NULL; + if (nv4ports != 0) { + v4ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv4ports); + if (v4ports == NULL) + return (ISC_R_NOMEMORY); + } + v6ports = NULL; + if (nv6ports != 0) { + v6ports = isc_mem_get(mgr->mctx, sizeof(in_port_t) * nv6ports); + if (v6ports == NULL) { + if (v4ports != NULL) { + isc_mem_put(mgr->mctx, v4ports, + sizeof(in_port_t) * + isc_portset_nports(v4portset)); + } + return (ISC_R_NOMEMORY); + } + } + + p = 0; + i4 = 0; + i6 = 0; + do { + if (isc_portset_isset(v4portset, p)) { + INSIST(i4 < nv4ports); + v4ports[i4++] = p; + } + if (isc_portset_isset(v6portset, p)) { + INSIST(i6 < nv6ports); + v6ports[i6++] = p; + } + } while (p++ < 65535); + INSIST(i4 == nv4ports && i6 == nv6ports); + + PORTBUFLOCK(mgr); + if (mgr->v4ports != NULL) { + isc_mem_put(mgr->mctx, mgr->v4ports, + mgr->nv4ports * sizeof(in_port_t)); + } + mgr->v4ports = v4ports; + mgr->nv4ports = nv4ports; + + if (mgr->v6ports != NULL) { + isc_mem_put(mgr->mctx, mgr->v6ports, + mgr->nv6ports * sizeof(in_port_t)); + } + mgr->v6ports = v6ports; + mgr->nv6ports = nv6ports; + PORTBUFUNLOCK(mgr); + + return (ISC_R_SUCCESS); } static isc_result_t dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr, - unsigned int buffersize, unsigned int maxbuffers, - unsigned int buckets, unsigned int increment) + unsigned int buffersize, unsigned int maxbuffers, + unsigned int maxrequests, unsigned int buckets, + unsigned int increment) { isc_result_t result; @@ -1381,24 +1924,39 @@ dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr, maxbuffers = 8; LOCK(&mgr->buffer_lock); + + /* Create or adjust buffer pool */ if (mgr->bpool != NULL) { isc_mempool_setmaxalloc(mgr->bpool, maxbuffers); mgr->maxbuffers = maxbuffers; + } else { + result = isc_mempool_create(mgr->mctx, buffersize, &mgr->bpool); + if (result != ISC_R_SUCCESS) { + UNLOCK(&mgr->buffer_lock); + return (result); + } + isc_mempool_setname(mgr->bpool, "dispmgr_bpool"); + isc_mempool_setmaxalloc(mgr->bpool, maxbuffers); + isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock); + } + + /* Create or adjust socket pool */ + if (mgr->spool != NULL) { + isc_mempool_setmaxalloc(mgr->spool, DNS_DISPATCH_POOLSOCKS * 2); UNLOCK(&mgr->buffer_lock); return (ISC_R_SUCCESS); } - - if (isc_mempool_create(mgr->mctx, buffersize, - &mgr->bpool) != ISC_R_SUCCESS) { + result = isc_mempool_create(mgr->mctx, sizeof(dispsocket_t), + &mgr->spool); + if (result != ISC_R_SUCCESS) { UNLOCK(&mgr->buffer_lock); - return (ISC_R_NOMEMORY); + goto cleanup; } + isc_mempool_setname(mgr->spool, "dispmgr_spool"); + isc_mempool_setmaxalloc(mgr->spool, maxrequests); + isc_mempool_associatelock(mgr->spool, &mgr->pool_lock); - isc_mempool_setname(mgr->bpool, "dispmgr_bpool"); - isc_mempool_setmaxalloc(mgr->bpool, maxbuffers); - isc_mempool_associatelock(mgr->bpool, &mgr->pool_lock); - - result = qid_allocate(mgr, buckets, increment, &mgr->qid); + result = qid_allocate(mgr, buckets, increment, &mgr->qid, ISC_TRUE); if (result != ISC_R_SUCCESS) goto cleanup; @@ -1409,8 +1967,10 @@ dns_dispatchmgr_setudp(dns_dispatchmgr_t *mgr, cleanup: isc_mempool_destroy(&mgr->bpool); + if (mgr->spool != NULL) + isc_mempool_destroy(&mgr->spool); UNLOCK(&mgr->buffer_lock); - return (ISC_R_NOMEMORY); + return (result); } void @@ -1436,30 +1996,56 @@ dns_dispatchmgr_destroy(dns_dispatchmgr_t **mgrp) { destroy_mgr(&mgr); } +static int +port_cmp(const void *key, const void *ent) { + in_port_t p1 = *(const in_port_t *)key; + in_port_t p2 = *(const in_port_t *)ent; + + if (p1 < p2) + return (-1); + else if (p1 == p2) + return (0); + else + return (1); +} + static isc_boolean_t -blacklisted(dns_dispatchmgr_t *mgr, isc_socket_t *sock, - isc_sockaddr_t *sockaddrp) +portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock, + isc_sockaddr_t *sockaddrp) { isc_sockaddr_t sockaddr; isc_result_t result; + in_port_t *ports, port; + unsigned int nports; + isc_boolean_t available = ISC_FALSE; REQUIRE(sock != NULL || sockaddrp != NULL); - if (mgr->portlist == NULL) - return (ISC_FALSE); - + PORTBUFLOCK(mgr); if (sock != NULL) { sockaddrp = &sockaddr; result = isc_socket_getsockname(sock, sockaddrp); if (result != ISC_R_SUCCESS) - return (ISC_FALSE); + goto unlock; } - if (mgr->portlist != NULL && - dns_portlist_match(mgr->portlist, isc_sockaddr_pf(sockaddrp), - isc_sockaddr_getport(sockaddrp))) - return (ISC_TRUE); - return (ISC_FALSE); + if (isc_sockaddr_pf(sockaddrp) == AF_INET) { + ports = mgr->v4ports; + nports = mgr->nv4ports; + } else { + ports = mgr->v6ports; + nports = mgr->nv6ports; + } + if (ports == NULL) + goto unlock; + + port = isc_sockaddr_getport(sockaddrp); + if (bsearch(&port, ports, nports, sizeof(in_port_t), port_cmp) != NULL) + available = ISC_TRUE; + +unlock: + PORTBUFUNLOCK(mgr); + return (available); } #define ATTRMATCH(_a1, _a2, _mask) (((_a1) & (_mask)) == ((_a2) & (_mask))) @@ -1469,17 +2055,20 @@ local_addr_match(dns_dispatch_t *disp, isc_sockaddr_t *addr) { isc_sockaddr_t sockaddr; isc_result_t result; + REQUIRE(disp->socket != NULL); + if (addr == NULL) return (ISC_TRUE); /* - * Don't match wildcard ports against newly blacklisted ports. + * Don't match wildcard ports unless the port is available in the + * current configuration. */ - if (disp->mgr->portlist != NULL && - isc_sockaddr_getport(addr) == 0 && + if (isc_sockaddr_getport(addr) == 0 && isc_sockaddr_getport(&disp->local) == 0 && - blacklisted(disp->mgr, disp->socket, NULL)) + !portavailable(disp->mgr, disp->socket, NULL)) { return (ISC_FALSE); + } /* * Check if we match the binding <address,port>. @@ -1521,10 +2110,10 @@ dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local, isc_result_t result; /* - * Make certain that we will not match a private dispatch. + * Make certain that we will not match a private or exclusive dispatch. */ - attributes &= ~DNS_DISPATCHATTR_PRIVATE; - mask |= DNS_DISPATCHATTR_PRIVATE; + attributes &= ~(DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE); + mask |= (DNS_DISPATCHATTR_PRIVATE|DNS_DISPATCHATTR_EXCLUSIVE); disp = ISC_LIST_HEAD(mgr->list); while (disp != NULL) { @@ -1551,7 +2140,8 @@ dispatch_find(dns_dispatchmgr_t *mgr, isc_sockaddr_t *local, static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, - unsigned int increment, dns_qid_t **qidp) + unsigned int increment, dns_qid_t **qidp, + isc_boolean_t needsocktable) { dns_qid_t *qid; unsigned int i; @@ -1572,16 +2162,35 @@ qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets, return (ISC_R_NOMEMORY); } + qid->sock_table = NULL; + if (needsocktable) { + qid->sock_table = isc_mem_get(mgr->mctx, buckets * + sizeof(dispsocketlist_t)); + if (qid->sock_table == NULL) { + isc_mem_put(mgr->mctx, qid, sizeof(*qid)); + isc_mem_put(mgr->mctx, qid->qid_table, + buckets * sizeof(dns_displist_t)); + return (ISC_R_NOMEMORY); + } + } + if (isc_mutex_init(&qid->lock) != ISC_R_SUCCESS) { UNEXPECTED_ERROR(__FILE__, __LINE__, "isc_mutex_init failed"); + if (qid->sock_table != NULL) { + isc_mem_put(mgr->mctx, qid->sock_table, + buckets * sizeof(dispsocketlist_t)); + } isc_mem_put(mgr->mctx, qid->qid_table, buckets * sizeof(dns_displist_t)); isc_mem_put(mgr->mctx, qid, sizeof(*qid)); return (ISC_R_UNEXPECTED); } - for (i = 0; i < buckets; i++) + for (i = 0; i < buckets; i++) { ISC_LIST_INIT(qid->qid_table[i]); + if (qid->sock_table != NULL) + ISC_LIST_INIT(qid->sock_table[i]); + } qid->qid_nbuckets = buckets; qid->qid_increment = increment; @@ -1603,6 +2212,10 @@ qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp) { qid->magic = 0; isc_mem_put(mctx, qid->qid_table, qid->qid_nbuckets * sizeof(dns_displist_t)); + if (qid->sock_table != NULL) { + isc_mem_put(mctx, qid->sock_table, + qid->qid_nbuckets * sizeof(dispsocketlist_t)); + } DESTROYLOCK(&qid->lock); isc_mem_put(mctx, qid, sizeof(*qid)); } @@ -1646,6 +2259,10 @@ dispatch_allocate(dns_dispatchmgr_t *mgr, unsigned int maxrequests, disp->requests = 0; disp->tcpbuffers = 0; disp->qid = NULL; + ISC_LIST_INIT(disp->activesockets); + ISC_LIST_INIT(disp->inactivesockets); + disp->nsockets = 0; + dispatch_arc4init(&disp->arc4ctx, mgr->entropy, NULL); if (isc_mutex_init(&disp->lock) != ISC_R_SUCCESS) { res = ISC_R_UNEXPECTED; @@ -1700,6 +2317,8 @@ dispatch_free(dns_dispatch_t **dispp) INSIST(disp->tcpbuffers == 0); INSIST(disp->requests == 0); INSIST(disp->recv_pending == 0); + INSIST(ISC_LIST_EMPTY(disp->activesockets)); + INSIST(ISC_LIST_EMPTY(disp->inactivesockets)); isc_mempool_put(mgr->epool, disp->failsafe_ev); disp->failsafe_ev = NULL; @@ -1745,7 +2364,7 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock, return (result); } - result = qid_allocate(mgr, buckets, increment, &disp->qid); + result = qid_allocate(mgr, buckets, increment, &disp->qid, ISC_FALSE); if (result != ISC_R_SUCCESS) goto deallocate_dispatch; @@ -1753,8 +2372,9 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock, disp->socket = NULL; isc_socket_attach(sock, &disp->socket); - disp->task = NULL; - result = isc_task_create(taskmgr, 0, &disp->task); + disp->ntasks = 1; + disp->task[0] = NULL; + result = isc_task_create(taskmgr, 0, &disp->task[0]); if (result != ISC_R_SUCCESS) goto kill_socket; @@ -1767,7 +2387,7 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock, goto kill_task; } - isc_task_setname(disp->task, "tcpdispatch", disp); + isc_task_setname(disp->task[0], "tcpdispatch", disp); dns_tcpmsg_init(mgr->mctx, disp->socket, &disp->tcpmsg); disp->tcpmsg_valid = 1; @@ -1781,7 +2401,7 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock, UNLOCK(&mgr->lock); mgr_log(mgr, LVL(90), "created TCP dispatcher %p", disp); - dispatch_log(disp, LVL(90), "created task %p", disp->task); + dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); *dispp = disp; @@ -1791,7 +2411,7 @@ dns_dispatch_createtcp(dns_dispatchmgr_t *mgr, isc_socket_t *sock, * Error returns. */ kill_task: - isc_task_detach(&disp->task); + isc_task_detach(&disp->task[0]); kill_socket: isc_socket_detach(&disp->socket); deallocate_dispatch: @@ -1826,13 +2446,13 @@ dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, REQUIRE((attributes & DNS_DISPATCHATTR_TCP) == 0); result = dns_dispatchmgr_setudp(mgr, buffersize, maxbuffers, - buckets, increment); + maxrequests, buckets, increment); if (result != ISC_R_SUCCESS) return (result); LOCK(&mgr->lock); - if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) != 0) { + if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { REQUIRE(isc_sockaddr_getport(localaddr) == 0); goto createudp; } @@ -1853,7 +2473,7 @@ dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, { disp->attributes |= DNS_DISPATCHATTR_NOLISTEN; if (disp->recv_pending != 0) - isc_socket_cancel(disp->socket, disp->task, + isc_socket_cancel(disp->socket, disp->task[0], ISC_SOCKCANCEL_RECV); } @@ -1890,6 +2510,101 @@ dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, #endif static isc_result_t +get_udpsocket(dns_dispatchmgr_t *mgr, dns_dispatch_t *disp, + isc_socketmgr_t *sockmgr, isc_sockaddr_t *localaddr, + isc_socket_t **sockp) +{ + unsigned int i, j; + isc_socket_t *held[DNS_DISPATCH_HELD]; + isc_sockaddr_t localaddr_bound; + isc_socket_t *sock = NULL; + isc_result_t result = ISC_R_SUCCESS; + isc_boolean_t anyport; + + INSIST(sockp != NULL && *sockp == NULL); + + localaddr_bound = *localaddr; + anyport = ISC_TF(isc_sockaddr_getport(localaddr) == 0); + + if (anyport) { + unsigned int nports; + in_port_t *ports; + + /* + * If no port is specified, we first try to pick up a random + * port by ourselves. + */ + if (isc_sockaddr_pf(&disp->local) == AF_INET) { + nports = disp->mgr->nv4ports; + ports = disp->mgr->v4ports; + } else { + nports = disp->mgr->nv6ports; + ports = disp->mgr->v6ports; + } + if (nports == 0) + return (ISC_R_ADDRNOTAVAIL); + + for (i = 0; i < 1024; i++) { + in_port_t prt; + + prt = ports[dispatch_arc4uniformrandom( + DISP_ARC4CTX(disp), + nports)]; + isc_sockaddr_setport(&localaddr_bound, prt); + result = open_socket(sockmgr, &localaddr_bound, + 0, &sock); + if (result == ISC_R_SUCCESS || + result != ISC_R_ADDRINUSE) { + disp->localport = prt; + *sockp = sock; + return (result); + } + } + + /* + * If this fails 1024 times, we then ask the kernel for + * choosing one. + */ + } + + memset(held, 0, sizeof(held)); + i = 0; + + for (j = 0; j < 0xffffU; j++) { + result = open_socket(sockmgr, localaddr, 0, &sock); + if (result != ISC_R_SUCCESS) + goto end; + else if (!anyport) + break; + else if (portavailable(mgr, sock, NULL)) + break; + if (held[i] != NULL) + isc_socket_detach(&held[i]); + held[i++] = sock; + sock = NULL; + if (i == DNS_DISPATCH_HELD) + i = 0; + } + if (j == 0xffffU) { + mgr_log(mgr, ISC_LOG_ERROR, + "avoid-v%s-udp-ports: unable to allocate " + "an available port", + isc_sockaddr_pf(localaddr) == AF_INET ? "4" : "6"); + result = ISC_R_FAILURE; + goto end; + } + *sockp = sock; + +end: + for (i = 0; i < DNS_DISPATCH_HELD; i++) { + if (held[i] != NULL) + isc_socket_detach(&held[i]); + } + + return (result); +} + +static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, isc_taskmgr_t *taskmgr, isc_sockaddr_t *localaddr, @@ -1900,10 +2615,7 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, isc_result_t result; dns_dispatch_t *disp; isc_socket_t *sock = NULL; - isc_socket_t *held[DNS_DISPATCH_HELD]; - unsigned int i = 0, j = 0, k = 0; - isc_sockaddr_t localaddr_bound; - in_port_t localport = 0; + int i = 0; /* * dispatch_allocate() checks mgr for us. @@ -1913,67 +2625,46 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, if (result != ISC_R_SUCCESS) return (result); - /* - * Try to allocate a socket that is not on the blacklist. - * Hold up to DNS_DISPATCH_HELD sockets to prevent the OS - * from returning the same port to us too quickly. - */ - memset(held, 0, sizeof(held)); - localaddr_bound = *localaddr; - getsocket: - if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) != 0) { - in_port_t prt; - - /* XXX: should the range be configurable? */ - prt = 1024 + dispatch_arc4uniformrandom(mgr, 65535 - 1023); - isc_sockaddr_setport(&localaddr_bound, prt); - if (blacklisted(mgr, NULL, &localaddr_bound)) { - if (++k == 1024) - attributes &= ~DNS_DISPATCHATTR_RANDOMPORT; - goto getsocket; - } - result = create_socket(sockmgr, &localaddr_bound, 0, &sock); - if (result == ISC_R_ADDRINUSE) { - if (++k == 1024) - attributes &= ~DNS_DISPATCHATTR_RANDOMPORT; - goto getsocket; - } - localport = prt; - } else - result = create_socket(sockmgr, localaddr, - ISC_SOCKET_REUSEADDRESS, &sock); - if (result != ISC_R_SUCCESS) - goto deallocate_dispatch; - if ((attributes & DNS_DISPATCHATTR_RANDOMPORT) == 0 && - isc_sockaddr_getport(localaddr) == 0 && - blacklisted(mgr, sock, NULL)) - { - if (held[i] != NULL) - isc_socket_detach(&held[i]); - held[i++] = sock; - sock = NULL; - if (i == DNS_DISPATCH_HELD) - i = 0; - if (j++ == 0xffffU) { - mgr_log(mgr, ISC_LOG_ERROR, "avoid-v%s-udp-ports: " - "unable to allocate a non-blacklisted port", - isc_sockaddr_pf(localaddr) == AF_INET ? - "4" : "6"); - result = ISC_R_FAILURE; + if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0) { + result = get_udpsocket(mgr, disp, sockmgr, localaddr, &sock); + if (result != ISC_R_SUCCESS) goto deallocate_dispatch; + } else { + isc_sockaddr_t sa_any; + + /* + * For dispatches using exclusive sockets with a specific + * source address, we only check if the specified address is + * available on the system. Query sockets will be created later + * on demand. + */ + isc_sockaddr_anyofpf(&sa_any, isc_sockaddr_pf(localaddr)); + if (!isc_sockaddr_eqaddr(&sa_any, localaddr)) { + result = open_socket(sockmgr, localaddr, 0, &sock); + if (sock != NULL) + isc_socket_detach(&sock); + if (result != ISC_R_SUCCESS) + goto deallocate_dispatch; } - goto getsocket; } - disp->socktype = isc_sockettype_udp; disp->socket = sock; disp->local = *localaddr; - disp->localport = localport; - disp->task = NULL; - result = isc_task_create(taskmgr, 0, &disp->task); - if (result != ISC_R_SUCCESS) - goto kill_socket; + if ((attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) + disp->ntasks = MAX_INTERNAL_TASKS; + else + disp->ntasks = 1; + for (i = 0; i < disp->ntasks; i++) { + disp->task[i] = NULL; + result = isc_task_create(taskmgr, 0, &disp->task[i]); + if (result != ISC_R_SUCCESS) { + while (--i >= 0) + isc_task_destroy(&disp->task[i]); + goto kill_socket; + } + isc_task_setname(disp->task[i], "udpdispatch", disp); + } disp->ctlevent = isc_event_allocate(mgr->mctx, disp, DNS_EVENT_DISPATCHCONTROL, @@ -1984,8 +2675,6 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, goto kill_task; } - isc_task_setname(disp->task, "udpdispatch", disp); - attributes &= ~DNS_DISPATCHATTR_TCP; attributes |= DNS_DISPATCHATTR_UDP; disp->attributes = attributes; @@ -1996,26 +2685,25 @@ dispatch_createudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, ISC_LIST_APPEND(mgr->list, disp, link); mgr_log(mgr, LVL(90), "created UDP dispatcher %p", disp); - dispatch_log(disp, LVL(90), "created task %p", disp->task); - dispatch_log(disp, LVL(90), "created socket %p", disp->socket); + dispatch_log(disp, LVL(90), "created task %p", disp->task[0]); /* XXX */ + if (disp->socket != NULL) + dispatch_log(disp, LVL(90), "created socket %p", disp->socket); *dispp = disp; - - goto cleanheld; + return (result); /* * Error returns. */ kill_task: - isc_task_detach(&disp->task); + for (i = 0; i < disp->ntasks; i++) + isc_task_detach(&disp->task[i]); kill_socket: - isc_socket_detach(&disp->socket); + if (disp->socket != NULL) + isc_socket_detach(&disp->socket); deallocate_dispatch: dispatch_free(&disp); - cleanheld: - for (i = 0; i < DNS_DISPATCH_HELD; i++) - if (held[i] != NULL) - isc_socket_detach(&held[i]); + return (result); } @@ -2041,6 +2729,7 @@ dns_dispatch_attach(dns_dispatch_t *disp, dns_dispatch_t **dispp) { void dns_dispatch_detach(dns_dispatch_t **dispp) { dns_dispatch_t *disp; + dispsocket_t *dispsock; isc_boolean_t killit; REQUIRE(dispp != NULL && VALID_DISPATCH(*dispp)); @@ -2055,8 +2744,14 @@ dns_dispatch_detach(dns_dispatch_t **dispp) { killit = ISC_FALSE; if (disp->refcount == 0) { if (disp->recv_pending > 0) - isc_socket_cancel(disp->socket, disp->task, + isc_socket_cancel(disp->socket, disp->task[0], + ISC_SOCKCANCEL_RECV); + for (dispsock = ISC_LIST_HEAD(disp->activesockets); + dispsock != NULL; + dispsock = ISC_LIST_NEXT(dispsock, link)) { + isc_socket_cancel(dispsock->socket, dispsock->task, ISC_SOCKCANCEL_RECV); + } disp->shutting_down = 1; } @@ -2065,26 +2760,32 @@ dns_dispatch_detach(dns_dispatch_t **dispp) { killit = destroy_disp_ok(disp); UNLOCK(&disp->lock); if (killit) - isc_task_send(disp->task, &disp->ctlevent); + isc_task_send(disp->task[0], &disp->ctlevent); } isc_result_t -dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest, - isc_task_t *task, isc_taskaction_t action, void *arg, - dns_messageid_t *idp, dns_dispentry_t **resp) +dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest, + isc_task_t *task, isc_taskaction_t action, void *arg, + dns_messageid_t *idp, dns_dispentry_t **resp, + isc_socketmgr_t *sockmgr) { dns_dispentry_t *res; unsigned int bucket; + in_port_t localport = 0; dns_messageid_t id; int i; isc_boolean_t ok; dns_qid_t *qid; + dispsocket_t *dispsocket = NULL; + isc_result_t result; REQUIRE(VALID_DISPATCH(disp)); REQUIRE(task != NULL); REQUIRE(dest != NULL); REQUIRE(resp != NULL && *resp == NULL); REQUIRE(idp != NULL); + if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) + REQUIRE(sockmgr != NULL); LOCK(&disp->lock); @@ -2098,23 +2799,75 @@ dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest, return (ISC_R_QUOTA); } + if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0 && + disp->nsockets > DNS_DISPATCH_SOCKSQUOTA) { + dispsocket_t *oldestsocket; + dns_dispentry_t *oldestresp; + dns_dispatchevent_t *rev; + + /* + * Kill oldest outstanding query if the number of sockets + * exceeds the quota to keep the room for new queries. + */ + oldestsocket = ISC_LIST_HEAD(disp->activesockets); + oldestresp = oldestsocket->resp; + if (oldestresp != NULL && !oldestresp->item_out) { + rev = allocate_event(oldestresp->disp); + if (rev != NULL) { + rev->buffer.base = NULL; + rev->result = ISC_R_CANCELED; + rev->id = oldestresp->id; + ISC_EVENT_INIT(rev, sizeof(*rev), 0, + NULL, DNS_EVENT_DISPATCH, + oldestresp->action, + oldestresp->arg, oldestresp, + NULL, NULL); + oldestresp->item_out = ISC_TRUE; + isc_task_send(oldestresp->task, + ISC_EVENT_PTR(&rev)); + } + } + + /* + * Move this entry to the tail so that it won't (easily) be + * examined before actually being canceled. + */ + ISC_LIST_UNLINK(disp->activesockets, oldestsocket, link); + ISC_LIST_APPEND(disp->activesockets, oldestsocket, link); + } + + qid = DNS_QID(disp); + LOCK(&qid->lock); + + if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { + /* + * Get a separate UDP socket with a random port number. + */ + result = get_dispsocket(disp, dest, sockmgr, qid, &dispsocket, + &localport); + if (result != ISC_R_SUCCESS) { + UNLOCK(&qid->lock); + UNLOCK(&disp->lock); + return (result); + } + } else { + localport = disp->localport; + } + /* * Try somewhat hard to find an unique ID. */ - id = (dns_messageid_t)dispatch_arc4random(disp->mgr); - qid = DNS_QID(disp); - LOCK(&qid->lock); - bucket = dns_hash(qid, dest, id, disp->localport); + id = (dns_messageid_t)dispatch_arc4random(DISP_ARC4CTX(disp)); + bucket = dns_hash(qid, dest, id, localport); ok = ISC_FALSE; for (i = 0; i < 64; i++) { - if (bucket_search(qid, dest, id, disp->localport, bucket) == - NULL) { + if (entry_search(qid, dest, id, localport, bucket) == NULL) { ok = ISC_TRUE; break; } id += qid->qid_increment; id &= 0x0000ffff; - bucket = dns_hash(qid, dest, id, disp->localport); + bucket = dns_hash(qid, dest, id, localport); } if (!ok) { @@ -2127,6 +2880,8 @@ dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest, if (res == NULL) { UNLOCK(&qid->lock); UNLOCK(&disp->lock); + if (dispsocket != NULL) + destroy_dispsocket(disp, &dispsocket); return (ISC_R_NOMEMORY); } @@ -2136,11 +2891,14 @@ dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest, isc_task_attach(task, &res->task); res->disp = disp; res->id = id; - res->port = disp->localport; + res->port = localport; res->bucket = bucket; res->host = *dest; res->action = action; res->arg = arg; + res->dispsocket = dispsocket; + if (dispsocket != NULL) + dispsocket->resp = res; res->item_out = ISC_FALSE; ISC_LIST_INIT(res->items); ISC_LINK_INIT(res, link); @@ -2152,27 +2910,62 @@ dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest, "attached to task %p", res->task); if (((disp->attributes & DNS_DISPATCHATTR_UDP) != 0) || - ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) - startrecv(disp); + ((disp->attributes & DNS_DISPATCHATTR_CONNECTED) != 0)) { + result = startrecv(disp, dispsocket); + if (result != ISC_R_SUCCESS) { + LOCK(&qid->lock); + ISC_LIST_UNLINK(qid->qid_table[bucket], res, link); + UNLOCK(&qid->lock); + + if (dispsocket != NULL) + destroy_dispsocket(disp, &dispsocket); + + disp->refcount--; + disp->requests--; + + UNLOCK(&disp->lock); + isc_task_detach(&res->task); + isc_mempool_put(disp->mgr->rpool, res); + return (result); + } + } + + if (dispsocket != NULL) + ISC_LIST_APPEND(disp->activesockets, dispsocket, link); UNLOCK(&disp->lock); *idp = id; *resp = res; + if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) + INSIST(res->dispsocket != NULL); + return (ISC_R_SUCCESS); } +isc_result_t +dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest, + isc_task_t *task, isc_taskaction_t action, void *arg, + dns_messageid_t *idp, dns_dispentry_t **resp) +{ + REQUIRE(VALID_DISPATCH(disp)); + REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0); + + return (dns_dispatch_addresponse2(disp, dest, task, action, arg, + idp, resp, NULL)); +} + void dns_dispatch_starttcp(dns_dispatch_t *disp) { REQUIRE(VALID_DISPATCH(disp)); - dispatch_log(disp, LVL(90), "starttcp %p", disp->task); + dispatch_log(disp, LVL(90), "starttcp %p", disp->task[0]); LOCK(&disp->lock); disp->attributes |= DNS_DISPATCHATTR_CONNECTED; - startrecv(disp); + (void)startrecv(disp, NULL); UNLOCK(&disp->lock); } @@ -2183,6 +2976,7 @@ dns_dispatch_removeresponse(dns_dispentry_t **resp, dns_dispatchmgr_t *mgr; dns_dispatch_t *disp; dns_dispentry_t *res; + dispsocket_t *dispsock; dns_dispatchevent_t *ev; unsigned int bucket; isc_boolean_t killit; @@ -2220,8 +3014,14 @@ dns_dispatch_removeresponse(dns_dispentry_t **resp, killit = ISC_FALSE; if (disp->refcount == 0) { if (disp->recv_pending > 0) - isc_socket_cancel(disp->socket, disp->task, + isc_socket_cancel(disp->socket, disp->task[0], + ISC_SOCKCANCEL_RECV); + for (dispsock = ISC_LIST_HEAD(disp->activesockets); + dispsock != NULL; + dispsock = ISC_LIST_NEXT(dispsock, link)) { + isc_socket_cancel(dispsock->socket, dispsock->task, ISC_SOCKCANCEL_RECV); + } disp->shutting_down = 1; } @@ -2257,6 +3057,12 @@ dns_dispatch_removeresponse(dns_dispentry_t **resp, request_log(disp, res, LVL(90), "detaching from task %p", res->task); isc_task_detach(&res->task); + if (res->dispsocket != NULL) { + isc_socket_cancel(res->dispsocket->socket, + res->dispsocket->task, ISC_SOCKCANCEL_RECV); + res->dispsocket->resp = NULL; + } + /* * Free any buffered requests as well */ @@ -2273,12 +3079,12 @@ dns_dispatch_removeresponse(dns_dispentry_t **resp, if (disp->shutting_down == 1) do_cancel(disp); else - startrecv(disp); + (void)startrecv(disp, NULL); killit = destroy_disp_ok(disp); UNLOCK(&disp->lock); if (killit) - isc_task_send(disp->task, &disp->ctlevent); + isc_task_send(disp->task[0], &disp->ctlevent); } static void @@ -2293,13 +3099,15 @@ do_cancel(dns_dispatch_t *disp) { qid = DNS_QID(disp); /* - * Search for the first response handler without packets outstanding. + * Search for the first response handler without packets outstanding + * unless a specific hander is given. */ LOCK(&qid->lock); for (resp = linear_first(qid); - resp != NULL && resp->item_out != ISC_FALSE; + resp != NULL && resp->item_out; /* Empty. */) resp = linear_next(qid, resp); + /* * No one to send the cancel event to, so nothing to do. */ @@ -2332,6 +3140,16 @@ dns_dispatch_getsocket(dns_dispatch_t *disp) { return (disp->socket); } +isc_socket_t * +dns_dispatch_getentrysocket(dns_dispentry_t *resp) { + REQUIRE(VALID_RESPONSE(resp)); + + if (resp->dispsocket != NULL) + return (resp->dispsocket->socket); + else + return (NULL); +} + isc_result_t dns_dispatch_getlocaladdress(dns_dispatch_t *disp, isc_sockaddr_t *addrp) { @@ -2365,11 +3183,27 @@ dns_dispatch_cancel(dns_dispatch_t *disp) { return; } +unsigned int +dns_dispatch_getattributes(dns_dispatch_t *disp) { + REQUIRE(VALID_DISPATCH(disp)); + + /* + * We don't bother locking disp here; it's the caller's responsibility + * to use only non volatile flags. + */ + return (disp->attributes); +} + void dns_dispatch_changeattributes(dns_dispatch_t *disp, unsigned int attributes, unsigned int mask) { REQUIRE(VALID_DISPATCH(disp)); + /* Exclusive attribute can only be set on creation */ + REQUIRE((attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0); + /* Also, a dispatch with randomport specified cannot start listening */ + REQUIRE((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) == 0 || + (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0); /* XXXMLG * Should check for valid attributes here! @@ -2381,13 +3215,13 @@ dns_dispatch_changeattributes(dns_dispatch_t *disp, if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) != 0 && (attributes & DNS_DISPATCHATTR_NOLISTEN) == 0) { disp->attributes &= ~DNS_DISPATCHATTR_NOLISTEN; - startrecv(disp); + (void)startrecv(disp, NULL); } else if ((disp->attributes & DNS_DISPATCHATTR_NOLISTEN) == 0 && (attributes & DNS_DISPATCHATTR_NOLISTEN) != 0) { disp->attributes |= DNS_DISPATCHATTR_NOLISTEN; if (disp->recv_pending != 0) - isc_socket_cancel(disp->socket, disp->task, + isc_socket_cancel(disp->socket, disp->task[0], ISC_SOCKCANCEL_RECV); } } @@ -2411,7 +3245,7 @@ dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) { INSIST(sevent->n <= disp->mgr->buffersize); newsevent = (isc_socketevent_t *) isc_event_allocate(disp->mgr->mctx, NULL, - DNS_EVENT_IMPORTRECVDONE, udp_recv, + DNS_EVENT_IMPORTRECVDONE, udp_shrecv, disp, sizeof(isc_socketevent_t)); if (newsevent == NULL) return; @@ -2431,7 +3265,7 @@ dns_dispatch_importrecv(dns_dispatch_t *disp, isc_event_t *event) { newsevent->pktinfo = sevent->pktinfo; newsevent->attributes = sevent->attributes; - isc_task_send(disp->task, ISC_EVENT_PTR(&newsevent)); + isc_task_send(disp->task[0], ISC_EVENT_PTR(&newsevent)); } #if 0 diff --git a/lib/dns/include/dns/dispatch.h b/lib/dns/include/dns/dispatch.h index e3495aaa361f2..041e2b6acb012 100644 --- a/lib/dns/include/dns/dispatch.h +++ b/lib/dns/include/dns/dispatch.h @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: dispatch.h,v 1.45.2.2.4.5.4.2 2008/07/23 07:28:11 tbox Exp $ */ +/* $Id: dispatch.h,v 1.45.2.2.4.9 2008/06/25 23:45:37 tbox Exp $ */ #ifndef DNS_DISPATCH_H #define DNS_DISPATCH_H 1 @@ -104,7 +104,7 @@ struct dns_dispatchevent { * The dispatcher is a TCP or UDP socket. * * _IPV4, _IPV6 - * The dispatcher uses an ipv4 or ipv6 socket. + * The dispatcher uses an IPv4 or IPv6 socket. * * _NOLISTEN * The dispatcher should not listen on the socket. @@ -114,7 +114,12 @@ struct dns_dispatchevent { * accept replies from them. * * _RANDOMPORT - * Allocate UDP port randomly. + * Previously used to indicate that the port of a dispatch UDP must be + * chosen randomly. This behavior now always applies and the attribute + * is obsoleted. + * + * _EXCLUSIVE + * A separate socket will be used on-demand for each transaction. */ #define DNS_DISPATCHATTR_PRIVATE 0x00000001U #define DNS_DISPATCHATTR_TCP 0x00000002U @@ -124,7 +129,8 @@ struct dns_dispatchevent { #define DNS_DISPATCHATTR_NOLISTEN 0x00000020U #define DNS_DISPATCHATTR_MAKEQUERY 0x00000040U #define DNS_DISPATCHATTR_CONNECTED 0x00000080U -#define DNS_DISPATCHATTR_RANDOMPORT 0x00000100U +/*#define DNS_DISPATCHATTR_RANDOMPORT 0x00000100U*/ +#define DNS_DISPATCHATTR_EXCLUSIVE 0x00000200U isc_result_t dns_dispatchmgr_create(isc_mem_t *mctx, isc_entropy_t *entropy, @@ -187,24 +193,32 @@ void dns_dispatchmgr_setblackportlist(dns_dispatchmgr_t *mgr, dns_portlist_t *portlist); /* - * Sets a list of UDP ports that won't be used when creating a udp - * dispatch with a wildcard port. + * This function is deprecated. Use dns_dispatchmgr_setavailports() instead. * * Requires: * mgr is a valid dispatchmgr - * portlist to be NULL or a valid port list. */ dns_portlist_t * dns_dispatchmgr_getblackportlist(dns_dispatchmgr_t *mgr); /* - * Return the current port list. + * This function is deprecated and always returns NULL. * * Requires: * mgr is a valid dispatchmgr */ - +isc_result_t +dns_dispatchmgr_setavailports(dns_dispatchmgr_t *mgr, isc_portset_t *v4portset, + isc_portset_t *v6portset); +/* + * Sets a list of UDP ports that can be used for outgoing UDP messages. + * + * Requires: + * mgr is a valid dispatchmgr + * v4portset is NULL or a valid port set + * v6portset is NULL or a valid port set + */ isc_result_t dns_dispatch_getudp(dns_dispatchmgr_t *mgr, isc_socketmgr_t *sockmgr, @@ -317,6 +331,12 @@ dns_dispatch_starttcp(dns_dispatch_t *disp); */ isc_result_t +dns_dispatch_addresponse2(dns_dispatch_t *disp, isc_sockaddr_t *dest, + isc_task_t *task, isc_taskaction_t action, void *arg, + isc_uint16_t *idp, dns_dispentry_t **resp, + isc_socketmgr_t *sockmgr); + +isc_result_t dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest, isc_task_t *task, isc_taskaction_t action, void *arg, isc_uint16_t *idp, dns_dispentry_t **resp); @@ -339,6 +359,10 @@ dns_dispatch_addresponse(dns_dispatch_t *disp, isc_sockaddr_t *dest, * * "resp" be non-NULL and *resp be NULL * + *\li "sockmgr" be NULL or a valid socket manager. If 'disp' has + * the DNS_DISPATCHATTR_EXCLUSIVE attribute, this must not be NULL, + * which also means dns_dispatch_addresponse() cannot be used. + * * Ensures: * * <id, dest> is a unique tuple. That means incoming messages @@ -369,6 +393,8 @@ dns_dispatch_removeresponse(dns_dispentry_t **resp, * argument to dns_dispatch_addresponse() when allocating '*resp'. */ +isc_socket_t * +dns_dispatch_getentrysocket(dns_dispentry_t *resp); isc_socket_t * dns_dispatch_getsocket(dns_dispatch_t *disp); @@ -406,6 +432,16 @@ dns_dispatch_cancel(dns_dispatch_t *disp); * disp is valid. */ +unsigned int +dns_dispatch_getattributes(dns_dispatch_t *disp); +/* + * Return the attributes (DNS_DISPATCHATTR_xxx) of this dispatch. Only the + * non-changeable attributes are expected to be referenced by the caller. + * + * Requires: + *\li disp is valid. + */ + void dns_dispatch_changeattributes(dns_dispatch_t *disp, unsigned int attributes, unsigned int mask); diff --git a/lib/dns/journal.c b/lib/dns/journal.c index b7e81f7ccec79..910e01ed44bd1 100644 --- a/lib/dns/journal.c +++ b/lib/dns/journal.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2005, 2007 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2007, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: journal.c,v 1.77.2.1.10.22 2007/09/07 05:25:37 marka Exp $ */ +/* $Id: journal.c,v 1.77.2.1.10.24 2008/09/25 04:01:06 tbox Exp $ */ #include <config.h> @@ -71,7 +71,7 @@ static isc_boolean_t bind8_compat = ISC_TRUE; /* XXX config */ } while (0) #define CHECK(op) \ - do { result = (op); \ + do { result = (op); \ if (result != ISC_R_SUCCESS) goto failure; \ } while (0) @@ -113,11 +113,11 @@ dns_db_createsoatuple(dns_db_t *db, dns_dbversion_t *ver, isc_mem_t *mctx, dns_rdataset_init(&rdataset); result = dns_db_findrdataset(db, node, ver, dns_rdatatype_soa, 0, (isc_stdtime_t)0, &rdataset, NULL); - if (result != ISC_R_SUCCESS) + if (result != ISC_R_SUCCESS) goto freenode; result = dns_rdataset_first(&rdataset); - if (result != ISC_R_SUCCESS) + if (result != ISC_R_SUCCESS) goto freenode; dns_rdataset_current(&rdataset, &rdata); @@ -680,7 +680,7 @@ dns_journal_open(isc_mem_t *mctx, const char *filename, isc_boolean_t write, int namelen; char backup[1024]; size_t n; - + result = journal_open(mctx, filename, write, write, journalp); if (result == ISC_R_NOTFOUND) { namelen = strlen(filename); @@ -1372,7 +1372,7 @@ dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) { if (result != ISC_R_SUCCESS) { isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, "journal open failure: %s: %s", - isc_result_totext(result), j->filename); + isc_result_totext(result), filename); return (result); } @@ -1410,9 +1410,9 @@ dns_journal_print(isc_mem_t *mctx, const char *filename, FILE *file) { if (n_soa == 3) n_soa = 1; if (n_soa == 0) { - isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, - "%s: journal file corrupt: missing " - "initial SOA", j->filename); + isc_log_write(JOURNAL_COMMON_LOGARGS, ISC_LOG_ERROR, + "%s: journal file corrupt: missing " + "initial SOA", j->filename); FAIL(ISC_R_UNEXPECTED); } CHECK(dns_difftuple_create(diff.mctx, n_soa == 1 ? @@ -1988,7 +1988,7 @@ dns_journal_compact(isc_mem_t *mctx, char *filename, isc_uint32_t serial, dns_journal_destroy(&j); return (ISC_R_SUCCESS); } - + if (DNS_SERIAL_GT(j->header.begin.serial, serial) || DNS_SERIAL_GT(serial, j->header.end.serial)) { dns_journal_destroy(&j); @@ -2012,7 +2012,7 @@ dns_journal_compact(isc_mem_t *mctx, char *filename, isc_uint32_t serial, } CHECK(journal_open(mctx, newname, ISC_TRUE, ISC_TRUE, &new)); - + /* * Remove overhead so space test below can succeed. */ @@ -2070,7 +2070,7 @@ dns_journal_compact(isc_mem_t *mctx, char *filename, isc_uint32_t serial, result = ISC_R_NOMEMORY; goto failure; } - + CHECK(journal_seek(j, best_guess.offset)); CHECK(journal_seek(new, indexend)); for (i = 0; i < copy_length; i += size) { @@ -2143,7 +2143,7 @@ dns_journal_compact(isc_mem_t *mctx, char *filename, isc_uint32_t serial, goto failure; } } - + dns_journal_destroy(&j); result = ISC_R_SUCCESS; diff --git a/lib/dns/masterdump.c b/lib/dns/masterdump.c index 0f4716d583d97..ed0834ac574da 100644 --- a/lib/dns/masterdump.c +++ b/lib/dns/masterdump.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004, 2006 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2006, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: masterdump.c,v 1.56.2.5.2.15 2006/03/10 00:17:21 marka Exp $ */ +/* $Id: masterdump.c,v 1.56.2.5.2.17 2008/08/13 23:45:33 tbox Exp $ */ #include <config.h> @@ -173,7 +173,7 @@ struct dns_dumpctx { char *tmpfile; }; -#define NXDOMAIN(x) (((x)->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0) +#define NXDOMAIN(x) (((x)->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0) /* * Output tabs and spaces to go from column '*current' to @@ -920,13 +920,13 @@ dns_dumpctx_detach(dns_dumpctx_t **dctxp) { dns_dbversion_t * dns_dumpctx_version(dns_dumpctx_t *dctx) { - REQUIRE(DNS_DCTX_VALID(dctx)); + REQUIRE(DNS_DCTX_VALID(dctx)); return (dctx->version); } dns_db_t * dns_dumpctx_db(dns_dumpctx_t *dctx) { - REQUIRE(DNS_DCTX_VALID(dctx)); + REQUIRE(DNS_DCTX_VALID(dctx)); return (dctx->db); } @@ -1159,13 +1159,12 @@ dumptostreaminc(dns_dumpctx_t *dctx) { result = dns_dbiterator_next(dctx->dbiter); } - if (dctx->nodes != 0 && result == ISC_R_SUCCESS) { - result = dns_dbiterator_pause(dctx->dbiter); - RUNTIME_CHECK(result == ISC_R_SUCCESS); + if (dctx->nodes != 0 && result == ISC_R_SUCCESS) result = DNS_R_CONTINUE; - } else if (result == ISC_R_NOMORE) + else if (result == ISC_R_NOMORE) result = ISC_R_SUCCESS; fail: + RUNTIME_CHECK(dns_dbiterator_pause(dctx->dbiter) == ISC_R_SUCCESS); isc_mem_put(dctx->mctx, buffer.base, buffer.length); return (result); } @@ -1419,10 +1418,10 @@ dns_master_dumpnode(isc_mem_t *mctx, dns_db_t *db, dns_dbversion_t *version, isc_result_t dns_master_stylecreate(dns_master_style_t **stylep, unsigned int flags, - unsigned int ttl_column, unsigned int class_column, - unsigned int type_column, unsigned int rdata_column, - unsigned int line_length, unsigned int tab_width, - isc_mem_t *mctx) + unsigned int ttl_column, unsigned int class_column, + unsigned int type_column, unsigned int rdata_column, + unsigned int line_length, unsigned int tab_width, + isc_mem_t *mctx) { dns_master_style_t *style; diff --git a/lib/dns/message.c b/lib/dns/message.c index 915f587871542..5eacad02f9284 100644 --- a/lib/dns/message.c +++ b/lib/dns/message.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: message.c,v 1.194.2.10.2.28.4.2 2008/07/28 23:47:49 tbox Exp $ */ +/* $Id: message.c,v 1.194.2.10.2.30 2008/07/28 23:45:48 tbox Exp $ */ /*** *** Imports diff --git a/lib/dns/openssldsa_link.c b/lib/dns/openssldsa_link.c index df731e45a47df..fbfcfbad35ad3 100644 --- a/lib/dns/openssldsa_link.c +++ b/lib/dns/openssldsa_link.c @@ -16,7 +16,7 @@ * IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: openssldsa_link.c,v 1.1.4.7 2007/08/28 07:19:13 tbox Exp $ */ +/* $Id: openssldsa_link.c,v 1.1.4.8 2008/12/24 00:21:45 marka Exp $ */ #ifdef OPENSSL @@ -133,7 +133,7 @@ openssldsa_verify(dst_context_t *dctx, const isc_region_t *sig) { status = DSA_do_verify(digest, ISC_SHA1_DIGESTLENGTH, dsasig, dsa); DSA_SIG_free(dsasig); - if (status == 0) + if (status != 1) return (dst__openssl_toresult(DST_R_VERIFYFAILURE)); return (ISC_R_SUCCESS); diff --git a/lib/dns/opensslrsa_link.c b/lib/dns/opensslrsa_link.c index c33913ce3d07b..765d9ed2a575c 100644 --- a/lib/dns/opensslrsa_link.c +++ b/lib/dns/opensslrsa_link.c @@ -17,7 +17,7 @@ /* * Principal Author: Brian Wellington - * $Id: opensslrsa_link.c,v 1.1.4.9 2006/11/07 21:28:40 marka Exp $ + * $Id: opensslrsa_link.c,v 1.1.4.10 2008/12/24 00:21:45 marka Exp $ */ #ifdef OPENSSL @@ -246,7 +246,7 @@ opensslrsa_verify(dst_context_t *dctx, const isc_region_t *sig) { status = RSA_verify(type, digest, digestlen, sig->base, RSA_size(rsa), rsa); - if (status == 0) + if (status != 1) return (dst__openssl_toresult(DST_R_VERIFYFAILURE)); return (ISC_R_SUCCESS); diff --git a/lib/dns/rbt.c b/lib/dns/rbt.c index 46c317d262bce..14d5ea73849f5 100644 --- a/lib/dns/rbt.c +++ b/lib/dns/rbt.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: rbt.c,v 1.115.2.2.2.17 2008/04/03 00:17:07 each Exp $ */ +/* $Id: rbt.c,v 1.115.2.2.2.18 2008/04/18 19:03:00 each Exp $ */ /* Principal Authors: DCL */ @@ -2048,10 +2048,6 @@ dns_rbt_deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, node = LEFT(node); goto traverse; } - if (RIGHT(node) != NULL) { - node = RIGHT(node); - goto traverse; - } if (DOWN(node) != NULL) { node = DOWN(node); goto traverse; @@ -2068,14 +2064,15 @@ dns_rbt_deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, node->magic = 0; #endif parent = PARENT(node); + if (RIGHT(node) != NULL) + PARENT(RIGHT(node)) = parent; if (parent != NULL) { if (LEFT(parent) == node) - LEFT(parent) = NULL; + LEFT(parent) = RIGHT(node); else if (DOWN(parent) == node) - DOWN(parent) = NULL; - else if (RIGHT(parent) == node) - RIGHT(parent) = NULL; - } + DOWN(parent) = RIGHT(node); + } else + parent = RIGHT(node); isc_mem_put(rbt->mctx, node, NODE_SIZE(node)); rbt->nodecount--; node = parent; diff --git a/lib/dns/rdata/generic/nsec_47.c b/lib/dns/rdata/generic/nsec_47.c index 74b7806c7e112..307cac89e0811 100644 --- a/lib/dns/rdata/generic/nsec_47.c +++ b/lib/dns/rdata/generic/nsec_47.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,11 +15,11 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: nsec_47.c,v 1.7.2.1 2004/03/08 02:08:03 marka Exp $ */ +/* $Id: nsec_47.c,v 1.7.2.3 2008/07/15 23:45:44 tbox Exp $ */ /* reviewed: Wed Mar 15 18:21:15 PST 2000 by brister */ -/* draft-ietf-dnsext-nsec-rdata-01.txt */ +/* RFC 3845 */ #ifndef RDATA_GENERIC_NSEC_47_C #define RDATA_GENERIC_NSEC_47_C @@ -255,7 +255,7 @@ fromstruct_nsec(ARGS_FROMSTRUCT) { window = nsec->typebits[i]; len = nsec->typebits[i+1]; i += 2; - INSIST(first || window > lastwindow); + INSIST(first || window > lastwindow); INSIST(len > 0 && len <= 32); INSIST(i + len <= nsec->len); INSIST(nsec->typebits[i + len - 1] != 0); diff --git a/lib/dns/rdata/generic/nsec_47.h b/lib/dns/rdata/generic/nsec_47.h index d76a25cc43db5..0070bfc6e456c 100644 --- a/lib/dns/rdata/generic/nsec_47.h +++ b/lib/dns/rdata/generic/nsec_47.h @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -18,9 +18,10 @@ #ifndef GENERIC_NSEC_47_H #define GENERIC_NSEC_47_H 1 -/* $Id: nsec_47.h,v 1.4.2.1 2004/03/08 02:08:03 marka Exp $ */ +/* $Id: nsec_47.h,v 1.4.2.3 2008/07/15 23:45:44 tbox Exp $ */ -/* draft-ietf-dnsext-nsec-rdata-01.txt */ +/*! + * \brief Per RFC 3845 */ typedef struct dns_rdata_nsec { dns_rdatacommon_t common; diff --git a/lib/dns/rdata/generic/txt_16.c b/lib/dns/rdata/generic/txt_16.c index 625fa2be8e7c0..eb511ba859f52 100644 --- a/lib/dns/rdata/generic/txt_16.c +++ b/lib/dns/rdata/generic/txt_16.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2007 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2007, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1998-2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: txt_16.c,v 1.37.12.7 2007/08/28 07:19:15 tbox Exp $ */ +/* $Id: txt_16.c,v 1.37.12.9 2008/04/28 23:45:37 tbox Exp $ */ /* Reviewed: Thu Mar 16 15:40:00 PST 2000 by bwelling */ @@ -142,7 +142,7 @@ fromstruct_txt(ARGS_FROMSTRUCT) { while (region.length > 0) { length = uint8_fromregion(®ion); isc_region_consume(®ion, 1); - if (region.length <= length) + if (region.length < length) return (ISC_R_UNEXPECTEDEND); isc_region_consume(®ion, length); } diff --git a/lib/dns/rdata/in_1/naptr_35.c b/lib/dns/rdata/in_1/naptr_35.c index f3c93c7c03d9f..fc6ee8cad9d17 100644 --- a/lib/dns/rdata/in_1/naptr_35.c +++ b/lib/dns/rdata/in_1/naptr_35.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2001, 2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: naptr_35.c,v 1.43.2.1.2.3 2004/03/06 08:14:17 marka Exp $ */ +/* $Id: naptr_35.c,v 1.43.2.1.2.5 2008/04/28 23:45:37 tbox Exp $ */ /* Reviewed: Thu Mar 16 16:52:50 PST 2000 by bwelling */ @@ -154,7 +154,7 @@ totext_in_naptr(ARGS_TOTEXT) { static inline isc_result_t fromwire_in_naptr(ARGS_FROMWIRE) { - dns_name_t name; + dns_name_t name; isc_region_t sr; REQUIRE(type == 35); @@ -165,7 +165,7 @@ fromwire_in_naptr(ARGS_FROMWIRE) { dns_decompress_setmethods(dctx, DNS_COMPRESS_NONE); - dns_name_init(&name, NULL); + dns_name_init(&name, NULL); /* * Order, preference. @@ -321,8 +321,8 @@ fromstruct_in_naptr(ARGS_FROMSTRUCT) { REQUIRE(naptr->common.rdtype == type); REQUIRE(naptr->common.rdclass == rdclass); REQUIRE(naptr->flags != NULL || naptr->flags_len == 0); - REQUIRE(naptr->service != NULL && naptr->service_len == 0); - REQUIRE(naptr->regexp != NULL && naptr->regexp_len == 0); + REQUIRE(naptr->service != NULL || naptr->service_len == 0); + REQUIRE(naptr->regexp != NULL || naptr->regexp_len == 0); UNUSED(type); UNUSED(rdclass); diff --git a/lib/dns/request.c b/lib/dns/request.c index 6fe7b114ab047..cbc6714af454f 100644 --- a/lib/dns/request.c +++ b/lib/dns/request.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: request.c,v 1.64.2.1.10.12.4.2 2008/07/23 07:28:11 tbox Exp $ */ +/* $Id: request.c,v 1.64.2.1.10.15 2008/07/22 04:00:37 marka Exp $ */ #include <config.h> @@ -119,6 +119,7 @@ static isc_result_t req_render(dns_message_t *message, isc_buffer_t **buffer, static void req_senddone(isc_task_t *task, isc_event_t *event); static void req_response(isc_task_t *task, isc_event_t *event); static void req_timeout(isc_task_t *task, isc_event_t *event); +static isc_socket_t * req_getsocket(dns_request_t *request); static void req_connected(isc_task_t *task, isc_event_t *event); static void req_sendevent(dns_request_t *request, isc_result_t result); static void req_cancel(dns_request_t *request); @@ -144,6 +145,7 @@ dns_requestmgr_create(isc_mem_t *mctx, isc_socket_t *socket; isc_result_t result; int i; + unsigned int dispattr; req_log(ISC_LOG_DEBUG(3), "dns_requestmgr_create"); @@ -152,13 +154,14 @@ dns_requestmgr_create(isc_mem_t *mctx, REQUIRE(socketmgr != NULL); REQUIRE(taskmgr != NULL); REQUIRE(dispatchmgr != NULL); + UNUSED(socket); if (dispatchv4 != NULL) { - socket = dns_dispatch_getsocket(dispatchv4); - REQUIRE(isc_socket_gettype(socket) == isc_sockettype_udp); + dispattr = dns_dispatch_getattributes(dispatchv4); + REQUIRE((dispattr & DNS_DISPATCHATTR_UDP) != 0); } if (dispatchv6 != NULL) { - socket = dns_dispatch_getsocket(dispatchv6); - REQUIRE(isc_socket_gettype(socket) == isc_sockettype_udp); + dispattr = dns_dispatch_getattributes(dispatchv6); + REQUIRE((dispattr & DNS_DISPATCHATTR_UDP) != 0); } requestmgr = isc_mem_get(mctx, sizeof(*requestmgr)); @@ -423,12 +426,19 @@ req_send(dns_request_t *request, isc_task_t *task, isc_sockaddr_t *address) { isc_region_t r; isc_socket_t *socket; isc_result_t result; + unsigned int dispattr; req_log(ISC_LOG_DEBUG(3), "req_send: request %p", request); REQUIRE(VALID_REQUEST(request)); - socket = dns_dispatch_getsocket(request->dispatch); + dispattr = dns_dispatch_getattributes(request->dispatch); + socket = req_getsocket(request); isc_buffer_usedregion(request->query, &r); + /* + * We could connect the socket when we are using an exclusive dispatch + * as we do in resolver.c, but we prefer implementation simplicity + * at this moment. + */ result = isc_socket_sendto(socket, &r, task, req_senddone, request, address, NULL); if (result == ISC_R_SUCCESS) @@ -740,14 +750,16 @@ dns_request_createraw3(dns_requestmgr_t *requestmgr, isc_buffer_t *msgbuf, if (result != ISC_R_SUCCESS) goto cleanup; - socket = dns_dispatch_getsocket(request->dispatch); - INSIST(socket != NULL); - result = dns_dispatch_addresponse(request->dispatch, destaddr, task, - req_response, request, &id, - &request->dispentry); + result = dns_dispatch_addresponse2(request->dispatch, destaddr, task, + req_response, request, &id, + &request->dispentry, + requestmgr->socketmgr); if (result != ISC_R_SUCCESS) goto cleanup; + socket = req_getsocket(request); + INSIST(socket != NULL); + result = isc_buffer_allocate(mctx, &request->query, r.length + (tcp ? 2 : 0)); if (result != ISC_R_SUCCESS) @@ -933,13 +945,14 @@ dns_request_createvia3(dns_requestmgr_t *requestmgr, dns_message_t *message, if (result != ISC_R_SUCCESS) goto cleanup; - socket = dns_dispatch_getsocket(request->dispatch); - INSIST(socket != NULL); - result = dns_dispatch_addresponse(request->dispatch, destaddr, task, - req_response, request, &id, - &request->dispentry); + result = dns_dispatch_addresponse2(request->dispatch, destaddr, task, + req_response, request, &id, + &request->dispentry, + requestmgr->socketmgr); if (result != ISC_R_SUCCESS) goto cleanup; + socket = req_getsocket(request); + INSIST(socket != NULL); message->id = id; if (setkey) { @@ -1224,6 +1237,21 @@ dns_request_destroy(dns_request_t **requestp) { *** Private: request. ***/ +static isc_socket_t * +req_getsocket(dns_request_t *request) { + unsigned int dispattr; + isc_socket_t *socket; + + dispattr = dns_dispatch_getattributes(request->dispatch); + if ((dispattr & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { + INSIST(request->dispentry != NULL); + socket = dns_dispatch_getentrysocket(request->dispentry); + } else + socket = dns_dispatch_getsocket(request->dispatch); + + return (socket); +} + static void req_connected(isc_task_t *task, isc_event_t *event) { isc_socketevent_t *sevent = (isc_socketevent_t *)event; @@ -1423,6 +1451,7 @@ req_destroy(dns_request_t *request) { static void req_cancel(dns_request_t *request) { isc_socket_t *socket; + unsigned int dispattr; REQUIRE(VALID_REQUEST(request)); @@ -1435,16 +1464,23 @@ req_cancel(dns_request_t *request) { if (request->timer != NULL) isc_timer_detach(&request->timer); + dispattr = dns_dispatch_getattributes(request->dispatch); + socket = NULL; + if (DNS_REQUEST_CONNECTING(request) || DNS_REQUEST_SENDING(request)) { + if ((dispattr & DNS_DISPATCHATTR_EXCLUSIVE) != 0) { + if (request->dispentry != NULL) { + socket = dns_dispatch_getentrysocket( + request->dispentry); + } + } else + socket = dns_dispatch_getsocket(request->dispatch); + if (DNS_REQUEST_CONNECTING(request) && socket != NULL) + isc_socket_cancel(socket, NULL, ISC_SOCKCANCEL_CONNECT); + if (DNS_REQUEST_SENDING(request) && socket != NULL) + isc_socket_cancel(socket, NULL, ISC_SOCKCANCEL_SEND); + } if (request->dispentry != NULL) dns_dispatch_removeresponse(&request->dispentry, NULL); - if (DNS_REQUEST_CONNECTING(request)) { - socket = dns_dispatch_getsocket(request->dispatch); - isc_socket_cancel(socket, NULL, ISC_SOCKCANCEL_CONNECT); - } - if (DNS_REQUEST_SENDING(request)) { - socket = dns_dispatch_getsocket(request->dispatch); - isc_socket_cancel(socket, NULL, ISC_SOCKCANCEL_SEND); - } dns_dispatch_detach(&request->dispatch); } diff --git a/lib/dns/resolver.c b/lib/dns/resolver.c index 602dcf5f9632f..3a02ba798e736 100644 --- a/lib/dns/resolver.c +++ b/lib/dns/resolver.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: resolver.c,v 1.218.2.18.4.77.2.3 2008/07/24 05:00:46 jinmei Exp $ */ +/* $Id: resolver.c,v 1.218.2.18.4.85 2008/10/17 22:03:37 jinmei Exp $ */ #include <config.h> @@ -119,6 +119,7 @@ typedef struct query { isc_mem_t * mctx; dns_dispatchmgr_t * dispatchmgr; dns_dispatch_t * dispatch; + isc_boolean_t exclusivesocket; dns_adbaddrinfo_t * addrinfo; isc_socket_t * tcpsocket; isc_time_t start; @@ -301,7 +302,9 @@ struct dns_resolver { unsigned int options; dns_dispatchmgr_t * dispatchmgr; dns_dispatch_t * dispatchv4; + isc_boolean_t exclusivev4; dns_dispatch_t * dispatchv6; + isc_boolean_t exclusivev6; unsigned int nbuckets; fctxbucket_t * buckets; isc_uint32_t lame_ttl; @@ -360,6 +363,8 @@ static isc_result_t ncache_adderesult(dns_message_t *message, isc_result_t *eresultp); static void validated(isc_task_t *task, isc_event_t *event); static void maybe_destroy(fetchctx_t *fctx); +static void add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, + isc_result_t reason); static isc_result_t valcreate(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, dns_name_t *name, @@ -549,6 +554,7 @@ fctx_cancelquery(resquery_t **queryp, dns_dispatchevent_t **deventp, unsigned int factor; dns_adbfind_t *find; dns_adbaddrinfo_t *addrinfo; + isc_socket_t *socket; query = *queryp; fctx = query->fctx; @@ -631,35 +637,48 @@ fctx_cancelquery(resquery_t **queryp, dns_dispatchevent_t **deventp, 0, factor); } - if (query->dispentry != NULL) - dns_dispatch_removeresponse(&query->dispentry, deventp); - - ISC_LIST_UNLINK(fctx->queries, query, link); - - if (query->tsig != NULL) - isc_buffer_free(&query->tsig); - - if (query->tsigkey != NULL) - dns_tsigkey_detach(&query->tsigkey); - /* * Check for any outstanding socket events. If they exist, cancel * them and let the event handlers finish the cleanup. The resolver * only needs to worry about managing the connect and send events; * the dispatcher manages the recv events. */ - if (RESQUERY_CONNECTING(query)) + if (RESQUERY_CONNECTING(query)) { /* * Cancel the connect. */ - isc_socket_cancel(query->tcpsocket, NULL, - ISC_SOCKCANCEL_CONNECT); - else if (RESQUERY_SENDING(query)) + if (query->tcpsocket != NULL) { + isc_socket_cancel(query->tcpsocket, NULL, + ISC_SOCKCANCEL_CONNECT); + } else if (query->dispentry != NULL) { + INSIST(query->exclusivesocket); + socket = dns_dispatch_getentrysocket(query->dispentry); + if (socket != NULL) + isc_socket_cancel(socket, NULL, + ISC_SOCKCANCEL_CONNECT); + } + } else if (RESQUERY_SENDING(query)) { /* * Cancel the pending send. */ - isc_socket_cancel(dns_dispatch_getsocket(query->dispatch), - NULL, ISC_SOCKCANCEL_SEND); + if (query->exclusivesocket && query->dispentry != NULL) + socket = dns_dispatch_getentrysocket(query->dispentry); + else + socket = dns_dispatch_getsocket(query->dispatch); + if (socket != NULL) + isc_socket_cancel(socket, NULL, ISC_SOCKCANCEL_SEND); + } + + if (query->dispentry != NULL) + dns_dispatch_removeresponse(&query->dispentry, deventp); + + ISC_LIST_UNLINK(fctx->queries, query, link); + + if (query->tsig != NULL) + isc_buffer_free(&query->tsig); + + if (query->tsigkey != NULL) + dns_tsigkey_detach(&query->tsigkey); if (query->dispatch != NULL) dns_dispatch_detach(&query->dispatch); @@ -824,43 +843,25 @@ fctx_done(fetchctx_t *fctx, isc_result_t result) { } static void -resquery_senddone(isc_task_t *task, isc_event_t *event) { +process_sendevent(resquery_t *query, isc_event_t *event) { isc_socketevent_t *sevent = (isc_socketevent_t *)event; - resquery_t *query = event->ev_arg; isc_boolean_t retry = ISC_FALSE; isc_result_t result; fetchctx_t *fctx; - REQUIRE(event->ev_type == ISC_SOCKEVENT_SENDDONE); - - QTRACE("senddone"); - - /* - * XXXRTH - * - * Currently we don't wait for the senddone event before retrying - * a query. This means that if we get really behind, we may end - * up doing extra work! - */ - - UNUSED(task); - - INSIST(RESQUERY_SENDING(query)); - - query->sends--; fctx = query->fctx; if (RESQUERY_CANCELED(query)) { - if (query->sends == 0) { + if (query->sends == 0 && query->connects == 0) { /* * This query was canceled while the - * isc_socket_sendto() was in progress. + * isc_socket_sendto/connect() was in progress. */ if (query->tcpsocket != NULL) isc_socket_detach(&query->tcpsocket); resquery_destroy(&query); } - } else + } else { switch (sevent->result) { case ISC_R_SUCCESS: break; @@ -874,6 +875,7 @@ resquery_senddone(isc_task_t *task, isc_event_t *event) { /* * No route to remote. */ + add_bad(fctx, query->addrinfo, sevent->result); fctx_cancelquery(&query, NULL, NULL, ISC_TRUE); retry = ISC_TRUE; break; @@ -882,6 +884,7 @@ resquery_senddone(isc_task_t *task, isc_event_t *event) { fctx_cancelquery(&query, NULL, NULL, ISC_FALSE); break; } + } isc_event_free(&event); @@ -899,6 +902,48 @@ resquery_senddone(isc_task_t *task, isc_event_t *event) { } } +static void +resquery_udpconnected(isc_task_t *task, isc_event_t *event) { + resquery_t *query = event->ev_arg; + + REQUIRE(event->ev_type == ISC_SOCKEVENT_CONNECT); + + QTRACE("udpconnected"); + + UNUSED(task); + + INSIST(RESQUERY_CONNECTING(query)); + + query->connects--; + + process_sendevent(query, event); +} + +static void +resquery_senddone(isc_task_t *task, isc_event_t *event) { + resquery_t *query = event->ev_arg; + + REQUIRE(event->ev_type == ISC_SOCKEVENT_SENDDONE); + + QTRACE("senddone"); + + /* + * XXXRTH + * + * Currently we don't wait for the senddone event before retrying + * a query. This means that if we get really behind, we may end + * up doing extra work! + */ + + UNUSED(task); + + INSIST(RESQUERY_SENDING(query)); + + query->sends--; + + process_sendevent(query, event); +} + static inline isc_result_t fctx_addopt(dns_message_t *message, dns_resolver_t *res) { dns_rdataset_t *rdataset; @@ -1029,6 +1074,7 @@ fctx_query(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, */ query->dispatchmgr = res->dispatchmgr; query->dispatch = NULL; + query->exclusivesocket = ISC_FALSE; query->tcpsocket = NULL; if ((query->options & DNS_FETCHOPT_TCP) != 0) { isc_sockaddr_t addr; @@ -1070,50 +1116,21 @@ fctx_query(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, * A dispatch will be created once the connect succeeds. */ } else { - isc_sockaddr_t localaddr; - unsigned int attrs, attrmask; - dns_dispatch_t *disp_base; - - attrs = 0; - attrs |= DNS_DISPATCHATTR_UDP; - attrs |= DNS_DISPATCHATTR_RANDOMPORT; - - attrmask = 0; - attrmask |= DNS_DISPATCHATTR_UDP; - attrmask |= DNS_DISPATCHATTR_TCP; - attrmask |= DNS_DISPATCHATTR_IPV4; - attrmask |= DNS_DISPATCHATTR_IPV6; - switch (isc_sockaddr_pf(&addrinfo->sockaddr)) { - case AF_INET: - disp_base = res->dispatchv4; - attrs |= DNS_DISPATCHATTR_IPV4; + case PF_INET: + dns_dispatch_attach(res->dispatchv4, + &query->dispatch); + query->exclusivesocket = res->exclusivev4; break; - case AF_INET6: - disp_base = res->dispatchv6; - attrs |= DNS_DISPATCHATTR_IPV6; + case PF_INET6: + dns_dispatch_attach(res->dispatchv6, + &query->dispatch); + query->exclusivesocket = res->exclusivev6; break; default: result = ISC_R_NOTIMPLEMENTED; goto cleanup_query; } - - result = dns_dispatch_getlocaladdress(disp_base, &localaddr); - if (result != ISC_R_SUCCESS) - goto cleanup_query; - if (isc_sockaddr_getport(&localaddr) == 0) { - result = dns_dispatch_getudp(res->dispatchmgr, - res->socketmgr, - res->taskmgr, - &localaddr, - 4096, 1000, 32768, - 16411, 16433, - attrs, attrmask, - &query->dispatch); - if (result != ISC_R_SUCCESS) - goto cleanup_query; - } else - dns_dispatch_attach(disp_base, &query->dispatch); /* * We should always have a valid dispatcher here. If we * don't support a protocol family, then its dispatcher @@ -1224,13 +1241,14 @@ resquery_send(resquery_t *query) { /* * Get a query id from the dispatch. */ - result = dns_dispatch_addresponse(query->dispatch, - &query->addrinfo->sockaddr, - task, - resquery_response, - query, - &query->id, - &query->dispentry); + result = dns_dispatch_addresponse2(query->dispatch, + &query->addrinfo->sockaddr, + task, + resquery_response, + query, + &query->id, + &query->dispentry, + res->socketmgr); if (result != ISC_R_SUCCESS) goto cleanup_temps; @@ -1412,12 +1430,24 @@ resquery_send(resquery_t *query) { */ dns_message_reset(fctx->qmessage, DNS_MESSAGE_INTENTRENDER); - socket = dns_dispatch_getsocket(query->dispatch); + if (query->exclusivesocket) + socket = dns_dispatch_getentrysocket(query->dispentry); + else + socket = dns_dispatch_getsocket(query->dispatch); /* * Send the query! */ - if ((query->options & DNS_FETCHOPT_TCP) == 0) + if ((query->options & DNS_FETCHOPT_TCP) == 0) { address = &query->addrinfo->sockaddr; + if (query->exclusivesocket) { + result = isc_socket_connect(socket, address, task, + resquery_udpconnected, + query); + if (result != ISC_R_SUCCESS) + goto cleanup_message; + query->connects++; + } + } isc_buffer_usedregion(buffer, &r); /* @@ -1740,7 +1770,7 @@ add_bad(fetchctx_t *fctx, dns_adbaddrinfo_t *addrinfo, isc_result_t reason) { return; if (reason == DNS_R_UNEXPECTEDRCODE && - fctx->rmessage->opcode == dns_rcode_servfail && + fctx->rmessage->rcode == dns_rcode_servfail && ISFORWARDER(addrinfo)) return; @@ -2026,6 +2056,13 @@ fctx_getaddresses(fetchctx_t *fctx) { } while (sa != NULL) { + if ((isc_sockaddr_pf(sa) == AF_INET && + fctx->res->dispatchv4 == NULL) || + (isc_sockaddr_pf(sa) == AF_INET6 && + fctx->res->dispatchv6 == NULL)) { + sa = ISC_LIST_NEXT(sa, link); + continue; + } ai = NULL; result = dns_adb_findaddrinfo(fctx->adb, sa, &ai, 0); /* XXXMLG */ @@ -5315,6 +5352,19 @@ resquery_response(isc_task_t *task, isc_event_t *event) { * There's no hope for this query. */ keep_trying = ISC_TRUE; + + /* + * If this is a network error on an exclusive query + * socket, mark the server as bad so that we won't try + * it for this fetch again. + */ + if (query->exclusivesocket && + (devent->result == ISC_R_HOSTUNREACH || + devent->result == ISC_R_NETUNREACH || + devent->result == ISC_R_CONNREFUSED || + devent->result == ISC_R_CANCELED)) { + broken_server = devent->result; + } } goto done; } @@ -5465,12 +5515,17 @@ resquery_response(isc_task_t *task, isc_event_t *event) { */ if (message->rcode != dns_rcode_noerror && message->rcode != dns_rcode_nxdomain) { - if ((message->rcode == dns_rcode_formerr || - message->rcode == dns_rcode_notimp || - message->rcode == dns_rcode_servfail) && + if (((message->rcode == dns_rcode_formerr || + message->rcode == dns_rcode_notimp) || + (message->rcode == dns_rcode_servfail && + dns_message_getopt(message) == NULL)) && (query->options & DNS_FETCHOPT_NOEDNS0) == 0) { /* * It's very likely they don't like EDNS0. + * If the response code is SERVFAIL, also check if the + * response contains an OPT RR and don't cache the + * failure since it can be returned for various other + * reasons. * * XXXRTH We should check if the question * we're asking requires EDNS0, and @@ -5936,6 +5991,7 @@ dns_resolver_create(dns_view_t *view, isc_result_t result = ISC_R_SUCCESS; unsigned int i, buckets_created = 0; char name[16]; + unsigned dispattr; /* * Create a resolver. @@ -5991,11 +6047,20 @@ dns_resolver_create(dns_view_t *view, } res->dispatchv4 = NULL; - if (dispatchv4 != NULL) + if (dispatchv4 != NULL) { dns_dispatch_attach(dispatchv4, &res->dispatchv4); + dispattr = dns_dispatch_getattributes(dispatchv4); + res->exclusivev4 = + ISC_TF((dispattr & DNS_DISPATCHATTR_EXCLUSIVE) != 0); + } + res->dispatchv6 = NULL; - if (dispatchv6 != NULL) + if (dispatchv6 != NULL) { dns_dispatch_attach(dispatchv6, &res->dispatchv6); + dispattr = dns_dispatch_getattributes(dispatchv6); + res->exclusivev6 = + ISC_TF((dispattr & DNS_DISPATCHATTR_EXCLUSIVE) != 0); + } res->references = 1; res->exiting = ISC_FALSE; @@ -6253,12 +6318,12 @@ dns_resolver_shutdown(dns_resolver_t *res) { fctx != NULL; fctx = ISC_LIST_NEXT(fctx, link)) fctx_shutdown(fctx); - if (res->dispatchv4 != NULL) { + if (res->dispatchv4 != NULL && !res->exclusivev4) { sock = dns_dispatch_getsocket(res->dispatchv4); isc_socket_cancel(sock, res->buckets[i].task, ISC_SOCKCANCEL_ALL); } - if (res->dispatchv6 != NULL) { + if (res->dispatchv6 != NULL && !res->exclusivev6) { sock = dns_dispatch_getsocket(res->dispatchv6); isc_socket_cancel(sock, res->buckets[i].task, ISC_SOCKCANCEL_ALL); diff --git a/lib/dns/validator.c b/lib/dns/validator.c index a32892f3b7d62..7c7aae1f17ec7 100644 --- a/lib/dns/validator.c +++ b/lib/dns/validator.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: validator.c,v 1.91.2.5.8.40 2008/02/19 17:10:55 each Exp $ */ +/* $Id: validator.c,v 1.91.2.5.8.41 2008/08/21 04:59:16 marka Exp $ */ #include <config.h> @@ -974,6 +974,7 @@ view_find(dns_validator_t *val, dns_name_t *name, dns_rdatatype_t type) { } else if (result != ISC_R_SUCCESS && result != DNS_R_NCACHENXDOMAIN && result != DNS_R_NCACHENXRRSET && + result != DNS_R_EMPTYNAME && result != DNS_R_NXRRSET && result != ISC_R_NOTFOUND) { goto notfound; @@ -1231,6 +1232,7 @@ get_key(dns_validator_t *val, dns_rdata_rrsig_t *siginfo) { return (DNS_R_WAIT); } else if (result == DNS_R_NCACHENXDOMAIN || result == DNS_R_NCACHENXRRSET || + result == DNS_R_EMPTYNAME || result == DNS_R_NXDOMAIN || result == DNS_R_NXRRSET) { @@ -1791,8 +1793,9 @@ validatezonekey(dns_validator_t *val) { if (result != ISC_R_SUCCESS) return (result); return (DNS_R_WAIT); - } else if (result == DNS_R_NCACHENXDOMAIN || + } else if (result == DNS_R_NCACHENXDOMAIN || result == DNS_R_NCACHENXRRSET || + result == DNS_R_EMPTYNAME || result == DNS_R_NXDOMAIN || result == DNS_R_NXRRSET) { @@ -2420,6 +2423,7 @@ finddlvsep(dns_validator_t *val, isc_boolean_t resume) { } if (result != DNS_R_NXRRSET && result != DNS_R_NXDOMAIN && + result != DNS_R_EMPTYNAME && result != DNS_R_NCACHENXRRSET && result != DNS_R_NCACHENXDOMAIN) return (result); diff --git a/lib/dns/view.c b/lib/dns/view.c index 90b7e938b3cb7..21effb6284a94 100644 --- a/lib/dns/view.c +++ b/lib/dns/view.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2007 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2007, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: view.c,v 1.103.2.5.2.17 2007/08/28 07:19:14 tbox Exp $ */ +/* $Id: view.c,v 1.103.2.5.2.19 2008/06/17 23:45:32 tbox Exp $ */ #include <config.h> @@ -298,7 +298,7 @@ destroy(dns_view_t *view) { name = ISC_LIST_HEAD(view->rootexclude[i]); while (name != NULL) { ISC_LIST_UNLINK(view->rootexclude[i], - name, link); + name, link); dns_name_free(name, view->mctx); isc_mem_put(view->mctx, name, sizeof(*name)); name = ISC_LIST_HEAD(view->rootexclude[i]); @@ -822,17 +822,6 @@ dns_view_find(dns_view_t *view, dns_name_t *name, dns_rdatatype_t type, } cleanup: - if (result == DNS_R_NXDOMAIN || result == DNS_R_NXRRSET) { - /* - * We don't care about any DNSSEC proof data in these cases. - */ - if (dns_rdataset_isassociated(rdataset)) - dns_rdataset_disassociate(rdataset); - if (sigrdataset != NULL && - dns_rdataset_isassociated(sigrdataset)) - dns_rdataset_disassociate(sigrdataset); - } - if (dns_rdataset_isassociated(&zrdataset)) { dns_rdataset_disassociate(&zrdataset); if (dns_rdataset_isassociated(&zsigrdataset)) @@ -912,7 +901,7 @@ dns_view_simplefind(dns_view_t *view, dns_name_t *name, dns_rdatatype_t type, isc_result_t dns_view_findzonecut(dns_view_t *view, dns_name_t *name, dns_name_t *fname, isc_stdtime_t now, unsigned int options, - isc_boolean_t use_hints, + isc_boolean_t use_hints, dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) { return(dns_view_findzonecut2(view, name, fname, now, options, @@ -1320,7 +1309,7 @@ dns_view_isdelegationonly(dns_view_t *view, dns_name_t *name) { return (ISC_TRUE); } -void +void dns_view_setrootdelonly(dns_view_t *view, isc_boolean_t value) { REQUIRE(DNS_VIEW_VALID(view)); view->rootdelonly = value; diff --git a/lib/dns/xfrin.c b/lib/dns/xfrin.c index 0e7a487b10574..ca2d8570d49e4 100644 --- a/lib/dns/xfrin.c +++ b/lib/dns/xfrin.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: xfrin.c,v 1.124.2.4.2.21.4.5 2008/07/28 23:47:49 tbox Exp $ */ +/* $Id: xfrin.c,v 1.124.2.4.2.28 2008/09/25 04:16:12 marka Exp $ */ #include <config.h> @@ -424,6 +424,10 @@ xfr_rr(dns_xfrin_ctx_t *xfr, dns_name_t *name, isc_uint32_t ttl, { isc_result_t result; + if (rdata->type == dns_rdatatype_none || + dns_rdatatype_ismeta(rdata->type)) + FAIL(DNS_R_FORMERR); + redo: switch (xfr->state) { case XFRST_SOAQUERY: diff --git a/lib/isc/Makefile.in b/lib/isc/Makefile.in index 7e53510c507db..78a820d767aa8 100644 --- a/lib/isc/Makefile.in +++ b/lib/isc/Makefile.in @@ -1,7 +1,7 @@ -# Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") +# Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") # Copyright (C) 1998-2003 Internet Software Consortium. # -# Permission to use, copy, modify, and distribute this software for any +# Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # @@ -13,7 +13,7 @@ # OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. -# $Id: Makefile.in,v 1.71.2.2.2.8 2004/07/20 07:01:58 marka Exp $ +# $Id: Makefile.in,v 1.71.2.2.2.10 2008/06/25 23:45:37 tbox Exp $ srcdir = @srcdir@ VPATH = @srcdir@ @@ -55,7 +55,7 @@ OBJS = @ISC_EXTRA_OBJS@ \ hash.@O@ heap.@O@ hex.@O@ hmacmd5.@O@ \ lex.@O@ lfsr.@O@ lib.@O@ log.@O@ md5.@O@ \ mem.@O@ mutexblock.@O@ netaddr.@O@ netscope.@O@ ondestroy.@O@ \ - parseint.@O@ quota.@O@ random.@O@ \ + parseint.@O@ portset.@O@ quota.@O@ random.@O@ \ ratelimiter.@O@ region.@O@ result.@O@ rwlock.@O@ \ serial.@O@ sha1.@O@ sockaddr.@O@ string.@O@ strtoul.@O@ \ symtab.@O@ task.@O@ taskpool.@O@ timer.@O@ version.@O@ \ @@ -68,7 +68,7 @@ SRCS = @ISC_EXTRA_SRCS@ \ heap.c hex.c hmacmd5.c \ lex.c lfsr.c lib.c log.c \ md5.c mem.c mutexblock.c netaddr.c netscope.c ondestroy.c \ - parseint.c quota.c random.c \ + parseint.c portset.c quota.c random.c \ ratelimiter.c result.c rwlock.c \ serial.c sha1.c sockaddr.c string.c strtoul.c symtab.c \ task.c taskpool.c timer.c version.c diff --git a/lib/isc/api b/lib/isc/api index a8be9070823bf..00b7f9242ef56 100644 --- a/lib/isc/api +++ b/lib/isc/api @@ -1,3 +1,3 @@ -LIBINTERFACE = 14 -LIBREVISION = 0 +LIBINTERFACE = 15 +LIBREVISION = 2 LIBAGE = 0 diff --git a/lib/isc/assertions.c b/lib/isc/assertions.c index 94c6732fd8bd1..ce4ea83c850ab 100644 --- a/lib/isc/assertions.c +++ b/lib/isc/assertions.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1997-2001 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: assertions.c,v 1.16.206.1 2004/03/06 08:14:27 marka Exp $ */ +/* $Id: assertions.c,v 1.16.206.3 2008/10/15 23:45:34 tbox Exp $ */ #include <config.h> @@ -28,7 +28,7 @@ /* * Forward. */ - +/* coverity[+kill] */ static void default_callback(const char *, int, isc_assertiontype_t, const char *); diff --git a/lib/isc/include/isc/assertions.h b/lib/isc/include/isc/assertions.h index 6091de9a63389..b6f68a6dfd5b1 100644 --- a/lib/isc/include/isc/assertions.h +++ b/lib/isc/include/isc/assertions.h @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1997-2001 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -16,7 +16,7 @@ */ /* - * $Id: assertions.h,v 1.17.206.1 2004/03/06 08:14:38 marka Exp $ + * $Id: assertions.h,v 1.17.206.3 2008/10/15 23:45:34 tbox Exp $ */ #ifndef ISC_ASSERTIONS_H @@ -37,6 +37,7 @@ typedef enum { typedef void (*isc_assertioncallback_t)(const char *, int, isc_assertiontype_t, const char *); +/* coverity[+kill] */ LIBISC_EXTERNAL_DATA extern isc_assertioncallback_t isc_assertion_failed; void @@ -45,14 +46,14 @@ isc_assertion_setcallback(isc_assertioncallback_t); const char * isc_assertion_typetotext(isc_assertiontype_t type); -#ifdef ISC_CHECK_ALL +#if defined(ISC_CHECK_ALL) || defined(__COVERITY__) #define ISC_CHECK_REQUIRE 1 #define ISC_CHECK_ENSURE 1 #define ISC_CHECK_INSIST 1 #define ISC_CHECK_INVARIANT 1 #endif -#ifdef ISC_CHECK_NONE +#if defined(ISC_CHECK_NONE) && !defined(__COVERITY__) #define ISC_CHECK_REQUIRE 0 #define ISC_CHECK_ENSURE 0 #define ISC_CHECK_INSIST 0 diff --git a/lib/isc/include/isc/mem.h b/lib/isc/include/isc/mem.h index bb94f5236b571..979407d89c1c3 100644 --- a/lib/isc/include/isc/mem.h +++ b/lib/isc/include/isc/mem.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2007 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2007, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1997-2001, 2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: mem.h,v 1.54.12.7 2007/08/28 07:19:15 tbox Exp $ */ +/* $Id: mem.h,v 1.54.12.9 2008/04/28 23:45:38 tbox Exp $ */ #ifndef ISC_MEM_H #define ISC_MEM_H 1 @@ -170,11 +170,11 @@ LIBISC_EXTERNAL_DATA extern unsigned int isc_mem_debugging; #define isc_mempool_put(c, p) isc__mempool_put((c), (p) _ISC_MEM_FILELINE) #endif -isc_result_t +isc_result_t isc_mem_create(size_t max_size, size_t target_size, isc_mem_t **mctxp); -isc_result_t +isc_result_t isc_mem_createx(size_t max_size, size_t target_size, isc_memalloc_t memalloc, isc_memfree_t memfree, void *arg, isc_mem_t **mctxp); @@ -203,9 +203,9 @@ isc_mem_createx(size_t max_size, size_t target_size, * Requires: * mctxp != NULL && *mctxp == NULL */ -void +void isc_mem_attach(isc_mem_t *, isc_mem_t **); -void +void isc_mem_detach(isc_mem_t **); /* * Attach to / detach from a memory context. @@ -213,20 +213,20 @@ isc_mem_detach(isc_mem_t **); * This is intended for applications that use multiple memory contexts * in such a way that it is not obvious when the last allocations from * a given context has been freed and destroying the context is safe. - * + * * Most applications do not need to call these functions as they can * simply create a single memory context at the beginning of main() * and destroy it at the end of main(), thereby guaranteeing that it * is not destroyed while there are outstanding allocations. */ -void +void isc_mem_destroy(isc_mem_t **); /* * Destroy a memory context. */ -isc_result_t +isc_result_t isc_mem_ondestroy(isc_mem_t *ctx, isc_task_t *task, isc_event_t **event); @@ -235,13 +235,13 @@ isc_mem_ondestroy(isc_mem_t *ctx, * been successfully destroyed. */ -void +void isc_mem_stats(isc_mem_t *mctx, FILE *out); /* * Print memory usage statistics for 'mctx' on the stream 'out'. */ -void +void isc_mem_setdestroycheck(isc_mem_t *mctx, isc_boolean_t on); /* @@ -249,9 +249,9 @@ isc_mem_setdestroycheck(isc_mem_t *mctx, * destroyed and abort the program if any are present. */ -void +void isc_mem_setquota(isc_mem_t *, size_t); -size_t +size_t isc_mem_getquota(isc_mem_t *); /* * Set/get the memory quota of 'mctx'. This is a hard limit @@ -259,7 +259,7 @@ isc_mem_getquota(isc_mem_t *); * if it is exceeded, allocations will fail. */ -size_t +size_t isc_mem_inuse(isc_mem_t *mctx); /* * Get an estimate of the number of memory in use in 'mctx', in bytes. @@ -271,10 +271,28 @@ void isc_mem_setwater(isc_mem_t *mctx, isc_mem_water_t water, void *water_arg, size_t hiwater, size_t lowater); /* - * Set high and low water marks for this memory context. When the memory - * usage of 'mctx' exceeds 'hiwater', '(water)(water_arg, ISC_MEM_HIWATER)' - * will be called. When the usage drops below 'lowater', 'water' will - * again be called, this time with ISC_MEM_LOWATER. + * Set high and low water marks for this memory context. + * When the memory usage of 'mctx' exceeds 'hiwater', + * '(water)(water_arg, #ISC_MEM_HIWATER)' will be called. 'water' needs to + * call isc_mem_waterack() with #ISC_MEM_HIWATER to acknowlege the state + * change. 'water' may be called multiple times. + * + * When the usage drops below 'lowater', 'water' will again be called, this + * time with #ISC_MEM_LOWATER. 'water' need to calls isc_mem_waterack() with + * #ISC_MEM_LOWATER to acknowlege the change. + * + * static void + * water(void *arg, int mark) { + * struct foo *foo = arg; + * + * LOCK(&foo->marklock); + * if (foo->mark != mark) { + * foo->mark = mark; + * .... + * isc_mem_waterack(foo->mctx, mark); + * } + * UNLOCK(&foo->marklock); + * } * * If 'water' is NULL then 'water_arg', 'hi_water' and 'lo_water' are * ignored and the state is reset. @@ -285,6 +303,12 @@ isc_mem_setwater(isc_mem_t *mctx, isc_mem_water_t water, void *water_arg, * hi_water >= lo_water */ +void +isc_mem_waterack(isc_mem_t *ctx, int mark); +/*%< + * Called to acknowledge changes in signalled by calls to 'water'. + */ + /* * Memory pools */ @@ -429,22 +453,22 @@ isc_mempool_setfillcount(isc_mempool_t *mpctx, unsigned int limit); /* * Pseudo-private functions for use via macros. Do not call directly. */ -void * +void * isc__mem_get(isc_mem_t *, size_t _ISC_MEM_FLARG); -void +void isc__mem_putanddetach(isc_mem_t **, void *, size_t _ISC_MEM_FLARG); -void +void isc__mem_put(isc_mem_t *, void *, size_t _ISC_MEM_FLARG); -void * +void * isc__mem_allocate(isc_mem_t *, size_t _ISC_MEM_FLARG); -void +void isc__mem_free(isc_mem_t *, void * _ISC_MEM_FLARG); -char * +char * isc__mem_strdup(isc_mem_t *, const char *_ISC_MEM_FLARG); -void * +void * isc__mempool_get(isc_mempool_t * _ISC_MEM_FLARG); -void +void isc__mempool_put(isc_mempool_t *, void * _ISC_MEM_FLARG); ISC_LANG_ENDDECLS diff --git a/lib/isc/include/isc/msgs.h b/lib/isc/include/isc/msgs.h index 967005bf35317..584edcbd881c4 100644 --- a/lib/isc/include/isc/msgs.h +++ b/lib/isc/include/isc/msgs.h @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 2000-2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: msgs.h,v 1.5.2.2.8.3 2004/03/06 08:14:44 marka Exp $ */ +/* $Id: msgs.h,v 1.5.2.2.8.5 2008/08/08 06:27:25 tbox Exp $ */ #ifndef ISC_MSGS_H #define ISC_MSGS_H 1 @@ -146,7 +146,9 @@ #define ISC_MSG_ACCEPTRETURNED 1418 /* accept() returned %d/%s */ #define ISC_MSG_TOOMANYFDS 1419 /* %s: too many open file descriptors */ #define ISC_MSG_ZEROPORT 1420 /* dropping source port zero packet */ -#define ISC_MSG_FILTER 1420 /* setsockopt(SO_ACCEPTFILTER): %s */ +#define ISC_MSG_FILTER 1421 /* setsockopt(SO_ACCEPTFILTER): %s */ + +#define ISC_MSG_TOOMANYHANDLES 1422 /*%< %s: too many open WSA event handles: %s */ #define ISC_MSG_AWAKE 1502 /* "awake" */ #define ISC_MSG_WORKING 1503 /* "working" */ diff --git a/lib/isc/include/isc/platform.h.in b/lib/isc/include/isc/platform.h.in index 9c4edabb5facc..d1877931d4252 100644 --- a/lib/isc/include/isc/platform.h.in +++ b/lib/isc/include/isc/platform.h.in @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2007 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2007, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: platform.h.in,v 1.24.2.1.10.13 2007/09/13 23:45:58 tbox Exp $ */ +/* $Id: platform.h.in,v 1.24.2.1.10.15 2008/06/25 23:45:37 tbox Exp $ */ #ifndef ISC_PLATFORM_H #define ISC_PLATFORM_H 1 @@ -142,6 +142,21 @@ *** Printing. ***/ +/*! \brief + * Define if the system supports kqueue multiplexing + */ +@ISC_PLATFORM_HAVEKQUEUE@ + +/*! \brief + * Define if the system supports epoll multiplexing + */ +@ISC_PLATFORM_HAVEEPOLL@ + +/*! \brief + * Define if the system supports /dev/poll multiplexing + */ +@ISC_PLATFORM_HAVEDEVPOLL@ + /* * If this system needs vsnprintf() and snprintf(), ISC_PLATFORM_NEEDVSNPRINTF * will be defined. diff --git a/lib/isc/include/isc/portset.h b/lib/isc/include/isc/portset.h new file mode 100644 index 0000000000000..60cbb790cbe8a --- /dev/null +++ b/lib/isc/include/isc/portset.h @@ -0,0 +1,141 @@ +/* + * Copyright (C) 2008 Internet Systems Consortium, Inc. ("ISC") + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +/* $Id: portset.h,v 1.3.6.2 2008/06/25 23:45:37 tbox Exp $ */ + +/*! \file isc/portset.h + * \brief Transport Protocol Port Manipuration Module + * + * This module provides simple utilities to handle a set of transport protocol + * (UDP or TCP) port numbers, e.g., for creating an ACL list. An isc_portset_t + * object is an opaque instance of a port set, for which the user can add or + * remove a specific port or a range of consecutive ports. This object is + * expected to be used as a temporary work space only, and does not protect + * simultaneous access from multiple threads. Therefore it must not be stored + * in a place that can be accessed from multiple threads. + */ + +#ifndef ISC_PORTSET_H +#define ISC_PORTSET_H 1 + +/*** + *** Imports + ***/ + +#include <isc/net.h> + +/*** + *** Functions + ***/ + +ISC_LANG_BEGINDECLS + +isc_result_t +isc_portset_create(isc_mem_t *mctx, isc_portset_t **portsetp); +/*%< + * Create a port set and initialize it as an empty set. + * + * Requires: + *\li 'mctx' to be valid. + *\li 'portsetp' to be non NULL and '*portsetp' to be NULL; + * + * Returns: + *\li #ISC_R_SUCCESS + *\li #ISC_R_NOMEMORY + */ + +void +isc_portset_destroy(isc_mem_t *mctx, isc_portset_t **portsetp); +/*%< + * Destroy a port set. + * + * Requires: + *\li 'mctx' to be valid and must be the same context given when the port set + * was created. + *\li '*portsetp' to be a valid set. + */ + +isc_boolean_t +isc_portset_isset(isc_portset_t *portset, in_port_t port); +/*%< + * Test whether the given port is stored in the portset. + * + * Requires: + *\li 'portset' to be a valid set. + * + * Returns + * \li #ISC_TRUE if the port is found, ISC_FALSE otherwise. + */ + +unsigned int +isc_portset_nports(isc_portset_t *portset); +/*%< + * Provides the number of ports stored in the given portset. + * + * Requires: + *\li 'portset' to be a valid set. + * + * Returns + * \li the number of ports stored in portset. + */ + +void +isc_portset_add(isc_portset_t *portset, in_port_t port); +/*%< + * Add the given port to the portset. The port may or may not be stored in + * the portset. + * + * Requires: + *\li 'portlist' to be valid. + */ + +void +isc_portset_remove(isc_portset_t *portset, in_port_t port); +/*%< + * Remove the given port to the portset. The port may or may not be stored in + * the portset. + * + * Requires: + *\li 'portlist' to be valid. + */ + +void +isc_portset_addrange(isc_portset_t *portset, in_port_t port_lo, + in_port_t port_hi); +/*%< + * Add a subset of [port_lo, port_hi] (inclusive) to the portset. Ports in the + * subset may or may not be stored in portset. + * + * Requires: + *\li 'portlist' to be valid. + *\li port_lo <= port_hi + */ + +void +isc_portset_removerange(isc_portset_t *portset, in_port_t port_lo, + in_port_t port_hi); +/*%< + * Subtract a subset of [port_lo, port_hi] (inclusive) from the portset. Ports + * in the subset may or may not be stored in portset. + * + * Requires: + *\li 'portlist' to be valid. + *\li port_lo <= port_hi + */ + +ISC_LANG_ENDDECLS + +#endif /* ISC_NETADDR_H */ diff --git a/lib/isc/include/isc/resource.h b/lib/isc/include/isc/resource.h index 1d4cb3023a4a3..fbd9e287ae849 100644 --- a/lib/isc/include/isc/resource.h +++ b/lib/isc/include/isc/resource.h @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: resource.h,v 1.4.206.1.34.2 2008/07/23 23:47:49 tbox Exp $ */ +/* $Id: resource.h,v 1.4.206.3 2008/08/01 23:45:29 tbox Exp $ */ #ifndef ISC_RESOURCE_H #define ISC_RESOURCE_H 1 @@ -80,16 +80,13 @@ isc_resource_getlimit(isc_resource_t resource, isc_resourcevalue_t *value); */ isc_result_t -isc_resource_curlimit(isc_resource_t resource, isc_resourcevalue_t *value); -/* - * Get the current limit on a resource. - * - * Requires: - * 'resource' is a valid member of the isc_resource_t enumeration. +isc_resource_getcurlimit(isc_resource_t resource, isc_resourcevalue_t *value); +/*%< + * Same as isc_resource_getlimit(), but returns the current (soft) limit. * * Returns: - * ISC_R_SUCCESS Success. - * ISC_R_NOTIMPLEMENTED 'resource' is not a type known by the OS. + *\li #ISC_R_SUCCESS Success. + *\li #ISC_R_NOTIMPLEMENTED 'resource' is not a type known by the OS. */ ISC_LANG_ENDDECLS diff --git a/lib/isc/include/isc/socket.h b/lib/isc/include/isc/socket.h index 0537a0d6693bf..73ffb2daffd91 100644 --- a/lib/isc/include/isc/socket.h +++ b/lib/isc/include/isc/socket.h @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: socket.h,v 1.54.12.7.4.4 2008/07/23 23:16:27 marka Exp $ */ +/* $Id: socket.h,v 1.54.12.16 2008/09/11 06:14:46 each Exp $ */ #ifndef ISC_SOCKET_H #define ISC_SOCKET_H 1 @@ -103,6 +103,7 @@ struct isc_socketevent { isc_time_t timestamp; /* timestamp of packet recv */ struct in6_pktinfo pktinfo; /* ipv6 pktinfo */ isc_uint32_t attributes; /* see below */ + isc_eventdestructor_t destroy; /* original destructor */ }; typedef struct isc_socket_newconnev isc_socket_newconnev_t; @@ -312,8 +313,53 @@ isc_socket_detach(isc_socket_t **socketp); */ isc_result_t +isc_socket_open(isc_socket_t *sock); +/* + * Open a new socket file descriptor of the given socket structure. It simply + * opens a new descriptor; all of the other parameters including the socket + * type are inherited from the existing socket. This function is provided to + * avoid overhead of destroying and creating sockets when many short-lived + * sockets are frequently opened and closed. When the efficiency is not an + * issue, it should be safer to detach the unused socket and re-create a new + * one. This optimization may not be available for some systems, in which + * case this function will return ISC_R_NOTIMPLEMENTED and must not be used. + * + * Requires: + * + * \li there must be no other reference to this socket. + * + * \li 'socket' is a valid and previously closed by isc_socket_close() + * + * Returns: + * Same as isc_socket_create(). + * \li ISC_R_NOTIMPLEMENTED + */ + +isc_result_t +isc_socket_close(isc_socket_t *sock); +/* + * Close a socket file descriptor of the given socket structure. This function + * is provided as an alternative to destroying an unused socket when overhead + * destroying/re-creating sockets can be significant, and is expected to be + * used with isc_socket_open(). This optimization may not be available for some + * systems, in which case this function will return ISC_R_NOTIMPLEMENTED and + * must not be used. + * + * Requires: + * + * \li The socket must have a valid descriptor. + * + * \li There must be no other reference to this socket. + * + * \li There must be no pending I/O requests. + * + * Returns: + * \li #ISC_R_NOTIMPLEMENTED + */ + +isc_result_t isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *addressp, - unsigned int reuseaddr); + unsigned int options); /* * Bind 'socket' to '*addressp'. * @@ -624,6 +670,7 @@ isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region, * ISC_R_INPROGRESS * ISC_R_NOMEMORY * ISC_R_UNEXPECTED + * ISC_R_NOTIMPLEMENTED * * Event results: * @@ -634,8 +681,14 @@ isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region, isc_result_t isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp); + +isc_result_t +isc_socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp, + unsigned int maxsocks); /* - * Create a socket manager. + * Create a socket manager. If "maxsocks" is non-zero, it specifies the + * maximum number of sockets that the created manager should handle. + * isc_socketmgr_create() is equivalent of isc_socketmgr_create2() with * * Notes: * @@ -658,6 +711,22 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp); * ISC_R_UNEXPECTED */ +isc_result_t +isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp); +/*%< + * Returns in "*nsockp" the maximum number of sockets this manager may open. + * + * Requires: + * + *\li '*manager' is a valid isc_socketmgr_t. + *\li 'nsockp' is not NULL. + * + * Returns: + * + *\li #ISC_R_SUCCESS + *\li #ISC_R_NOTIMPLEMENTED + */ + void isc_socketmgr_destroy(isc_socketmgr_t **managerp); /* diff --git a/lib/isc/include/isc/timer.h b/lib/isc/include/isc/timer.h index 9d4cc00000a83..4b39c6220b609 100644 --- a/lib/isc/include/isc/timer.h +++ b/lib/isc/include/isc/timer.h @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: timer.h,v 1.28.12.9.4.2 2008/07/24 23:47:44 tbox Exp $ */ +/* $Id: timer.h,v 1.28.12.11 2008/06/25 23:45:37 tbox Exp $ */ #ifndef ISC_TIMER_H #define ISC_TIMER_H 1 diff --git a/lib/isc/include/isc/types.h b/lib/isc/include/isc/types.h index fad77da99e7be..d5f010092889e 100644 --- a/lib/isc/include/isc/types.h +++ b/lib/isc/include/isc/types.h @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: types.h,v 1.32.2.3.2.1 2004/03/06 08:14:50 marka Exp $ */ +/* $Id: types.h,v 1.32.2.3.2.3 2008/06/25 23:45:37 tbox Exp $ */ #ifndef ISC_TYPES_H #define ISC_TYPES_H 1 @@ -66,6 +66,7 @@ typedef struct isc_mempool isc_mempool_t; typedef struct isc_msgcat isc_msgcat_t; typedef struct isc_ondestroy isc_ondestroy_t; typedef struct isc_netaddr isc_netaddr_t; +typedef struct isc_portset isc_portset_t; typedef struct isc_quota isc_quota_t; typedef struct isc_random isc_random_t; typedef struct isc_ratelimiter isc_ratelimiter_t; diff --git a/lib/isc/mem.c b/lib/isc/mem.c index 8bfe967295c9b..69f6cab9ea7b5 100644 --- a/lib/isc/mem.c +++ b/lib/isc/mem.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004-2007 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004-2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1997-2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: mem.c,v 1.98.2.7.2.12 2007/11/26 23:45:51 tbox Exp $ */ +/* $Id: mem.c,v 1.98.2.7.2.14 2008/04/28 23:45:37 tbox Exp $ */ #include <config.h> @@ -604,7 +604,7 @@ mem_get(isc_mem_t *ctx, size_t size) { ret = (ctx->memalloc)(ctx->arg, size); if (ret == NULL) - ctx->memalloc_failures++; + ctx->memalloc_failures++; #if ISC_MEM_FILL if (ret != NULL) @@ -1036,7 +1036,6 @@ isc__mem_get(isc_mem_t *ctx, size_t size FLARG) { ADD_TRACE(ctx, ptr, size, file, line); if (ctx->hi_water != 0U && !ctx->hi_called && ctx->inuse > ctx->hi_water) { - ctx->hi_called = ISC_TRUE; call_water = ISC_TRUE; } if (ctx->inuse > ctx->maxinuse) { @@ -1078,10 +1077,8 @@ isc__mem_put(isc_mem_t *ctx, void *ptr, size_t size FLARG) * when the context was pushed over hi_water but then had * isc_mem_setwater() called with 0 for hi_water and lo_water. */ - if (ctx->hi_called && + if (ctx->hi_called && (ctx->inuse < ctx->lo_water || ctx->lo_water == 0U)) { - ctx->hi_called = ISC_FALSE; - if (ctx->water != NULL) call_water = ISC_TRUE; } @@ -1091,6 +1088,18 @@ isc__mem_put(isc_mem_t *ctx, void *ptr, size_t size FLARG) (ctx->water)(ctx->water_arg, ISC_MEM_LOWATER); } +void +isc_mem_waterack(isc_mem_t *ctx, int flag) { + REQUIRE(VALID_CONTEXT(ctx)); + + LOCK(&ctx->lock); + if (flag == ISC_MEM_LOWATER) + ctx->hi_called = ISC_FALSE; + else if (flag == ISC_MEM_HIWATER) + ctx->hi_called = ISC_TRUE; + UNLOCK(&ctx->lock); +} + #if ISC_MEM_TRACKLINES static void print_active(isc_mem_t *mctx, FILE *out) { @@ -1110,7 +1119,7 @@ print_active(isc_mem_t *mctx, FILE *out) { "\tptr %p size %u file %s line %u\n"); for (i = 0; i <= mctx->max_size; i++) { dl = ISC_LIST_HEAD(mctx->debuglist[i]); - + if (dl != NULL) found = ISC_TRUE; diff --git a/lib/isc/portset.c b/lib/isc/portset.c new file mode 100644 index 0000000000000..90e95968fc3b6 --- /dev/null +++ b/lib/isc/portset.c @@ -0,0 +1,143 @@ +/* + * Copyright (C) 2008 Internet Systems Consortium, Inc. ("ISC") + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH + * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY + * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, + * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM + * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE + * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THIS SOFTWARE. + */ + +/* $Id: portset.c,v 1.4.2.1 2008/06/25 00:03:29 jinmei Exp $ */ + +/*! \file */ + +#include <config.h> + +#include <isc/mem.h> +#include <isc/portset.h> +#include <isc/string.h> +#include <isc/types.h> +#include <isc/util.h> + +#define ISC_PORTSET_BUFSIZE (65536 / (sizeof(isc_uint32_t) * 8)) + +/*% + * Internal representation of portset. It's an array of 32-bit integers, each + * bit corresponding to a single port in the ascending order. For example, + * the second most significant bit of buf[0] corresponds to port 1. + */ +struct isc_portset { + unsigned int nports; /*%< number of ports in the set */ + isc_uint32_t buf[ISC_PORTSET_BUFSIZE]; +}; + +static inline isc_boolean_t +portset_isset(isc_portset_t *portset, in_port_t port) { + return (ISC_TF((portset->buf[port >> 5] & (1 << (port & 31))) != 0)); +} + +static inline void +portset_add(isc_portset_t *portset, in_port_t port) { + if (!portset_isset(portset, port)) { + portset->nports++; + portset->buf[port >> 5] |= (1 << (port & 31)); + } +} + +static inline void +portset_remove(isc_portset_t *portset, in_port_t port) { + if (portset_isset(portset, port)) { + portset->nports--; + portset->buf[port >> 5] &= ~(1 << (port & 31)); + } +} + +isc_result_t +isc_portset_create(isc_mem_t *mctx, isc_portset_t **portsetp) { + isc_portset_t *portset; + + REQUIRE(portsetp != NULL && *portsetp == NULL); + + portset = isc_mem_get(mctx, sizeof(*portset)); + if (portset == NULL) + return (ISC_R_NOMEMORY); + + /* Make the set 'empty' by default */ + memset(portset, 0, sizeof(*portset)); + *portsetp = portset; + + return (ISC_R_SUCCESS); +} + +void +isc_portset_destroy(isc_mem_t *mctx, isc_portset_t **portsetp) { + isc_portset_t *portset; + + REQUIRE(portsetp != NULL); + portset = *portsetp; + + isc_mem_put(mctx, portset, sizeof(*portset)); +} + +isc_boolean_t +isc_portset_isset(isc_portset_t *portset, in_port_t port) { + REQUIRE(portset != NULL); + + return (portset_isset(portset, port)); +} + +unsigned int +isc_portset_nports(isc_portset_t *portset) { + REQUIRE(portset != NULL); + + return (portset->nports); +} + +void +isc_portset_add(isc_portset_t *portset, in_port_t port) { + REQUIRE(portset != NULL); + + portset_add(portset, port); +} + +void +isc_portset_remove(isc_portset_t *portset, in_port_t port) { + portset_remove(portset, port); +} + +void +isc_portset_addrange(isc_portset_t *portset, in_port_t port_lo, + in_port_t port_hi) +{ + in_port_t p; + + REQUIRE(portset != NULL); + REQUIRE(port_lo <= port_hi); + + p = port_lo; + do { + portset_add(portset, p); + } while (p++ < port_hi); +} + +void +isc_portset_removerange(isc_portset_t *portset, in_port_t port_lo, + in_port_t port_hi) +{ + in_port_t p; + + REQUIRE(portset != NULL); + REQUIRE(port_lo <= port_hi); + + p = port_lo; + do { + portset_remove(portset, p); + } while (p++ < port_hi); +} diff --git a/lib/isc/print.c b/lib/isc/print.c index ee50b29e5d6d1..e4e4a709f7c47 100644 --- a/lib/isc/print.c +++ b/lib/isc/print.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004-2006 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004-2006, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2001, 2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: print.c,v 1.22.2.3.2.4 2006/04/17 18:27:20 explorer Exp $ */ +/* $Id: print.c,v 1.22.2.3.2.6 2008/04/28 23:45:38 tbox Exp $ */ /*! \file */ @@ -246,8 +246,24 @@ isc_print_vsnprintf(char *str, size_t size, const char *format, va_list ap) { head = ""; tmpui = tmpi; } - sprintf(buf, "%" ISC_PRINT_QUADFORMAT "u", - tmpui); + if (tmpui <= 0xffffffffU) + sprintf(buf, "%lu", + (unsigned long)tmpui); + else { + unsigned long mid; + unsigned long lo; + unsigned long hi; + lo = tmpui % 1000000000; + tmpui /= 1000000000; + mid = tmpui % 1000000000; + hi = tmpui / 1000000000; + if (hi != 0) + sprintf(buf, "%lu", hi); + else + buf[0] = '\n'; + sprintf(buf + strlen(buf), "%lu", mid); + sprintf(buf + strlen(buf), "%lu", lo); + } goto printint; case 'o': if (q) @@ -256,10 +272,29 @@ isc_print_vsnprintf(char *str, size_t size, const char *format, va_list ap) { tmpui = va_arg(ap, long int); else tmpui = va_arg(ap, int); - sprintf(buf, - alt ? "%#" ISC_PRINT_QUADFORMAT "o" - : "%" ISC_PRINT_QUADFORMAT "o", - tmpui); + if (tmpui <= 0xffffffffU) + sprintf(buf, alt ? "%#lo" : "%lo", + (unsigned long)tmpui); + else { + unsigned long mid; + unsigned long lo; + unsigned long hi; + lo = tmpui % 010000000000; + tmpui /= 010000000000; + mid = tmpui % 010000000000; + hi = tmpui / 010000000000; + if (hi != 0) { + sprintf(buf, + alt ? "%#lo" : "%lo", + hi); + sprintf(buf + strlen(buf), + "%lo", mid); + } else + sprintf(buf, + alt ? "%#lo" : "%lo", + mid); + sprintf(buf + strlen(buf), "%lo", lo); + } goto printint; case 'u': if (q) @@ -268,8 +303,24 @@ isc_print_vsnprintf(char *str, size_t size, const char *format, va_list ap) { tmpui = va_arg(ap, unsigned long int); else tmpui = va_arg(ap, unsigned int); - sprintf(buf, "%" ISC_PRINT_QUADFORMAT "u", - tmpui); + if (tmpui <= 0xffffffffU) + sprintf(buf, "%lu", + (unsigned long)tmpui); + else { + unsigned long mid; + unsigned long lo; + unsigned long hi; + lo = tmpui % 1000000000; + tmpui /= 1000000000; + mid = tmpui % 1000000000; + hi = tmpui / 1000000000; + if (hi != 0) + sprintf(buf, "%lu", hi); + else + buf[0] = '\n'; + sprintf(buf + strlen(buf), "%lu", mid); + sprintf(buf + strlen(buf), "%lu", lo); + } goto printint; case 'x': if (q) @@ -283,8 +334,15 @@ isc_print_vsnprintf(char *str, size_t size, const char *format, va_list ap) { if (precision > 2) precision -= 2; } - sprintf(buf, "%" ISC_PRINT_QUADFORMAT "x", - tmpui); + if (tmpui <= 0xffffffffU) + sprintf(buf, "%lx", + (unsigned long)tmpui); + else { + unsigned long hi = tmpui>>32; + unsigned long lo = tmpui & 0xffffffff; + sprintf(buf, "%lx", hi); + sprintf(buf + strlen(buf), "%lx", lo); + } goto printint; case 'X': if (q) @@ -298,8 +356,15 @@ isc_print_vsnprintf(char *str, size_t size, const char *format, va_list ap) { if (precision > 2) precision -= 2; } - sprintf(buf, "%" ISC_PRINT_QUADFORMAT "X", - tmpui); + if (tmpui <= 0xffffffffU) + sprintf(buf, "%lX", + (unsigned long)tmpui); + else { + unsigned long hi = tmpui>>32; + unsigned long lo = tmpui & 0xffffffff; + sprintf(buf, "%lX", hi); + sprintf(buf + strlen(buf), "%lX", lo); + } goto printint; printint: if (precision != 0 || width != 0) { diff --git a/lib/isc/pthreads/mutex.c b/lib/isc/pthreads/mutex.c index 17cca53fc6a72..ef7db0de9709d 100644 --- a/lib/isc/pthreads/mutex.c +++ b/lib/isc/pthreads/mutex.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2005, 2007 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2007, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 2000-2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: mutex.c,v 1.6.26.8 2007/08/28 07:19:17 tbox Exp $ */ +/* $Id: mutex.c,v 1.6.26.10 2008/04/04 23:45:32 tbox Exp $ */ #include <config.h> @@ -31,23 +31,23 @@ /* Operations on timevals; adapted from FreeBSD's sys/time.h */ #define timevalclear(tvp) ((tvp)->tv_sec = (tvp)->tv_usec = 0) #define timevaladd(vvp, uvp) \ - do { \ - (vvp)->tv_sec += (uvp)->tv_sec; \ - (vvp)->tv_usec += (uvp)->tv_usec; \ - if ((vvp)->tv_usec >= 1000000) { \ - (vvp)->tv_sec++; \ - (vvp)->tv_usec -= 1000000; \ - } \ - } while (0) + do { \ + (vvp)->tv_sec += (uvp)->tv_sec; \ + (vvp)->tv_usec += (uvp)->tv_usec; \ + if ((vvp)->tv_usec >= 1000000) { \ + (vvp)->tv_sec++; \ + (vvp)->tv_usec -= 1000000; \ + } \ + } while (0) #define timevalsub(vvp, uvp) \ - do { \ - (vvp)->tv_sec -= (uvp)->tv_sec; \ - (vvp)->tv_usec -= (uvp)->tv_usec; \ - if ((vvp)->tv_usec < 0) { \ - (vvp)->tv_sec--; \ - (vvp)->tv_usec += 1000000; \ - } \ - } while (0) + do { \ + (vvp)->tv_sec -= (uvp)->tv_sec; \ + (vvp)->tv_usec -= (uvp)->tv_usec; \ + if ((vvp)->tv_usec < 0) { \ + (vvp)->tv_sec--; \ + (vvp)->tv_usec += 1000000; \ + } \ + } while (0) #define ISC_MUTEX_MAX_LOCKERS 32 @@ -70,8 +70,11 @@ struct isc_mutexstats { isc_mutexlocker_t lockers[ISC_MUTEX_MAX_LOCKERS]; }; -#define TABLESIZE (8 * 1024) -static isc_mutexstats_t stats[TABLESIZE]; +#ifndef ISC_MUTEX_PROFTABLESIZE +#define ISC_MUTEX_PROFTABLESIZE (16 * 1024) +#endif +static isc_mutexstats_t stats[ISC_MUTEX_PROFTABLESIZE]; +static int stats_next = 0; static isc_boolean_t stats_init = ISC_FALSE; static pthread_mutex_t statslock = PTHREAD_MUTEX_INITIALIZER; @@ -85,21 +88,19 @@ isc_mutex_init_profile(isc_mutex_t *mp, const char *file, int line) { RUNTIME_CHECK(pthread_mutex_lock(&statslock) == 0); - if (stats_init == ISC_FALSE) { - for (i = 0; i < TABLESIZE; i++) { - stats[i].file = NULL; - } + if (stats_init == ISC_FALSE) stats_init = ISC_TRUE; - } - mp->stats = NULL; - for (i = 0; i < TABLESIZE; i++) { - if (stats[i].file == NULL) { - mp->stats = &stats[i]; - break; - } - } - RUNTIME_CHECK(mp->stats != NULL); + /* + * If all statistics entries have been used, give up and trigger an + * assertion failure. There would be no other way to deal with this + * because we'd like to keep record of all locks for the purpose of + * debugging and the number of necessary locks is unpredictable. + * If this failure is triggered while debugging, named should be + * rebuilt with an increased ISC_MUTEX_PROFTABLESIZE. + */ + RUNTIME_CHECK(stats_next < ISC_MUTEX_PROFTABLESIZE); + mp->stats = &stats[stats_next++]; RUNTIME_CHECK(pthread_mutex_unlock(&statslock) == 0); @@ -186,10 +187,9 @@ void isc_mutex_statsprofile(FILE *fp) { isc_mutexlocker_t *locker; int i, j; + fprintf(fp, "Mutex stats (in us)\n"); - for (i = 0; i < TABLESIZE; i++) { - if (stats[i].file == NULL) - continue; + for (i = 0; i < stats_next; i++) { fprintf(fp, "%-12s %4d: %10u %lu.%06lu %lu.%06lu\n", stats[i].file, stats[i].line, stats[i].count, stats[i].locked_total.tv_sec, @@ -225,7 +225,7 @@ isc_mutex_init_errcheck(isc_mutex_t *mp) if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK) != 0) return ISC_R_UNEXPECTED; - + if (pthread_mutex_init(mp, &attr) != 0) return ISC_R_UNEXPECTED; diff --git a/lib/isc/timer.c b/lib/isc/timer.c index 990f2943cd6cf..75508563ca1df 100644 --- a/lib/isc/timer.c +++ b/lib/isc/timer.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: timer.c,v 1.64.12.17.4.3 2008/07/29 18:34:29 jinmei Exp $ */ +/* $Id: timer.c,v 1.64.12.20 2008/08/22 05:59:24 marka Exp $ */ #include <config.h> diff --git a/lib/isc/unix/app.c b/lib/isc/unix/app.c index 382e445ead5c9..daf4b2f495e76 100644 --- a/lib/isc/unix/app.c +++ b/lib/isc/unix/app.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: app.c,v 1.43.2.3.8.8.4.1 2008/07/29 04:43:57 each Exp $ */ +/* $Id: app.c,v 1.43.2.3.8.11 2008/10/15 03:41:17 marka Exp $ */ #include <config.h> @@ -28,6 +28,9 @@ #include <unistd.h> #include <signal.h> #include <sys/time.h> +#ifdef HAVE_EPOLL +#include <sys/epoll.h> +#endif #include <isc/app.h> #include <isc/boolean.h> @@ -295,14 +298,13 @@ isc_app_onrun(isc_mem_t *mctx, isc_task_t *task, isc_taskaction_t action, * Event loop for nonthreaded programs. */ static isc_result_t -evloop() { +evloop(void) { isc_result_t result; while (!want_shutdown) { int n; isc_time_t when, now; struct timeval tv, *tvp; - fd_set *readfds, *writefds; - int maxfd; + isc_socketwait_t *swait; isc_boolean_t readytasks; isc_boolean_t call_timer_dispatch = ISC_FALSE; @@ -329,8 +331,8 @@ evloop() { } } - isc__socketmgr_getfdsets(&readfds, &writefds, &maxfd); - n = select(maxfd, readfds, writefds, NULL, tvp); + swait = NULL; + n = isc__socketmgr_waitevents(tvp, &swait); if (n == 0 || call_timer_dispatch) { /* @@ -350,8 +352,7 @@ evloop() { isc__timermgr_dispatch(); } if (n > 0) - (void)isc__socketmgr_dispatch(readfds, writefds, - maxfd); + (void)isc__socketmgr_dispatch(swait); (void)isc__taskmgr_dispatch(); if (want_reload) { @@ -432,10 +433,10 @@ isc_app_run(void) { #ifdef ISC_PLATFORM_USETHREADS sigset_t sset; char strbuf[ISC_STRERRORSIZE]; -#endif /* ISC_PLATFORM_USETHREADS */ #ifdef HAVE_SIGWAIT int sig; #endif +#endif /* ISC_PLATFORM_USETHREADS */ #ifdef HAVE_LINUXTHREADS REQUIRE(main_thread == pthread_self()); diff --git a/lib/isc/unix/include/isc/net.h b/lib/isc/unix/include/isc/net.h index f1a015f5bb1d7..01c808fea611e 100644 --- a/lib/isc/unix/include/isc/net.h +++ b/lib/isc/unix/include/isc/net.h @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: net.h,v 1.31.2.2.10.8 2004/04/29 01:31:23 marka Exp $ */ +/* $Id: net.h,v 1.31.2.2.10.10 2008/06/25 23:45:37 tbox Exp $ */ #ifndef ISC_NET_H #define ISC_NET_H 1 @@ -101,7 +101,7 @@ /* * Required for some pre RFC2133 implementations. * IN6ADDR_ANY_INIT and IN6ADDR_LOOPBACK_INIT were added in - * draft-ietf-ipngwg-bsd-api-04.txt or draft-ietf-ipngwg-bsd-api-05.txt. + * draft-ietf-ipngwg-bsd-api-04.txt or draft-ietf-ipngwg-bsd-api-05.txt. * If 's6_addr' is defined then assume that there is a union and three * levels otherwise assume two levels required. */ @@ -303,6 +303,23 @@ isc_net_enableipv4(void); void isc_net_enableipv6(void); +isc_result_t +isc_net_getudpportrange(int af, in_port_t *low, in_port_t *high); +/* + * Returns system's default range of ephemeral UDP ports, if defined. + * If the range is not available or unknown, ISC_NET_PORTRANGELOW and + * ISC_NET_PORTRANGEHIGH will be returned. + * + * Requires: + * + *\li 'low' and 'high' must be non NULL. + * + * Returns: + * + *\li *low and *high will be the ports specifying the low and high ends of + * the range. + */ + #ifdef ISC_PLATFORM_NEEDNTOP const char * isc_net_ntop(int af, const void *src, char *dst, size_t size); diff --git a/lib/isc/unix/net.c b/lib/isc/unix/net.c index 42cadec7d7617..cd3abc64ed00e 100644 --- a/lib/isc/unix/net.c +++ b/lib/isc/unix/net.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2004, 2005, 2007 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2005, 2007, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1999-2003 Internet Software Consortium. * * Permission to use, copy, modify, and/or distribute this software for any @@ -15,10 +15,19 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: net.c,v 1.22.2.2.10.11 2007/09/13 23:45:58 tbox Exp $ */ +/* $Id: net.c,v 1.22.2.2.10.15 2008/07/04 23:45:39 tbox Exp $ */ #include <config.h> +#include <sys/types.h> + +#if defined(HAVE_SYS_SYSCTL_H) +#if defined(HAVE_SYS_PARAM_H) +#include <sys/param.h> +#endif +#include <sys/sysctl.h> +#endif + #include <errno.h> #include <unistd.h> @@ -30,6 +39,59 @@ #include <isc/string.h> #include <isc/util.h> +/* + * Definitions about UDP port range specification. This is a total mess of + * portability variants: some use sysctl (but the sysctl names vary), some use + * system-specific interfaces, some have the same interface for IPv4 and IPv6, + * some separate them, etc... + */ + +/* + * The last resort defaults: use all non well known port space + */ +#ifndef ISC_NET_PORTRANGELOW +#define ISC_NET_PORTRANGELOW 1024 +#endif /* ISC_NET_PORTRANGELOW */ +#ifndef ISC_NET_PORTRANGEHIGH +#define ISC_NET_PORTRANGEHIGH 65535 +#endif /* ISC_NET_PORTRANGEHIGH */ + +#ifdef HAVE_SYSCTLBYNAME + +/* + * sysctl variants + */ +#if defined(__FreeBSD__) || defined(__APPLE__) || defined(__DragonFly__) +#define USE_SYSCTL_PORTRANGE +#define SYSCTL_V4PORTRANGE_LOW "net.inet.ip.portrange.hifirst" +#define SYSCTL_V4PORTRANGE_HIGH "net.inet.ip.portrange.hilast" +#define SYSCTL_V6PORTRANGE_LOW "net.inet.ip.portrange.hifirst" +#define SYSCTL_V6PORTRANGE_HIGH "net.inet.ip.portrange.hilast" +#endif + +#ifdef __NetBSD__ +#define USE_SYSCTL_PORTRANGE +#define SYSCTL_V4PORTRANGE_LOW "net.inet.ip.anonportmin" +#define SYSCTL_V4PORTRANGE_HIGH "net.inet.ip.anonportmax" +#define SYSCTL_V6PORTRANGE_LOW "net.inet6.ip6.anonportmin" +#define SYSCTL_V6PORTRANGE_HIGH "net.inet6.ip6.anonportmax" +#endif + +#else /* !HAVE_SYSCTLBYNAME */ + +#ifdef __OpenBSD__ +#define USE_SYSCTL_PORTRANGE +#define SYSCTL_V4PORTRANGE_LOW { CTL_NET, PF_INET, IPPROTO_IP, \ + IPCTL_IPPORT_HIFIRSTAUTO } +#define SYSCTL_V4PORTRANGE_HIGH { CTL_NET, PF_INET, IPPROTO_IP, \ + IPCTL_IPPORT_HILASTAUTO } +/* Same for IPv6 */ +#define SYSCTL_V6PORTRANGE_LOW SYSCTL_V4PORTRANGE_LOW +#define SYSCTL_V6PORTRANGE_HIGH SYSCTL_V4PORTRANGE_HIGH +#endif + +#endif /* HAVE_SYSCTLBYNAME */ + #if defined(ISC_PLATFORM_HAVEIPV6) # if defined(ISC_PLATFORM_NEEDIN6ADDRANY) const struct in6_addr isc_net_in6addrany = IN6ADDR_ANY_INIT; @@ -328,6 +390,101 @@ isc_net_probe_ipv6pktinfo(void) { return (ipv6pktinfo_result); } +#if defined(USE_SYSCTL_PORTRANGE) +#if defined(HAVE_SYSCTLBYNAME) +static isc_result_t +getudpportrange_sysctl(int af, in_port_t *low, in_port_t *high) { + int port_low, port_high; + size_t portlen; + const char *sysctlname_lowport, *sysctlname_hiport; + + if (af == AF_INET) { + sysctlname_lowport = SYSCTL_V4PORTRANGE_LOW; + sysctlname_hiport = SYSCTL_V4PORTRANGE_HIGH; + } else { + sysctlname_lowport = SYSCTL_V6PORTRANGE_LOW; + sysctlname_hiport = SYSCTL_V6PORTRANGE_HIGH; + } + portlen = sizeof(portlen); + if (sysctlbyname(sysctlname_lowport, &port_low, &portlen, + NULL, 0) < 0) { + return (ISC_R_FAILURE); + } + portlen = sizeof(portlen); + if (sysctlbyname(sysctlname_hiport, &port_high, &portlen, + NULL, 0) < 0) { + return (ISC_R_FAILURE); + } + if ((port_low & ~0xffff) != 0 || (port_high & ~0xffff) != 0) + return (ISC_R_RANGE); + + *low = (in_port_t)port_low; + *high = (in_port_t)port_high; + + return (ISC_R_SUCCESS); +} +#else /* !HAVE_SYSCTLBYNAME */ +static isc_result_t +getudpportrange_sysctl(int af, in_port_t *low, in_port_t *high) { + int mib_lo4[4] = SYSCTL_V4PORTRANGE_LOW; + int mib_hi4[4] = SYSCTL_V4PORTRANGE_HIGH; + int mib_lo6[4] = SYSCTL_V6PORTRANGE_LOW; + int mib_hi6[4] = SYSCTL_V6PORTRANGE_HIGH; + int *mib_lo, *mib_hi, miblen; + int port_low, port_high; + size_t portlen; + + if (af == AF_INET) { + mib_lo = mib_lo4; + mib_hi = mib_hi4; + miblen = sizeof(mib_lo4) / sizeof(mib_lo4[0]); + } else { + mib_lo = mib_lo6; + mib_hi = mib_hi6; + miblen = sizeof(mib_lo6) / sizeof(mib_lo6[0]); + } + + portlen = sizeof(portlen); + if (sysctl(mib_lo, miblen, &port_low, &portlen, NULL, 0) < 0) { + return (ISC_R_FAILURE); + } + + portlen = sizeof(portlen); + if (sysctl(mib_hi, miblen, &port_high, &portlen, NULL, 0) < 0) { + return (ISC_R_FAILURE); + } + + if ((port_low & ~0xffff) != 0 || (port_high & ~0xffff) != 0) + return (ISC_R_RANGE); + + *low = (in_port_t) port_low; + *high = (in_port_t) port_high; + + return (ISC_R_SUCCESS); +} +#endif /* HAVE_SYSCTLBYNAME */ +#endif /* USE_SYSCTL_PORTRANGE */ + +isc_result_t +isc_net_getudpportrange(int af, in_port_t *low, in_port_t *high) { + int result = ISC_R_FAILURE; + + REQUIRE(low != NULL && high != NULL); + +#if defined(USE_SYSCTL_PORTRANGE) + result = getudpportrange_sysctl(af, low, high); +#else + UNUSED(af); +#endif + + if (result != ISC_R_SUCCESS) { + *low = ISC_NET_PORTRANGELOW; + *high = ISC_NET_PORTRANGEHIGH; + } + + return (ISC_R_SUCCESS); /* we currently never fail in this function */ +} + void isc_net_disableipv4(void) { initialize(); diff --git a/lib/isc/unix/resource.c b/lib/isc/unix/resource.c index a6280b7477e8a..dac9fcaa6c08c 100644 --- a/lib/isc/unix/resource.c +++ b/lib/isc/unix/resource.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: resource.c,v 1.11.206.3.4.3 2008/07/28 22:45:53 marka Exp $ */ +/* $Id: resource.c,v 1.11.206.7 2008/08/05 07:23:56 marka Exp $ */ #include <config.h> @@ -32,7 +32,7 @@ #include <linux/fs.h> /* To get the large NR_OPEN. */ #endif -#ifdef __hpux +#if defined(__hpux) && defined(HAVE_SYS_DYNTUNE_H) #include <sys/dyntune.h> #endif @@ -170,7 +170,7 @@ isc_resource_setlimit(isc_resource_t resource, isc_resourcevalue_t value) { if (unixresult == 0) return (ISC_R_SUCCESS); } -#elif defined(__hpux) +#elif defined(__hpux) && defined(HAVE_SYS_DYNTUNE_H) if (resource == isc_resource_openfiles && rlim_value == RLIM_INFINITY) { uint64_t maxfiles; if (gettune("maxfiles_lim", &maxfiles) == 0) { @@ -210,7 +210,7 @@ isc_resource_getlimit(isc_resource_t resource, isc_resourcevalue_t *value) { } isc_result_t -isc_resource_curlimit(isc_resource_t resource, isc_resourcevalue_t *value) { +isc_resource_getcurlimit(isc_resource_t resource, isc_resourcevalue_t *value) { int unixresult; int unixresource; struct rlimit rl; diff --git a/lib/isc/unix/socket.c b/lib/isc/unix/socket.c index 3ee796c24bbaa..1508487945beb 100644 --- a/lib/isc/unix/socket.c +++ b/lib/isc/unix/socket.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: socket.c,v 1.207.2.19.2.35.4.6 2008/07/29 04:43:57 each Exp $ */ +/* $Id: socket.c,v 1.207.2.19.2.62 2008/11/20 00:07:38 jinmei Exp $ */ #include <config.h> @@ -52,20 +52,66 @@ #include <isc/thread.h> #include <isc/util.h> +#ifdef ISC_PLATFORM_HAVESYSUNH +#include <sys/un.h> +#endif +#ifdef ISC_PLATFORM_HAVEKQUEUE +#include <sys/event.h> +#endif +#ifdef ISC_PLATFORM_HAVEEPOLL +#include <sys/epoll.h> +#endif +#ifdef ISC_PLATFORM_HAVEDEVPOLL +#include <sys/devpoll.h> +#endif + #include "errno2result.h" #ifndef ISC_PLATFORM_USETHREADS #include "socket_p.h" #endif /* ISC_PLATFORM_USETHREADS */ -#if defined(SO_BSDCOMPAT) && defined(__linux__) -#include <sys/utsname.h> -#endif +/* + * Choose the most preferable multiplex method. + */ +#ifdef ISC_PLATFORM_HAVEKQUEUE +#define USE_KQUEUE +#elif defined (ISC_PLATFORM_HAVEEPOLL) +#define USE_EPOLL +#elif defined (ISC_PLATFORM_HAVEDEVPOLL) +#define USE_DEVPOLL +typedef struct { + unsigned int want_read : 1, + want_write : 1; +} pollinfo_t; +#else +#define USE_SELECT +#endif /* ISC_PLATFORM_HAVEKQUEUE */ -/*% - * Max number of open sockets. In the vast majority of cases the default size - * of FD_SETSIZE should be fine, and this constant should be increased only - * when absolutely necessary and possible, i.e., the server is exhausting all +#ifndef ISC_PLATFORM_USETHREADS +#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) +struct isc_socketwait { + int nevents; +}; +#elif defined (USE_SELECT) +struct isc_socketwait { + fd_set *readset; + fd_set *writeset; + int nfds; + int maxfd; +}; +#endif /* USE_KQUEUE */ +#endif /* !ISC_PLATFORM_USETHREADS */ + +/* + * Maximum number of allowable open sockets. This is also the maximum + * allowable socket file descriptor. + * + * Care should be taken before modifying this value for select(): + * The API standard doesn't ensure select() accept more than (the system default + * of) FD_SETSIZE descriptors, and the default size should in fact be fine in + * the vast majority of cases. This constant should therefore be increased only + * when absolutely necessary and possible, i.e., the server is exhausting all * available file descriptors (up to FD_SETSIZE) and the select() function * and FD_xxx macros support larger values than FD_SETSIZE (which may not * always by true, but we keep using some of them to ensure as much @@ -76,21 +122,79 @@ * As a special note, this value shouldn't have to be touched if * this is a build for an authoritative only DNS server. */ - -#ifndef ISC_SOCKET_FDSETSIZE -#define ISC_SOCKET_FDSETSIZE FD_SETSIZE -#endif - +#ifndef ISC_SOCKET_MAXSOCKETS +#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) +#define ISC_SOCKET_MAXSOCKETS 4096 +#elif defined(USE_SELECT) +#define ISC_SOCKET_MAXSOCKETS FD_SETSIZE +#endif /* USE_KQUEUE... */ +#endif /* ISC_SOCKET_MAXSOCKETS */ + +#ifdef USE_SELECT /*% - * Mac OS X needs a special definition to support larger values in select() + * Mac OS X needs a special definition to support larger values in select(). + * We always define this because a larger value can be specified run-time. */ -#if ISC_SOCKET_FDSETSIZE > FD_SETSIZE #ifdef __APPLE__ #define _DARWIN_UNLIMITED_SELECT #endif /* __APPLE__ */ +#endif /* USE_SELECT */ + +#ifdef ISC_SOCKET_USE_POLLWATCH +/* + * If this macro is defined, enable workaround for a Solaris /dev/poll kernel + * bug: DP_POLL ioctl could keep sleeping even if socket I/O is possible for + * some of the specified FD. The idea is based on the observation that it's + * likely for a busy server to keep receiving packets. It specifically works + * as follows: the socket watcher is first initialized with the state of + * "poll_idle". While it's in the idle state it keeps sleeping until a socket + * event occurs. When it wakes up for a socket I/O event, it moves to the + * poll_active state, and sets the poll timeout to a short period + * (ISC_SOCKET_POLLWATCH_TIMEOUT msec). If timeout occurs in this state, the + * watcher goes to the poll_checking state with the same timeout period. + * In this state, the watcher tries to detect whether this is a break + * during intermittent events or the kernel bug is triggered. If the next + * polling reports an event within the short period, the previous timeout is + * likely to be a kernel bug, and so the watcher goes back to the active state. + * Otherwise, it moves to the idle state again. + * + * It's not clear whether this is a thread-related bug, but since we've only + * seen this with threads, this workaround is used only when enabling threads. + */ + +typedef enum { poll_idle, poll_active, poll_checking } pollstate_t; + +#ifndef ISC_SOCKET_POLLWATCH_TIMEOUT +#define ISC_SOCKET_POLLWATCH_TIMEOUT 10 +#endif /* ISC_SOCKET_POLLWATCH_TIMEOUT */ +#endif /* ISC_SOCKET_USE_POLLWATCH */ + +/* + * Size of per-FD lock buckets. + */ +#ifdef ISC_PLATFORM_USETHREADS +#define FDLOCK_COUNT 1024 +#define FDLOCK_ID(fd) ((fd) % FDLOCK_COUNT) +#else +#define FDLOCK_COUNT 1 +#define FDLOCK_ID(fd) 0 +#endif /* ISC_PLATFORM_USETHREADS */ + +/* + * Maximum number of events communicated with the kernel. There should normally + * be no need for having a large number. + */ +#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) +#ifndef ISC_SOCKET_MAXEVENTS +#define ISC_SOCKET_MAXEVENTS 64 +#endif #endif -/*% +#if defined(SO_BSDCOMPAT) && defined(__linux__) +#include <sys/utsname.h> +#endif + +/* * Some systems define the socket length argument as an int, some as size_t, * some as socklen_t. This is here so it can be easily changed if needed. */ @@ -218,22 +322,50 @@ struct isc_socketmgr { unsigned int magic; isc_mem_t *mctx; isc_mutex_t lock; + isc_mutex_t *fdlock; +#ifdef USE_KQUEUE + int kqueue_fd; + int nevents; + struct kevent *events; +#endif /* USE_KQUEUE */ +#ifdef USE_EPOLL + int epoll_fd; + int nevents; + struct epoll_event *events; +#endif /* USE_EPOLL */ +#ifdef USE_DEVPOLL + int devpoll_fd; + int nevents; + struct pollfd *events; +#endif /* USE_DEVPOLL */ +#ifdef USE_SELECT int fd_bufsize; - int fdsize; +#endif /* USE_SELECT */ + unsigned int maxsocks; +#ifdef ISC_PLATFORM_USETHREADS + int pipe_fds[2]; +#endif + + /* Locked by fdlock. */ + isc_socket_t **fds; + int *fdstate; +#ifdef USE_DEVPOLL + pollinfo_t *fdpollinfo; +#endif + /* Locked by manager lock. */ ISC_LIST(isc_socket_t) socklist; +#ifdef USE_SELECT fd_set *read_fds; fd_set *read_fds_copy; fd_set *write_fds; fd_set *write_fds_copy; - isc_socket_t **fds; - int *fdstate; int maxfd; - int reserved; /* unlocked */ +#endif /* USE_SELECT */ + int reserved; /* unlocked */ #ifdef ISC_PLATFORM_USETHREADS isc_thread_t watcher; isc_condition_t shutdown_ok; - int pipe_fds[2]; #else /* ISC_PLATFORM_USETHREADS */ unsigned int refs; #endif /* ISC_PLATFORM_USETHREADS */ @@ -272,8 +404,9 @@ static void build_msghdr_send(isc_socket_t *, isc_socketevent_t *, struct msghdr *, struct iovec *, size_t *); static void build_msghdr_recv(isc_socket_t *, isc_socketevent_t *, struct msghdr *, struct iovec *, size_t *); -static void cleanup_fdsets(isc_socketmgr_t *, isc_mem_t *); -static isc_result_t create_fdsets(isc_socketmgr_t *, isc_mem_t *); +#ifdef ISC_PLATFORM_USETHREADS +static isc_boolean_t process_ctlfd(isc_socketmgr_t *manager); +#endif #define SELECT_POKE_SHUTDOWN (-1) #define SELECT_POKE_NOTHING (-2) @@ -342,9 +475,195 @@ socket_log(isc_socket_t *sock, isc_sockaddr_t *address, } } +#if defined(_AIX) && defined(ISC_NET_BSD44MSGHDR) && \ + defined(USE_CMSG) && defined(IPV6_RECVPKTINFO) +/* + * AIX has a kernel bug where IPV6_RECVPKTINFO gets cleared by + * setting IPV6_V6ONLY. + */ +static void +FIX_IPV6_RECVPKTINFO(isc_socket_t *sock) +{ + char strbuf[ISC_STRERRORSIZE]; + int on = 1; + + if (sock->pf != AF_INET6 || sock->type != isc_sockettype_udp) + return; + + if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, + (void *)&on, sizeof(on)) < 0) { + + UNEXPECTED_ERROR(__FILE__, __LINE__, + "setsockopt(%d, IPV6_RECVPKTINFO) " + "%s: %s", sock->fd, + isc_msgcat_get(isc_msgcat, + ISC_MSGSET_GENERAL, + ISC_MSG_FAILED, + "failed"), + strbuf); + } +} +#else +#define FIX_IPV6_RECVPKTINFO(sock) (void)0 +#endif + +static inline isc_result_t +watch_fd(isc_socketmgr_t *manager, int fd, int msg) { + isc_result_t result = ISC_R_SUCCESS; + +#ifdef USE_KQUEUE + struct kevent evchange; + + memset(&evchange, 0, sizeof(evchange)); + if (msg == SELECT_POKE_READ) + evchange.filter = EVFILT_READ; + else + evchange.filter = EVFILT_WRITE; + evchange.flags = EV_ADD; + evchange.ident = fd; + if (kevent(manager->kqueue_fd, &evchange, 1, NULL, 0, NULL) != 0) + result = isc__errno2result(errno); + + return (result); +#elif defined(USE_EPOLL) + struct epoll_event event; + + if (msg == SELECT_POKE_READ) + event.events = EPOLLIN; + else + event.events = EPOLLOUT; + event.data.fd = fd; + if (epoll_ctl(manager->epoll_fd, EPOLL_CTL_ADD, fd, &event) == -1 && + errno != EEXIST) { + result = isc__errno2result(errno); + } + + return (result); +#elif defined(USE_DEVPOLL) + struct pollfd pfd; + int lockid = FDLOCK_ID(fd); + + memset(&pfd, 0, sizeof(pfd)); + if (msg == SELECT_POKE_READ) + pfd.events = POLLIN; + else + pfd.events = POLLOUT; + pfd.fd = fd; + pfd.revents = 0; + LOCK(&manager->fdlock[lockid]); + if (write(manager->devpoll_fd, &pfd, sizeof(pfd)) == -1) + result = isc__errno2result(errno); + else { + if (msg == SELECT_POKE_READ) + manager->fdpollinfo[fd].want_read = 1; + else + manager->fdpollinfo[fd].want_write = 1; + } + UNLOCK(&manager->fdlock[lockid]); + + return (result); +#elif defined(USE_SELECT) + LOCK(&manager->lock); + if (msg == SELECT_POKE_READ) + FD_SET(fd, manager->read_fds); + if (msg == SELECT_POKE_WRITE) + FD_SET(fd, manager->write_fds); + UNLOCK(&manager->lock); + + return (result); +#endif +} + +static inline isc_result_t +unwatch_fd(isc_socketmgr_t *manager, int fd, int msg) { + isc_result_t result = ISC_R_SUCCESS; + +#ifdef USE_KQUEUE + struct kevent evchange; + + memset(&evchange, 0, sizeof(evchange)); + if (msg == SELECT_POKE_READ) + evchange.filter = EVFILT_READ; + else + evchange.filter = EVFILT_WRITE; + evchange.flags = EV_DELETE; + evchange.ident = fd; + if (kevent(manager->kqueue_fd, &evchange, 1, NULL, 0, NULL) != 0) + result = isc__errno2result(errno); + + return (result); +#elif defined(USE_EPOLL) + struct epoll_event event; + + if (msg == SELECT_POKE_READ) + event.events = EPOLLIN; + else + event.events = EPOLLOUT; + event.data.fd = fd; + if (epoll_ctl(manager->epoll_fd, EPOLL_CTL_DEL, fd, &event) == -1 && + errno != ENOENT) { + char strbuf[ISC_STRERRORSIZE]; + isc__strerror(errno, strbuf, sizeof(strbuf)); + UNEXPECTED_ERROR(__FILE__, __LINE__, + "epoll_ctl(DEL), %d: %s", fd, strbuf); + result = ISC_R_UNEXPECTED; + } + return (result); +#elif defined(USE_DEVPOLL) + struct pollfd pfds[2]; + size_t writelen = sizeof(pfds[0]); + int lockid = FDLOCK_ID(fd); + + memset(pfds, 0, sizeof(pfds)); + pfds[0].events = POLLREMOVE; + pfds[0].fd = fd; + + /* + * Canceling read or write polling via /dev/poll is tricky. Since it + * only provides a way of canceling per FD, we may need to re-poll the + * socket for the other operation. + */ + LOCK(&manager->fdlock[lockid]); + if (msg == SELECT_POKE_READ && + manager->fdpollinfo[fd].want_write == 1) { + pfds[1].events = POLLOUT; + pfds[1].fd = fd; + writelen += sizeof(pfds[1]); + } + if (msg == SELECT_POKE_WRITE && + manager->fdpollinfo[fd].want_read == 1) { + pfds[1].events = POLLIN; + pfds[1].fd = fd; + writelen += sizeof(pfds[1]); + } + + if (write(manager->devpoll_fd, pfds, writelen) == -1) + result = isc__errno2result(errno); + else { + if (msg == SELECT_POKE_READ) + manager->fdpollinfo[fd].want_read = 0; + else + manager->fdpollinfo[fd].want_write = 0; + } + UNLOCK(&manager->fdlock[lockid]); + + return (result); +#elif defined(USE_SELECT) + LOCK(&manager->lock); + if (msg == SELECT_POKE_READ) + FD_CLR(fd, manager->read_fds); + else if (msg == SELECT_POKE_WRITE) + FD_CLR(fd, manager->write_fds); + UNLOCK(&manager->lock); + + return (result); +#endif +} + static void wakeup_socket(isc_socketmgr_t *manager, int fd, int msg) { - isc_socket_t *sock; + isc_result_t result; + int lockid = FDLOCK_ID(fd); /* * This is a wakeup on a socket. If the socket is not in the @@ -352,27 +671,54 @@ wakeup_socket(isc_socketmgr_t *manager, int fd, int msg) { * or writes. */ - INSIST(fd >= 0 && fd < manager->fdsize); + INSIST(fd >= 0 && fd < (int)manager->maxsocks); - if (manager->fdstate[fd] == CLOSE_PENDING) { + if (msg == SELECT_POKE_CLOSE) { + /* No one should be updating fdstate, so no need to lock it */ + INSIST(manager->fdstate[fd] == CLOSE_PENDING); manager->fdstate[fd] = CLOSED; - FD_CLR(fd, manager->read_fds); - FD_CLR(fd, manager->write_fds); + (void)unwatch_fd(manager, fd, SELECT_POKE_READ); + (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); (void)close(fd); return; } - if (manager->fdstate[fd] != MANAGED) - return; - sock = manager->fds[fd]; + LOCK(&manager->fdlock[lockid]); + if (manager->fdstate[fd] == CLOSE_PENDING) { + UNLOCK(&manager->fdlock[lockid]); + /* + * We accept (and ignore) any error from unwatch_fd() as we are + * closing the socket, hoping it doesn't leave dangling state in + * the kernel. + * Note that unwatch_fd() must be called after releasing the + * fdlock; otherwise it could cause deadlock due to a lock order + * reversal. + */ + (void)unwatch_fd(manager, fd, SELECT_POKE_READ); + (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); + return; + } + if (manager->fdstate[fd] != MANAGED) { + UNLOCK(&manager->fdlock[lockid]); + return; + } + UNLOCK(&manager->fdlock[lockid]); /* * Set requested bit. */ - if (msg == SELECT_POKE_READ) - FD_SET(sock->fd, manager->read_fds); - if (msg == SELECT_POKE_WRITE) - FD_SET(sock->fd, manager->write_fds); + result = watch_fd(manager, fd, msg); + if (result != ISC_R_SUCCESS) { + /* + * XXXJT: what should we do? Ignoring the failure of watching + * a socket will make the application dysfunctional, but there + * seems to be no reasonable recovery process. + */ + isc_log_write(isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, + "failed to start watching FD (%d): %s", + fd, isc_result_totext(result)); + } } #ifdef ISC_PLATFORM_USETHREADS @@ -667,7 +1013,7 @@ build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev, memset(msg, 0, sizeof(*msg)); - if (sock->type == isc_sockettype_udp) { + if (!sock->connected) { msg->msg_name = (void *)&dev->address.type.sa; msg->msg_namelen = dev->address.length; } else { @@ -941,15 +1287,17 @@ dump_msg(struct msghdr *msg) { unsigned int i; printf("MSGHDR %p\n", msg); - printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen); - printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen); + printf("\tname %p, namelen %ld\n", msg->msg_name, + (long) msg->msg_namelen); + printf("\tiov %p, iovlen %ld\n", msg->msg_iov, + (long) msg->msg_iovlen); for (i = 0; i < (unsigned int)msg->msg_iovlen; i++) - printf("\t\t%d\tbase %p, len %d\n", i, + printf("\t\t%d\tbase %p, len %ld\n", i, msg->msg_iov[i].iov_base, - msg->msg_iov[i].iov_len); + (long) msg->msg_iov[i].iov_len); #ifdef ISC_NET_BSD44MSGHDR - printf("\tcontrol %p, controllen %d\n", msg->msg_control, - msg->msg_controllen); + printf("\tcontrol %p, controllen %ld\n", msg->msg_control, + (long) msg->msg_controllen); #endif } #endif @@ -1017,6 +1365,14 @@ doio_recv(isc_socket_t *sock, isc_socketevent_t *dev) { /* HPUX 11.11 can return EADDRNOTAVAIL. */ SOFT_OR_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES); + /* + * HPUX returns EPROTO and EINVAL on receiving some ICMP/ICMPv6 + * errors. + */ +#ifdef EPROTO + SOFT_OR_HARD(EPROTO, ISC_R_HOSTUNREACH); +#endif + SOFT_OR_HARD(EINVAL, ISC_R_HOSTUNREACH); #undef SOFT_OR_HARD #undef ALWAYS_HARD @@ -1222,7 +1578,54 @@ doio_send(isc_socket_t *sock, isc_socketevent_t *dev) { * references exist. */ static void +closesocket(isc_socketmgr_t *manager, isc_sockettype_t type, int fd) { + int lockid = FDLOCK_ID(fd); + + UNUSED(type); + + /* + * No one has this socket open, so the watcher doesn't have to be + * poked, and the socket doesn't have to be locked. + */ + LOCK(&manager->fdlock[lockid]); + manager->fds[fd] = NULL; + manager->fdstate[fd] = CLOSE_PENDING; + UNLOCK(&manager->fdlock[lockid]); + select_poke(manager, fd, SELECT_POKE_CLOSE); + + /* + * update manager->maxfd here (XXX: this should be implemented more + * efficiently) + */ +#ifdef USE_SELECT + LOCK(&manager->lock); + if (manager->maxfd == fd) { + int i; + + manager->maxfd = 0; + for (i = fd - 1; i >= 0; i--) { + lockid = FDLOCK_ID(i); + + LOCK(&manager->fdlock[lockid]); + if (manager->fdstate[i] == MANAGED) { + manager->maxfd = i; + UNLOCK(&manager->fdlock[lockid]); + break; + } + UNLOCK(&manager->fdlock[lockid]); + } +#ifdef ISC_PLATFORM_USETHREADS + if (manager->maxfd < manager->pipe_fds[0]) + manager->maxfd = manager->pipe_fds[0]; +#endif + } + UNLOCK(&manager->lock); +#endif /* USE_SELECT */ +} + +static void destroy(isc_socket_t **sockp) { + int fd; isc_socket_t *sock = *sockp; isc_socketmgr_t *manager = sock->manager; @@ -1233,17 +1636,16 @@ destroy(isc_socket_t **sockp) { INSIST(ISC_LIST_EMPTY(sock->recv_list)); INSIST(ISC_LIST_EMPTY(sock->send_list)); INSIST(sock->connect_ev == NULL); - REQUIRE(sock->fd >= 0 && sock->fd < (int)manager->fdsize); + REQUIRE(sock->fd == -1 || sock->fd < (int)manager->maxsocks); + + if (sock->fd >= 0) { + fd = sock->fd; + sock->fd = -1; + closesocket(manager, sock->type, fd); + } LOCK(&manager->lock); - /* - * No one has this socket open, so the watcher doesn't have to be - * poked, and the socket doesn't have to be locked. - */ - manager->fds[sock->fd] = NULL; - manager->fdstate[sock->fd] = CLOSE_PENDING; - select_poke(manager, sock->fd, SELECT_POKE_CLOSE); ISC_LIST_UNLINK(manager->socklist, sock, link); #ifdef ISC_PLATFORM_USETHREADS @@ -1251,10 +1653,6 @@ destroy(isc_socket_t **sockp) { SIGNAL(&manager->shutdown_ok); #endif /* ISC_PLATFORM_USETHREADS */ - /* - * XXX should reset manager->maxfd here - */ - UNLOCK(&manager->lock); free_socket(sockp); @@ -1446,40 +1844,22 @@ clear_bsdcompat(void) { } #endif -/*% - * Create a new 'type' socket managed by 'manager'. Events - * will be posted to 'task' and when dispatched 'action' will be - * called with 'arg' as the arg value. The new socket is returned - * in 'socketp'. - */ -isc_result_t -isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, - isc_socket_t **socketp) -{ - isc_socket_t *sock = NULL; - isc_result_t ret; -#if defined(USE_CMSG) || defined(SO_BSDCOMPAT) - int on = 1; -#endif +static isc_result_t +opensocket(isc_socketmgr_t *manager, isc_socket_t *sock) { char strbuf[ISC_STRERRORSIZE]; const char *err = "socket"; int tries = 0; +#if defined(USE_CMSG) || defined(SO_BSDCOMPAT) + int on = 1; +#endif - REQUIRE(VALID_MANAGER(manager)); - REQUIRE(socketp != NULL && *socketp == NULL); - - ret = allocate_socket(manager, type, &sock); - if (ret != ISC_R_SUCCESS) - return (ret); - - sock->pf = pf; again: - switch (type) { + switch (sock->type) { case isc_sockettype_udp: - sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP); + sock->fd = socket(sock->pf, SOCK_DGRAM, IPPROTO_UDP); break; case isc_sockettype_tcp: - sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP); + sock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP); break; } if (sock->fd == -1 && errno == EINTR && tries++ < 42) @@ -1489,7 +1869,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, /* * Leave a space for stdio and TCP to work in. */ - if (manager->reserved != 0 && type == isc_sockettype_udp && + if (manager->reserved != 0 && sock->type == isc_sockettype_udp && sock->fd >= 0 && sock->fd < manager->reserved) { int new, tmp; new = fcntl(sock->fd, F_DUPFD, manager->reserved); @@ -1509,23 +1889,28 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, } #endif - if (sock->fd >= (int)manager->fdsize) { + if (sock->fd >= (int)manager->maxsocks) { (void)close(sock->fd); isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_TOOMANYFDS, - "%s: too many open file descriptors", "socket"); - free_socket(&sock); + "socket: file descriptor exceeds limit (%d/%u)", + sock->fd, manager->maxsocks); return (ISC_R_NORESOURCES); } if (sock->fd < 0) { - free_socket(&sock); - switch (errno) { case EMFILE: case ENFILE: + isc__strerror(errno, strbuf, sizeof(strbuf)); + isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, + isc_msgcat, ISC_MSGSET_SOCKET, + ISC_MSG_TOOMANYFDS, + "%s: %s", err, strbuf); + /* fallthrough */ case ENOBUFS: return (ISC_R_NORESOURCES); @@ -1554,7 +1939,6 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, if (make_nonblock(sock->fd) != ISC_R_SUCCESS) { (void)close(sock->fd); - free_socket(&sock); return (ISC_R_UNEXPECTED); } @@ -1562,7 +1946,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, RUNTIME_CHECK(isc_once_do(&bsdcompat_once, clear_bsdcompat) == ISC_R_SUCCESS); if (bsdcompat && setsockopt(sock->fd, SOL_SOCKET, SO_BSDCOMPAT, - (void *)&on, sizeof(on)) < 0) { + (void *)&on, sizeof(on)) < 0) { isc__strerror(errno, strbuf, sizeof(strbuf)); UNEXPECTED_ERROR(__FILE__, __LINE__, "setsockopt(%d, SO_BSDCOMPAT) %s: %s", @@ -1588,9 +1972,10 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, } #endif -#if defined(USE_CMSG) - if (type == isc_sockettype_udp) { +#if defined(USE_CMSG) || defined(SO_RCVBUF) + if (sock->type == isc_sockettype_udp) { +#if defined(USE_CMSG) #if defined(SO_TIMESTAMP) if (setsockopt(sock->fd, SOL_SOCKET, SO_TIMESTAMP, (void *)&on, sizeof(on)) < 0 @@ -1609,7 +1994,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, #endif /* SO_TIMESTAMP */ #if defined(ISC_PLATFORM_HAVEIPV6) - if (pf == AF_INET6 && sock->recvcmsgbuflen == 0U) { + if (sock->pf == AF_INET6 && sock->recvcmsgbuflen == 0U) { /* * Warn explicitly because this anomaly can be hidden * in usual operation (and unexpectedly appear later). @@ -1621,7 +2006,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, #ifdef ISC_PLATFORM_HAVEIN6PKTINFO #ifdef IPV6_RECVPKTINFO /* RFC 3542 */ - if ((pf == AF_INET6) + if ((sock->pf == AF_INET6) && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, (void *)&on, sizeof(on)) < 0)) { isc__strerror(errno, strbuf, sizeof(strbuf)); @@ -1636,7 +2021,7 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, } #else /* RFC 2292 */ - if ((pf == AF_INET6) + if ((sock->pf == AF_INET6) && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO, (void *)&on, sizeof(on)) < 0)) { isc__strerror(errno, strbuf, sizeof(strbuf)); @@ -1653,33 +2038,93 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, #endif /* ISC_PLATFORM_HAVEIN6PKTINFO */ #ifdef IPV6_USE_MIN_MTU /* RFC 3542, not too common yet*/ /* use minimum MTU */ - if (pf == AF_INET6) { + if (sock->pf == AF_INET6) { (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_USE_MIN_MTU, (void *)&on, sizeof(on)); } #endif #endif /* ISC_PLATFORM_HAVEIPV6 */ +#endif /* defined(USE_CMSG) */ +#if defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DONT) + /* + * Turn off Path MTU discovery on IPv4/UDP sockets. + */ + if (sock->pf == AF_INET) { + int action = IP_PMTUDISC_DONT; + (void)setsockopt(sock->fd, IPPROTO_IP, IP_MTU_DISCOVER, + &action, sizeof(action)); + } +#endif +#if defined(IP_DONTFRAG) + /* + * Turn off Path MTU discovery on IPv4/UDP sockets. + */ + if (sock->pf == AF_INET) { + int off = 0; + (void)setsockopt(sock->fd, IPPROTO_IP, IP_DONTFRAG, + &off, sizeof(off)); + } +#endif + } +#endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */ + + return (ISC_R_SUCCESS); +} + +/* + * Create a new 'type' socket managed by 'manager'. Events + * will be posted to 'task' and when dispatched 'action' will be + * called with 'arg' as the arg value. The new socket is returned + * in 'socketp'. + */ +isc_result_t +isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, + isc_socket_t **socketp) +{ + isc_socket_t *sock = NULL; + isc_result_t result; + int lockid; + + REQUIRE(VALID_MANAGER(manager)); + REQUIRE(socketp != NULL && *socketp == NULL); + + result = allocate_socket(manager, type, &sock); + if (result != ISC_R_SUCCESS) + return (result); + + sock->pf = pf; + result = opensocket(manager, sock); + if (result != ISC_R_SUCCESS) { + free_socket(&sock); + return (result); } -#endif /* USE_CMSG */ sock->references = 1; *socketp = sock; - LOCK(&manager->lock); - /* * Note we don't have to lock the socket like we normally would because * there are no external references to it yet. */ + lockid = FDLOCK_ID(sock->fd); + LOCK(&manager->fdlock[lockid]); manager->fds[sock->fd] = sock; manager->fdstate[sock->fd] = MANAGED; +#ifdef USE_DEVPOLL + INSIST(sock->manager->fdpollinfo[sock->fd].want_read == 0 && + sock->manager->fdpollinfo[sock->fd].want_write == 0); +#endif + UNLOCK(&manager->fdlock[lockid]); + + LOCK(&manager->lock); ISC_LIST_APPEND(manager->socklist, sock, link); +#ifdef USE_SELECT if (manager->maxfd < sock->fd) manager->maxfd = sock->fd; - +#endif UNLOCK(&manager->lock); socket_log(sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, @@ -1688,6 +2133,48 @@ isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, return (ISC_R_SUCCESS); } +isc_result_t +isc_socket_open(isc_socket_t *sock) { + isc_result_t result; + + REQUIRE(VALID_SOCKET(sock)); + + LOCK(&sock->lock); + REQUIRE(sock->references == 1); + UNLOCK(&sock->lock); + /* + * We don't need to retain the lock hereafter, since no one else has + * this socket. + */ + REQUIRE(sock->fd == -1); + + result = opensocket(sock->manager, sock); + if (result != ISC_R_SUCCESS) + sock->fd = -1; + + if (result == ISC_R_SUCCESS) { + int lockid = FDLOCK_ID(sock->fd); + + LOCK(&sock->manager->fdlock[lockid]); + sock->manager->fds[sock->fd] = sock; + sock->manager->fdstate[sock->fd] = MANAGED; +#ifdef USE_DEVPOLL + INSIST(sock->manager->fdpollinfo[sock->fd].want_read == 0 && + sock->manager->fdpollinfo[sock->fd].want_write == 0); +#endif + UNLOCK(&sock->manager->fdlock[lockid]); + +#ifdef USE_SELECT + LOCK(&sock->manager->lock); + if (sock->manager->maxfd < sock->fd) + sock->manager->maxfd = sock->fd; + UNLOCK(&sock->manager->lock); +#endif + } + + return (result); +} + /* * Attach to a socket. Caller must explicitly detach when it is done. */ @@ -1729,6 +2216,44 @@ isc_socket_detach(isc_socket_t **socketp) { *socketp = NULL; } +isc_result_t +isc_socket_close(isc_socket_t *sock) { + int fd; + + REQUIRE(VALID_SOCKET(sock)); + + LOCK(&sock->lock); + REQUIRE(sock->references == 1); + UNLOCK(&sock->lock); + /* + * We don't need to retain the lock hereafter, since no one else has + * this socket. + */ + + REQUIRE(sock->fd >= 0 && sock->fd < (int)sock->manager->maxsocks); + + INSIST(!sock->connecting); + INSIST(!sock->pending_recv); + INSIST(!sock->pending_send); + INSIST(!sock->pending_accept); + INSIST(ISC_LIST_EMPTY(sock->recv_list)); + INSIST(ISC_LIST_EMPTY(sock->send_list)); + INSIST(ISC_LIST_EMPTY(sock->accept_list)); + INSIST(sock->connect_ev == NULL); + + fd = sock->fd; + sock->fd = -1; + sock->listener = 0; + sock->connected = 0; + sock->connecting = 0; + sock->bound = 0; + isc_sockaddr_any(&sock->address); + + closesocket(sock->manager, sock->type, fd); + + return (ISC_R_SUCCESS); +} + /* * I/O is possible on a given socket. Schedule an event to this task that * will call an internal function to do the I/O. This will charge the @@ -2041,13 +2566,14 @@ internal_accept(isc_task_t *me, isc_event_t *ev) { sock->pf); (void)close(fd); goto soft_error; - } else if (fd >= (int)manager->fdsize) { + } else if (fd >= (int)manager->maxsocks) { isc_log_iwrite(isc_lctx, ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_TOOMANYFDS, - "%s: too many open file descriptors", - "accept"); + "accept: " + "file descriptor exceeds limit (%d/%u)", + fd, manager->maxsocks); (void)close(fd); goto soft_error; } @@ -2081,6 +2607,13 @@ internal_accept(isc_task_t *me, isc_event_t *ev) { * -1 means the new socket didn't happen. */ if (fd != -1) { + int lockid = FDLOCK_ID(fd); + + LOCK(&manager->fdlock[lockid]); + manager->fds[fd] = dev->newsocket; + manager->fdstate[fd] = MANAGED; + UNLOCK(&manager->fdlock[lockid]); + LOCK(&manager->lock); ISC_LIST_APPEND(manager->socklist, dev->newsocket, link); @@ -2093,10 +2626,10 @@ internal_accept(isc_task_t *me, isc_event_t *ev) { */ dev->address = dev->newsocket->address; - manager->fds[fd] = dev->newsocket; - manager->fdstate[fd] = MANAGED; +#ifdef USE_SELECT if (manager->maxfd < fd) manager->maxfd = fd; +#endif socket_log(sock, &dev->newsocket->address, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN, @@ -2245,77 +2778,256 @@ internal_send(isc_task_t *me, isc_event_t *ev) { UNLOCK(&sock->lock); } +/* + * Process read/writes on each fd here. Avoid locking + * and unlocking twice if both reads and writes are possible. + */ static void -process_fds(isc_socketmgr_t *manager, int maxfd, - fd_set *readfds, fd_set *writefds) +process_fd(isc_socketmgr_t *manager, int fd, isc_boolean_t readable, + isc_boolean_t writeable) { - int i; isc_socket_t *sock; isc_boolean_t unlock_sock; - - REQUIRE(maxfd <= (int)manager->fdsize); + int lockid = FDLOCK_ID(fd); /* - * Process read/writes on other fds here. Avoid locking - * and unlocking twice if both reads and writes are possible. + * If the socket is going to be closed, don't do more I/O. */ - for (i = 0; i < maxfd; i++) { + LOCK(&manager->fdlock[lockid]); + if (manager->fdstate[fd] == CLOSE_PENDING) { + UNLOCK(&manager->fdlock[lockid]); + + (void)unwatch_fd(manager, fd, SELECT_POKE_READ); + (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); + return; + } + + sock = manager->fds[fd]; + UNLOCK(&manager->fdlock[lockid]); + unlock_sock = ISC_FALSE; + if (readable) { + if (sock == NULL) { + (void)unwatch_fd(manager, fd, SELECT_POKE_READ); + goto check_write; + } + unlock_sock = ISC_TRUE; + LOCK(&sock->lock); + if (!SOCK_DEAD(sock)) { + if (sock->listener) + dispatch_accept(sock); + else + dispatch_recv(sock); + } + (void)unwatch_fd(manager, fd, SELECT_POKE_READ); + } +check_write: + if (writeable) { + if (sock == NULL) { + (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); + return; + } + if (!unlock_sock) { + unlock_sock = ISC_TRUE; + LOCK(&sock->lock); + } + if (!SOCK_DEAD(sock)) { + if (sock->connecting) + dispatch_connect(sock); + else + dispatch_send(sock); + } + (void)unwatch_fd(manager, fd, SELECT_POKE_WRITE); + } + if (unlock_sock) + UNLOCK(&sock->lock); +} + +#ifdef USE_KQUEUE +static isc_boolean_t +process_fds(isc_socketmgr_t *manager, struct kevent *events, int nevents) { + int i; + isc_boolean_t readable, writable; + isc_boolean_t done = ISC_FALSE; #ifdef ISC_PLATFORM_USETHREADS - if (i == manager->pipe_fds[0] || i == manager->pipe_fds[1]) + isc_boolean_t have_ctlevent = ISC_FALSE; +#endif + + if (nevents == manager->nevents) { + /* + * This is not an error, but something unexpected. If this + * happens, it may indicate the need for increasing + * ISC_SOCKET_MAXEVENTS. + */ + manager_log(manager, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, + "maximum number of FD events (%d) received", + nevents); + } + + for (i = 0; i < nevents; i++) { + REQUIRE(events[i].ident < manager->maxsocks); +#ifdef ISC_PLATFORM_USETHREADS + if (events[i].ident == (uintptr_t)manager->pipe_fds[0]) { + have_ctlevent = ISC_TRUE; continue; -#endif /* ISC_PLATFORM_USETHREADS */ + } +#endif + readable = ISC_TF(events[i].filter == EVFILT_READ); + writable = ISC_TF(events[i].filter == EVFILT_WRITE); + process_fd(manager, events[i].ident, readable, writable); + } - if (manager->fdstate[i] == CLOSE_PENDING) { - manager->fdstate[i] = CLOSED; - FD_CLR(i, manager->read_fds); - FD_CLR(i, manager->write_fds); +#ifdef ISC_PLATFORM_USETHREADS + if (have_ctlevent) + done = process_ctlfd(manager); +#endif - (void)close(i); + return (done); +} +#elif defined(USE_EPOLL) +static isc_boolean_t +process_fds(isc_socketmgr_t *manager, struct epoll_event *events, int nevents) { + int i; + isc_boolean_t done = ISC_FALSE; +#ifdef ISC_PLATFORM_USETHREADS + isc_boolean_t have_ctlevent = ISC_FALSE; +#endif + if (nevents == manager->nevents) { + manager_log(manager, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, + "maximum number of FD events (%d) received", + nevents); + } + + for (i = 0; i < nevents; i++) { + REQUIRE(events[i].data.fd < (int)manager->maxsocks); +#ifdef ISC_PLATFORM_USETHREADS + if (events[i].data.fd == manager->pipe_fds[0]) { + have_ctlevent = ISC_TRUE; continue; } - - sock = manager->fds[i]; - unlock_sock = ISC_FALSE; - if (FD_ISSET(i, readfds)) { - if (sock == NULL) { - FD_CLR(i, manager->read_fds); - goto check_write; - } - unlock_sock = ISC_TRUE; - LOCK(&sock->lock); - if (!SOCK_DEAD(sock)) { - if (sock->listener) - dispatch_accept(sock); - else - dispatch_recv(sock); - } - FD_CLR(i, manager->read_fds); +#endif + if ((events[i].events & EPOLLERR) != 0 || + (events[i].events & EPOLLHUP) != 0) { + /* + * epoll does not set IN/OUT bits on an erroneous + * condition, so we need to try both anyway. This is a + * bit inefficient, but should be okay for such rare + * events. Note also that the read or write attempt + * won't block because we use non-blocking sockets. + */ + events[i].events |= (EPOLLIN | EPOLLOUT); } - check_write: - if (FD_ISSET(i, writefds)) { - if (sock == NULL) { - FD_CLR(i, manager->write_fds); - continue; - } - if (!unlock_sock) { - unlock_sock = ISC_TRUE; - LOCK(&sock->lock); - } - if (!SOCK_DEAD(sock)) { - if (sock->connecting) - dispatch_connect(sock); - else - dispatch_send(sock); - } - FD_CLR(i, manager->write_fds); + process_fd(manager, events[i].data.fd, + (events[i].events & EPOLLIN) != 0, + (events[i].events & EPOLLOUT) != 0); + } + +#ifdef ISC_PLATFORM_USETHREADS + if (have_ctlevent) + done = process_ctlfd(manager); +#endif + + return (done); +} +#elif defined(USE_DEVPOLL) +static isc_boolean_t +process_fds(isc_socketmgr_t *manager, struct pollfd *events, int nevents) { + int i; + isc_boolean_t done = ISC_FALSE; +#ifdef ISC_PLATFORM_USETHREADS + isc_boolean_t have_ctlevent = ISC_FALSE; +#endif + + if (nevents == manager->nevents) { + manager_log(manager, ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_SOCKET, ISC_LOG_INFO, + "maximum number of FD events (%d) received", + nevents); + } + + for (i = 0; i < nevents; i++) { + REQUIRE(events[i].fd < (int)manager->maxsocks); +#ifdef ISC_PLATFORM_USETHREADS + if (events[i].fd == manager->pipe_fds[0]) { + have_ctlevent = ISC_TRUE; + continue; } - if (unlock_sock) - UNLOCK(&sock->lock); +#endif + process_fd(manager, events[i].fd, + (events[i].events & POLLIN) != 0, + (events[i].events & POLLOUT) != 0); + } + +#ifdef ISC_PLATFORM_USETHREADS + if (have_ctlevent) + done = process_ctlfd(manager); +#endif + + return (done); +} +#elif defined(USE_SELECT) +static void +process_fds(isc_socketmgr_t *manager, int maxfd, + fd_set *readfds, fd_set *writefds) +{ + int i; + + REQUIRE(maxfd <= (int)manager->maxsocks); + + for (i = 0; i < maxfd; i++) { +#ifdef ISC_PLATFORM_USETHREADS + if (i == manager->pipe_fds[0] || i == manager->pipe_fds[1]) + continue; +#endif /* ISC_PLATFORM_USETHREADS */ + process_fd(manager, i, FD_ISSET(i, readfds), + FD_ISSET(i, writefds)); } } +#endif #ifdef ISC_PLATFORM_USETHREADS +static isc_boolean_t +process_ctlfd(isc_socketmgr_t *manager) { + int msg, fd; + + for (;;) { + select_readmsg(manager, &fd, &msg); + + manager_log(manager, IOEVENT, + isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, + ISC_MSG_WATCHERMSG, + "watcher got message %d " + "for socket %d"), msg, fd); + + /* + * Nothing to read? + */ + if (msg == SELECT_POKE_NOTHING) + break; + + /* + * Handle shutdown message. We really should + * jump out of this loop right away, but + * it doesn't matter if we have to do a little + * more work first. + */ + if (msg == SELECT_POKE_SHUTDOWN) + return (ISC_TRUE); + + /* + * This is a wakeup on a socket. Look + * at the event queue for both read and write, + * and decide if we need to watch on it now + * or not. + */ + wakeup_socket(manager, fd, msg); + } + + return (ISC_FALSE); +} + /* * This is the thread that will loop forever, always in a select or poll * call. @@ -2329,98 +3041,115 @@ watcher(void *uap) { isc_boolean_t done; int ctlfd; int cc; - int msg, fd; +#ifdef USE_KQUEUE + const char *fnname = "kevent()"; +#elif defined (USE_EPOLL) + const char *fnname = "epoll_wait()"; +#elif defined(USE_DEVPOLL) + const char *fnname = "ioctl(DP_POLL)"; + struct dvpoll dvp; +#elif defined (USE_SELECT) + const char *fnname = "select()"; int maxfd; +#endif char strbuf[ISC_STRERRORSIZE]; +#ifdef ISC_SOCKET_USE_POLLWATCH + pollstate_t pollstate = poll_idle; +#endif /* * Get the control fd here. This will never change. */ - LOCK(&manager->lock); ctlfd = manager->pipe_fds[0]; - done = ISC_FALSE; while (!done) { do { +#ifdef USE_KQUEUE + cc = kevent(manager->kqueue_fd, NULL, 0, + manager->events, manager->nevents, NULL); +#elif defined(USE_EPOLL) + cc = epoll_wait(manager->epoll_fd, manager->events, + manager->nevents, -1); +#elif defined(USE_DEVPOLL) + dvp.dp_fds = manager->events; + dvp.dp_nfds = manager->nevents; +#ifndef ISC_SOCKET_USE_POLLWATCH + dvp.dp_timeout = -1; +#else + if (pollstate == poll_idle) + dvp.dp_timeout = -1; + else + dvp.dp_timeout = ISC_SOCKET_POLLWATCH_TIMEOUT; +#endif /* ISC_SOCKET_USE_POLLWATCH */ + cc = ioctl(manager->devpoll_fd, DP_POLL, &dvp); +#elif defined(USE_SELECT) + LOCK(&manager->lock); memcpy(manager->read_fds_copy, manager->read_fds, manager->fd_bufsize); memcpy(manager->write_fds_copy, manager->write_fds, manager->fd_bufsize); maxfd = manager->maxfd + 1; - UNLOCK(&manager->lock); cc = select(maxfd, manager->read_fds_copy, manager->write_fds_copy, NULL, NULL); - if (cc < 0) { - if (!SOFT_ERROR(errno)) { - isc__strerror(errno, strbuf, - sizeof(strbuf)); - FATAL_ERROR(__FILE__, __LINE__, - "select() %s: %s", - isc_msgcat_get(isc_msgcat, - ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, - "failed"), - strbuf); - } +#endif /* USE_KQUEUE */ + + if (cc < 0 && !SOFT_ERROR(errno)) { + isc__strerror(errno, strbuf, sizeof(strbuf)); + FATAL_ERROR(__FILE__, __LINE__, + "%s %s: %s", fnname, + isc_msgcat_get(isc_msgcat, + ISC_MSGSET_GENERAL, + ISC_MSG_FAILED, + "failed"), strbuf); } - LOCK(&manager->lock); +#if defined(USE_DEVPOLL) && defined(ISC_SOCKET_USE_POLLWATCH) + if (cc == 0) { + if (pollstate == poll_active) + pollstate = poll_checking; + else if (pollstate == poll_checking) + pollstate = poll_idle; + } else if (cc > 0) { + if (pollstate == poll_checking) { + /* + * XXX: We'd like to use a more + * verbose log level as it's actually an + * unexpected event, but the kernel bug + * reportedly happens pretty frequently + * (and it can also be a false positive) + * so it would be just too noisy. + */ + manager_log(manager, + ISC_LOGCATEGORY_GENERAL, + ISC_LOGMODULE_SOCKET, + ISC_LOG_DEBUG(1), + "unexpected POLL timeout"); + } + pollstate = poll_active; + } +#endif } while (cc < 0); +#if defined(USE_KQUEUE) || defined (USE_EPOLL) || defined (USE_DEVPOLL) + done = process_fds(manager, manager->events, cc); +#elif defined(USE_SELECT) + process_fds(manager, maxfd, manager->read_fds_copy, + manager->write_fds_copy); /* * Process reads on internal, control fd. */ - if (FD_ISSET(ctlfd, manager->read_fds_copy)) { - for (;;) { - select_readmsg(manager, &fd, &msg); - - manager_log(manager, IOEVENT, - isc_msgcat_get(isc_msgcat, - ISC_MSGSET_SOCKET, - ISC_MSG_WATCHERMSG, - "watcher got message %d"), - msg); - - /* - * Nothing to read? - */ - if (msg == SELECT_POKE_NOTHING) - break; - - /* - * Handle shutdown message. We really should - * jump out of this loop right away, but - * it doesn't matter if we have to do a little - * more work first. - */ - if (msg == SELECT_POKE_SHUTDOWN) { - done = ISC_TRUE; - - break; - } - - /* - * This is a wakeup on a socket. Look - * at the event queue for both read and write, - * and decide if we need to watch on it now - * or not. - */ - wakeup_socket(manager, fd, msg); - } - } - - process_fds(manager, maxfd, manager->read_fds_copy, - manager->write_fds_copy); + if (FD_ISSET(ctlfd, manager->read_fds_copy)) + done = process_ctlfd(manager); +#endif } manager_log(manager, TRACE, isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, ISC_MSG_EXITING, "watcher exiting")); - UNLOCK(&manager->lock); return ((isc_threadresult_t)0); } #endif /* ISC_PLATFORM_USETHREADS */ @@ -2434,69 +3163,208 @@ isc__socketmgr_setreserved(isc_socketmgr_t *manager, isc_uint32_t reserved) { } /* - * Initialize fdsets in socketmgr structure. + * Create a new socket manager. */ + static isc_result_t -create_fdsets(isc_socketmgr_t *manager, isc_mem_t *mctx) { -#if ISC_SOCKET_FDSETSIZE > FD_SETSIZE - manager->fdsize = ISC_SOCKET_FDSETSIZE; - manager->fd_bufsize = howmany(ISC_SOCKET_FDSETSIZE, NFDBITS) * +setup_watcher(isc_mem_t *mctx, isc_socketmgr_t *manager) { + isc_result_t result; +#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) + char strbuf[ISC_STRERRORSIZE]; +#endif + +#ifdef USE_KQUEUE + manager->nevents = ISC_SOCKET_MAXEVENTS; + manager->events = isc_mem_get(mctx, sizeof(struct kevent) * + manager->nevents); + if (manager->events == NULL) + return (ISC_R_NOMEMORY); + manager->kqueue_fd = kqueue(); + if (manager->kqueue_fd == -1) { + result = isc__errno2result(errno); + isc__strerror(errno, strbuf, sizeof(strbuf)); + UNEXPECTED_ERROR(__FILE__, __LINE__, + "kqueue %s: %s", + isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, + ISC_MSG_FAILED, "failed"), + strbuf); + isc_mem_put(mctx, manager->events, + sizeof(struct kevent) * manager->nevents); + return (result); + } + +#ifdef ISC_PLATFORM_USETHREADS + result = watch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); + if (result != ISC_R_SUCCESS) { + close(manager->kqueue_fd); + isc_mem_put(mctx, manager->events, + sizeof(struct kevent) * manager->nevents); + return (result); + } +#endif /* ISC_PLATFORM_USETHREADS */ +#elif defined(USE_EPOLL) + manager->nevents = ISC_SOCKET_MAXEVENTS; + manager->events = isc_mem_get(mctx, sizeof(struct epoll_event) * + manager->nevents); + if (manager->events == NULL) + return (ISC_R_NOMEMORY); + manager->epoll_fd = epoll_create(manager->nevents); + if (manager->epoll_fd == -1) { + result = isc__errno2result(errno); + isc__strerror(errno, strbuf, sizeof(strbuf)); + UNEXPECTED_ERROR(__FILE__, __LINE__, + "epoll_create %s: %s", + isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, + ISC_MSG_FAILED, "failed"), + strbuf); + isc_mem_put(mctx, manager->events, + sizeof(struct epoll_event) * manager->nevents); + return (result); + } +#ifdef ISC_PLATFORM_USETHREADS + result = watch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); + if (result != ISC_R_SUCCESS) { + close(manager->epoll_fd); + isc_mem_put(mctx, manager->events, + sizeof(struct epoll_event) * manager->nevents); + return (result); + } +#endif /* ISC_PLATFORM_USETHREADS */ +#elif defined(USE_DEVPOLL) + /* + * XXXJT: /dev/poll seems to reject large numbers of events, + * so we should be careful about redefining ISC_SOCKET_MAXEVENTS. + */ + manager->nevents = ISC_SOCKET_MAXEVENTS; + manager->events = isc_mem_get(mctx, sizeof(struct pollfd) * + manager->nevents); + if (manager->events == NULL) + return (ISC_R_NOMEMORY); + /* + * Note: fdpollinfo should be able to support all possible FDs, so + * it must have maxsocks entries (not nevents). + */ + manager->fdpollinfo = isc_mem_get(mctx, sizeof(pollinfo_t) * + manager->maxsocks); + if (manager->fdpollinfo == NULL) { + isc_mem_put(mctx, manager->events, + sizeof(pollinfo_t) * manager->maxsocks); + return (ISC_R_NOMEMORY); + } + memset(manager->fdpollinfo, 0, sizeof(pollinfo_t) * manager->maxsocks); + manager->devpoll_fd = open("/dev/poll", O_RDWR); + if (manager->devpoll_fd == -1) { + result = isc__errno2result(errno); + isc__strerror(errno, strbuf, sizeof(strbuf)); + UNEXPECTED_ERROR(__FILE__, __LINE__, + "open(/dev/poll) %s: %s", + isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, + ISC_MSG_FAILED, "failed"), + strbuf); + isc_mem_put(mctx, manager->events, + sizeof(struct pollfd) * manager->nevents); + isc_mem_put(mctx, manager->fdpollinfo, + sizeof(pollinfo_t) * manager->maxsocks); + return (result); + } +#ifdef ISC_PLATFORM_USETHREADS + result = watch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); + if (result != ISC_R_SUCCESS) { + close(manager->devpoll_fd); + isc_mem_put(mctx, manager->events, + sizeof(struct pollfd) * manager->nevents); + isc_mem_put(mctx, manager->fdpollinfo, + sizeof(pollinfo_t) * manager->maxsocks); + return (result); + } +#endif /* ISC_PLATFORM_USETHREADS */ +#elif defined(USE_SELECT) + UNUSED(result); + +#if ISC_SOCKET_MAXSOCKETS > FD_SETSIZE + /* + * Note: this code should also cover the case of MAXSOCKETS <= + * FD_SETSIZE, but we separate the cases to avoid possible portability + * issues regarding howmany() and the actual representation of fd_set. + */ + manager->fd_bufsize = howmany(manager->maxsocks, NFDBITS) * sizeof(fd_mask); #else - manager->fdsize = FD_SETSIZE; manager->fd_bufsize = sizeof(fd_set); #endif - manager->fds = NULL; - manager->fdstate = NULL; manager->read_fds = NULL; manager->read_fds_copy = NULL; manager->write_fds = NULL; manager->write_fds_copy = NULL; - manager->fds = isc_mem_get(mctx, - manager->fdsize * sizeof(manager->fds[0])); - if (manager->fds == NULL) - goto fail; - - manager->fdstate = isc_mem_get(mctx, manager->fdsize * - sizeof(manager->fdstate[0])); - if (manager->fdstate == NULL) - goto fail; - manager->read_fds = isc_mem_get(mctx, manager->fd_bufsize); - if (manager->read_fds == NULL) - goto fail; - manager->read_fds_copy = isc_mem_get(mctx, manager->fd_bufsize); - if (manager->read_fds_copy == NULL) - goto fail; - manager->write_fds = isc_mem_get(mctx, manager->fd_bufsize); - if (manager->write_fds == NULL) - goto fail; - manager->write_fds_copy = isc_mem_get(mctx, manager->fd_bufsize); - if (manager->write_fds_copy == NULL) - goto fail; + if (manager->read_fds != NULL) + manager->read_fds_copy = isc_mem_get(mctx, manager->fd_bufsize); + if (manager->read_fds_copy != NULL) + manager->write_fds = isc_mem_get(mctx, manager->fd_bufsize); + if (manager->write_fds != NULL) { + manager->write_fds_copy = isc_mem_get(mctx, + manager->fd_bufsize); + } + if (manager->write_fds_copy == NULL) { + if (manager->write_fds != NULL) { + isc_mem_put(mctx, manager->write_fds, + manager->fd_bufsize); + } + if (manager->read_fds_copy != NULL) { + isc_mem_put(mctx, manager->read_fds_copy, + manager->fd_bufsize); + } + if (manager->read_fds != NULL) { + isc_mem_put(mctx, manager->read_fds, + manager->fd_bufsize); + } + return (ISC_R_NOMEMORY); + } + memset(manager->read_fds, 0, manager->fd_bufsize); + memset(manager->write_fds, 0, manager->fd_bufsize); - return (ISC_R_SUCCESS); +#ifdef ISC_PLATFORM_USETHREADS + (void)watch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); + manager->maxfd = manager->pipe_fds[0]; +#else /* ISC_PLATFORM_USETHREADS */ + manager->maxfd = 0; +#endif /* ISC_PLATFORM_USETHREADS */ +#endif /* USE_KQUEUE */ - fail: - cleanup_fdsets(manager, mctx); - return (ISC_R_NOMEMORY); + return (ISC_R_SUCCESS); } -/* - * Clean up fdsets in socketmgr structure. - */ static void -cleanup_fdsets(isc_socketmgr_t *manager, isc_mem_t *mctx) { - if (manager->fds != NULL) { - isc_mem_put(mctx, manager->fds, - manager->fdsize * sizeof(manager->fds[0])); - } - if (manager->fdstate != NULL) { - isc_mem_put(mctx, manager->fdstate, - manager->fdsize * sizeof(manager->fdstate[0])); +cleanup_watcher(isc_mem_t *mctx, isc_socketmgr_t *manager) { +#ifdef ISC_PLATFORM_USETHREADS + isc_result_t result; + + result = unwatch_fd(manager, manager->pipe_fds[0], SELECT_POKE_READ); + if (result != ISC_R_SUCCESS) { + UNEXPECTED_ERROR(__FILE__, __LINE__, + "epoll_ctl(DEL) %s", + isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, + ISC_MSG_FAILED, "failed")); } +#endif /* ISC_PLATFORM_USETHREADS */ + +#ifdef USE_KQUEUE + close(manager->kqueue_fd); + isc_mem_put(mctx, manager->events, + sizeof(struct kevent) * manager->nevents); +#elif defined(USE_EPOLL) + close(manager->epoll_fd); + isc_mem_put(mctx, manager->events, + sizeof(struct epoll_event) * manager->nevents); +#elif defined(USE_DEVPOLL) + close(manager->devpoll_fd); + isc_mem_put(mctx, manager->events, + sizeof(struct pollfd) * manager->nevents); + isc_mem_put(mctx, manager->fdpollinfo, + sizeof(pollinfo_t) * manager->maxsocks); +#elif defined(USE_SELECT) if (manager->read_fds != NULL) isc_mem_put(mctx, manager->read_fds, manager->fd_bufsize); if (manager->read_fds_copy != NULL) @@ -2505,13 +3373,19 @@ cleanup_fdsets(isc_socketmgr_t *manager, isc_mem_t *mctx) { isc_mem_put(mctx, manager->write_fds, manager->fd_bufsize); if (manager->write_fds_copy != NULL) isc_mem_put(mctx, manager->write_fds_copy, manager->fd_bufsize); +#endif /* USE_KQUEUE */ } -/* - * Create a new socket manager. - */ isc_result_t isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { + return (isc_socketmgr_create2(mctx, managerp, 0)); +} + +isc_result_t +isc_socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp, + unsigned int maxsocks) +{ + int i; isc_socketmgr_t *manager; #ifdef ISC_PLATFORM_USETHREADS char strbuf[ISC_STRERRORSIZE]; @@ -2522,46 +3396,71 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { #ifndef ISC_PLATFORM_USETHREADS if (socketmgr != NULL) { + /* Don't allow maxsocks to be updated */ + if (maxsocks > 0 && socketmgr->maxsocks != maxsocks) + return (ISC_R_EXISTS); + socketmgr->refs++; *managerp = socketmgr; return (ISC_R_SUCCESS); } #endif /* ISC_PLATFORM_USETHREADS */ + if (maxsocks == 0) + maxsocks = ISC_SOCKET_MAXSOCKETS; + manager = isc_mem_get(mctx, sizeof(*manager)); if (manager == NULL) return (ISC_R_NOMEMORY); - result = create_fdsets(manager, mctx); - if (result != ISC_R_SUCCESS) { - cleanup_fdsets(manager, mctx); - isc_mem_put(mctx, manager, sizeof(*manager)); - return (result); + /* zero-clear so that necessary cleanup on failure will be easy */ + memset(manager, 0, sizeof(*manager)); + manager->maxsocks = maxsocks; + manager->reserved = 0; + manager->fds = isc_mem_get(mctx, + manager->maxsocks * sizeof(isc_socket_t *)); + if (manager->fds == NULL) { + result = ISC_R_NOMEMORY; + goto free_manager; + } + manager->fdstate = isc_mem_get(mctx, manager->maxsocks * sizeof(int)); + if (manager->fds == NULL) { + result = ISC_R_NOMEMORY; + goto free_manager; } manager->magic = SOCKET_MANAGER_MAGIC; manager->mctx = NULL; + memset(manager->fds, 0, manager->maxsocks * sizeof(isc_socket_t *)); ISC_LIST_INIT(manager->socklist); - result = isc_mutex_init(&manager->lock); - if (result != ISC_R_SUCCESS) { - cleanup_fdsets(manager, mctx); - isc_mem_put(mctx, manager, sizeof(*manager)); - UNEXPECTED_ERROR(__FILE__, __LINE__, - "isc_mutex_init() %s", - isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, - ISC_MSG_FAILED, "failed")); - return (ISC_R_UNEXPECTED); + result = isc_mutex_init(&manager->lock); + if (result != ISC_R_SUCCESS) + goto free_manager; + manager->fdlock = isc_mem_get(mctx, FDLOCK_COUNT * sizeof(isc_mutex_t)); + if (manager->fdlock == NULL) { + result = ISC_R_NOMEMORY; + goto cleanup_lock; } + for (i = 0; i < FDLOCK_COUNT; i++) { + result = isc_mutex_init(&manager->fdlock[i]); + if (result != ISC_R_SUCCESS) { + while (--i >= 0) + DESTROYLOCK(&manager->fdlock[i]); + isc_mem_put(mctx, manager->fdlock, + FDLOCK_COUNT * sizeof(isc_mutex_t)); + manager->fdlock = NULL; + goto cleanup_lock; + } + } + #ifdef ISC_PLATFORM_USETHREADS if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) { - cleanup_fdsets(manager, mctx); - DESTROYLOCK(&manager->lock); - isc_mem_put(mctx, manager, sizeof(*manager)); UNEXPECTED_ERROR(__FILE__, __LINE__, "isc_condition_init() %s", isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, ISC_MSG_FAILED, "failed")); - return (ISC_R_UNEXPECTED); + result = ISC_R_UNEXPECTED; + goto cleanup_lock; } /* @@ -2569,17 +3468,14 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { * select/poll loop when something internal needs to be done. */ if (pipe(manager->pipe_fds) != 0) { - cleanup_fdsets(manager, mctx); - DESTROYLOCK(&manager->lock); - isc_mem_put(mctx, manager, sizeof(*manager)); isc__strerror(errno, strbuf, sizeof(strbuf)); UNEXPECTED_ERROR(__FILE__, __LINE__, "pipe() %s: %s", isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, ISC_MSG_FAILED, "failed"), strbuf); - - return (ISC_R_UNEXPECTED); + result = ISC_R_UNEXPECTED; + goto cleanup_condition; } RUNTIME_CHECK(make_nonblock(manager->pipe_fds[0]) == ISC_R_SUCCESS); @@ -2593,33 +3489,23 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { /* * Set up initial state for the select loop */ - memset(manager->read_fds, 0, manager->fd_bufsize); - memset(manager->write_fds, 0, manager->fd_bufsize); -#ifdef ISC_PLATFORM_USETHREADS - FD_SET(manager->pipe_fds[0], manager->read_fds); - manager->maxfd = manager->pipe_fds[0]; -#else /* ISC_PLATFORM_USETHREADS */ - manager->maxfd = 0; -#endif /* ISC_PLATFORM_USETHREADS */ - manager->reserved = 0; - memset(manager->fdstate, 0, - manager->fdsize * sizeof(manager->fdstate[0])); - + result = setup_watcher(mctx, manager); + if (result != ISC_R_SUCCESS) + goto cleanup; + memset(manager->fdstate, 0, manager->maxsocks * sizeof(int)); #ifdef ISC_PLATFORM_USETHREADS /* * Start up the select/poll thread. */ if (isc_thread_create(watcher, manager, &manager->watcher) != ISC_R_SUCCESS) { - (void)close(manager->pipe_fds[0]); - (void)close(manager->pipe_fds[1]); - DESTROYLOCK(&manager->lock); - isc_mem_put(mctx, manager, sizeof(*manager)); UNEXPECTED_ERROR(__FILE__, __LINE__, "isc_thread_create() %s", isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, ISC_MSG_FAILED, "failed")); - return (ISC_R_UNEXPECTED); + cleanup_watcher(mctx, manager); + result = ISC_R_UNEXPECTED; + goto cleanup; } #endif /* ISC_PLATFORM_USETHREADS */ isc_mem_attach(mctx, &manager->mctx); @@ -2630,6 +3516,52 @@ isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { *managerp = manager; return (ISC_R_SUCCESS); + +cleanup: +#ifdef ISC_PLATFORM_USETHREADS + (void)close(manager->pipe_fds[0]); + (void)close(manager->pipe_fds[1]); +#endif /* ISC_PLATFORM_USETHREADS */ + +#ifdef ISC_PLATFORM_USETHREADS +cleanup_condition: + (void)isc_condition_destroy(&manager->shutdown_ok); +#endif /* ISC_PLATFORM_USETHREADS */ + + +cleanup_lock: + if (manager->fdlock != NULL) { + for (i = 0; i < FDLOCK_COUNT; i++) + DESTROYLOCK(&manager->fdlock[i]); + } + DESTROYLOCK(&manager->lock); + +free_manager: + if (manager->fdlock != NULL) { + isc_mem_put(mctx, manager->fdlock, + FDLOCK_COUNT * sizeof(isc_mutex_t)); + } + if (manager->fdstate != NULL) { + isc_mem_put(mctx, manager->fdstate, + manager->maxsocks * sizeof(int)); + } + if (manager->fds != NULL) { + isc_mem_put(mctx, manager->fds, + manager->maxsocks * sizeof(isc_socket_t *)); + } + isc_mem_put(mctx, manager, sizeof(*manager)); + + return (result); +} + +isc_result_t +isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) { + REQUIRE(VALID_MANAGER(manager)); + REQUIRE(nsockp != NULL); + + *nsockp = manager->maxsocks; + + return (ISC_R_SUCCESS); } void @@ -2703,18 +3635,30 @@ isc_socketmgr_destroy(isc_socketmgr_t **managerp) { /* * Clean up. */ + cleanup_watcher(manager->mctx, manager); + #ifdef ISC_PLATFORM_USETHREADS (void)close(manager->pipe_fds[0]); (void)close(manager->pipe_fds[1]); (void)isc_condition_destroy(&manager->shutdown_ok); #endif /* ISC_PLATFORM_USETHREADS */ - for (i = 0; i < (int)manager->fdsize; i++) - if (manager->fdstate[i] == CLOSE_PENDING) + for (i = 0; i < (int)manager->maxsocks; i++) + if (manager->fdstate[i] == CLOSE_PENDING) /* no need to lock */ (void)close(i); + isc_mem_put(manager->mctx, manager->fds, + manager->maxsocks * sizeof(isc_socket_t *)); + isc_mem_put(manager->mctx, manager->fdstate, + manager->maxsocks * sizeof(int)); + + if (manager->fdlock != NULL) { + for (i = 0; i < FDLOCK_COUNT; i++) + DESTROYLOCK(&manager->fdlock[i]); + isc_mem_put(manager->mctx, manager->fdlock, + FDLOCK_COUNT * sizeof(isc_mutex_t)); + } DESTROYLOCK(&manager->lock); - cleanup_fdsets(manager, manager->mctx); manager->magic = 0; mctx= manager->mctx; isc_mem_put(mctx, manager, sizeof(*manager)); @@ -2767,7 +3711,7 @@ socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, * Enqueue the request. If the socket was previously not being * watched, poke the watcher to start paying attention to it. */ - if (ISC_LIST_EMPTY(sock->recv_list)) + if (ISC_LIST_EMPTY(sock->recv_list) && !sock->pending_recv) select_poke(sock->manager, sock->fd, SELECT_POKE_READ); ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link); @@ -2964,7 +3908,8 @@ socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, * not being watched, poke the watcher to start * paying attention to it. */ - if (ISC_LIST_EMPTY(sock->send_list)) + if (ISC_LIST_EMPTY(sock->send_list) && + !sock->pending_send) select_poke(sock->manager, sock->fd, SELECT_POKE_WRITE); ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link); @@ -3222,7 +4167,7 @@ isc_socket_listen(isc_socket_t *sock, unsigned int backlog) { } /* - * This should try to do agressive accept() XXXMLG + * This should try to do aggressive accept() XXXMLG */ isc_result_t isc_socket_accept(isc_socket_t *sock, @@ -3333,6 +4278,16 @@ isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr, sock->address = *addr; cc = connect(sock->fd, &addr->type.sa, addr->length); if (cc < 0) { + /* + * HP-UX "fails" to connect a UDP socket and sets errno to + * EINPROGRESS if it's non-blocking. We'd rather regard this as + * a success and let the user detect it if it's really an error + * at the time of sending a packet on the socket. + */ + if (sock->type == isc_sockettype_udp && errno == EINPROGRESS) { + cc = 0; + goto success; + } if (SOFT_ERROR(errno) || errno == EINPROGRESS) goto queue; @@ -3374,6 +4329,7 @@ isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr, /* * If connect completed, fire off the done event. */ + success: if (cc == 0) { sock->connected = 1; sock->bound = 1; @@ -3733,37 +4689,107 @@ isc_socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) { #ifdef IPV6_V6ONLY if (sock->pf == AF_INET6) { - (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY, - (void *)&onoff, sizeof(onoff)); + if (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY, + (void *)&onoff, sizeof(int)) < 0) { + char strbuf[ISC_STRERRORSIZE]; + + UNEXPECTED_ERROR(__FILE__, __LINE__, + "setsockopt(%d, IPV6_V6ONLY) " + "%s: %s", sock->fd, + isc_msgcat_get(isc_msgcat, + ISC_MSGSET_GENERAL, + ISC_MSG_FAILED, + "failed"), + strbuf); + } } + FIX_IPV6_RECVPKTINFO(sock); /* AIX */ #endif } #ifndef ISC_PLATFORM_USETHREADS -void -isc__socketmgr_getfdsets(fd_set **readset, fd_set **writeset, int *maxfd) { +/* In our assumed scenario, we can simply use a single static object. */ +static isc_socketwait_t swait_private; + +int +isc__socketmgr_waitevents(struct timeval *tvp, isc_socketwait_t **swaitp) { + int n; +#ifdef USE_KQUEUE + struct timespec ts, *tsp; +#endif +#ifdef USE_EPOLL + int timeout; +#endif +#ifdef USE_DEVPOLL + struct dvpoll dvp; +#endif + + REQUIRE(swaitp != NULL && *swaitp == NULL); + if (socketmgr == NULL) - *maxfd = 0; - else { - /* Prepare duplicates of fd_sets, as select() will modify */ - memcpy(socketmgr->read_fds_copy, socketmgr->read_fds, - socketmgr->fd_bufsize); - memcpy(socketmgr->write_fds_copy, socketmgr->write_fds, - socketmgr->fd_bufsize); - *readset = socketmgr->read_fds_copy; - *writeset = socketmgr->write_fds_copy; - *maxfd = socketmgr->maxfd + 1; - } + return (0); + +#ifdef USE_KQUEUE + if (tvp != NULL) { + ts.tv_sec = tvp->tv_sec; + ts.tv_nsec = tvp->tv_usec * 1000; + tsp = &ts; + } else + tsp = NULL; + swait_private.nevents = kevent(socketmgr->kqueue_fd, NULL, 0, + socketmgr->events, socketmgr->nevents, + tsp); + n = swait_private.nevents; +#elif defined(USE_EPOLL) + if (tvp != NULL) + timeout = tvp->tv_sec * 1000 + (tvp->tv_usec + 999) / 1000; + else + timeout = -1; + swait_private.nevents = epoll_wait(socketmgr->epoll_fd, + socketmgr->events, + socketmgr->nevents, timeout); + n = swait_private.nevents; +#elif defined(USE_DEVPOLL) + dvp.dp_fds = socketmgr->events; + dvp.dp_nfds = socketmgr->nevents; + if (tvp != NULL) { + dvp.dp_timeout = tvp->tv_sec * 1000 + + (tvp->tv_usec + 999) / 1000; + } else + dvp.dp_timeout = -1; + swait_private.nevents = ioctl(socketmgr->devpoll_fd, DP_POLL, &dvp); + n = swait_private.nevents; +#elif defined(USE_SELECT) + memcpy(socketmgr->read_fds_copy, socketmgr->read_fds, + socketmgr->fd_bufsize); + memcpy(socketmgr->write_fds_copy, socketmgr->write_fds, + socketmgr->fd_bufsize); + + swait_private.readset = socketmgr->read_fds_copy; + swait_private.writeset = socketmgr->write_fds_copy; + swait_private.maxfd = socketmgr->maxfd + 1; + + n = select(swait_private.maxfd, swait_private.readset, + swait_private.writeset, NULL, tvp); +#endif + + *swaitp = &swait_private; + return (n); } isc_result_t -isc__socketmgr_dispatch(fd_set *readset, fd_set *writeset, int maxfd) { - isc_socketmgr_t *manager = socketmgr; +isc__socketmgr_dispatch(isc_socketwait_t *swait) { + REQUIRE(swait == &swait_private); - if (manager == NULL) + if (socketmgr == NULL) return (ISC_R_NOTFOUND); - process_fds(manager, maxfd, readset, writeset); +#if defined(USE_KQUEUE) || defined(USE_EPOLL) || defined(USE_DEVPOLL) + (void)process_fds(socketmgr, socketmgr->events, swait->nevents); return (ISC_R_SUCCESS); +#elif defined(USE_SELECT) + process_fds(socketmgr, swait->maxfd, swait->readset, swait->writeset); + return (ISC_R_SUCCESS); +#endif } #endif /* ISC_PLATFORM_USETHREADS */ diff --git a/lib/isc/unix/socket_p.h b/lib/isc/unix/socket_p.h index 07720eef656c7..5786facad9c89 100644 --- a/lib/isc/unix/socket_p.h +++ b/lib/isc/unix/socket_p.h @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 2000, 2001 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: socket_p.h,v 1.6.206.1.34.1 2008/07/29 04:43:57 each Exp $ */ +/* $Id: socket_p.h,v 1.6.206.3 2008/06/25 23:45:37 tbox Exp $ */ #ifndef ISC_SOCKET_P_H #define ISC_SOCKET_P_H @@ -24,10 +24,7 @@ #include <sys/select.h> #endif -void -isc__socketmgr_getfdsets(fd_set **readset, fd_set **writeset, int *maxfd); - -isc_result_t -isc__socketmgr_dispatch(fd_set *readset, fd_set *writeset, int maxfd); - +typedef struct isc_socketwait isc_socketwait_t; +int isc__socketmgr_waitevents(struct timeval *, isc_socketwait_t **); +isc_result_t isc__socketmgr_dispatch(isc_socketwait_t *); #endif /* ISC_SOCKET_P_H */ diff --git a/lib/isc/unix/time.c b/lib/isc/unix/time.c index 39c851cebe9a9..c4b7f13600eba 100644 --- a/lib/isc/unix/time.c +++ b/lib/isc/unix/time.c @@ -1,8 +1,8 @@ /* - * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC") + * Copyright (C) 2004, 2008 Internet Systems Consortium, Inc. ("ISC") * Copyright (C) 1998-2001, 2003 Internet Software Consortium. * - * Permission to use, copy, modify, and distribute this software for any + * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: time.c,v 1.34.2.6.2.4 2004/03/06 08:15:03 marka Exp $ */ +/* $Id: time.c,v 1.34.2.6.2.6 2008/04/28 23:45:41 tbox Exp $ */ #include <config.h> @@ -225,7 +225,7 @@ isc_time_nowplusinterval(isc_time_t *t, const isc_interval_t *i) { t->seconds = tv.tv_sec + i->seconds; t->nanoseconds = tv.tv_usec * NS_PER_US + i->nanoseconds; - if (t->nanoseconds > NS_PER_S) { + if (t->nanoseconds >= NS_PER_S) { t->seconds++; t->nanoseconds -= NS_PER_S; } @@ -408,5 +408,5 @@ isc_time_formattimestamp(const isc_time_t *t, char *buf, unsigned int len) { snprintf(buf + flen, len - flen, ".%03u", t->nanoseconds / 1000000); else - snprintf(buf, len, "99-Bad-9999 99:99:99.999"); + snprintf(buf, len, "99-Bad-9999 99:99:99.999"); } diff --git a/lib/isccfg/api b/lib/isccfg/api index 808217d1f364c..364ea9d2a3bc4 100644 --- a/lib/isccfg/api +++ b/lib/isccfg/api @@ -1,3 +1,3 @@ LIBINTERFACE = 1 -LIBREVISION = 9 +LIBREVISION = 10 LIBAGE = 0 diff --git a/lib/isccfg/namedconf.c b/lib/isccfg/namedconf.c index 622d76aef790a..65b4e4e57e722 100644 --- a/lib/isccfg/namedconf.c +++ b/lib/isccfg/namedconf.c @@ -15,7 +15,7 @@ * PERFORMANCE OF THIS SOFTWARE. */ -/* $Id: namedconf.c,v 1.21.44.36.2.1 2008/07/23 12:03:53 marka Exp $ */ +/* $Id: namedconf.c,v 1.21.44.40 2008/09/04 08:11:25 marka Exp $ */ #include <config.h> @@ -513,30 +513,80 @@ static cfg_type_t cfg_type_serverid = { /* * Port list. */ +static cfg_tuplefielddef_t porttuple_fields[] = { + { "loport", &cfg_type_uint32, 0 }, + { "hiport", &cfg_type_uint32, 0 }, + { NULL, NULL, 0 } +}; +static cfg_type_t cfg_type_porttuple = { + "porttuple", cfg_parse_tuple, cfg_print_tuple, cfg_doc_tuple, + &cfg_rep_tuple, porttuple_fields +}; + static isc_result_t -parse_port(cfg_parser_t *pctx, const cfg_type_t *type, cfg_obj_t **ret) { +parse_port(cfg_parser_t *pctx, cfg_obj_t **ret) { isc_result_t result; - UNUSED(type); - CHECK(cfg_parse_uint32(pctx, NULL, ret)); if ((*ret)->value.uint32 > 0xffff) { cfg_parser_error(pctx, CFG_LOG_NEAR, "invalid port"); cfg_obj_destroy(pctx, ret); result = ISC_R_RANGE; } + + cleanup: + return (result); +} + +static isc_result_t +parse_portrange(cfg_parser_t *pctx, const cfg_type_t *type, cfg_obj_t **ret) { + isc_result_t result; + cfg_obj_t *obj = NULL; + + UNUSED(type); + + CHECK(cfg_peektoken(pctx, ISC_LEXOPT_NUMBER | ISC_LEXOPT_CNUMBER)); + if (pctx->token.type == isc_tokentype_number) + CHECK(parse_port(pctx, ret)); + else { + CHECK(cfg_gettoken(pctx, 0)); + if (pctx->token.type != isc_tokentype_string || + strcasecmp(TOKEN_STRING(pctx), "range") != 0) { + cfg_parser_error(pctx, CFG_LOG_NEAR, + "expected integer or 'range'"); + return (ISC_R_UNEXPECTEDTOKEN); + } + CHECK(cfg_create_tuple(pctx, &cfg_type_porttuple, &obj)); + CHECK(parse_port(pctx, &obj->value.tuple[0])); + CHECK(parse_port(pctx, &obj->value.tuple[1])); + if (obj->value.tuple[0]->value.uint32 > + obj->value.tuple[1]->value.uint32) { + cfg_parser_error(pctx, CFG_LOG_NOPREP, + "low port '%u' must not be larger " + "than high port", + obj->value.tuple[0]->value.uint32); + result = ISC_R_RANGE; + goto cleanup; + } + *ret = obj; + obj = NULL; + } + cleanup: + if (obj != NULL) + cfg_obj_destroy(pctx, &obj); return (result); } -static cfg_type_t cfg_type_port = { - "port", parse_port, NULL, cfg_doc_terminal, +static cfg_type_t cfg_type_portrange = { + "portrange", parse_portrange, NULL, cfg_doc_terminal, NULL, NULL }; static cfg_type_t cfg_type_bracketed_portlist = { - "bracketed_sockaddrlist", cfg_parse_bracketed_list, cfg_print_bracketed_list, cfg_doc_bracketed_list, - &cfg_rep_list, &cfg_type_port + "bracketed_sockaddrlist", cfg_parse_bracketed_list, + cfg_print_bracketed_list, cfg_doc_bracketed_list, + &cfg_rep_list, &cfg_type_portrange }; /* @@ -573,6 +623,8 @@ namedconf_or_view_clauses[] = { */ static cfg_clausedef_t options_clauses[] = { + { "use-v4-udp-ports", &cfg_type_bracketed_portlist, 0 }, + { "use-v6-udp-ports", &cfg_type_bracketed_portlist, 0 }, { "avoid-v4-udp-ports", &cfg_type_bracketed_portlist, 0 }, { "avoid-v6-udp-ports", &cfg_type_bracketed_portlist, 0 }, { "blackhole", &cfg_type_bracketed_aml, 0 }, @@ -1565,6 +1617,7 @@ static isc_result_t parse_logversions(cfg_parser_t *pctx, const cfg_type_t *type, cfg_obj_t **ret) { return (parse_enum_or_other(pctx, type, &cfg_type_uint32, ret)); } + static cfg_type_t cfg_type_logversions = { "logversions", parse_logversions, cfg_print_ustring, cfg_doc_terminal, &cfg_rep_string, logversions_enums @@ -1638,8 +1691,19 @@ print_logfile(cfg_printer_t *pctx, const cfg_obj_t *obj) { } } + +static void +doc_logfile(cfg_printer_t *pctx, const cfg_type_t *type) { + UNUSED(type); + cfg_print_cstr(pctx, "<quoted_string>"); + cfg_print_chars(pctx, " ", 1); + cfg_print_cstr(pctx, "[ versions ( \"unlimited\" | <integer> ) ]"); + cfg_print_chars(pctx, " ", 1); + cfg_print_cstr(pctx, "[ size <size> ]"); +} + static cfg_type_t cfg_type_logfile = { - "log_file", parse_logfile, print_logfile, cfg_doc_terminal, + "log_file", parse_logfile, print_logfile, doc_logfile, &cfg_rep_tuple, logfile_fields }; @@ -1671,8 +1735,8 @@ static cfg_type_t cfg_type_lwres_view = { }; static cfg_type_t cfg_type_lwres_searchlist = { - "lwres_searchlist", cfg_parse_bracketed_list, cfg_print_bracketed_list, cfg_doc_bracketed_list, - &cfg_rep_list, &cfg_type_astring }; + "lwres_searchlist", cfg_parse_bracketed_list, cfg_print_bracketed_list, + cfg_doc_bracketed_list, &cfg_rep_list, &cfg_type_astring }; static cfg_clausedef_t lwres_clauses[] = { @@ -1784,15 +1848,15 @@ doc_sockaddrnameport(cfg_printer_t *pctx, const cfg_type_t *type) { cfg_print_chars(pctx, "( ", 2); cfg_print_cstr(pctx, "<quoted_string>"); cfg_print_chars(pctx, " ", 1); - cfg_print_cstr(pctx, "[port <integer>]"); + cfg_print_cstr(pctx, "[ port <integer> ]"); cfg_print_chars(pctx, " | ", 3); cfg_print_cstr(pctx, "<ipv4_address>"); cfg_print_chars(pctx, " ", 1); - cfg_print_cstr(pctx, "[port <integer>]"); + cfg_print_cstr(pctx, "[ port <integer> ]"); cfg_print_chars(pctx, " | ", 3); cfg_print_cstr(pctx, "<ipv6_address>"); cfg_print_chars(pctx, " ", 1); - cfg_print_cstr(pctx, "[port <integer>]"); + cfg_print_cstr(pctx, "[ port <integer> ]"); cfg_print_chars(pctx, " )", 2); } @@ -1870,11 +1934,11 @@ doc_masterselement(cfg_printer_t *pctx, const cfg_type_t *type) { cfg_print_chars(pctx, " | ", 3); cfg_print_cstr(pctx, "<ipv4_address>"); cfg_print_chars(pctx, " ", 1); - cfg_print_cstr(pctx, "[port <integer>]"); + cfg_print_cstr(pctx, "[ port <integer> ]"); cfg_print_chars(pctx, " | ", 3); cfg_print_cstr(pctx, "<ipv6_address>"); cfg_print_chars(pctx, " ", 1); - cfg_print_cstr(pctx, "[port <integer>]"); + cfg_print_cstr(pctx, "[ port <integer> ]"); cfg_print_chars(pctx, " )", 2); } |