diff options
| author | Philip Paeps <philip@FreeBSD.org> | 2026-04-01 08:49:18 +0000 |
|---|---|---|
| committer | Philip Paeps <philip@FreeBSD.org> | 2026-04-01 08:49:18 +0000 |
| commit | f5b5e292795b65df1847b90ab5b23889828ee28b (patch) | |
| tree | 92e6f965b601ec1b4e5ca815e2fe0e99804be5aa | |
| parent | a8fa7ccb47307fbf67141b5a6611456e2698dade (diff) | |
| -rw-r--r-- | Changes | 159 | ||||
| -rw-r--r-- | Makefile.am | 3 | ||||
| -rw-r--r-- | Makefile.in | 6 | ||||
| -rw-r--r-- | README.md | 13 | ||||
| -rw-r--r-- | configure.ac | 66 | ||||
| -rw-r--r-- | doc/Makefile.in | 3 | ||||
| -rw-r--r-- | doc/reference.html | 4677 | ||||
| -rw-r--r-- | doc/xmlwf.1 | 15 | ||||
| -rw-r--r-- | doc/xmlwf.xml | 480 | ||||
| -rw-r--r-- | examples/Makefile.in | 3 | ||||
| -rw-r--r-- | expat_config.h.in | 3 | ||||
| -rwxr-xr-x | fix-xmltest-log.sh | 5 | ||||
| -rw-r--r-- | lib/Makefile.am | 6 | ||||
| -rw-r--r-- | lib/Makefile.in | 21 | ||||
| -rw-r--r-- | lib/expat.h | 4 | ||||
| -rw-r--r-- | lib/expat_external.h | 5 | ||||
| -rw-r--r-- | lib/internal.h | 2 | ||||
| -rw-r--r-- | lib/libexpat.map.in | 119 | ||||
| -rw-r--r-- | lib/xmlparse.c | 173 | ||||
| -rw-r--r-- | lib/xmlrole.c | 4 | ||||
| -rw-r--r-- | lib/xmltok.c | 4 | ||||
| -rw-r--r-- | lib/xmltok_ns.c | 7 | ||||
| -rw-r--r-- | tests/Makefile.in | 3 | ||||
| -rw-r--r-- | tests/basic_tests.c | 74 | ||||
| -rw-r--r-- | tests/benchmark/Makefile.in | 3 | ||||
| -rw-r--r-- | tests/handlers.c | 12 | ||||
| -rw-r--r-- | tests/handlers.h | 5 | ||||
| -rw-r--r-- | tests/misc_tests.c | 35 | ||||
| -rw-r--r-- | tests/nsalloc_tests.c | 27 | ||||
| -rw-r--r-- | xmlwf/Makefile.in | 3 | ||||
| -rw-r--r-- | xmlwf/xmlfile.c | 4 | ||||
| -rw-r--r-- | xmlwf/xmlwf.c | 13 | ||||
| -rwxr-xr-x | xmlwf/xmlwf_helpgen.py | 186 |
33 files changed, 3813 insertions, 2330 deletions
@@ -10,37 +10,160 @@ !! ~~~~~~~~~~~~ !! !! The following topics need *additional skilled C developers* to progress !! !! in a timely manner or at all (loosely ordered by descending priority): !! +!! _______________________ !! +!! - teaming up on fixing the UNFIXED SECURITY ISSUES listed at: !! +!! """"""""""""""""""""""" !! +!! https://github.com/libexpat/libexpat/issues/1160 !! !! !! !! - teaming up on researching and fixing future security reports and !! !! ClusterFuzz findings with few-days-max response times in communication !! !! in order to (1) have a sound fix ready before the end of a 90 days !! !! grace period and (2) in a sustainable manner, !! -!! - helping CPython Expat bindings with supporting Expat's amplification !! -!! attack protection API (https://github.com/python/cpython/issues/90949): !! -!! - XML_SetAllocTrackerActivationThreshold !! -!! - XML_SetAllocTrackerMaximumAmplification !! -!! - XML_SetBillionLaughsAttackProtectionActivationThreshold !! -!! - XML_SetBillionLaughsAttackProtectionMaximumAmplification !! -!! - helping Perl's XML::Parser Expat bindings with supporting Expat's !! -!! security API (https://github.com/cpan-authors/XML-Parser/issues/102): !! -!! - XML_SetAllocTrackerActivationThreshold !! -!! - XML_SetAllocTrackerMaximumAmplification !! -!! - XML_SetBillionLaughsAttackProtectionActivationThreshold !! -!! - XML_SetBillionLaughsAttackProtectionMaximumAmplification !! -!! - XML_SetReparseDeferralEnabled !! +!! !! !! - implementing and auto-testing XML 1.0r5 support !! !! (needs discussion before pull requests), !! -!! - smart ideas on fixing the Autotools CMake files generation issue !! -!! without breaking CI (needs discussion before pull requests), !! -!! - pushing migration from `int` to `size_t` further !! -!! including edge-cases test coverage (needs discussion before anything). !! !! !! !! For details, please reach out via e-mail to sebastian@pipping.org so we !! !! can schedule a voice call on the topic, in English or German. !! !! !! -!! THANK YOU! Sebastian Pipping -- Berlin, 2024-03-09 !! +!! THANK YOU! Sebastian Pipping -- Berlin, 2026-03-17 !! !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +Release 2.7.5 Tue March 17 2026 + Security fixes: + #1158 CVE-2026-32776 -- Fix NULL function pointer dereference for + empty external parameter entities; it takes use of both + functions XML_ExternalEntityParserCreate and + XML_SetParamEntityParsing for an application to be + vulnerable. + #1161 #1162 CVE-2026-32777 -- Protect from XML_TOK_INSTANCE_START + infinite loop in function entityValueProcessor; it takes + use of both functions XML_ExternalEntityParserCreate and + XML_SetParamEntityParsing for an application to be + vulnerable. + #1163 CVE-2026-32778 -- Fix NULL dereference in function setContext + on retry after an earlier ouf-of-memory condition; it takes + use of function XML_ParserCreateNS or XML_ParserCreate_MM + for an application to be vulnerable. + #1160 Three more unfixed vulnerabilities left + + Other changes: + #1146 #1147 Autotools: Fix condition for symbol versioning check, in + particular when compiling with slibtool (not libtool) + #1156 Address Cppcheck >=2.20.0 warnings + #1153 tests: Make test_buffer_can_grow_to_max work for MinGW on + Ubuntu 24.04 + #1157 #1159 Version info bumped from 12:2:11 (libexpat*.so.1.11.2) + to 12:3:11 (libexpat*.so.1.11.3); see https://verbump.de/ + for what these numbers do + + Infrastructure: + #1148 CI: Fix FreeBSD and Solaris CI + #1149 CI: Bump to WASI SDK 30 + #1153 CI: Adapt to breaking changes with Ubuntu 22.04 + #1156 CI: Adapt to breaking changes in Cppcheck + + Special thanks to: + Berkay Eren Ürün + Christian Ng + Fabio Scaccabarozzi + Francesco Bertolaccini + Mark Brand + Rhodri James + and + AddressSanitizer + Buttercup + OSS-Fuzz / ClusterFuzz + Trail of Bits + +Release 2.7.4 Sat January 31 2026 + Security fixes: + #1131 CVE-2026-24515 -- Function XML_ExternalEntityParserCreate + failed to copy the encoding handler data passed to + XML_SetUnknownEncodingHandler from the parent to the new + subparser. This can cause a NULL dereference (CWE-476) from + external entities that declare use of an unknown encoding. + The expected impact is denial of service. It takes use of + both functions XML_ExternalEntityParserCreate and + XML_SetUnknownEncodingHandler for an application to be + vulnerable. + #1075 CVE-2026-25210 -- Add missing check for integer overflow + related to buffer size determination in function doContent + + Bug fixes: + #1073 lib: Fix missing undoing of group size expansion in doProlog + failure cases + #1107 xmlwf: Fix a memory leak + #1104 WASI: Fix format specifiers for 32bit WASI SDK + + Other changes: + #1105 lib: Fix strict aliasing + #1106 lib: Leverage feature "flexible array member" of C99 + #1051 lib: Swap (size_t)(-1) for C99 equivalent SIZE_MAX + #1109 lib|xmlwf: Return NULL instead of 0 for pointers + #1068 lib|Windows: Clean up use of macro _MSC_EXTENSIONS with MSVC + #1112 lib: Remove unused import + #1110 xmlwf: Warn about XXE in --help output (and man page) + #1102 #1103 WASI: Stop using getpid + #1113 #1130 Autotools: Drop file expat.m4 that provided obsolete Autoconf + macro AM_WITH_EXPAT + #1123 Autotools: Limit -Wno-pedantic-ms-format to MinGW + #1129 #1134 .. + #1087 Autotools|macOS: Sync CMake templates with CMake 4.0 + #1139 #1140 Autotools|CMake: Introduce off-by-default symbol versioning + The related build system flags are: + - For Autotools, configure with --enable-symbol-versioning + - For CMake, configure with -DEXPAT_SYMBOL_VERSIONING=ON + Please double-check for consequences before activating + this inside distro packaging. Bug reports welcome! + #1117 Autotools|CMake: Remove libbsd support + #1105 Autotools|CMake: Stop using -fno-strict-aliasing, and use + -Wstrict-aliasing=3 instead + #1124 Autotools|CMake: Prefer command gsed (GNU sed) over sed + (e.g. for Solaris) inside fix-xmltest-log.sh + #1067 CMake: Detect and warn about unusable check_c_compiler_flag + #1137 CMake: Drop support for CMake <3.17 + #1138 CMake|Windows: Fix libexpat.def.cmake version comments + + #1086 #1110 docs: Add warning about external reference handlers and XXE + #1066 docs: Be explicit that parent parsers need to outlive + subparsers + #1089 .. + #1090 #1091 .. + #1092 #1093 .. + #1094 #1098 .. + #1115 #1116 docs: Misc non-content improvements to doc/reference.html + #1132 #1133 Version info bumped from 12:1:11 (libexpat*.so.1.11.1) + to 12:2:11 (libexpat*.so.1.11.2); see https://verbump.de/ + for what these numbers do + + Infrastructure: + #1119 #1121 Document guidelines for contributing to Expat + #1120 Introduce a pull request template + #1074 CI: Stop using about-to-be-removed image "macos-13" + #1083 #1088 CI: Mitigate random Wine crashes + #1104 CI: Cover compilation with WASI SDK + #1116 CI: Enforce clean doc XML formatting + #1124 .. + #1135 #1136 CI: Cover Solaris 11.4 + #1125 CI: Extend CI coverage of FreeBSD + #1139 #1140 CI: Cover symbol versioning + #1114 xmlwf: Reformat helpgen code (using Black 25.12.0) + #1071 .gitignore: Add files CPackConfig.cmake and + CPackSourceConfig.cmake + + Special thanks to: + Alfonso Gregory + Bénédikt Tran + Gordon Messmer + Hanno Böck + Jakub Kulík + Matthew Fernandez + Neil Pang + Rosen Penev + and + Artiphishell Inc. + Release 2.7.3 Wed September 24 2025 Security fixes: #1046 #1048 Fix alignment of internal allocations for some non-amd64 diff --git a/Makefile.am b/Makefile.am index d612d432becb..72f2fca59d6f 100644 --- a/Makefile.am +++ b/Makefile.am @@ -6,7 +6,7 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2025 Sebastian Pipping <sebastian@pipping.org> +# Copyright (c) 2017-2026 Sebastian Pipping <sebastian@pipping.org> # Copyright (c) 2018 KangLin <kl222@126.com> # Copyright (c) 2022 Johnny Jazeix <jazeix@gmail.com> # Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com> @@ -94,7 +94,6 @@ EXTRA_DIST = \ $(_EXTRA_DIST_CMAKE) \ $(_EXTRA_DIST_WINDOWS) \ \ - conftools/expat.m4 \ conftools/get-version.sh \ \ fuzz/xml_lpm_fuzzer.cpp \ diff --git a/Makefile.in b/Makefile.in index b799591f2fc2..aa41b152525d 100644 --- a/Makefile.in +++ b/Makefile.in @@ -22,7 +22,7 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2025 Sebastian Pipping <sebastian@pipping.org> +# Copyright (c) 2017-2026 Sebastian Pipping <sebastian@pipping.org> # Copyright (c) 2018 KangLin <kl222@126.com> # Copyright (c) 2022 Johnny Jazeix <jazeix@gmail.com> # Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com> @@ -395,6 +395,9 @@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ STRIP = @STRIP@ VERSION = @VERSION@ +VSCRIPT_LDFLAGS = @VSCRIPT_LDFLAGS@ +_EXPAT_COMMENT_ATTR_INFO = @_EXPAT_COMMENT_ATTR_INFO@ +_EXPAT_COMMENT_DTD_OR_GE = @_EXPAT_COMMENT_DTD_OR_GE@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ @@ -497,7 +500,6 @@ EXTRA_DIST = \ $(_EXTRA_DIST_CMAKE) \ $(_EXTRA_DIST_WINDOWS) \ \ - conftools/expat.m4 \ conftools/get-version.sh \ \ fuzz/xml_lpm_fuzzer.cpp \ diff --git a/README.md b/README.md index c2f288ca1242..a67548be7fc9 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ > at the top of the `Changes` file. -# Expat, Release 2.7.3 +# Expat, Release 2.7.5 This is Expat, a C99 library for parsing [XML 1.0 Fourth Edition](https://www.w3.org/TR/2006/REC-xml-20060816/), started by @@ -234,11 +234,6 @@ overrides the in-makefile set `DESTDIR`, because variable-setting priority is Note: This only applies to the Expat library itself, building UTF-16 versions of xmlwf and the tests is currently not supported. -When using Expat with a project using autoconf for configuration, you -can use the probing macro in `conftools/expat.m4` to determine how to -include Expat. See the comments at the top of that file for more -information. - A reference manual is available in the file `doc/reference.html` in this distribution. @@ -297,15 +292,15 @@ EXPAT_OSSFUZZ_BUILD:BOOL=OFF // Build a shared expat library EXPAT_SHARED_LIBS:BOOL=ON +// Define to provide symbol versioning for dependency generation +EXPAT_SYMBOL_VERSIONING:BOOL=OFF + // Treat all compiler warnings as errors EXPAT_WARNINGS_AS_ERRORS:BOOL=OFF // Make use of getrandom function (ON|OFF|AUTO) [default=AUTO] EXPAT_WITH_GETRANDOM:STRING=AUTO -// Utilize libbsd (for arc4random_buf) -EXPAT_WITH_LIBBSD:BOOL=OFF - // Make use of syscall SYS_getrandom (ON|OFF|AUTO) [default=AUTO] EXPAT_WITH_SYS_GETRANDOM:STRING=AUTO ``` diff --git a/configure.ac b/configure.ac index 072fea41ee8c..6d028b5f6658 100644 --- a/configure.ac +++ b/configure.ac @@ -11,7 +11,7 @@ dnl Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> dnl Copyright (c) 2000-2005 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> dnl Copyright (c) 2001-2003 Greg Stein <gstein@users.sourceforge.net> dnl Copyright (c) 2006-2012 Karl Waclawek <karl@waclawek.net> -dnl Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org> +dnl Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> dnl Copyright (c) 2017 S. P. Zeidler <spz@netbsd.org> dnl Copyright (c) 2017 Stephen Groat <stephen@groat.us> dnl Copyright (c) 2017-2020 Joe Orton <jorton@redhat.com> @@ -25,6 +25,10 @@ dnl Copyright (c) 2020 Jeffrey Walton <noloader@gmail.com> dnl Copyright (c) 2024 Ferenc Géczi <ferenc.gm@gmail.com> dnl Copyright (c) 2024 Dag-Erling Smørgrav <des@des.dev> dnl Copyright (c) 2025 Matthew Fernandez <matthew.fernandez@gmail.com> +dnl Copyright (c) 2025 Alfonso Gregory <gfunni234@gmail.com> +dnl Copyright (c) 2026 Rosen Penev <rosenp@gmail.com> +dnl Copyright (c) 2026 Gordon Messmer <gordon.messmer@gmail.com> +dnl Copyright (c) 2026 Fabio Scaccabarozzi <fsvm88@gmail.com> dnl Licensed under the MIT license: dnl dnl Permission is hereby granted, free of charge, to any person obtaining @@ -86,7 +90,7 @@ dnl If the API changes incompatibly set LIBAGE back to 0 dnl LIBCURRENT=12 # sync -LIBREVISION=1 # with +LIBREVISION=3 # with LIBAGE=11 # CMakeLists.txt! AC_CONFIG_HEADERS([expat_config.h]) @@ -117,10 +121,12 @@ AS_IF([test "$GCC" = yes], dnl GCC don't support it and it causes extra warnings that are only dnl distracting; avoid. AX_APPEND_COMPILE_FLAGS([-fexceptions], [AM_CFLAGS]) - AX_APPEND_COMPILE_FLAGS([-fno-strict-aliasing -Wmissing-prototypes -Wstrict-prototypes], [AM_CFLAGS]) + AX_APPEND_COMPILE_FLAGS([-Wstrict-aliasing=3 -Wmissing-prototypes -Wstrict-prototypes], [AM_CFLAGS]) AX_APPEND_COMPILE_FLAGS([-pedantic -Wduplicated-cond -Wduplicated-branches -Wlogical-op], [AM_CFLAGS]) AX_APPEND_COMPILE_FLAGS([-Wrestrict -Wnull-dereference -Wjump-misses-init -Wdouble-promotion], [AM_CFLAGS]) - AX_APPEND_COMPILE_FLAGS([-Wshadow -Wformat=2 -Wno-pedantic-ms-format -Wmisleading-indentation], [AM_CFLAGS])]) + AX_APPEND_COMPILE_FLAGS([-Wshadow -Wformat=2 -Wmisleading-indentation], [AM_CFLAGS]) + AS_CASE(["${host_os}"], [mingw*], [AX_APPEND_COMPILE_FLAGS([-Wno-pedantic-ms-format], [AM_CFLAGS])]) + ]) AC_LANG_PUSH([C++]) AC_PROG_CXX @@ -131,11 +137,23 @@ AS_IF([test "$GCC" = yes], dnl GCC don't support it and it causes extra warnings that are only dnl distracting; avoid. AX_APPEND_COMPILE_FLAGS([-fexceptions], [AM_CXXFLAGS]) - AX_APPEND_COMPILE_FLAGS([-fno-strict-aliasing], [AM_CXXFLAGS])]) + AX_APPEND_COMPILE_FLAGS([-Wstrict-aliasing=3], [AM_CXXFLAGS])]) AC_LANG_POP([C++]) AS_IF([test "$GCC" = yes], - [AX_APPEND_LINK_FLAGS([-fno-strict-aliasing],[AM_LDFLAGS])]) + [AX_APPEND_LINK_FLAGS([-Wstrict-aliasing=3],[AM_LDFLAGS])]) + +AC_ARG_ENABLE([symbol-versioning], + [AS_HELP_STRING([--enable-symbol-versioning], + [provide symbol versioning for dependency generation @<:@default=no@:>@])], + [enable_symbol_versioning=$enableval], + [enable_symbol_versioning=no]) +AS_IF([test "x$enable_symbol_versioning" != xno], + [VSCRIPT_LDFLAGS="-Wl,--version-script" + AC_SUBST([VSCRIPT_LDFLAGS]) + ]) +AM_CONDITIONAL([HAVE_VSCRIPT], + [test "x$enable_symbol_versioning" != xno]) dnl patching ${archive_cmds} to affect generation of file "libtool" to fix linking with clang (issue #312) AS_CASE(["$LD"],[*clang*], @@ -199,23 +217,9 @@ AM_CONDITIONAL([_INTERNAL_LARGE_SIZE], [echo -- "${CPPFLAGS}${CFLAGS}" | ${FGREP LT_LIB_M -AC_ARG_WITH([libbsd], - [AS_HELP_STRING([--with-libbsd], [utilize libbsd (for arc4random_buf)])], - [], - [with_libbsd=no]) -AS_IF([test "x${with_libbsd}" != xno], - [AC_CHECK_LIB([bsd], - [arc4random_buf], - [], - [AS_IF([test "x${with_libbsd}" = xyes], - [AC_MSG_ERROR([Enforced use of libbsd cannot be satisfied.])])])]) -AC_MSG_CHECKING([for arc4random_buf (BSD, libbsd or glibc 2.36+)]) +AC_MSG_CHECKING([for arc4random_buf (BSD or glibc 2.36+)]) AC_LINK_IFELSE([AC_LANG_SOURCE([ - #if defined(HAVE_LIBBSD) - # include <bsd/stdlib.h> - #else - # include <stdlib.h> /* for arc4random_buf on BSD */ - #endif + #include <stdlib.h> int main(void) { char dummy[[123]]; // double brackets for m4 arc4random_buf(dummy, 0U); @@ -226,13 +230,9 @@ AC_LINK_IFELSE([AC_LANG_SOURCE([ AC_MSG_RESULT([yes])], [AC_MSG_RESULT([no]) - AC_MSG_CHECKING([for arc4random (BSD, macOS, libbsd or glibc 2.36+)]) + AC_MSG_CHECKING([for arc4random (BSD, macOS, or glibc 2.36+)]) AC_LINK_IFELSE([AC_LANG_SOURCE([ - #if defined(HAVE_LIBBSD) - # include <bsd/stdlib.h> - #else - # include <stdlib.h> - #endif + #include <stdlib.h> int main(void) { arc4random(); return 0; @@ -381,9 +381,14 @@ dnl NOTE: The *_TRUE variables read here are Automake conditionals dnl that are either set to "" when enabled or to "#" when disabled dnl (because they are used to dynamically comment out certain things) AS_IF([test "x${enable_xml_attr_info}" = xyes], - [EXPAT_ATTR_INFO=ON], - [EXPAT_ATTR_INFO=OFF]) + [EXPAT_ATTR_INFO=ON + _EXPAT_COMMENT_ATTR_INFO=" "], + [EXPAT_ATTR_INFO=OFF + _EXPAT_COMMENT_ATTR_INFO="#"]) +AC_SUBST([_EXPAT_COMMENT_ATTR_INFO]) EXPAT_DTD=ON +_EXPAT_COMMENT_DTD_OR_GE=" " +AC_SUBST([_EXPAT_COMMENT_DTD_OR_GE]) AS_IF([test "x${_INTERNAL_LARGE_SIZE_TRUE}" = x], [EXPAT_LARGE_SIZE=ON], [EXPAT_LARGE_SIZE=OFF]) @@ -461,6 +466,7 @@ AC_CONFIG_FILES([Makefile] [doc/Makefile] [examples/Makefile] [lib/Makefile] + [lib/libexpat.map] [tests/Makefile] [tests/benchmark/Makefile] [xmlwf/Makefile]) diff --git a/doc/Makefile.in b/doc/Makefile.in index 13be5107f89b..0bda758420f0 100644 --- a/doc/Makefile.in +++ b/doc/Makefile.in @@ -293,6 +293,9 @@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ STRIP = @STRIP@ VERSION = @VERSION@ +VSCRIPT_LDFLAGS = @VSCRIPT_LDFLAGS@ +_EXPAT_COMMENT_ATTR_INFO = @_EXPAT_COMMENT_ATTR_INFO@ +_EXPAT_COMMENT_DTD_OR_GE = @_EXPAT_COMMENT_DTD_OR_GE@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ diff --git a/doc/reference.html b/doc/reference.html index d2dded499435..5faa8d6515af 100644 --- a/doc/reference.html +++ b/doc/reference.html @@ -1,9 +1,9 @@ -<?xml version="1.0" encoding="iso-8859-1"?> +<?xml version="1.0" encoding="utf-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" - "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -<html> -<head> -<!-- + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head> + <!-- __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -14,12 +14,12 @@ Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net> Copyright (c) 2000-2004 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2002-2012 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2017-2025 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2017-2026 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017 Jakub Wilk <jwilk@jwilk.net> Copyright (c) 2021 Tomas Korbar <tkorbar@redhat.com> Copyright (c) 2021 Nicolas Cavallari <nicolas.cavallari@green-communications.fr> Copyright (c) 2022 Thijs Schreijer <thijs@thijsschreijer.nl> - Copyright (c) 2023 Hanno Böck <hanno@gentoo.org> + Copyright (c) 2023-2025 Hanno Böck <hanno@gentoo.org> Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com> Licensed under the MIT license: @@ -42,205 +42,489 @@ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. --> - <title>Expat XML Parser</title> - <meta name="author" content="Clark Cooper, coopercc@netheaven.com" /> - <meta http-equiv="Content-Style-Type" content="text/css" /> - <link href="ok.min.css" rel="stylesheet" type="text/css" /> - <link href="style.css" rel="stylesheet" type="text/css" /> -</head> -<body> - <div> - <h1> - The Expat XML Parser - <small>Release 2.7.3</small> - </h1> - </div> -<div class="content"> -<p>Expat is a library, written in C, for parsing XML documents. It's -the underlying XML parser for the open source Mozilla project, Perl's -<code>XML::Parser</code>, Python's <code>xml.parsers.expat</code>, and -other open-source XML parsers.</p> + <title> + Expat XML Parser + </title> + <meta name="author" content="Clark Cooper, coopercc@netheaven.com" /> + <link href="ok.min.css" rel="stylesheet" /> + <link href="style.css" rel="stylesheet" /> + </head> + <body> + <div> + <h1> + The Expat XML Parser <small>Release 2.7.5</small> + </h1> + </div> -<p>This library is the creation of James Clark, who's also given us -groff (an nroff look-alike), Jade (an implementation of ISO's DSSSL -stylesheet language for SGML), XP (a Java XML parser package), XT (a -Java XSL engine). James was also the technical lead on the XML -Working Group at W3C that produced the XML specification.</p> + <div class="content"> + <p> + Expat is a library, written in C, for parsing XML documents. It's the underlying + XML parser for the open source Mozilla project, Perl's <code>XML::Parser</code>, + Python's <code>xml.parsers.expat</code>, and other open-source XML parsers. + </p> -<p>This is free software, licensed under the <a -href="../COPYING">MIT/X Consortium license</a>. You may download it -from <a href="https://libexpat.github.io/">the Expat home page</a>. -</p> + <p> + This library is the creation of James Clark, who's also given us groff (an nroff + look-alike), Jade (an implementation of ISO's DSSSL stylesheet language for + SGML), XP (a Java XML parser package), XT (a Java XSL engine). James was also the + technical lead on the XML Working Group at W3C that produced the XML + specification. + </p> -<p>The bulk of this document was originally commissioned as an article -by <a href="https://www.xml.com/">XML.com</a>. They graciously allowed -Clark Cooper to retain copyright and to distribute it with Expat. -This version has been substantially extended to include documentation -on features which have been added since the original article was -published, and additional information on using the original -interface.</p> + <p> + This is free software, licensed under the <a href="../COPYING">MIT/X Consortium + license</a>. You may download it from <a href="https://libexpat.github.io/">the + Expat home page</a>. + </p> + + <p> + The bulk of this document was originally commissioned as an article by <a href= + "https://www.xml.com/">XML.com</a>. They graciously allowed Clark Cooper to + retain copyright and to distribute it with Expat. This version has been + substantially extended to include documentation on features which have been added + since the original article was published, and additional information on using the + original interface. + </p> + + <hr /> + + <h2> + Table of Contents + </h2> -<hr /> -<h2>Table of Contents</h2> -<ul> - <li><a href="#overview">Overview</a></li> - <li><a href="#building">Building and Installing</a></li> - <li><a href="#using">Using Expat</a></li> - <li><a href="#reference">Reference</a> - <ul> - <li><a href="#creation">Parser Creation Functions</a> - <ul> - <li><a href="#XML_ParserCreate">XML_ParserCreate</a></li> - <li><a href="#XML_ParserCreateNS">XML_ParserCreateNS</a></li> - <li><a href="#XML_ParserCreate_MM">XML_ParserCreate_MM</a></li> - <li><a href="#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></li> - <li><a href="#XML_ParserFree">XML_ParserFree</a></li> - <li><a href="#XML_ParserReset">XML_ParserReset</a></li> - </ul> - </li> - <li><a href="#parsing">Parsing Functions</a> - <ul> - <li><a href="#XML_Parse">XML_Parse</a></li> - <li><a href="#XML_ParseBuffer">XML_ParseBuffer</a></li> - <li><a href="#XML_GetBuffer">XML_GetBuffer</a></li> - <li><a href="#XML_StopParser">XML_StopParser</a></li> - <li><a href="#XML_ResumeParser">XML_ResumeParser</a></li> - <li><a href="#XML_GetParsingStatus">XML_GetParsingStatus</a></li> - </ul> - </li> - <li><a href="#setting">Handler Setting Functions</a> - <ul> - <li><a href="#XML_SetStartElementHandler">XML_SetStartElementHandler</a></li> - <li><a href="#XML_SetEndElementHandler">XML_SetEndElementHandler</a></li> - <li><a href="#XML_SetElementHandler">XML_SetElementHandler</a></li> - <li><a href="#XML_SetCharacterDataHandler">XML_SetCharacterDataHandler</a></li> - <li><a href="#XML_SetProcessingInstructionHandler">XML_SetProcessingInstructionHandler</a></li> - <li><a href="#XML_SetCommentHandler">XML_SetCommentHandler</a></li> - <li><a href="#XML_SetStartCdataSectionHandler">XML_SetStartCdataSectionHandler</a></li> - <li><a href="#XML_SetEndCdataSectionHandler">XML_SetEndCdataSectionHandler</a></li> - <li><a href="#XML_SetCdataSectionHandler">XML_SetCdataSectionHandler</a></li> - <li><a href="#XML_SetDefaultHandler">XML_SetDefaultHandler</a></li> - <li><a href="#XML_SetDefaultHandlerExpand">XML_SetDefaultHandlerExpand</a></li> - <li><a href="#XML_SetExternalEntityRefHandler">XML_SetExternalEntityRefHandler</a></li> - <li><a href="#XML_SetExternalEntityRefHandlerArg">XML_SetExternalEntityRefHandlerArg</a></li> - <li><a href="#XML_SetSkippedEntityHandler">XML_SetSkippedEntityHandler</a></li> - <li><a href="#XML_SetUnknownEncodingHandler">XML_SetUnknownEncodingHandler</a></li> - <li><a href="#XML_SetStartNamespaceDeclHandler">XML_SetStartNamespaceDeclHandler</a></li> - <li><a href="#XML_SetEndNamespaceDeclHandler">XML_SetEndNamespaceDeclHandler</a></li> - <li><a href="#XML_SetNamespaceDeclHandler">XML_SetNamespaceDeclHandler</a></li> - <li><a href="#XML_SetXmlDeclHandler">XML_SetXmlDeclHandler</a></li> - <li><a href="#XML_SetStartDoctypeDeclHandler">XML_SetStartDoctypeDeclHandler</a></li> - <li><a href="#XML_SetEndDoctypeDeclHandler">XML_SetEndDoctypeDeclHandler</a></li> - <li><a href="#XML_SetDoctypeDeclHandler">XML_SetDoctypeDeclHandler</a></li> - <li><a href="#XML_SetElementDeclHandler">XML_SetElementDeclHandler</a></li> - <li><a href="#XML_SetAttlistDeclHandler">XML_SetAttlistDeclHandler</a></li> - <li><a href="#XML_SetEntityDeclHandler">XML_SetEntityDeclHandler</a></li> - <li><a href="#XML_SetUnparsedEntityDeclHandler">XML_SetUnparsedEntityDeclHandler</a></li> - <li><a href="#XML_SetNotationDeclHandler">XML_SetNotationDeclHandler</a></li> - <li><a href="#XML_SetNotStandaloneHandler">XML_SetNotStandaloneHandler</a></li> - </ul> - </li> - <li><a href="#position">Parse Position and Error Reporting Functions</a> - <ul> - <li><a href="#XML_GetErrorCode">XML_GetErrorCode</a></li> - <li><a href="#XML_ErrorString">XML_ErrorString</a></li> - <li><a href="#XML_GetCurrentByteIndex">XML_GetCurrentByteIndex</a></li> - <li><a href="#XML_GetCurrentLineNumber">XML_GetCurrentLineNumber</a></li> - <li><a href="#XML_GetCurrentColumnNumber">XML_GetCurrentColumnNumber</a></li> - <li><a href="#XML_GetCurrentByteCount">XML_GetCurrentByteCount</a></li> - <li><a href="#XML_GetInputContext">XML_GetInputContext</a></li> - </ul> - </li> - <li> - <a href="#attack-protection">Attack Protection</a> <ul> - <li><a href="#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></li> - <li><a href="#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a></li> - <li><a href="#XML_SetAllocTrackerMaximumAmplification">XML_SetAllocTrackerMaximumAmplification</a></li> - <li><a href="#XML_SetAllocTrackerActivationThreshold">XML_SetAllocTrackerActivationThreshold</a></li> - <li><a href="#XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</a></li> + <li> + <a href="#overview">Overview</a> + </li> + + <li> + <a href="#building">Building and Installing</a> + </li> + + <li> + <a href="#using">Using Expat</a> + </li> + + <li> + <a href="#reference">Reference</a> + <ul> + <li> + <a href="#creation">Parser Creation Functions</a> + <ul> + <li> + <a href="#XML_ParserCreate">XML_ParserCreate</a> + </li> + + <li> + <a href="#XML_ParserCreateNS">XML_ParserCreateNS</a> + </li> + + <li> + <a href="#XML_ParserCreate_MM">XML_ParserCreate_MM</a> + </li> + + <li> + <a href= + "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a> + </li> + + <li> + <a href="#XML_ParserFree">XML_ParserFree</a> + </li> + + <li> + <a href="#XML_ParserReset">XML_ParserReset</a> + </li> + </ul> + </li> + + <li> + <a href="#parsing">Parsing Functions</a> + <ul> + <li> + <a href="#XML_Parse">XML_Parse</a> + </li> + + <li> + <a href="#XML_ParseBuffer">XML_ParseBuffer</a> + </li> + + <li> + <a href="#XML_GetBuffer">XML_GetBuffer</a> + </li> + + <li> + <a href="#XML_StopParser">XML_StopParser</a> + </li> + + <li> + <a href="#XML_ResumeParser">XML_ResumeParser</a> + </li> + + <li> + <a href="#XML_GetParsingStatus">XML_GetParsingStatus</a> + </li> + </ul> + </li> + + <li> + <a href="#setting">Handler Setting Functions</a> + <ul> + <li> + <a href="#XML_SetStartElementHandler">XML_SetStartElementHandler</a> + </li> + + <li> + <a href="#XML_SetEndElementHandler">XML_SetEndElementHandler</a> + </li> + + <li> + <a href="#XML_SetElementHandler">XML_SetElementHandler</a> + </li> + + <li> + <a href="#XML_SetCharacterDataHandler">XML_SetCharacterDataHandler</a> + </li> + + <li> + <a href= + "#XML_SetProcessingInstructionHandler">XML_SetProcessingInstructionHandler</a> + </li> + + <li> + <a href="#XML_SetCommentHandler">XML_SetCommentHandler</a> + </li> + + <li> + <a href= + "#XML_SetStartCdataSectionHandler">XML_SetStartCdataSectionHandler</a> + </li> + + <li> + <a href= + "#XML_SetEndCdataSectionHandler">XML_SetEndCdataSectionHandler</a> + </li> + + <li> + <a href="#XML_SetCdataSectionHandler">XML_SetCdataSectionHandler</a> + </li> + + <li> + <a href="#XML_SetDefaultHandler">XML_SetDefaultHandler</a> + </li> + + <li> + <a href="#XML_SetDefaultHandlerExpand">XML_SetDefaultHandlerExpand</a> + </li> + + <li> + <a href= + "#XML_SetExternalEntityRefHandler">XML_SetExternalEntityRefHandler</a> + </li> + + <li> + <a href= + "#XML_SetExternalEntityRefHandlerArg">XML_SetExternalEntityRefHandlerArg</a> + </li> + + <li> + <a href="#XML_SetSkippedEntityHandler">XML_SetSkippedEntityHandler</a> + </li> + + <li> + <a href= + "#XML_SetUnknownEncodingHandler">XML_SetUnknownEncodingHandler</a> + </li> + + <li> + <a href= + "#XML_SetStartNamespaceDeclHandler">XML_SetStartNamespaceDeclHandler</a> + </li> + + <li> + <a href= + "#XML_SetEndNamespaceDeclHandler">XML_SetEndNamespaceDeclHandler</a> + </li> + + <li> + <a href="#XML_SetNamespaceDeclHandler">XML_SetNamespaceDeclHandler</a> + </li> + + <li> + <a href="#XML_SetXmlDeclHandler">XML_SetXmlDeclHandler</a> + </li> + + <li> + <a href= + "#XML_SetStartDoctypeDeclHandler">XML_SetStartDoctypeDeclHandler</a> + </li> + + <li> + <a href= + "#XML_SetEndDoctypeDeclHandler">XML_SetEndDoctypeDeclHandler</a> + </li> + + <li> + <a href="#XML_SetDoctypeDeclHandler">XML_SetDoctypeDeclHandler</a> + </li> + + <li> + <a href="#XML_SetElementDeclHandler">XML_SetElementDeclHandler</a> + </li> + + <li> + <a href="#XML_SetAttlistDeclHandler">XML_SetAttlistDeclHandler</a> + </li> + + <li> + <a href="#XML_SetEntityDeclHandler">XML_SetEntityDeclHandler</a> + </li> + + <li> + <a href= + "#XML_SetUnparsedEntityDeclHandler">XML_SetUnparsedEntityDeclHandler</a> + </li> + + <li> + <a href="#XML_SetNotationDeclHandler">XML_SetNotationDeclHandler</a> + </li> + + <li> + <a href="#XML_SetNotStandaloneHandler">XML_SetNotStandaloneHandler</a> + </li> + </ul> + </li> + + <li> + <a href="#position">Parse Position and Error Reporting Functions</a> + <ul> + <li> + <a href="#XML_GetErrorCode">XML_GetErrorCode</a> + </li> + + <li> + <a href="#XML_ErrorString">XML_ErrorString</a> + </li> + + <li> + <a href="#XML_GetCurrentByteIndex">XML_GetCurrentByteIndex</a> + </li> + + <li> + <a href="#XML_GetCurrentLineNumber">XML_GetCurrentLineNumber</a> + </li> + + <li> + <a href="#XML_GetCurrentColumnNumber">XML_GetCurrentColumnNumber</a> + </li> + + <li> + <a href="#XML_GetCurrentByteCount">XML_GetCurrentByteCount</a> + </li> + + <li> + <a href="#XML_GetInputContext">XML_GetInputContext</a> + </li> + </ul> + </li> + + <li> + <a href="#attack-protection">Attack Protection</a> + <ul> + <li> + <a href= + "#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a> + </li> + + <li> + <a href= + "#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a> + </li> + + <li> + <a href= + "#XML_SetAllocTrackerMaximumAmplification">XML_SetAllocTrackerMaximumAmplification</a> + </li> + + <li> + <a href= + "#XML_SetAllocTrackerActivationThreshold">XML_SetAllocTrackerActivationThreshold</a> + </li> + + <li> + <a href= + "#XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</a> + </li> + </ul> + </li> + + <li> + <a href="#miscellaneous">Miscellaneous Functions</a> + <ul> + <li> + <a href="#XML_SetUserData">XML_SetUserData</a> + </li> + + <li> + <a href="#XML_GetUserData">XML_GetUserData</a> + </li> + + <li> + <a href="#XML_UseParserAsHandlerArg">XML_UseParserAsHandlerArg</a> + </li> + + <li> + <a href="#XML_SetBase">XML_SetBase</a> + </li> + + <li> + <a href="#XML_GetBase">XML_GetBase</a> + </li> + + <li> + <a href= + "#XML_GetSpecifiedAttributeCount">XML_GetSpecifiedAttributeCount</a> + </li> + + <li> + <a href="#XML_GetIdAttributeIndex">XML_GetIdAttributeIndex</a> + </li> + + <li> + <a href="#XML_GetAttributeInfo">XML_GetAttributeInfo</a> + </li> + + <li> + <a href="#XML_SetEncoding">XML_SetEncoding</a> + </li> + + <li> + <a href="#XML_SetParamEntityParsing">XML_SetParamEntityParsing</a> + </li> + + <li> + <a href="#XML_SetHashSalt">XML_SetHashSalt</a> + </li> + + <li> + <a href="#XML_UseForeignDTD">XML_UseForeignDTD</a> + </li> + + <li> + <a href="#XML_SetReturnNSTriplet">XML_SetReturnNSTriplet</a> + </li> + + <li> + <a href="#XML_DefaultCurrent">XML_DefaultCurrent</a> + </li> + + <li> + <a href="#XML_ExpatVersion">XML_ExpatVersion</a> + </li> + + <li> + <a href="#XML_ExpatVersionInfo">XML_ExpatVersionInfo</a> + </li> + + <li> + <a href="#XML_GetFeatureList">XML_GetFeatureList</a> + </li> + + <li> + <a href="#XML_FreeContentModel">XML_FreeContentModel</a> + </li> + + <li> + <a href="#XML_MemMalloc">XML_MemMalloc</a> + </li> + + <li> + <a href="#XML_MemRealloc">XML_MemRealloc</a> + </li> + + <li> + <a href="#XML_MemFree">XML_MemFree</a> + </li> + </ul> + </li> + </ul> + </li> </ul> - </li> - <li><a href="#miscellaneous">Miscellaneous Functions</a> - <ul> - <li><a href="#XML_SetUserData">XML_SetUserData</a></li> - <li><a href="#XML_GetUserData">XML_GetUserData</a></li> - <li><a href="#XML_UseParserAsHandlerArg">XML_UseParserAsHandlerArg</a></li> - <li><a href="#XML_SetBase">XML_SetBase</a></li> - <li><a href="#XML_GetBase">XML_GetBase</a></li> - <li><a href="#XML_GetSpecifiedAttributeCount">XML_GetSpecifiedAttributeCount</a></li> - <li><a href="#XML_GetIdAttributeIndex">XML_GetIdAttributeIndex</a></li> - <li><a href="#XML_GetAttributeInfo">XML_GetAttributeInfo</a></li> - <li><a href="#XML_SetEncoding">XML_SetEncoding</a></li> - <li><a href="#XML_SetParamEntityParsing">XML_SetParamEntityParsing</a></li> - <li><a href="#XML_SetHashSalt">XML_SetHashSalt</a></li> - <li><a href="#XML_UseForeignDTD">XML_UseForeignDTD</a></li> - <li><a href="#XML_SetReturnNSTriplet">XML_SetReturnNSTriplet</a></li> - <li><a href="#XML_DefaultCurrent">XML_DefaultCurrent</a></li> - <li><a href="#XML_ExpatVersion">XML_ExpatVersion</a></li> - <li><a href="#XML_ExpatVersionInfo">XML_ExpatVersionInfo</a></li> - <li><a href="#XML_GetFeatureList">XML_GetFeatureList</a></li> - <li><a href="#XML_FreeContentModel">XML_FreeContentModel</a></li> - <li><a href="#XML_MemMalloc">XML_MemMalloc</a></li> - <li><a href="#XML_MemRealloc">XML_MemRealloc</a></li> - <li><a href="#XML_MemFree">XML_MemFree</a></li> - </ul> - </li> - </ul> - </li> -</ul> -<hr /> -<h2><a name="overview">Overview</a></h2> + <hr /> + + <h2> + <a id="overview" name="overview">Overview</a> + </h2> + + <p> + Expat is a stream-oriented parser. You register callback (or handler) functions + with the parser and then start feeding it the document. As the parser recognizes + parts of the document, it will call the appropriate handler for that part (if + you've registered one.) The document is fed to the parser in pieces, so you can + start parsing before you have all the document. This also allows you to parse + really huge documents that won't fit into memory. + </p> + + <p> + Expat can be intimidating due to the many kinds of handlers and options you can + set. But you only need to learn four functions in order to do 90% of what you'll + want to do with it: + </p> -<p>Expat is a stream-oriented parser. You register callback (or -handler) functions with the parser and then start feeding it the -document. As the parser recognizes parts of the document, it will -call the appropriate handler for that part (if you've registered one.) -The document is fed to the parser in pieces, so you can start parsing -before you have all the document. This also allows you to parse really -huge documents that won't fit into memory.</p> + <dl> + <dt> + <code><a href="#XML_ParserCreate">XML_ParserCreate</a></code> + </dt> -<p>Expat can be intimidating due to the many kinds of handlers and -options you can set. But you only need to learn four functions in -order to do 90% of what you'll want to do with it:</p> + <dd> + Create a new parser object. + </dd> -<dl> + <dt> + <code><a href="#XML_SetElementHandler">XML_SetElementHandler</a></code> + </dt> -<dt><code><a href= "#XML_ParserCreate" - >XML_ParserCreate</a></code></dt> - <dd>Create a new parser object.</dd> + <dd> + Set handlers for start and end tags. + </dd> -<dt><code><a href= "#XML_SetElementHandler" - >XML_SetElementHandler</a></code></dt> - <dd>Set handlers for start and end tags.</dd> + <dt> + <code><a href= + "#XML_SetCharacterDataHandler">XML_SetCharacterDataHandler</a></code> + </dt> -<dt><code><a href= "#XML_SetCharacterDataHandler" - >XML_SetCharacterDataHandler</a></code></dt> - <dd>Set handler for text.</dd> + <dd> + Set handler for text. + </dd> -<dt><code><a href= "#XML_Parse" - >XML_Parse</a></code></dt> - <dd>Pass a buffer full of document to the parser</dd> -</dl> + <dt> + <code><a href="#XML_Parse">XML_Parse</a></code> + </dt> -<p>These functions and others are described in the <a -href="#reference">reference</a> part of this document. The reference -section also describes in detail the parameters passed to the -different types of handlers.</p> + <dd> + Pass a buffer full of document to the parser + </dd> + </dl> -<p>Let's look at a very simple example program that only uses 3 of the -above functions (it doesn't need to set a character handler.) The -program <a href="../examples/outline.c">outline.c</a> prints an -element outline, indenting child elements to distinguish them from the -parent element that contains them. The start handler does all the -work. It prints two indenting spaces for every level of ancestor -elements, then it prints the element and attribute -information. Finally it increments the global <code>Depth</code> -variable.</p> + <p> + These functions and others are described in the <a href= + "#reference">reference</a> part of this document. The reference section also + describes in detail the parameters passed to the different types of handlers. + </p> -<pre class="eg"> + <p> + Let's look at a very simple example program that only uses 3 of the above + functions (it doesn't need to set a character handler.) The program <a href= + "../examples/outline.c">outline.c</a> prints an element outline, indenting child + elements to distinguish them from the parent element that contains them. The + start handler does all the work. It prints two indenting spaces for every level + of ancestor elements, then it prints the element and attribute information. + Finally it increments the global <code>Depth</code> variable. + </p> + + <pre class="eg"> int Depth; void XMLCALL @@ -260,39 +544,41 @@ start(void *data, const char *el, const char **attr) { Depth++; } /* End of start handler */ </pre> + <p> + The end tag simply does the bookkeeping work of decrementing <code>Depth</code>. + </p> -<p>The end tag simply does the bookkeeping work of decrementing -<code>Depth</code>.</p> -<pre class="eg"> + <pre class="eg"> void XMLCALL end(void *data, const char *el) { Depth--; } /* End of end handler */ </pre> + <p> + Note the <code>XMLCALL</code> annotation used for the callbacks. This is used to + ensure that the Expat and the callbacks are using the same calling convention in + case the compiler options used for Expat itself and the client code are + different. Expat tries not to care what the default calling convention is, though + it may require that it be compiled with a default convention of "cdecl" on some + platforms. For code which uses Expat, however, the calling convention is + specified by the <code>XMLCALL</code> annotation on most platforms; callbacks + should be defined using this annotation. + </p> -<p>Note the <code>XMLCALL</code> annotation used for the callbacks. -This is used to ensure that the Expat and the callbacks are using the -same calling convention in case the compiler options used for Expat -itself and the client code are different. Expat tries not to care -what the default calling convention is, though it may require that it -be compiled with a default convention of "cdecl" on some platforms. -For code which uses Expat, however, the calling convention is -specified by the <code>XMLCALL</code> annotation on most platforms; -callbacks should be defined using this annotation.</p> - -<p>The <code>XMLCALL</code> annotation was added in Expat 1.95.7, but -existing working Expat applications don't need to add it (since they -are already using the "cdecl" calling convention, or they wouldn't be -working). The annotation is only needed if the default calling -convention may be something other than "cdecl". To use the annotation -safely with older versions of Expat, you can conditionally define it -<em>after</em> including Expat's header file:</p> + <p> + The <code>XMLCALL</code> annotation was added in Expat 1.95.7, but existing + working Expat applications don't need to add it (since they are already using the + "cdecl" calling convention, or they wouldn't be working). The annotation is only + needed if the default calling convention may be something other than "cdecl". To + use the annotation safely with older versions of Expat, you can conditionally + define it <em>after</em> including Expat's header file: + </p> -<pre class="eg"> + <pre class="eg"> #include <expat.h> #ifndef XMLCALL -#if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__) +#if defined(_MSC_VER) && !defined(__BEOS__) && !defined(__CYGWIN__) #define XMLCALL __cdecl #elif defined(__GNUC__) #define XMLCALL __attribute__((cdecl)) @@ -301,186 +587,256 @@ safely with older versions of Expat, you can conditionally define it #endif #endif </pre> + <p> + After creating the parser, the main program just has the job of shoveling the + document to the parser so that it can do its work. + </p> -<p>After creating the parser, the main program just has the job of -shoveling the document to the parser so that it can do its work.</p> + <hr /> -<hr /> -<h2><a name="building">Building and Installing Expat</a></h2> + <h2> + <a id="building" name="building">Building and Installing Expat</a> + </h2> -<p>The Expat distribution comes as a compressed (with GNU gzip) tar -file. You may download the latest version from <a href= -"https://sourceforge.net/projects/expat/" >Source Forge</a>. After -unpacking this, cd into the directory. Then follow either the Win32 -directions or Unix directions below.</p> + <p> + The Expat distribution comes as a compressed (with GNU gzip) tar file. You may + download the latest version from <a href= + "https://sourceforge.net/projects/expat/">Source Forge</a>. After unpacking this, + cd into the directory. Then follow either the Win32 directions or Unix directions + below. + </p> -<h3>Building under Win32</h3> + <h3> + Building under Win32 + </h3> -<p>If you're using the GNU compiler under cygwin, follow the Unix -directions in the next section. Otherwise if you have Microsoft's -Developer Studio installed, -you can use CMake to generate a <code>.sln</code> file, e.g. -<code> -cmake -G"Visual Studio 17 2022" -DCMAKE_BUILD_TYPE=RelWithDebInfo . -</code>, and build Expat using <code>msbuild /m expat.sln</code> after.</p> + <p> + If you're using the GNU compiler under cygwin, follow the Unix directions in the + next section. Otherwise if you have Microsoft's Developer Studio installed, you + can use CMake to generate a <code>.sln</code> file, e.g. <code>cmake -G"Visual + Studio 17 2022" -DCMAKE_BUILD_TYPE=RelWithDebInfo .</code> , and build Expat + using <code>msbuild /m expat.sln</code> after. + </p> -<p>Alternatively, you may download the Win32 binary package that -contains the "expat.h" include file and a pre-built DLL.</p> + <p> + Alternatively, you may download the Win32 binary package that contains the + "expat.h" include file and a pre-built DLL. + </p> -<h3>Building under Unix (or GNU)</h3> + <h3> + Building under Unix (or GNU) + </h3> -<p>First you'll need to run the configure shell script in order to -configure the Makefiles and headers for your system.</p> + <p> + First you'll need to run the configure shell script in order to configure the + Makefiles and headers for your system. + </p> -<p>If you're happy with all the defaults that configure picks for you, -and you have permission on your system to install into /usr/local, you -can install Expat with this sequence of commands:</p> + <p> + If you're happy with all the defaults that configure picks for you, and you have + permission on your system to install into /usr/local, you can install Expat with + this sequence of commands: + </p> -<pre class="eg"> + <pre class="eg"> ./configure make make install </pre> + <p> + There are some options that you can provide to this script, but the only one + we'll mention here is the <code>--prefix</code> option. You can find out all the + options available by running configure with just the <code>--help</code> option. + </p> + + <p> + By default, the configure script sets things up so that the library gets + installed in <code>/usr/local/lib</code> and the associated header file in + <code>/usr/local/include</code>. But if you were to give the option, + <code>--prefix=/home/me/mystuff</code>, then the library and header would get + installed in <code>/home/me/mystuff/lib</code> and + <code>/home/me/mystuff/include</code> respectively. + </p> + + <h3> + Configuring Expat Using the Pre-Processor + </h3> -<p>There are some options that you can provide to this script, but the -only one we'll mention here is the <code>--prefix</code> option. You -can find out all the options available by running configure with just -the <code>--help</code> option.</p> + <p> + Expat's feature set can be configured using a small number of pre-processor + definitions. The symbols are: + </p> -<p>By default, the configure script sets things up so that the library -gets installed in <code>/usr/local/lib</code> and the associated -header file in <code>/usr/local/include</code>. But if you were to -give the option, <code>--prefix=/home/me/mystuff</code>, then the -library and header would get installed in -<code>/home/me/mystuff/lib</code> and -<code>/home/me/mystuff/include</code> respectively.</p> + <dl class="cpp-symbols"> + <dt> + <a id="XML_GE" name="XML_GE">XML_GE</a> + </dt> -<h3>Configuring Expat Using the Pre-Processor</h3> + <dd> + Added in Expat 2.6.0. Include support for <a href= + "https://www.w3.org/TR/2006/REC-xml-20060816/#sec-physical-struct">general + entities</a> (syntax <code>&e1;</code> to reference and syntax + <code><!ENTITY e1 'value1'></code> (an internal general entity) or + <code><!ENTITY e2 SYSTEM 'file2'></code> (an external general entity) to + declare). With <code>XML_GE</code> enabled, general entities will be replaced + by their declared replacement text; for this to work for <em>external</em> + general entities, in addition an <code><a href= + "#XML_SetExternalEntityRefHandler">XML_ExternalEntityRefHandler</a></code> must + be set using <code><a href= + "#XML_SetExternalEntityRefHandler">XML_SetExternalEntityRefHandler</a></code>. + Also, enabling <code>XML_GE</code> makes the functions <code><a href= + "#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></code> + and <code><a href= + "#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a></code> + available.<br /> + With <code>XML_GE</code> disabled, Expat has a smaller memory footprint and can + be faster, but will not load external general entities and will replace all + general entities (except the <a href= + "https://www.w3.org/TR/2006/REC-xml-20060816/#sec-predefined-ent">predefined + five</a>: <code>amp</code>, <code>apos</code>, <code>gt</code>, + <code>lt</code>, <code>quot</code>) with a self-reference: for example, + referencing an entity <code>e1</code> via <code>&e1;</code> will be + replaced by text <code>&e1;</code>. + </dd> -<p>Expat's feature set can be configured using a small number of -pre-processor definitions. The symbols are:</p> + <dt> + <a id="XML_DTD" name="XML_DTD">XML_DTD</a> + </dt> -<dl class="cpp-symbols"> -<dt><a name="XML_GE">XML_GE</a></dt> -<dd> -Added in Expat 2.6.0. -Include support for -<a href="https://www.w3.org/TR/2006/REC-xml-20060816/#sec-physical-struct">general entities</a> -(syntax <code>&e1;</code> to reference and -syntax <code><!ENTITY e1 'value1'></code> (an internal general entity) or -<code><!ENTITY e2 SYSTEM 'file2'></code> (an external general entity) to declare). -With <code>XML_GE</code> enabled, general entities will be replaced by their declared replacement text; -for this to work for <em>external</em> general entities, in addition an -<code><a href="#XML_SetExternalEntityRefHandler">XML_ExternalEntityRefHandler</a></code> must be set using -<code><a href="#XML_SetExternalEntityRefHandler">XML_SetExternalEntityRefHandler</a></code>. -Also, enabling <code>XML_GE</code> makes -the functions <code><a href="#XML_SetBillionLaughsAttackProtectionMaximumAmplification"> -XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></code> and <code> -<a href="#XML_SetBillionLaughsAttackProtectionActivationThreshold"> -XML_SetBillionLaughsAttackProtectionActivationThreshold</a></code> available. -<br/> -With <code>XML_GE</code> disabled, Expat has a smaller memory footprint and can be faster, but will -not load external general entities and will replace all general entities -(except the <a href="https://www.w3.org/TR/2006/REC-xml-20060816/#sec-predefined-ent">predefined five</a>: -<code>amp</code>, <code>apos</code>, <code>gt</code>, <code>lt</code>, <code>quot</code>) -with a self-reference: -for example, referencing an entity <code>e1</code> via <code>&e1;</code> will be replaced -by text <code>&e1;</code>. -</dd> + <dd> + Include support for using and reporting DTD-based content. If this is defined, + default attribute values from an external DTD subset are reported and attribute + value normalization occurs based on the type of attributes defined in the + external subset. Without this, Expat has a smaller memory footprint and can be + faster, but will not load external parameter entities or process conditional + sections. If defined, makes the functions <code><a href= + "#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></code> + and <code><a href= + "#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a></code> + available. + </dd> -<dt><a name="XML_DTD">XML_DTD</a></dt> -<dd>Include support for using and reporting DTD-based content. If -this is defined, default attribute values from an external DTD subset -are reported and attribute value normalization occurs based on the -type of attributes defined in the external subset. Without -this, Expat has a smaller memory footprint and can be faster, but will -not load external parameter entities or process conditional sections. If defined, makes -the functions <code><a -href="#XML_SetBillionLaughsAttackProtectionMaximumAmplification"> -XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></code> and <code> -<a href="#XML_SetBillionLaughsAttackProtectionActivationThreshold"> -XML_SetBillionLaughsAttackProtectionActivationThreshold</a></code> available.</dd> + <dt> + <a id="XML_NS" name="XML_NS">XML_NS</a> + </dt> -<dt><a name="XML_NS">XML_NS</a></dt> -<dd>When defined, support for the <cite><a href= -"https://www.w3.org/TR/REC-xml-names/" >Namespaces in XML</a></cite> -specification is included.</dd> + <dd> + When defined, support for the <cite><a href= + "https://www.w3.org/TR/REC-xml-names/">Namespaces in XML</a></cite> + specification is included. + </dd> -<dt><a name="XML_UNICODE">XML_UNICODE</a></dt> -<dd>When defined, character data reported to the application is -encoded in UTF-16 using wide characters of the type -<code>XML_Char</code>. This is implied if -<code>XML_UNICODE_WCHAR_T</code> is defined.</dd> + <dt> + <a id="XML_UNICODE" name="XML_UNICODE">XML_UNICODE</a> + </dt> -<dt><a name="XML_UNICODE_WCHAR_T">XML_UNICODE_WCHAR_T</a></dt> -<dd>If defined, causes the <code>XML_Char</code> character type to be -defined using the <code>wchar_t</code> type; otherwise, <code>unsigned -short</code> is used. Defining this implies -<code>XML_UNICODE</code>.</dd> + <dd> + When defined, character data reported to the application is encoded in UTF-16 + using wide characters of the type <code>XML_Char</code>. This is implied if + <code>XML_UNICODE_WCHAR_T</code> is defined. + </dd> -<dt><a name="XML_LARGE_SIZE">XML_LARGE_SIZE</a></dt> -<dd>If defined, causes the <code>XML_Size</code> and <code>XML_Index</code> -integer types to be at least 64 bits in size. This is intended to support -processing of very large input streams, where the return values of -<code><a href="#XML_GetCurrentByteIndex" >XML_GetCurrentByteIndex</a></code>, -<code><a href="#XML_GetCurrentLineNumber" >XML_GetCurrentLineNumber</a></code> and -<code><a href="#XML_GetCurrentColumnNumber" >XML_GetCurrentColumnNumber</a></code> -could overflow. It may not be supported by all compilers, and is turned -off by default.</dd> + <dt> + <a id="XML_UNICODE_WCHAR_T" name="XML_UNICODE_WCHAR_T">XML_UNICODE_WCHAR_T</a> + </dt> -<dt><a name="XML_CONTEXT_BYTES">XML_CONTEXT_BYTES</a></dt> -<dd>The number of input bytes of markup context which the parser will -ensure are available for reporting via <code><a href= -"#XML_GetInputContext" >XML_GetInputContext</a></code>. This is -normally set to 1024, and must be set to a positive integer to enable. -If this is set to zero, the input context will not be available and <code><a -href= "#XML_GetInputContext" >XML_GetInputContext</a></code> will -always report <code>NULL</code>. Without this, Expat has a smaller memory -footprint and can be faster.</dd> + <dd> + If defined, causes the <code>XML_Char</code> character type to be defined using + the <code>wchar_t</code> type; otherwise, <code>unsigned short</code> is used. + Defining this implies <code>XML_UNICODE</code>. + </dd> -<dt><a name="XML_STATIC">XML_STATIC</a></dt> -<dd>On Windows, this should be set if Expat is going to be linked -statically with the code that calls it; this is required to get all -the right MSVC magic annotations correct. This is ignored on other -platforms.</dd> + <dt> + <a id="XML_LARGE_SIZE" name="XML_LARGE_SIZE">XML_LARGE_SIZE</a> + </dt> -<dt><a name="XML_ATTR_INFO">XML_ATTR_INFO</a></dt> -<dd>If defined, makes the additional function <code><a href= -"#XML_GetAttributeInfo" >XML_GetAttributeInfo</a></code> available -for reporting attribute byte offsets.</dd> -</dl> + <dd> + If defined, causes the <code>XML_Size</code> and <code>XML_Index</code> integer + types to be at least 64 bits in size. This is intended to support processing of + very large input streams, where the return values of <code><a href= + "#XML_GetCurrentByteIndex">XML_GetCurrentByteIndex</a></code>, <code><a href= + "#XML_GetCurrentLineNumber">XML_GetCurrentLineNumber</a></code> and + <code><a href= + "#XML_GetCurrentColumnNumber">XML_GetCurrentColumnNumber</a></code> could + overflow. It may not be supported by all compilers, and is turned off by + default. + </dd> -<hr /> -<h2><a name="using">Using Expat</a></h2> + <dt> + <a id="XML_CONTEXT_BYTES" name="XML_CONTEXT_BYTES">XML_CONTEXT_BYTES</a> + </dt> -<h3>Compiling and Linking Against Expat</h3> + <dd> + The number of input bytes of markup context which the parser will ensure are + available for reporting via <code><a href= + "#XML_GetInputContext">XML_GetInputContext</a></code>. This is normally set to + 1024, and must be set to a positive integer to enable. If this is set to zero, + the input context will not be available and <code><a href= + "#XML_GetInputContext">XML_GetInputContext</a></code> will always report + <code>NULL</code>. Without this, Expat has a smaller memory footprint and can + be faster. + </dd> -<p>Unless you installed Expat in a location not expected by your -compiler and linker, all you have to do to use Expat in your programs -is to include the Expat header (<code>#include <expat.h></code>) -in your files that make calls to it and to tell the linker that it -needs to link against the Expat library. On Unix systems, this would -usually be done with the <code>-lexpat</code> argument. Otherwise, -you'll need to tell the compiler where to look for the Expat header -and the linker where to find the Expat library. You may also need to -take steps to tell the operating system where to find this library at -run time.</p> + <dt> + <a id="XML_STATIC" name="XML_STATIC">XML_STATIC</a> + </dt> -<p>On a Unix-based system, here's what a Makefile might look like when -Expat is installed in a standard location:</p> + <dd> + On Windows, this should be set if Expat is going to be linked statically with + the code that calls it; this is required to get all the right MSVC magic + annotations correct. This is ignored on other platforms. + </dd> -<pre class="eg"> + <dt> + <a id="XML_ATTR_INFO" name="XML_ATTR_INFO">XML_ATTR_INFO</a> + </dt> + + <dd> + If defined, makes the additional function <code><a href= + "#XML_GetAttributeInfo">XML_GetAttributeInfo</a></code> available for reporting + attribute byte offsets. + </dd> + </dl> + + <hr /> + + <h2> + <a id="using" name="using">Using Expat</a> + </h2> + + <h3> + Compiling and Linking Against Expat + </h3> + + <p> + Unless you installed Expat in a location not expected by your compiler and + linker, all you have to do to use Expat in your programs is to include the Expat + header (<code>#include <expat.h></code>) in your files that make calls to + it and to tell the linker that it needs to link against the Expat library. On + Unix systems, this would usually be done with the <code>-lexpat</code> argument. + Otherwise, you'll need to tell the compiler where to look for the Expat header + and the linker where to find the Expat library. You may also need to take steps + to tell the operating system where to find this library at run time. + </p> + + <p> + On a Unix-based system, here's what a Makefile might look like when Expat is + installed in a standard location: + </p> + + <pre class="eg"> CC=cc LDFLAGS= LIBS= -lexpat xmlapp: xmlapp.o $(CC) $(LDFLAGS) -o xmlapp xmlapp.o $(LIBS) </pre> + <p> + If you installed Expat in, say, <code>/home/me/mystuff</code>, then the Makefile + would look like this: + </p> -<p>If you installed Expat in, say, <code>/home/me/mystuff</code>, then -the Makefile would look like this:</p> - -<pre class="eg"> + <pre class="eg"> CC=cc CFLAGS= -I/home/me/mystuff/include LDFLAGS= @@ -488,65 +844,71 @@ LIBS= -L/home/me/mystuff/lib -lexpat xmlapp: xmlapp.o $(CC) $(LDFLAGS) -o xmlapp xmlapp.o $(LIBS) </pre> + <p> + You'd also have to set the environment variable <code>LD_LIBRARY_PATH</code> to + <code>/home/me/mystuff/lib</code> (or to + <code>${LD_LIBRARY_PATH}:/home/me/mystuff/lib</code> if LD_LIBRARY_PATH already + has some directories in it) in order to run your application. + </p> -<p>You'd also have to set the environment variable -<code>LD_LIBRARY_PATH</code> to <code>/home/me/mystuff/lib</code> (or -to <code>${LD_LIBRARY_PATH}:/home/me/mystuff/lib</code> if -LD_LIBRARY_PATH already has some directories in it) in order to run -your application.</p> - -<h3>Expat Basics</h3> + <h3> + Expat Basics + </h3> -<p>As we saw in the example in the overview, the first step in parsing -an XML document with Expat is to create a parser object. There are <a -href="#creation">three functions</a> in the Expat API for creating a -parser object. However, only two of these (<code><a href= -"#XML_ParserCreate" >XML_ParserCreate</a></code> and <code><a href= -"#XML_ParserCreateNS" >XML_ParserCreateNS</a></code>) can be used for -constructing a parser for a top-level document. The object returned -by these functions is an opaque pointer (i.e. "expat.h" declares it as -void *) to data with further internal structure. In order to free the -memory associated with this object you must call <code><a href= -"#XML_ParserFree" >XML_ParserFree</a></code>. Note that if you have -provided any <a href="#userdata">user data</a> that gets stored in the -parser, then your application is responsible for freeing it prior to -calling <code>XML_ParserFree</code>.</p> + <p> + As we saw in the example in the overview, the first step in parsing an XML + document with Expat is to create a parser object. There are <a href= + "#creation">three functions</a> in the Expat API for creating a parser object. + However, only two of these (<code><a href= + "#XML_ParserCreate">XML_ParserCreate</a></code> and <code><a href= + "#XML_ParserCreateNS">XML_ParserCreateNS</a></code>) can be used for constructing + a parser for a top-level document. The object returned by these functions is an + opaque pointer (i.e. "expat.h" declares it as void *) to data with further + internal structure. In order to free the memory associated with this object you + must call <code><a href="#XML_ParserFree">XML_ParserFree</a></code>. Note that if + you have provided any <a href="#userdata">user data</a> that gets stored in the + parser, then your application is responsible for freeing it prior to calling + <code>XML_ParserFree</code>. + </p> -<p>The objects returned by the parser creation functions are good for -parsing only one XML document or external parsed entity. If your -application needs to parse many XML documents, then it needs to create -a parser object for each one. The best way to deal with this is to -create a higher level object that contains all the default -initialization you want for your parser objects.</p> + <p> + The objects returned by the parser creation functions are good for parsing only + one XML document or external parsed entity. If your application needs to parse + many XML documents, then it needs to create a parser object for each one. The + best way to deal with this is to create a higher level object that contains all + the default initialization you want for your parser objects. + </p> -<p>Walking through a document hierarchy with a stream oriented parser -will require a good stack mechanism in order to keep track of current -context. For instance, to answer the simple question, "What element -does this text belong to?" requires a stack, since the parser may have -descended into other elements that are children of the current one and -has encountered this text on the way out.</p> + <p> + Walking through a document hierarchy with a stream oriented parser will require a + good stack mechanism in order to keep track of current context. For instance, to + answer the simple question, "What element does this text belong to?" requires a + stack, since the parser may have descended into other elements that are children + of the current one and has encountered this text on the way out. + </p> -<p>The things you're likely to want to keep on a stack are the -currently opened element and it's attributes. You push this -information onto the stack in the start handler and you pop it off in -the end handler.</p> + <p> + The things you're likely to want to keep on a stack are the currently opened + element and it's attributes. You push this information onto the stack in the + start handler and you pop it off in the end handler. + </p> -<p>For some tasks, it is sufficient to just keep information on what -the depth of the stack is (or would be if you had one.) The outline -program shown above presents one example. Another such task would be -skipping over a complete element. When you see the start tag for the -element you want to skip, you set a skip flag and record the depth at -which the element started. When the end tag handler encounters the -same depth, the skipped element has ended and the flag may be -cleared. If you follow the convention that the root element starts at -1, then you can use the same variable for skip flag and skip -depth.</p> + <p> + For some tasks, it is sufficient to just keep information on what the depth of + the stack is (or would be if you had one.) The outline program shown above + presents one example. Another such task would be skipping over a complete + element. When you see the start tag for the element you want to skip, you set a + skip flag and record the depth at which the element started. When the end tag + handler encounters the same depth, the skipped element has ended and the flag may + be cleared. If you follow the convention that the root element starts at 1, then + you can use the same variable for skip flag and skip depth. + </p> -<pre class="eg"> + <pre class="eg"> void init_info(Parseinfo *info) { - info->skip = 0; - info->depth = 1; + info->skip = 0; + info->depth = 1; /* Other initializations here */ } /* End of init_info */ @@ -554,87 +916,91 @@ void XMLCALL rawstart(void *data, const char *el, const char **attr) { Parseinfo *inf = (Parseinfo *) data; - if (! inf->skip) { + if (! inf->skip) { if (should_skip(inf, el, attr)) { - inf->skip = inf->depth; + inf->skip = inf->depth; } else start(inf, el, attr); /* This does rest of start handling */ } - inf->depth++; + inf->depth++; } /* End of rawstart */ void XMLCALL rawend(void *data, const char *el) { Parseinfo *inf = (Parseinfo *) data; - inf->depth--; + inf->depth--; - if (! inf->skip) + if (! inf->skip) end(inf, el); /* This does rest of end handling */ - if (inf->skip == inf->depth) - inf->skip = 0; + if (inf->skip == inf->depth) + inf->skip = 0; } /* End rawend */ </pre> + <p> + Notice in the above example the difference in how depth is manipulated in the + start and end handlers. The end tag handler should be the mirror image of the + start tag handler. This is necessary to properly model containment. Since, in the + start tag handler, we incremented depth <em>after</em> the main body of start tag + code, then in the end handler, we need to manipulate it <em>before</em> the main + body. If we'd decided to increment it first thing in the start handler, then we'd + have had to decrement it last thing in the end handler. + </p> -<p>Notice in the above example the difference in how depth is -manipulated in the start and end handlers. The end tag handler should -be the mirror image of the start tag handler. This is necessary to -properly model containment. Since, in the start tag handler, we -incremented depth <em>after</em> the main body of start tag code, then -in the end handler, we need to manipulate it <em>before</em> the main -body. If we'd decided to increment it first thing in the start -handler, then we'd have had to decrement it last thing in the end -handler.</p> + <h3 id="userdata"> + Communicating between handlers + </h3> -<h3 id="userdata">Communicating between handlers</h3> + <p> + In order to be able to pass information between different handlers without using + globals, you'll need to define a data structure to hold the shared variables. You + can then tell Expat (with the <code><a href= + "#XML_SetUserData">XML_SetUserData</a></code> function) to pass a pointer to this + structure to the handlers. This is the first argument received by most handlers. + In the <a href="#reference">reference section</a>, an argument to a callback + function is named <code>userData</code> and have type <code>void *</code> if the + user data is passed; it will have the type <code>XML_Parser</code> if the parser + itself is passed. When the parser is passed, the user data may be retrieved using + <code><a href="#XML_GetUserData">XML_GetUserData</a></code>. + </p> -<p>In order to be able to pass information between different handlers -without using globals, you'll need to define a data structure to hold -the shared variables. You can then tell Expat (with the <code><a href= -"#XML_SetUserData" >XML_SetUserData</a></code> function) to pass a -pointer to this structure to the handlers. This is the first -argument received by most handlers. In the <a href="#reference" ->reference section</a>, an argument to a callback function is named -<code>userData</code> and have type <code>void *</code> if the user -data is passed; it will have the type <code>XML_Parser</code> if the -parser itself is passed. When the parser is passed, the user data may -be retrieved using <code><a href="#XML_GetUserData" ->XML_GetUserData</a></code>.</p> + <p> + One common case where multiple calls to a single handler may need to communicate + using an application data structure is the case when content passed to the + character data handler (set by <code><a href= + "#XML_SetCharacterDataHandler">XML_SetCharacterDataHandler</a></code>) needs to + be accumulated. A common first-time mistake with any of the event-oriented + interfaces to an XML parser is to expect all the text contained in an element to + be reported by a single call to the character data handler. Expat, like many + other XML parsers, reports such data as a sequence of calls; there's no way to + know when the end of the sequence is reached until a different callback is made. + A buffer referenced by the user data structure proves both an effective and + convenient place to accumulate character data. + </p> + <!-- XXX example needed here --> -<p>One common case where multiple calls to a single handler may need -to communicate using an application data structure is the case when -content passed to the character data handler (set by <code><a href= -"#XML_SetCharacterDataHandler" ->XML_SetCharacterDataHandler</a></code>) needs to be accumulated. A -common first-time mistake with any of the event-oriented interfaces to -an XML parser is to expect all the text contained in an element to be -reported by a single call to the character data handler. Expat, like -many other XML parsers, reports such data as a sequence of calls; -there's no way to know when the end of the sequence is reached until a -different callback is made. A buffer referenced by the user data -structure proves both an effective and convenient place to accumulate -character data.</p> + <h3> + XML Version + </h3> -<!-- XXX example needed here --> + <p> + Expat is an XML 1.0 parser, and as such never complains based on the value of the + <code>version</code> pseudo-attribute in the XML declaration, if present. + </p> + <p> + If an application needs to check the version number (to support alternate + processing), it should use the <code><a href= + "#XML_SetXmlDeclHandler">XML_SetXmlDeclHandler</a></code> function to set a + handler that uses the information in the XML declaration to determine what to do. + This example shows how to check that only a version number of <code>"1.0"</code> + is accepted: + </p> -<h3>XML Version</h3> - -<p>Expat is an XML 1.0 parser, and as such never complains based on -the value of the <code>version</code> pseudo-attribute in the XML -declaration, if present.</p> - -<p>If an application needs to check the version number (to support -alternate processing), it should use the <code><a href= -"#XML_SetXmlDeclHandler" >XML_SetXmlDeclHandler</a></code> function to -set a handler that uses the information in the XML declaration to -determine what to do. This example shows how to check that only a -version number of <code>"1.0"</code> is accepted:</p> - -<pre class="eg"> + <pre class="eg"> static int wrong_version; static XML_Parser parser; @@ -660,201 +1026,272 @@ xmldecl_handler(void *userData, ... } </pre> + <h3> + Namespace Processing + </h3> -<h3>Namespace Processing</h3> + <p> + When the parser is created using the <code><a href= + "#XML_ParserCreateNS">XML_ParserCreateNS</a></code>, function, Expat performs + namespace processing. Under namespace processing, Expat consumes + <code>xmlns</code> and <code>xmlns:...</code> attributes, which declare + namespaces for the scope of the element in which they occur. This means that your + start handler will not see these attributes. Your application can still be + informed of these declarations by setting namespace declaration handlers with + <a href= + "#XML_SetNamespaceDeclHandler"><code>XML_SetNamespaceDeclHandler</code></a>. + </p> -<p>When the parser is created using the <code><a href= -"#XML_ParserCreateNS" >XML_ParserCreateNS</a></code>, function, Expat -performs namespace processing. Under namespace processing, Expat -consumes <code>xmlns</code> and <code>xmlns:...</code> attributes, -which declare namespaces for the scope of the element in which they -occur. This means that your start handler will not see these -attributes. Your application can still be informed of these -declarations by setting namespace declaration handlers with <a href= -"#XML_SetNamespaceDeclHandler" -><code>XML_SetNamespaceDeclHandler</code></a>.</p> + <p> + Element type and attribute names that belong to a given namespace are passed to + the appropriate handler in expanded form. By default this expanded form is a + concatenation of the namespace URI, the separator character (which is the 2nd + argument to <code><a href="#XML_ParserCreateNS">XML_ParserCreateNS</a></code>), + and the local name (i.e. the part after the colon). Names with undeclared + prefixes are not well-formed when namespace processing is enabled, and will + trigger an error. Unprefixed attribute names are never expanded, and unprefixed + element names are only expanded when they are in the scope of a default + namespace. + </p> -<p>Element type and attribute names that belong to a given namespace -are passed to the appropriate handler in expanded form. By default -this expanded form is a concatenation of the namespace URI, the -separator character (which is the 2nd argument to <code><a href= -"#XML_ParserCreateNS" >XML_ParserCreateNS</a></code>), and the local -name (i.e. the part after the colon). Names with undeclared prefixes -are not well-formed when namespace processing is enabled, and will -trigger an error. Unprefixed attribute names are never expanded, -and unprefixed element names are only expanded when they are in the -scope of a default namespace.</p> + <p> + However if <code><a href= + "#XML_SetReturnNSTriplet">XML_SetReturnNSTriplet</a></code> has been called with + a non-zero <code>do_nst</code> parameter, then the expanded form for names with + an explicit prefix is a concatenation of: URI, separator, local name, separator, + prefix. + </p> -<p>However if <code><a href= "#XML_SetReturnNSTriplet" ->XML_SetReturnNSTriplet</a></code> has been called with a non-zero -<code>do_nst</code> parameter, then the expanded form for names with -an explicit prefix is a concatenation of: URI, separator, local name, -separator, prefix.</p> + <p> + You can set handlers for the start of a namespace declaration and for the end of + a scope of a declaration with the <code><a href= + "#XML_SetNamespaceDeclHandler">XML_SetNamespaceDeclHandler</a></code> function. + The StartNamespaceDeclHandler is called prior to the start tag handler and the + EndNamespaceDeclHandler is called after the corresponding end tag that ends the + namespace's scope. The namespace start handler gets passed the prefix and URI for + the namespace. For a default namespace declaration (xmlns='...'), the prefix will + be <code>NULL</code>. The URI will be <code>NULL</code> for the case where the + default namespace is being unset. The namespace end handler just gets the prefix + for the closing scope. + </p> -<p>You can set handlers for the start of a namespace declaration and -for the end of a scope of a declaration with the <code><a href= -"#XML_SetNamespaceDeclHandler" >XML_SetNamespaceDeclHandler</a></code> -function. The StartNamespaceDeclHandler is called prior to the start -tag handler and the EndNamespaceDeclHandler is called after the -corresponding end tag that ends the namespace's scope. The namespace -start handler gets passed the prefix and URI for the namespace. For a -default namespace declaration (xmlns='...'), the prefix will be -<code>NULL</code>. -The URI will be <code>NULL</code> for the case where the default namespace is being -unset. The namespace end handler just gets the prefix for the closing -scope.</p> + <p> + These handlers are called for each declaration. So if, for instance, a start tag + had three namespace declarations, then the StartNamespaceDeclHandler would be + called three times before the start tag handler is called, once for each + declaration. + </p> -<p>These handlers are called for each declaration. So if, for -instance, a start tag had three namespace declarations, then the -StartNamespaceDeclHandler would be called three times before the start -tag handler is called, once for each declaration.</p> + <h3> + Character Encodings + </h3> -<h3>Character Encodings</h3> + <p> + While XML is based on Unicode, and every XML processor is required to recognized + UTF-8 and UTF-16 (1 and 2 byte encodings of Unicode), other encodings may be + declared in XML documents or entities. For the main document, an XML declaration + may contain an encoding declaration: + </p> -<p>While XML is based on Unicode, and every XML processor is required -to recognized UTF-8 and UTF-16 (1 and 2 byte encodings of Unicode), -other encodings may be declared in XML documents or entities. For the -main document, an XML declaration may contain an encoding -declaration:</p> -<pre> + <pre> <?xml version="1.0" encoding="ISO-8859-2"?> </pre> + <p> + External parsed entities may begin with a text declaration, which looks like an + XML declaration with just an encoding declaration: + </p> -<p>External parsed entities may begin with a text declaration, which -looks like an XML declaration with just an encoding declaration:</p> -<pre> + <pre> <?xml encoding="Big5"?> </pre> + <p> + With Expat, you may also specify an encoding at the time of creating a parser. + This is useful when the encoding information may come from a source outside the + document itself (like a higher level protocol.) + </p> + + <p> + <a id="builtin_encodings" name="builtin_encodings"></a>There are four built-in + encodings in Expat: + </p> -<p>With Expat, you may also specify an encoding at the time of -creating a parser. This is useful when the encoding information may -come from a source outside the document itself (like a higher level -protocol.)</p> + <ul> + <li>UTF-8 + </li> + + <li>UTF-16 + </li> -<p><a name="builtin_encodings"></a>There are four built-in encodings -in Expat:</p> -<ul> -<li>UTF-8</li> -<li>UTF-16</li> -<li>ISO-8859-1</li> -<li>US-ASCII</li> -</ul> + <li>ISO-8859-1 + </li> + + <li>US-ASCII + </li> + </ul> -<p>Anything else discovered in an encoding declaration or in the -protocol encoding specified in the parser constructor, triggers a call -to the <code>UnknownEncodingHandler</code>. This handler gets passed -the encoding name and a pointer to an <code>XML_Encoding</code> data -structure. Your handler must fill in this structure and return -<code>XML_STATUS_OK</code> if it knows how to deal with the -encoding. Otherwise the handler should return -<code>XML_STATUS_ERROR</code>. The handler also gets passed a pointer -to an optional application data structure that you may indicate when -you set the handler.</p> + <p> + Anything else discovered in an encoding declaration or in the protocol encoding + specified in the parser constructor, triggers a call to the + <code>UnknownEncodingHandler</code>. This handler gets passed the encoding name + and a pointer to an <code>XML_Encoding</code> data structure. Your handler must + fill in this structure and return <code>XML_STATUS_OK</code> if it knows how to + deal with the encoding. Otherwise the handler should return + <code>XML_STATUS_ERROR</code>. The handler also gets passed a pointer to an + optional application data structure that you may indicate when you set the + handler. + </p> -<p>Expat places restrictions on character encodings that it can -support by filling in the <code>XML_Encoding</code> structure. -include file:</p> -<ol> -<li>Every ASCII character that can appear in a well-formed XML document -must be represented by a single byte, and that byte must correspond to -it's ASCII encoding (except for the characters $@\^'{}~)</li> -<li>Characters must be encoded in 4 bytes or less.</li> -<li>All characters encoded must have Unicode scalar values less than or -equal to 65535 (0xFFFF)<em>This does not apply to the built-in support -for UTF-16 and UTF-8</em></li> -<li>No character may be encoded by more that one distinct sequence of -bytes</li> -</ol> + <p> + Expat places restrictions on character encodings that it can support by filling + in the <code>XML_Encoding</code> structure. include file: + </p> -<p><code>XML_Encoding</code> contains an array of integers that -correspond to the 1st byte of an encoding sequence. If the value in -the array for a byte is zero or positive, then the byte is a single -byte encoding that encodes the Unicode scalar value contained in the -array. A -1 in this array indicates a malformed byte. If the value is --2, -3, or -4, then the byte is the beginning of a 2, 3, or 4 byte -sequence respectively. Multi-byte sequences are sent to the convert -function pointed at in the <code>XML_Encoding</code> structure. This -function should return the Unicode scalar value for the sequence or -1 -if the sequence is malformed.</p> + <ol> + <li>Every ASCII character that can appear in a well-formed XML document must be + represented by a single byte, and that byte must correspond to it's ASCII + encoding (except for the characters $@\^'{}~) + </li> -<p>One pitfall that novice Expat users are likely to fall into is that -although Expat may accept input in various encodings, the strings that -it passes to the handlers are always encoded in UTF-8 or UTF-16 -(depending on how Expat was compiled). Your application is responsible -for any translation of these strings into other encodings.</p> + <li>Characters must be encoded in 4 bytes or less. + </li> -<h3>Handling External Entity References</h3> + <li>All characters encoded must have Unicode scalar values less than or equal to + 65535 (0xFFFF)<em>This does not apply to the built-in support for UTF-16 and + UTF-8</em> + </li> -<p>Expat does not read or parse external entities directly. Note that -any external DTD is a special case of an external entity. If you've -set no <code>ExternalEntityRefHandler</code>, then external entity -references are silently ignored. Otherwise, it calls your handler with -the information needed to read and parse the external entity.</p> + <li>No character may be encoded by more that one distinct sequence of bytes + </li> + </ol> -<p>Your handler isn't actually responsible for parsing the entity, but -it is responsible for creating a subsidiary parser with <code><a href= -"#XML_ExternalEntityParserCreate" ->XML_ExternalEntityParserCreate</a></code> that will do the job. This -returns an instance of <code>XML_Parser</code> that has handlers and -other data structures initialized from the parent parser. You may then -use <code><a href= "#XML_Parse" >XML_Parse</a></code> or <code><a -href= "#XML_ParseBuffer">XML_ParseBuffer</a></code> calls against this -parser. Since external entities my refer to other external entities, -your handler should be prepared to be called recursively.</p> + <p> + <code>XML_Encoding</code> contains an array of integers that correspond to the + 1st byte of an encoding sequence. If the value in the array for a byte is zero or + positive, then the byte is a single byte encoding that encodes the Unicode scalar + value contained in the array. A -1 in this array indicates a malformed byte. If + the value is -2, -3, or -4, then the byte is the beginning of a 2, 3, or 4 byte + sequence respectively. Multi-byte sequences are sent to the convert function + pointed at in the <code>XML_Encoding</code> structure. This function should + return the Unicode scalar value for the sequence or -1 if the sequence is + malformed. + </p> -<h3>Parsing DTDs</h3> + <p> + One pitfall that novice Expat users are likely to fall into is that although + Expat may accept input in various encodings, the strings that it passes to the + handlers are always encoded in UTF-8 or UTF-16 (depending on how Expat was + compiled). Your application is responsible for any translation of these strings + into other encodings. + </p> -<p>In order to parse parameter entities, before starting the parse, -you must call <code><a href= "#XML_SetParamEntityParsing" ->XML_SetParamEntityParsing</a></code> with one of the following -arguments:</p> -<dl> -<dt><code>XML_PARAM_ENTITY_PARSING_NEVER</code></dt> -<dd>Don't parse parameter entities or the external subset</dd> -<dt><code>XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE</code></dt> -<dd>Parse parameter entities and the external subset unless -<code>standalone</code> was set to "yes" in the XML declaration.</dd> -<dt><code>XML_PARAM_ENTITY_PARSING_ALWAYS</code></dt> -<dd>Always parse parameter entities and the external subset</dd> -</dl> + <h3> + Handling External Entity References + </h3> -<p>In order to read an external DTD, you also have to set an external -entity reference handler as described above.</p> + <p> + Expat does not read or parse external entities directly. Note that any external + DTD is a special case of an external entity. If you've set no + <code>ExternalEntityRefHandler</code>, then external entity references are + silently ignored. Otherwise, it calls your handler with the information needed to + read and parse the external entity. + </p> -<h3 id="stop-resume">Temporarily Stopping Parsing</h3> + <p> + Your handler isn't actually responsible for parsing the entity, but it is + responsible for creating a subsidiary parser with <code><a href= + "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></code> that + will do the job. This returns an instance of <code>XML_Parser</code> that has + handlers and other data structures initialized from the parent parser. You may + then use <code><a href="#XML_Parse">XML_Parse</a></code> or <code><a href= + "#XML_ParseBuffer">XML_ParseBuffer</a></code> calls against this parser. Since + external entities my refer to other external entities, your handler should be + prepared to be called recursively. + </p> -<p>Expat 1.95.8 introduces a new feature: its now possible to stop -parsing temporarily from within a handler function, even if more data -has already been passed into the parser. Applications for this -include</p> + <h3> + Parsing DTDs + </h3> -<ul> - <li>Supporting the <a href= "https://www.w3.org/TR/xinclude/" - >XInclude</a> specification.</li> + <p> + In order to parse parameter entities, before starting the parse, you must call + <code><a href="#XML_SetParamEntityParsing">XML_SetParamEntityParsing</a></code> + with one of the following arguments: + </p> - <li>Delaying further processing until additional information is - available from some other source.</li> + <dl> + <dt> + <code>XML_PARAM_ENTITY_PARSING_NEVER</code> + </dt> - <li>Adjusting processor load as task priorities shift within an - application.</li> + <dd> + Don't parse parameter entities or the external subset + </dd> - <li>Stopping parsing completely (simply free or reset the parser - instead of resuming in the outer parsing loop). This can be useful - if an application-domain error is found in the XML being parsed or if - the result of the parse is determined not to be useful after - all.</li> -</ul> + <dt> + <code>XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE</code> + </dt> -<p>To take advantage of this feature, the main parsing loop of an -application needs to support this specifically. It cannot be -supported with a parsing loop compatible with Expat 1.95.7 or -earlier (though existing loops will continue to work without -supporting the stop/resume feature).</p> + <dd> + Parse parameter entities and the external subset unless <code>standalone</code> + was set to "yes" in the XML declaration. + </dd> -<p>An application that uses this feature for a single parser will have -the rough structure (in pseudo-code):</p> + <dt> + <code>XML_PARAM_ENTITY_PARSING_ALWAYS</code> + </dt> -<pre class="pseudocode"> + <dd> + Always parse parameter entities and the external subset + </dd> + </dl> + + <p> + In order to read an external DTD, you also have to set an external entity + reference handler as described above. + </p> + + <h3 id="stop-resume"> + Temporarily Stopping Parsing + </h3> + + <p> + Expat 1.95.8 introduces a new feature: its now possible to stop parsing + temporarily from within a handler function, even if more data has already been + passed into the parser. Applications for this include + </p> + + <ul> + <li>Supporting the <a href="https://www.w3.org/TR/xinclude/">XInclude</a> + specification. + </li> + + <li>Delaying further processing until additional information is available from + some other source. + </li> + + <li>Adjusting processor load as task priorities shift within an application. + </li> + + <li>Stopping parsing completely (simply free or reset the parser instead of + resuming in the outer parsing loop). This can be useful if an application-domain + error is found in the XML being parsed or if the result of the parse is + determined not to be useful after all. + </li> + </ul> + + <p> + To take advantage of this feature, the main parsing loop of an application needs + to support this specifically. It cannot be supported with a parsing loop + compatible with Expat 1.95.7 or earlier (though existing loops will continue to + work without supporting the stop/resume feature). + </p> + + <p> + An application that uses this feature for a single parser will have the rough + structure (in pseudo-code): + </p> + + <pre class="pseudocode"> fd = open_input() p = create_parser() @@ -871,15 +1308,18 @@ if parse_xml(p, fd) { } } </pre> + <p> + An application that may resume any of several parsers based on input (either from + the XML being parsed or some other source) will certainly have more interesting + control structures. + </p> -<p>An application that may resume any of several parsers based on -input (either from the XML being parsed or some other source) will -certainly have more interesting control structures.</p> - -<p>This C function could be used for the <code>parse_xml</code> -function mentioned in the pseudo-code above:</p> + <p> + This C function could be used for the <code>parse_xml</code> function mentioned + in the pseudo-code above: + </p> -<pre class="eg"> + <pre class="eg"> #define BUFF_SIZE 10240 /* Parse a document from the open file descriptor 'fd' until the parse @@ -918,14 +1358,14 @@ parse_xml(XML_Parser p, int fd) } } </pre> + <p> + The corresponding <code>continue_parsing</code> function is somewhat simpler, + since it only need deal with the return code from <code><a href= + "#XML_ResumeParser">XML_ResumeParser</a></code>; it can delegate the input + handling to the <code>parse_xml</code> function: + </p> -<p>The corresponding <code>continue_parsing</code> function is -somewhat simpler, since it only need deal with the return code from -<code><a href= "#XML_ResumeParser">XML_ResumeParser</a></code>; it can -delegate the input handling to the <code>parse_xml</code> -function:</p> - -<pre class="eg"> + <pre class="eg"> /* Continue parsing a document which had been suspended. The 'p' and 'fd' arguments are the same as passed to parse_xml(). Return non-zero when the parse is suspended. @@ -947,274 +1387,343 @@ continue_parsing(XML_Parser p, int fd) return parse_xml(p, fd); } </pre> + <p> + Now that we've seen what a mess the top-level parsing loop can become, what have + we gained? Very simply, we can now use the <code><a href= + "#XML_StopParser">XML_StopParser</a></code> function to stop parsing, without + having to go to great lengths to avoid additional processing that we're expecting + to ignore. As a bonus, we get to stop parsing <em>temporarily</em>, and come back + to it when we're ready. + </p> -<p>Now that we've seen what a mess the top-level parsing loop can -become, what have we gained? Very simply, we can now use the <code><a -href= "#XML_StopParser" >XML_StopParser</a></code> function to stop -parsing, without having to go to great lengths to avoid additional -processing that we're expecting to ignore. As a bonus, we get to stop -parsing <em>temporarily</em>, and come back to it when we're -ready.</p> - -<p>To stop parsing from a handler function, use the <code><a href= -"#XML_StopParser" >XML_StopParser</a></code> function. This function -takes two arguments; the parser being stopped and a flag indicating -whether the parse can be resumed in the future.</p> + <p> + To stop parsing from a handler function, use the <code><a href= + "#XML_StopParser">XML_StopParser</a></code> function. This function takes two + arguments; the parser being stopped and a flag indicating whether the parse can + be resumed in the future. + </p> + <!-- XXX really need more here --> -<!-- XXX really need more here --> + <hr /> + <!-- ================================================================ --> + <h2> + <a id="reference" name="reference">Expat Reference</a> + </h2> -<hr /> -<!-- ================================================================ --> + <h3> + <a id="creation" name="creation">Parser Creation</a> + </h3> -<h2><a name="reference">Expat Reference</a></h2> + <h4 id="XML_ParserCreate"> + XML_ParserCreate + </h4> -<h3><a name="creation">Parser Creation</a></h3> - -<h4 id="XML_ParserCreate">XML_ParserCreate</h4> -<pre class="fcndec"> + <pre class="fcndec"> XML_Parser XMLCALL XML_ParserCreate(const XML_Char *encoding); </pre> -<div class="fcndef"> -<p> -Construct a new parser. If encoding is non-<code>NULL</code>, it specifies a -character encoding to use for the document. This overrides the document -encoding declaration. There are four built-in encodings: -</p> -<ul> -<li>US-ASCII</li> -<li>UTF-8</li> -<li>UTF-16</li> -<li>ISO-8859-1</li> -</ul> -<p> -Any other value will invoke a call to the UnknownEncodingHandler. -</p> -</div> + <div class="fcndef"> + <p> + Construct a new parser. If encoding is non-<code>NULL</code>, it specifies a + character encoding to use for the document. This overrides the document + encoding declaration. There are four built-in encodings: + </p> + + <ul> + <li>US-ASCII + </li> -<h4 id="XML_ParserCreateNS">XML_ParserCreateNS</h4> -<pre class="fcndec"> + <li>UTF-8 + </li> + + <li>UTF-16 + </li> + + <li>ISO-8859-1 + </li> + </ul> + + <p> + Any other value will invoke a call to the UnknownEncodingHandler. + </p> + </div> + + <h4 id="XML_ParserCreateNS"> + XML_ParserCreateNS + </h4> + + <pre class="fcndec"> XML_Parser XMLCALL XML_ParserCreateNS(const XML_Char *encoding, XML_Char sep); </pre> -<div class="fcndef"> -Constructs a new parser that has namespace processing in effect. Namespace -expanded element names and attribute names are returned as a concatenation -of the namespace URI, <em>sep</em>, and the local part of the name. This -means that you should pick a character for <em>sep</em> that can't be part -of an URI. Since Expat does not check namespace URIs for conformance, the -only safe choice for a namespace separator is a character that is illegal -in XML. For instance, <code>'\xFF'</code> is not legal in UTF-8, and -<code>'\xFFFF'</code> is not legal in UTF-16. There is a special case when -<em>sep</em> is the null character <code>'\0'</code>: the namespace URI and -the local part will be concatenated without any separator - this is intended -to support RDF processors. It is a programming error to use the null separator -with <a href= "#XML_SetReturnNSTriplet">namespace triplets</a>.</div> + <div class="fcndef"> + Constructs a new parser that has namespace processing in effect. Namespace + expanded element names and attribute names are returned as a concatenation of the + namespace URI, <em>sep</em>, and the local part of the name. This means that you + should pick a character for <em>sep</em> that can't be part of an URI. Since + Expat does not check namespace URIs for conformance, the only safe choice for a + namespace separator is a character that is illegal in XML. For instance, + <code>'\xFF'</code> is not legal in UTF-8, and <code>'\xFFFF'</code> is not legal + in UTF-16. There is a special case when <em>sep</em> is the null character + <code>'\0'</code>: the namespace URI and the local part will be concatenated + without any separator - this is intended to support RDF processors. It is a + programming error to use the null separator with <a href= + "#XML_SetReturnNSTriplet">namespace triplets</a>. + </div> + + <p> + <strong>Note:</strong> Expat does not validate namespace URIs (beyond encoding) + against RFC 3986 today (and is not required to do so with regard to the XML 1.0 + namespaces specification) but it may start doing that in future releases. Before + that, an application using Expat must be ready to receive namespace URIs + containing non-URI characters. + </p> -<p><strong>Note:</strong> -Expat does not validate namespace URIs (beyond encoding) -against RFC 3986 today (and is not required to do so with regard to -the XML 1.0 namespaces specification) but it may start doing that -in future releases. Before that, an application using Expat must -be ready to receive namespace URIs containing non-URI characters. -</p> + <h4 id="XML_ParserCreate_MM"> + XML_ParserCreate_MM + </h4> -<h4 id="XML_ParserCreate_MM">XML_ParserCreate_MM</h4> -<pre class="fcndec"> + <pre class="fcndec"> XML_Parser XMLCALL XML_ParserCreate_MM(const XML_Char *encoding, const XML_Memory_Handling_Suite *ms, - const XML_Char *sep); + const XML_Char *sep); </pre> -<pre class="signature"> + + <pre class="signature"> typedef struct { void *(XMLCALL *malloc_fcn)(size_t size); void *(XMLCALL *realloc_fcn)(void *ptr, size_t size); void (XMLCALL *free_fcn)(void *ptr); } XML_Memory_Handling_Suite; </pre> -<div class="fcndef"> -<p>Construct a new parser using the suite of memory handling functions -specified in <code>ms</code>. If <code>ms</code> is <code>NULL</code>, then use the -standard set of memory management functions. If <code>sep</code> is -non-<code>NULL</code>, then namespace processing is enabled in the created parser -and the character pointed at by sep is used as the separator between -the namespace URI and the local part of the name.</p> -</div> + <div class="fcndef"> + <p> + Construct a new parser using the suite of memory handling functions specified + in <code>ms</code>. If <code>ms</code> is <code>NULL</code>, then use the + standard set of memory management functions. If <code>sep</code> is + non-<code>NULL</code>, then namespace processing is enabled in the created + parser and the character pointed at by sep is used as the separator between the + namespace URI and the local part of the name. + </p> + </div> + + <h4 id="XML_ExternalEntityParserCreate"> + XML_ExternalEntityParserCreate + </h4> -<h4 id="XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</h4> -<pre class="fcndec"> + <pre class="fcndec"> XML_Parser XMLCALL XML_ExternalEntityParserCreate(XML_Parser p, const XML_Char *context, const XML_Char *encoding); </pre> -<div class="fcndef"> -Construct a new <code>XML_Parser</code> object for parsing an external -general entity. Context is the context argument passed in a call to a -ExternalEntityRefHandler. Other state information such as handlers, -user data, namespace processing is inherited from the parser passed as -the 1st argument. So you shouldn't need to call any of the behavior -changing functions on this parser (unless you want it to act -differently than the parent parser). -</div> + <div class="fcndef"> + <p> + Construct a new <code>XML_Parser</code> object for parsing an external general + entity. Context is the context argument passed in a call to a + ExternalEntityRefHandler. Other state information such as handlers, user data, + namespace processing is inherited from the parser passed as the 1st argument. + So you shouldn't need to call any of the behavior changing functions on this + parser (unless you want it to act differently than the parent parser). + </p> -<h4 id="XML_ParserFree">XML_ParserFree</h4> -<pre class="fcndec"> + <p> + <strong>Note:</strong> Please be sure to free subparsers created by + <code><a href= + "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></code> + <em>prior to</em> freeing their related parent parser, as subparsers reference + and use parts of their respective parent parser, internally. Parent parsers + must outlive subparsers. + </p> + </div> + + <h4 id="XML_ParserFree"> + XML_ParserFree + </h4> + + <pre class="fcndec"> void XMLCALL XML_ParserFree(XML_Parser p); </pre> -<div class="fcndef"> -Free memory used by the parser. Your application is responsible for -freeing any memory associated with <a href="#userdata">user data</a>. -</div> + <div class="fcndef"> + <p> + Free memory used by the parser. + </p> + + <p> + <strong>Note:</strong> Your application is responsible for freeing any memory + associated with <a href="#userdata">user data</a>. + </p> + + <p> + <strong>Note:</strong> Please be sure to free subparsers created by + <code><a href= + "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></code> + <em>prior to</em> freeing their related parent parser, as subparsers reference + and use parts of their respective parent parser, internally. Parent parsers + must outlive subparsers. + </p> + </div> -<h4 id="XML_ParserReset">XML_ParserReset</h4> -<pre class="fcndec"> + <h4 id="XML_ParserReset"> + XML_ParserReset + </h4> + + <pre class="fcndec"> XML_Bool XMLCALL XML_ParserReset(XML_Parser p, const XML_Char *encoding); </pre> -<div class="fcndef"> -Clean up the memory structures maintained by the parser so that it may -be used again. After this has been called, <code>parser</code> is -ready to start parsing a new document. All handlers are cleared from -the parser, except for the unknownEncodingHandler. The parser's external -state is re-initialized except for the values of ns and ns_triplets. -This function may not be used on a parser created using <code><a href= -"#XML_ExternalEntityParserCreate" >XML_ExternalEntityParserCreate</a -></code>; it will return <code>XML_FALSE</code> in that case. Returns -<code>XML_TRUE</code> on success. Your application is responsible for -dealing with any memory associated with <a href="#userdata">user data</a>. -</div> + <div class="fcndef"> + Clean up the memory structures maintained by the parser so that it may be used + again. After this has been called, <code>parser</code> is ready to start parsing + a new document. All handlers are cleared from the parser, except for the + unknownEncodingHandler. The parser's external state is re-initialized except for + the values of ns and ns_triplets. This function may not be used on a parser + created using <code><a href= + "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></code>; it + will return <code>XML_FALSE</code> in that case. Returns <code>XML_TRUE</code> on + success. Your application is responsible for dealing with any memory associated + with <a href="#userdata">user data</a>. + </div> + + <h3> + <a id="parsing" name="parsing">Parsing</a> + </h3> -<h3><a name="parsing">Parsing</a></h3> + <p> + To state the obvious: the three parsing functions <code><a href= + "#XML_Parse">XML_Parse</a></code>, <code><a href= + "#XML_ParseBuffer">XML_ParseBuffer</a></code> and <code><a href= + "#XML_GetBuffer">XML_GetBuffer</a></code> must not be called from within a + handler unless they operate on a separate parser instance, that is, one that did + not call the handler. For example, it is OK to call the parsing functions from + within an <code>XML_ExternalEntityRefHandler</code>, if they apply to the parser + created by <code><a href= + "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></code>. + </p> -<p>To state the obvious: the three parsing functions <code><a href= -"#XML_Parse" >XML_Parse</a></code>, <code><a href= "#XML_ParseBuffer"> -XML_ParseBuffer</a></code> and <code><a href= "#XML_GetBuffer"> -XML_GetBuffer</a></code> must not be called from within a handler -unless they operate on a separate parser instance, that is, one that -did not call the handler. For example, it is OK to call the parsing -functions from within an <code>XML_ExternalEntityRefHandler</code>, -if they apply to the parser created by -<code><a href= "#XML_ExternalEntityParserCreate" ->XML_ExternalEntityParserCreate</a></code>.</p> + <p> + Note: The <code>len</code> argument passed to these functions should be + considerably less than the maximum value for an integer, as it could create an + integer overflow situation if the added lengths of a buffer and the unprocessed + portion of the previous buffer exceed the maximum integer value. Input data at + the end of a buffer will remain unprocessed if it is part of an XML token for + which the end is not part of that buffer. + </p> -<p>Note: The <code>len</code> argument passed to these functions -should be considerably less than the maximum value for an integer, -as it could create an integer overflow situation if the added -lengths of a buffer and the unprocessed portion of the previous buffer -exceed the maximum integer value. Input data at the end of a buffer -will remain unprocessed if it is part of an XML token for which the -end is not part of that buffer.</p> + <p> + <a id="isFinal" name="isFinal"></a>The application <em>must</em> make a + concluding <code><a href="#XML_Parse">XML_Parse</a></code> or <code><a href= + "#XML_ParseBuffer">XML_ParseBuffer</a></code> call with <code>isFinal</code> set + to <code>XML_TRUE</code>. + </p> -<p><a name="isFinal"></a>The application <em>must</em> make a concluding -<code><a href="#XML_Parse">XML_Parse</a></code> or -<code><a href="#XML_ParseBuffer">XML_ParseBuffer</a></code> call -with <code>isFinal</code> set to <code>XML_TRUE</code>.</p> + <h4 id="XML_Parse"> + XML_Parse + </h4> -<h4 id="XML_Parse">XML_Parse</h4> -<pre class="fcndec"> + <pre class="fcndec"> enum XML_Status XMLCALL XML_Parse(XML_Parser p, const char *s, int len, int isFinal); </pre> -<pre class="signature"> + + <pre class="signature"> enum XML_Status { XML_STATUS_ERROR = 0, XML_STATUS_OK = 1 }; </pre> -<div class="fcndef"> -<p> -Parse some more of the document. The string <code>s</code> is a buffer -containing part (or perhaps all) of the document. The number of bytes of s -that are part of the document is indicated by <code>len</code>. This means -that <code>s</code> doesn't have to be null-terminated. It also means that -if <code>len</code> is larger than the number of bytes in the block of -memory that <code>s</code> points at, then a memory fault is likely. -Negative values for <code>len</code> are rejected since Expat 2.2.1. -The -<code>isFinal</code> parameter informs the parser that this is the last -piece of the document. Frequently, the last piece is empty (i.e. -<code>len</code> is zero.) -</p> + <div class="fcndef"> + <p> + Parse some more of the document. The string <code>s</code> is a buffer + containing part (or perhaps all) of the document. The number of bytes of s that + are part of the document is indicated by <code>len</code>. This means that + <code>s</code> doesn't have to be null-terminated. It also means that if + <code>len</code> is larger than the number of bytes in the block of memory that + <code>s</code> points at, then a memory fault is likely. Negative values for + <code>len</code> are rejected since Expat 2.2.1. The <code>isFinal</code> + parameter informs the parser that this is the last piece of the document. + Frequently, the last piece is empty (i.e. <code>len</code> is zero.) + </p> + + <p> + If a parse error occurred, it returns <code>XML_STATUS_ERROR</code>. Otherwise + it returns <code>XML_STATUS_OK</code> value. Note that regardless of the return + value, there is no guarantee that all provided input has been parsed; only + after <a href="#isFinal">the concluding call</a> will all handler callbacks and + parsing errors have happened. + </p> -<p> -If a parse error occurred, it returns <code>XML_STATUS_ERROR</code>. -Otherwise it returns <code>XML_STATUS_OK</code> value. -Note that regardless of the return value, there is no guarantee that all -provided input has been parsed; only after <a href="#isFinal">the -concluding call</a> will all handler callbacks and parsing errors have -happened. -</p> + <p> + Simplified, <code>XML_Parse</code> can be considered a convenience wrapper that + is pairing calls to <code><a href="#XML_GetBuffer">XML_GetBuffer</a></code> and + <code><a href="#XML_ParseBuffer">XML_ParseBuffer</a></code> (when Expat is + built with macro <code>XML_CONTEXT_BYTES</code> defined to a positive value, + which is both common and default). <code>XML_Parse</code> is then functionally + equivalent to calling <code><a href="#XML_GetBuffer">XML_GetBuffer</a></code>, + <code>memcpy</code>, and <code><a href= + "#XML_ParseBuffer">XML_ParseBuffer</a></code>. + </p> -<p> -Simplified, <code>XML_Parse</code> can be considered a convenience wrapper -that is pairing calls -to <code><a href="#XML_GetBuffer">XML_GetBuffer</a></code> -and <code><a href="#XML_ParseBuffer">XML_ParseBuffer</a></code> -(when Expat is built with macro <code>XML_CONTEXT_BYTES</code> -defined to a positive value, which is both common and default). -<code>XML_Parse</code> is then functionally equivalent to calling -<code><a href="#XML_GetBuffer">XML_GetBuffer</a></code>, -<code>memcpy</code>, and -<code><a href="#XML_ParseBuffer">XML_ParseBuffer</a></code>. -</p> + <p> + To avoid double copying of the input, direct use of functions <code><a href= + "#XML_GetBuffer">XML_GetBuffer</a></code> and <code><a href= + "#XML_ParseBuffer">XML_ParseBuffer</a></code> is advised for most production + use, e.g. if you're using <code>read</code> or similar functionality to fill + your buffers, fill directly into the buffer from <code><a href= + "#XML_GetBuffer">XML_GetBuffer</a></code>, then parse with <code><a href= + "#XML_ParseBuffer">XML_ParseBuffer</a></code>. + </p> + </div> -<p> -To avoid double copying of the input, direct use of functions -<code><a href="#XML_GetBuffer">XML_GetBuffer</a></code> and -<code><a href="#XML_ParseBuffer">XML_ParseBuffer</a></code> is advised -for most production use, e.g. -if you're using <code>read</code> or similar functionality to fill your -buffers, fill directly into the buffer from -<code><a href="#XML_GetBuffer">XML_GetBuffer</a></code>, -then parse with <code><a href="#XML_ParseBuffer">XML_ParseBuffer</a></code>. -</p> -</div> + <h4 id="XML_ParseBuffer"> + XML_ParseBuffer + </h4> -<h4 id="XML_ParseBuffer">XML_ParseBuffer</h4> -<pre class="fcndec"> + <pre class="fcndec"> enum XML_Status XMLCALL XML_ParseBuffer(XML_Parser p, int len, int isFinal); </pre> -<div class="fcndef"> -<p> -This is just like <code><a href= "#XML_Parse" >XML_Parse</a></code>, -except in this case Expat provides the buffer. By obtaining the -buffer from Expat with the <code><a href= "#XML_GetBuffer" ->XML_GetBuffer</a></code> function, the application can avoid double -copying of the input. -</p> + <div class="fcndef"> + <p> + This is just like <code><a href="#XML_Parse">XML_Parse</a></code>, except in + this case Expat provides the buffer. By obtaining the buffer from Expat with + the <code><a href="#XML_GetBuffer">XML_GetBuffer</a></code> function, the + application can avoid double copying of the input. + </p> -<p> -Negative values for <code>len</code> are rejected since Expat 2.6.3. -</p> -</div> + <p> + Negative values for <code>len</code> are rejected since Expat 2.6.3. + </p> + </div> -<h4 id="XML_GetBuffer">XML_GetBuffer</h4> -<pre class="fcndec"> + <h4 id="XML_GetBuffer"> + XML_GetBuffer + </h4> + + <pre class="fcndec"> void * XMLCALL XML_GetBuffer(XML_Parser p, int len); </pre> -<div class="fcndef"> -Obtain a buffer of size <code>len</code> to read a piece of the document -into. A <code>NULL</code> value is returned if Expat can't allocate enough memory for -this buffer. A <code>NULL</code> value may also be returned if <code>len</code> is zero. -This has to be called prior to every call to -<code><a href= "#XML_ParseBuffer" >XML_ParseBuffer</a></code>. A -typical use would look like this: + <div class="fcndef"> + Obtain a buffer of size <code>len</code> to read a piece of the document into. A + <code>NULL</code> value is returned if Expat can't allocate enough memory for + this buffer. A <code>NULL</code> value may also be returned if <code>len</code> + is zero. This has to be called prior to every call to <code><a href= + "#XML_ParseBuffer">XML_ParseBuffer</a></code>. A typical use would look like + this: -<pre class="eg"> + <pre class="eg"> for (;;) { int bytes_read; void *buff = XML_GetBuffer(p, BUFF_SIZE); @@ -1235,115 +1744,168 @@ for (;;) { break; } </pre> -</div> + </div> + + <h4 id="XML_StopParser"> + XML_StopParser + </h4> -<h4 id="XML_StopParser">XML_StopParser</h4> -<pre class="fcndec"> + <pre class="fcndec"> enum XML_Status XMLCALL XML_StopParser(XML_Parser p, XML_Bool resumable); </pre> -<div class="fcndef"> + <div class="fcndef"> + <p> + Stops parsing, causing <code><a href="#XML_Parse">XML_Parse</a></code> or + <code><a href="#XML_ParseBuffer">XML_ParseBuffer</a></code> to return. Must be + called from within a call-back handler, except when aborting (when + <code>resumable</code> is <code>XML_FALSE</code>) an already suspended parser. + Some call-backs may still follow because they would otherwise get lost, + including + </p> + + <ul> + <li>the end element handler for empty elements when stopped in the start + element handler, + </li> + + <li>the end namespace declaration handler when stopped in the end element + handler, + </li> + + <li>the character data handler when stopped in the character data handler while + making multiple call-backs on a contiguous chunk of characters, + </li> + </ul> + + <p> + and possibly others. + </p> + + <p> + This can be called from most handlers, including DTD related call-backs, except + when parsing an external parameter entity and <code>resumable</code> is + <code>XML_TRUE</code>. Returns <code>XML_STATUS_OK</code> when successful, + <code>XML_STATUS_ERROR</code> otherwise. The possible error codes are: + </p> + + <dl> + <dt> + <code>XML_ERROR_NOT_STARTED</code> + </dt> + + <dd> + when stopping or suspending a parser before it has started, added in Expat + 2.6.4. + </dd> -<p>Stops parsing, causing <code><a href= "#XML_Parse" ->XML_Parse</a></code> or <code><a href= "#XML_ParseBuffer" ->XML_ParseBuffer</a></code> to return. Must be called from within a -call-back handler, except when aborting (when <code>resumable</code> -is <code>XML_FALSE</code>) an already suspended parser. Some -call-backs may still follow because they would otherwise get -lost, including</p> -<ul> - <li> the end element handler for empty elements when stopped in the - start element handler,</li> - <li> the end namespace declaration handler when stopped in the end - element handler,</li> - <li> the character data handler when stopped in the character data handler - while making multiple call-backs on a contiguous chunk of characters,</li> -</ul> -<p>and possibly others.</p> + <dt> + <code>XML_ERROR_SUSPENDED</code> + </dt> -<p>This can be called from most handlers, including DTD related -call-backs, except when parsing an external parameter entity and -<code>resumable</code> is <code>XML_TRUE</code>. Returns -<code>XML_STATUS_OK</code> when successful, -<code>XML_STATUS_ERROR</code> otherwise. The possible error codes -are:</p> -<dl> - <dt><code>XML_ERROR_NOT_STARTED</code></dt> - <dd> - when stopping or suspending a parser before it has started, - added in Expat 2.6.4. - </dd> - <dt><code>XML_ERROR_SUSPENDED</code></dt> - <dd>when suspending an already suspended parser.</dd> - <dt><code>XML_ERROR_FINISHED</code></dt> - <dd>when the parser has already finished.</dd> - <dt><code>XML_ERROR_SUSPEND_PE</code></dt> - <dd>when suspending while parsing an external PE.</dd> -</dl> + <dd> + when suspending an already suspended parser. + </dd> -<p>Since the stop/resume feature requires application support in the -outer parsing loop, it is an error to call this function for a parser -not being handled appropriately; see <a href= "#stop-resume" ->Temporarily Stopping Parsing</a> for more information.</p> + <dt> + <code>XML_ERROR_FINISHED</code> + </dt> -<p>When <code>resumable</code> is <code>XML_TRUE</code> then parsing -is <em>suspended</em>, that is, <code><a href= "#XML_Parse" ->XML_Parse</a></code> and <code><a href= "#XML_ParseBuffer" ->XML_ParseBuffer</a></code> return <code>XML_STATUS_SUSPENDED</code>. -Otherwise, parsing is <em>aborted</em>, that is, <code><a href= -"#XML_Parse" >XML_Parse</a></code> and <code><a href= -"#XML_ParseBuffer" >XML_ParseBuffer</a></code> return -<code>XML_STATUS_ERROR</code> with error code -<code>XML_ERROR_ABORTED</code>.</p> + <dd> + when the parser has already finished. + </dd> -<p><strong>Note:</strong> -This will be applied to the current parser instance only, that is, if -there is a parent parser then it will continue parsing when the -external entity reference handler returns. It is up to the -implementation of that handler to call <code><a href= -"#XML_StopParser" >XML_StopParser</a></code> on the parent parser -(recursively), if one wants to stop parsing altogether.</p> + <dt> + <code>XML_ERROR_SUSPEND_PE</code> + </dt> -<p>When suspended, parsing can be resumed by calling <code><a href= -"#XML_ResumeParser" >XML_ResumeParser</a></code>.</p> + <dd> + when suspending while parsing an external PE. + </dd> + </dl> -<p>New in Expat 1.95.8.</p> -</div> + <p> + Since the stop/resume feature requires application support in the outer parsing + loop, it is an error to call this function for a parser not being handled + appropriately; see <a href="#stop-resume">Temporarily Stopping Parsing</a> for + more information. + </p> -<h4 id="XML_ResumeParser">XML_ResumeParser</h4> -<pre class="fcndec"> + <p> + When <code>resumable</code> is <code>XML_TRUE</code> then parsing is + <em>suspended</em>, that is, <code><a href="#XML_Parse">XML_Parse</a></code> + and <code><a href="#XML_ParseBuffer">XML_ParseBuffer</a></code> return + <code>XML_STATUS_SUSPENDED</code>. Otherwise, parsing is <em>aborted</em>, that + is, <code><a href="#XML_Parse">XML_Parse</a></code> and <code><a href= + "#XML_ParseBuffer">XML_ParseBuffer</a></code> return + <code>XML_STATUS_ERROR</code> with error code <code>XML_ERROR_ABORTED</code>. + </p> + + <p> + <strong>Note:</strong> This will be applied to the current parser instance + only, that is, if there is a parent parser then it will continue parsing when + the external entity reference handler returns. It is up to the implementation + of that handler to call <code><a href= + "#XML_StopParser">XML_StopParser</a></code> on the parent parser (recursively), + if one wants to stop parsing altogether. + </p> + + <p> + When suspended, parsing can be resumed by calling <code><a href= + "#XML_ResumeParser">XML_ResumeParser</a></code>. + </p> + + <p> + New in Expat 1.95.8. + </p> + </div> + + <h4 id="XML_ResumeParser"> + XML_ResumeParser + </h4> + + <pre class="fcndec"> enum XML_Status XMLCALL XML_ResumeParser(XML_Parser p); </pre> -<div class="fcndef"> -<p>Resumes parsing after it has been suspended with <code><a href= -"#XML_StopParser" >XML_StopParser</a></code>. Must not be called from -within a handler call-back. Returns same status codes as <code><a -href= "#XML_Parse">XML_Parse</a></code> or <code><a href= -"#XML_ParseBuffer" >XML_ParseBuffer</a></code>. An additional error -code, <code>XML_ERROR_NOT_SUSPENDED</code>, will be returned if the -parser was not currently suspended.</p> + <div class="fcndef"> + <p> + Resumes parsing after it has been suspended with <code><a href= + "#XML_StopParser">XML_StopParser</a></code>. Must not be called from within a + handler call-back. Returns same status codes as <code><a href= + "#XML_Parse">XML_Parse</a></code> or <code><a href= + "#XML_ParseBuffer">XML_ParseBuffer</a></code>. An additional error code, + <code>XML_ERROR_NOT_SUSPENDED</code>, will be returned if the parser was not + currently suspended. + </p> + + <p> + <strong>Note:</strong> This must be called on the most deeply nested child + parser instance first, and on its parent parser only after the child parser has + finished, to be applied recursively until the document entity's parser is + restarted. That is, the parent parser will not resume by itself and it is up to + the application to call <code><a href= + "#XML_ResumeParser">XML_ResumeParser</a></code> on it at the appropriate + moment. + </p> -<p><strong>Note:</strong> -This must be called on the most deeply nested child parser instance -first, and on its parent parser only after the child parser has -finished, to be applied recursively until the document entity's parser -is restarted. That is, the parent parser will not resume by itself -and it is up to the application to call <code><a href= -"#XML_ResumeParser" >XML_ResumeParser</a></code> on it at the -appropriate moment.</p> + <p> + New in Expat 1.95.8. + </p> + </div> -<p>New in Expat 1.95.8.</p> -</div> + <h4 id="XML_GetParsingStatus"> + XML_GetParsingStatus + </h4> -<h4 id="XML_GetParsingStatus">XML_GetParsingStatus</h4> -<pre class="fcndec"> + <pre class="fcndec"> void XMLCALL XML_GetParsingStatus(XML_Parser p, XML_ParsingStatus *status); </pre> -<pre class="signature"> + + <pre class="signature"> enum XML_Parsing { XML_INITIALIZED, XML_PARSING, @@ -1356,244 +1918,322 @@ typedef struct { XML_Bool finalBuffer; } XML_ParsingStatus; </pre> -<div class="fcndef"> -<p>Returns status of parser with respect to being initialized, -parsing, finished, or suspended, and whether the final buffer is being -processed. The <code>status</code> parameter <em>must not</em> be -<code>NULL</code>.</p> + <div class="fcndef"> + <p> + Returns status of parser with respect to being initialized, parsing, finished, + or suspended, and whether the final buffer is being processed. The + <code>status</code> parameter <em>must not</em> be <code>NULL</code>. + </p> -<p>New in Expat 1.95.8.</p> -</div> + <p> + New in Expat 1.95.8. + </p> + </div> + <h3> + <a id="setting" name="setting">Handler Setting</a> + </h3> -<h3><a name="setting">Handler Setting</a></h3> + <p> + Although handlers are typically set prior to parsing and left alone, an + application may choose to set or change the handler for a parsing event while the + parse is in progress. For instance, your application may choose to ignore all + text not descended from a <code>para</code> element. One way it could do this is + to set the character handler when a para start tag is seen, and unset it for the + corresponding end tag. + </p> -<p>Although handlers are typically set prior to parsing and left alone, an -application may choose to set or change the handler for a parsing event -while the parse is in progress. For instance, your application may choose -to ignore all text not descended from a <code>para</code> element. One -way it could do this is to set the character handler when a para start tag -is seen, and unset it for the corresponding end tag.</p> + <p> + A handler may be <em>unset</em> by providing a <code>NULL</code> pointer to the + appropriate handler setter. None of the handler setting functions have a return + value. + </p> -<p>A handler may be <em>unset</em> by providing a <code>NULL</code> pointer to the -appropriate handler setter. None of the handler setting functions have -a return value.</p> + <p> + Your handlers will be receiving strings in arrays of type <code>XML_Char</code>. + This type is conditionally defined in expat.h as either <code>char</code>, + <code>wchar_t</code> or <code>unsigned short</code>. The former implies UTF-8 + encoding, the latter two imply UTF-16 encoding. Note that you'll receive them in + this form independent of the original encoding of the document. + </p> -<p>Your handlers will be receiving strings in arrays of type -<code>XML_Char</code>. This type is conditionally defined in expat.h as -either <code>char</code>, <code>wchar_t</code> or <code>unsigned short</code>. -The former implies UTF-8 encoding, the latter two imply UTF-16 encoding. -Note that you'll receive them in this form independent of the original -encoding of the document.</p> + <div class="handler"> + <h4 id="XML_SetStartElementHandler"> + XML_SetStartElementHandler + </h4> -<div class="handler"> -<h4 id="XML_SetStartElementHandler">XML_SetStartElementHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetStartElementHandler(XML_Parser p, XML_StartElementHandler start); </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_StartElementHandler)(void *userData, const XML_Char *name, const XML_Char **atts); </pre> -<p>Set handler for start (and empty) tags. Attributes are passed to the start -handler as a pointer to a vector of char pointers. Each attribute seen in -a start (or empty) tag occupies 2 consecutive places in this vector: the -attribute name followed by the attribute value. These pairs are terminated -by a <code>NULL</code> pointer.</p> -<p>Note that an empty tag generates a call to both start and end handlers -(in that order).</p> -</div> + <p> + Set handler for start (and empty) tags. Attributes are passed to the start + handler as a pointer to a vector of char pointers. Each attribute seen in a + start (or empty) tag occupies 2 consecutive places in this vector: the + attribute name followed by the attribute value. These pairs are terminated by a + <code>NULL</code> pointer. + </p> -<div class="handler"> -<h4 id="XML_SetEndElementHandler">XML_SetEndElementHandler</h4> -<pre class="setter"> + <p> + Note that an empty tag generates a call to both start and end handlers (in that + order). + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetEndElementHandler"> + XML_SetEndElementHandler + </h4> + + <pre class="setter"> void XMLCALL XML_SetEndElementHandler(XML_Parser p, XML_EndElementHandler); </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_EndElementHandler)(void *userData, const XML_Char *name); </pre> -<p>Set handler for end (and empty) tags. As noted above, an empty tag -generates a call to both start and end handlers.</p> -</div> + <p> + Set handler for end (and empty) tags. As noted above, an empty tag generates a + call to both start and end handlers. + </p> + </div> -<div class="handler"> -<h4 id="XML_SetElementHandler">XML_SetElementHandler</h4> -<pre class="setter"> + <div class="handler"> + <h4 id="XML_SetElementHandler"> + XML_SetElementHandler + </h4> + + <pre class="setter"> void XMLCALL XML_SetElementHandler(XML_Parser p, XML_StartElementHandler start, XML_EndElementHandler end); </pre> -<p>Set handlers for start and end tags with one call.</p> -</div> + <p> + Set handlers for start and end tags with one call. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetCharacterDataHandler"> + XML_SetCharacterDataHandler + </h4> -<div class="handler"> -<h4 id="XML_SetCharacterDataHandler">XML_SetCharacterDataHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetCharacterDataHandler(XML_Parser p, XML_CharacterDataHandler charhndl) </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_CharacterDataHandler)(void *userData, const XML_Char *s, int len); </pre> -<p>Set a text handler. The string your handler receives -is <em>NOT null-terminated</em>. You have to use the length argument -to deal with the end of the string. A single block of contiguous text -free of markup may still result in a sequence of calls to this handler. -In other words, if you're searching for a pattern in the text, it may -be split across calls to this handler. Note: Setting this handler to <code>NULL</code> -may <em>NOT immediately</em> terminate call-backs if the parser is currently -processing such a single block of contiguous markup-free text, as the parser -will continue calling back until the end of the block is reached.</p> -</div> + <p> + Set a text handler. The string your handler receives is <em>NOT + null-terminated</em>. You have to use the length argument to deal with the end + of the string. A single block of contiguous text free of markup may still + result in a sequence of calls to this handler. In other words, if you're + searching for a pattern in the text, it may be split across calls to this + handler. Note: Setting this handler to <code>NULL</code> may <em>NOT + immediately</em> terminate call-backs if the parser is currently processing + such a single block of contiguous markup-free text, as the parser will continue + calling back until the end of the block is reached. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetProcessingInstructionHandler"> + XML_SetProcessingInstructionHandler + </h4> -<div class="handler"> -<h4 id="XML_SetProcessingInstructionHandler">XML_SetProcessingInstructionHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetProcessingInstructionHandler(XML_Parser p, XML_ProcessingInstructionHandler proc) </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_ProcessingInstructionHandler)(void *userData, const XML_Char *target, const XML_Char *data); </pre> -<p>Set a handler for processing instructions. The target is the first word -in the processing instruction. The data is the rest of the characters in -it after skipping all whitespace after the initial word.</p> -</div> + <p> + Set a handler for processing instructions. The target is the first word in the + processing instruction. The data is the rest of the characters in it after + skipping all whitespace after the initial word. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetCommentHandler"> + XML_SetCommentHandler + </h4> -<div class="handler"> -<h4 id="XML_SetCommentHandler">XML_SetCommentHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetCommentHandler(XML_Parser p, XML_CommentHandler cmnt) </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_CommentHandler)(void *userData, const XML_Char *data); </pre> -<p>Set a handler for comments. The data is all text inside the comment -delimiters.</p> -</div> + <p> + Set a handler for comments. The data is all text inside the comment delimiters. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetStartCdataSectionHandler"> + XML_SetStartCdataSectionHandler + </h4> -<div class="handler"> -<h4 id="XML_SetStartCdataSectionHandler">XML_SetStartCdataSectionHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetStartCdataSectionHandler(XML_Parser p, XML_StartCdataSectionHandler start); </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_StartCdataSectionHandler)(void *userData); </pre> -<p>Set a handler that gets called at the beginning of a CDATA section.</p> -</div> + <p> + Set a handler that gets called at the beginning of a CDATA section. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetEndCdataSectionHandler"> + XML_SetEndCdataSectionHandler + </h4> -<div class="handler"> -<h4 id="XML_SetEndCdataSectionHandler">XML_SetEndCdataSectionHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetEndCdataSectionHandler(XML_Parser p, XML_EndCdataSectionHandler end); </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_EndCdataSectionHandler)(void *userData); </pre> -<p>Set a handler that gets called at the end of a CDATA section.</p> -</div> + <p> + Set a handler that gets called at the end of a CDATA section. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetCdataSectionHandler"> + XML_SetCdataSectionHandler + </h4> -<div class="handler"> -<h4 id="XML_SetCdataSectionHandler">XML_SetCdataSectionHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetCdataSectionHandler(XML_Parser p, XML_StartCdataSectionHandler start, XML_EndCdataSectionHandler end) </pre> -<p>Sets both CDATA section handlers with one call.</p> -</div> + <p> + Sets both CDATA section handlers with one call. + </p> + </div> -<div class="handler"> -<h4 id="XML_SetDefaultHandler">XML_SetDefaultHandler</h4> -<pre class="setter"> + <div class="handler"> + <h4 id="XML_SetDefaultHandler"> + XML_SetDefaultHandler + </h4> + + <pre class="setter"> void XMLCALL XML_SetDefaultHandler(XML_Parser p, XML_DefaultHandler hndl) </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_DefaultHandler)(void *userData, const XML_Char *s, int len); </pre> + <p> + Sets a handler for any characters in the document which wouldn't otherwise be + handled. This includes both data for which no handlers can be set (like some + kinds of DTD declarations) and data which could be reported but which currently + has no handler set. The characters are passed exactly as they were present in + the XML document except that they will be encoded in UTF-8 or UTF-16. Line + boundaries are not normalized. Note that a byte order mark character is not + passed to the default handler. There are no guarantees about how characters are + divided between calls to the default handler: for example, a comment might be + split between multiple calls. Setting the handler with this call has the side + effect of turning off expansion of references to internally defined general + entities. Instead these references are passed to the default handler. + </p> -<p>Sets a handler for any characters in the document which wouldn't -otherwise be handled. This includes both data for which no handlers -can be set (like some kinds of DTD declarations) and data which could -be reported but which currently has no handler set. The characters -are passed exactly as they were present in the XML document except -that they will be encoded in UTF-8 or UTF-16. Line boundaries are not -normalized. Note that a byte order mark character is not passed to the -default handler. There are no guarantees about how characters are -divided between calls to the default handler: for example, a comment -might be split between multiple calls. Setting the handler with -this call has the side effect of turning off expansion of references -to internally defined general entities. Instead these references are -passed to the default handler.</p> + <p> + See also <code><a href="#XML_DefaultCurrent">XML_DefaultCurrent</a></code>. + </p> + </div> -<p>See also <code><a -href="#XML_DefaultCurrent">XML_DefaultCurrent</a></code>.</p> -</div> + <div class="handler"> + <h4 id="XML_SetDefaultHandlerExpand"> + XML_SetDefaultHandlerExpand + </h4> -<div class="handler"> -<h4 id="XML_SetDefaultHandlerExpand">XML_SetDefaultHandlerExpand</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetDefaultHandlerExpand(XML_Parser p, XML_DefaultHandler hndl) </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_DefaultHandler)(void *userData, const XML_Char *s, int len); </pre> -<p>This sets a default handler, but doesn't inhibit the expansion of -internal entity references. The entity reference will not be passed -to the default handler.</p> + <p> + This sets a default handler, but doesn't inhibit the expansion of internal + entity references. The entity reference will not be passed to the default + handler. + </p> + + <p> + See also <code><a href="#XML_DefaultCurrent">XML_DefaultCurrent</a></code>. + </p> + </div> -<p>See also <code><a -href="#XML_DefaultCurrent">XML_DefaultCurrent</a></code>.</p> -</div> + <div class="handler"> + <h4 id="XML_SetExternalEntityRefHandler"> + XML_SetExternalEntityRefHandler + </h4> -<div class="handler"> -<h4 id="XML_SetExternalEntityRefHandler">XML_SetExternalEntityRefHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetExternalEntityRefHandler(XML_Parser p, XML_ExternalEntityRefHandler hndl) </pre> -<pre class="signature"> + + <pre class="signature"> typedef int (XMLCALL *XML_ExternalEntityRefHandler)(XML_Parser p, const XML_Char *context, @@ -1601,109 +2241,151 @@ typedef int const XML_Char *systemId, const XML_Char *publicId); </pre> -<p>Set an external entity reference handler. This handler is also -called for processing an external DTD subset if parameter entity parsing -is in effect. (See <a href="#XML_SetParamEntityParsing"> -<code>XML_SetParamEntityParsing</code></a>.)</p> + <p> + Set an external entity reference handler. This handler is also called for + processing an external DTD subset if parameter entity parsing is in effect. + (See <a href= + "#XML_SetParamEntityParsing"><code>XML_SetParamEntityParsing</code></a>.) + </p> + + <p> + <strong>Warning:</strong> Using an external entity reference handler can lead + to <a href="https://libexpat.github.io/doc/xml-security/#external-entities">XXE + vulnerabilities</a>. It should only be used in applications that do not parse + untrusted XML input. + </p> + + <p> + The <code>context</code> parameter specifies the parsing context in the format + expected by the <code>context</code> argument to <code><a href= + "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></code>. + <code>code</code> is valid only until the handler returns, so if the referenced + entity is to be parsed later, it must be copied. <code>context</code> is + <code>NULL</code> only when the entity is a parameter entity, which is how one + can differentiate between general and parameter entities. + </p> -<p>The <code>context</code> parameter specifies the parsing context in -the format expected by the <code>context</code> argument to <code><a -href="#XML_ExternalEntityParserCreate" ->XML_ExternalEntityParserCreate</a></code>. <code>code</code> is -valid only until the handler returns, so if the referenced entity is -to be parsed later, it must be copied. <code>context</code> is <code>NULL</code> -only when the entity is a parameter entity, which is how one can -differentiate between general and parameter entities.</p> + <p> + The <code>base</code> parameter is the base to use for relative system + identifiers. It is set by <code><a href="#XML_SetBase">XML_SetBase</a></code> + and may be <code>NULL</code>. The <code>publicId</code> parameter is the public + id given in the entity declaration and may be <code>NULL</code>. + <code>systemId</code> is the system identifier specified in the entity + declaration and is never <code>NULL</code>. + </p> -<p>The <code>base</code> parameter is the base to use for relative -system identifiers. It is set by <code><a -href="#XML_SetBase">XML_SetBase</a></code> and may be <code>NULL</code>. The -<code>publicId</code> parameter is the public id given in the entity -declaration and may be <code>NULL</code>. <code>systemId</code> is the system -identifier specified in the entity declaration and is never <code>NULL</code>.</p> + <p> + There are a couple of ways in which this handler differs from others. First, + this handler returns a status indicator (an integer). + <code>XML_STATUS_OK</code> should be returned for successful handling of the + external entity reference. Returning <code>XML_STATUS_ERROR</code> indicates + failure, and causes the calling parser to return an + <code>XML_ERROR_EXTERNAL_ENTITY_HANDLING</code> error. + </p> -<p>There are a couple of ways in which this handler differs from -others. First, this handler returns a status indicator (an -integer). <code>XML_STATUS_OK</code> should be returned for successful -handling of the external entity reference. Returning -<code>XML_STATUS_ERROR</code> indicates failure, and causes the -calling parser to return an -<code>XML_ERROR_EXTERNAL_ENTITY_HANDLING</code> error.</p> + <p> + Second, instead of having the user data as its first argument, it receives the + parser that encountered the entity reference. This, along with the context + parameter, may be used as arguments to a call to <code><a href= + "#XML_ExternalEntityParserCreate">XML_ExternalEntityParserCreate</a></code>. + Using the returned parser, the body of the external entity can be recursively + parsed. + </p> -<p>Second, instead of having the user data as its first argument, it -receives the parser that encountered the entity reference. This, along -with the context parameter, may be used as arguments to a call to -<code><a href= "#XML_ExternalEntityParserCreate" ->XML_ExternalEntityParserCreate</a></code>. Using the returned -parser, the body of the external entity can be recursively parsed.</p> + <p> + Since this handler may be called recursively, it should not be saving + information into global or static variables. + </p> + </div> -<p>Since this handler may be called recursively, it should not be saving -information into global or static variables.</p> -</div> + <h4 id="XML_SetExternalEntityRefHandlerArg"> + XML_SetExternalEntityRefHandlerArg + </h4> -<h4 id="XML_SetExternalEntityRefHandlerArg">XML_SetExternalEntityRefHandlerArg</h4> -<pre class="fcndec"> + <pre class="fcndec"> void XMLCALL XML_SetExternalEntityRefHandlerArg(XML_Parser p, void *arg) </pre> -<div class="fcndef"> -<p>Set the argument passed to the ExternalEntityRefHandler. If -<code>arg</code> is not <code>NULL</code>, it is the new value passed to the -handler set using <code><a href="#XML_SetExternalEntityRefHandler" ->XML_SetExternalEntityRefHandler</a></code>; if <code>arg</code> is -<code>NULL</code>, the argument passed to the handler function will be the parser -object itself.</p> + <div class="fcndef"> + <p> + Set the argument passed to the ExternalEntityRefHandler. If <code>arg</code> is + not <code>NULL</code>, it is the new value passed to the handler set using + <code><a href= + "#XML_SetExternalEntityRefHandler">XML_SetExternalEntityRefHandler</a></code>; + if <code>arg</code> is <code>NULL</code>, the argument passed to the handler + function will be the parser object itself. + </p> -<p><strong>Note:</strong> -The type of <code>arg</code> and the type of the first argument to the -ExternalEntityRefHandler do not match. This function takes a -<code>void *</code> to be passed to the handler, while the handler -accepts an <code>XML_Parser</code>. This is a historical accident, -but will not be corrected before Expat 2.0 (at the earliest) to avoid -causing compiler warnings for code that's known to work with this -API. It is the responsibility of the application code to know the -actual type of the argument passed to the handler and to manage it -properly.</p> -</div> + <p> + <strong>Note:</strong> The type of <code>arg</code> and the type of the first + argument to the ExternalEntityRefHandler do not match. This function takes a + <code>void *</code> to be passed to the handler, while the handler accepts an + <code>XML_Parser</code>. This is a historical accident, but will not be + corrected before Expat 2.0 (at the earliest) to avoid causing compiler warnings + for code that's known to work with this API. It is the responsibility of the + application code to know the actual type of the argument passed to the handler + and to manage it properly. + </p> + </div> -<div class="handler"> -<h4 id="XML_SetSkippedEntityHandler">XML_SetSkippedEntityHandler</h4> -<pre class="setter"> + <div class="handler"> + <h4 id="XML_SetSkippedEntityHandler"> + XML_SetSkippedEntityHandler + </h4> + + <pre class="setter"> void XMLCALL XML_SetSkippedEntityHandler(XML_Parser p, XML_SkippedEntityHandler handler) </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_SkippedEntityHandler)(void *userData, const XML_Char *entityName, int is_parameter_entity); </pre> -<p>Set a skipped entity handler. This is called in two situations:</p> -<ol> - <li>An entity reference is encountered for which no declaration - has been read <em>and</em> this is not an error.</li> - <li>An internal entity reference is read, but not expanded, because - <a href="#XML_SetDefaultHandler"><code>XML_SetDefaultHandler</code></a> - has been called.</li> -</ol> -<p>The <code>is_parameter_entity</code> argument will be non-zero for -a parameter entity and zero for a general entity.</p> <p>Note: Skipped -parameter entities in declarations and skipped general entities in -attribute values cannot be reported, because the event would be out of -sync with the reporting of the declarations or attribute values</p> -</div> + <p> + Set a skipped entity handler. This is called in two situations: + </p> + + <ol> + <li>An entity reference is encountered for which no declaration has been read + <em>and</em> this is not an error. + </li> + + <li>An internal entity reference is read, but not expanded, because <a href= + "#XML_SetDefaultHandler"><code>XML_SetDefaultHandler</code></a> has been + called. + </li> + </ol> -<div class="handler"> -<h4 id="XML_SetUnknownEncodingHandler">XML_SetUnknownEncodingHandler</h4> -<pre class="setter"> + <p> + The <code>is_parameter_entity</code> argument will be non-zero for a parameter + entity and zero for a general entity. + </p> + + <p> + Note: Skipped parameter entities in declarations and skipped general entities + in attribute values cannot be reported, because the event would be out of sync + with the reporting of the declarations or attribute values + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetUnknownEncodingHandler"> + XML_SetUnknownEncodingHandler + </h4> + + <pre class="setter"> void XMLCALL XML_SetUnknownEncodingHandler(XML_Parser p, XML_UnknownEncodingHandler enchandler, - void *encodingHandlerData) + void *encodingHandlerData) </pre> -<pre class="signature"> + + <pre class="signature"> typedef int (XMLCALL *XML_UnknownEncodingHandler)(void *encodingHandlerData, const XML_Char *name, @@ -1716,115 +2398,147 @@ typedef struct { void (XMLCALL *release)(void *data); } XML_Encoding; </pre> -<p>Set a handler to deal with encodings other than the <a -href="#builtin_encodings">built in set</a>. This should be done before -<code><a href= "#XML_Parse" >XML_Parse</a></code> or <code><a href= -"#XML_ParseBuffer" >XML_ParseBuffer</a></code> have been called on the -given parser.</p> <p>If the handler knows how to deal with an encoding -with the given name, it should fill in the <code>info</code> data -structure and return <code>XML_STATUS_OK</code>. Otherwise it -should return <code>XML_STATUS_ERROR</code>. The handler will be called -at most once per parsed (external) entity. The optional application -data pointer <code>encodingHandlerData</code> will be passed back to -the handler.</p> + <p> + Set a handler to deal with encodings other than the <a href= + "#builtin_encodings">built in set</a>. This should be done before + <code><a href="#XML_Parse">XML_Parse</a></code> or <code><a href= + "#XML_ParseBuffer">XML_ParseBuffer</a></code> have been called on the given + parser. + </p> -<p>The map array contains information for every possible leading -byte in a byte sequence. If the corresponding value is >= 0, then it's -a single byte sequence and the byte encodes that Unicode value. If the -value is -1, then that byte is invalid as the initial byte in a sequence. -If the value is -n, where n is an integer > 1, then n is the number of -bytes in the sequence and the actual conversion is accomplished by a -call to the function pointed at by convert. This function may return -1 -if the sequence itself is invalid. The convert pointer may be <code>NULL</code> if -there are only single byte codes. The data parameter passed to the convert -function is the data pointer from <code>XML_Encoding</code>. The -string s is <em>NOT</em> null-terminated and points at the sequence of -bytes to be converted.</p> + <p> + If the handler knows how to deal with an encoding with the given name, it + should fill in the <code>info</code> data structure and return + <code>XML_STATUS_OK</code>. Otherwise it should return + <code>XML_STATUS_ERROR</code>. The handler will be called at most once per + parsed (external) entity. The optional application data pointer + <code>encodingHandlerData</code> will be passed back to the handler. + </p> -<p>The function pointed at by <code>release</code> is called by the -parser when it is finished with the encoding. It may be <code>NULL</code>.</p> -</div> + <p> + The map array contains information for every possible leading byte in a byte + sequence. If the corresponding value is >= 0, then it's a single byte + sequence and the byte encodes that Unicode value. If the value is -1, then that + byte is invalid as the initial byte in a sequence. If the value is -n, where n + is an integer > 1, then n is the number of bytes in the sequence and the + actual conversion is accomplished by a call to the function pointed at by + convert. This function may return -1 if the sequence itself is invalid. The + convert pointer may be <code>NULL</code> if there are only single byte codes. + The data parameter passed to the convert function is the data pointer from + <code>XML_Encoding</code>. The string s is <em>NOT</em> null-terminated and + points at the sequence of bytes to be converted. + </p> -<div class="handler"> -<h4 id="XML_SetStartNamespaceDeclHandler">XML_SetStartNamespaceDeclHandler</h4> -<pre class="setter"> + <p> + The function pointed at by <code>release</code> is called by the parser when it + is finished with the encoding. It may be <code>NULL</code>. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetStartNamespaceDeclHandler"> + XML_SetStartNamespaceDeclHandler + </h4> + + <pre class="setter"> void XMLCALL XML_SetStartNamespaceDeclHandler(XML_Parser p, - XML_StartNamespaceDeclHandler start); + XML_StartNamespaceDeclHandler start); </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_StartNamespaceDeclHandler)(void *userData, const XML_Char *prefix, const XML_Char *uri); </pre> -<p>Set a handler to be called when a namespace is declared. Namespace -declarations occur inside start tags. But the namespace declaration start -handler is called before the start tag handler for each namespace declared -in that start tag.</p> -</div> + <p> + Set a handler to be called when a namespace is declared. Namespace declarations + occur inside start tags. But the namespace declaration start handler is called + before the start tag handler for each namespace declared in that start tag. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetEndNamespaceDeclHandler"> + XML_SetEndNamespaceDeclHandler + </h4> -<div class="handler"> -<h4 id="XML_SetEndNamespaceDeclHandler">XML_SetEndNamespaceDeclHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetEndNamespaceDeclHandler(XML_Parser p, - XML_EndNamespaceDeclHandler end); + XML_EndNamespaceDeclHandler end); </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_EndNamespaceDeclHandler)(void *userData, const XML_Char *prefix); </pre> -<p>Set a handler to be called when leaving the scope of a namespace -declaration. This will be called, for each namespace declaration, -after the handler for the end tag of the element in which the -namespace was declared.</p> -</div> + <p> + Set a handler to be called when leaving the scope of a namespace declaration. + This will be called, for each namespace declaration, after the handler for the + end tag of the element in which the namespace was declared. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetNamespaceDeclHandler"> + XML_SetNamespaceDeclHandler + </h4> -<div class="handler"> -<h4 id="XML_SetNamespaceDeclHandler">XML_SetNamespaceDeclHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetNamespaceDeclHandler(XML_Parser p, XML_StartNamespaceDeclHandler start, XML_EndNamespaceDeclHandler end) </pre> -<p>Sets both namespace declaration handlers with a single call.</p> -</div> + <p> + Sets both namespace declaration handlers with a single call. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetXmlDeclHandler"> + XML_SetXmlDeclHandler + </h4> -<div class="handler"> -<h4 id="XML_SetXmlDeclHandler">XML_SetXmlDeclHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetXmlDeclHandler(XML_Parser p, - XML_XmlDeclHandler xmldecl); + XML_XmlDeclHandler xmldecl); </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_XmlDeclHandler)(void *userData, const XML_Char *version, const XML_Char *encoding, int standalone); </pre> -<p>Sets a handler that is called for XML declarations and also for -text declarations discovered in external entities. The way to -distinguish is that the <code>version</code> parameter will be <code>NULL</code> -for text declarations. The <code>encoding</code> parameter may be <code>NULL</code> -for an XML declaration. The <code>standalone</code> argument will -contain -1, 0, or 1 indicating respectively that there was no -standalone parameter in the declaration, that it was given as no, or -that it was given as yes.</p> -</div> + <p> + Sets a handler that is called for XML declarations and also for text + declarations discovered in external entities. The way to distinguish is that + the <code>version</code> parameter will be <code>NULL</code> for text + declarations. The <code>encoding</code> parameter may be <code>NULL</code> for + an XML declaration. The <code>standalone</code> argument will contain -1, 0, or + 1 indicating respectively that there was no standalone parameter in the + declaration, that it was given as no, or that it was given as yes. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetStartDoctypeDeclHandler"> + XML_SetStartDoctypeDeclHandler + </h4> -<div class="handler"> -<h4 id="XML_SetStartDoctypeDeclHandler">XML_SetStartDoctypeDeclHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetStartDoctypeDeclHandler(XML_Parser p, - XML_StartDoctypeDeclHandler start); + XML_StartDoctypeDeclHandler start); </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_StartDoctypeDeclHandler)(void *userData, const XML_Char *doctypeName, @@ -1832,52 +2546,71 @@ typedef void const XML_Char *pubid, int has_internal_subset); </pre> -<p>Set a handler that is called at the start of a DOCTYPE declaration, -before any external or internal subset is parsed. Both <code>sysid</code> -and <code>pubid</code> may be <code>NULL</code>. The <code>has_internal_subset</code> -will be non-zero if the DOCTYPE declaration has an internal subset.</p> -</div> + <p> + Set a handler that is called at the start of a DOCTYPE declaration, before any + external or internal subset is parsed. Both <code>sysid</code> and + <code>pubid</code> may be <code>NULL</code>. The + <code>has_internal_subset</code> will be non-zero if the DOCTYPE declaration + has an internal subset. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetEndDoctypeDeclHandler"> + XML_SetEndDoctypeDeclHandler + </h4> -<div class="handler"> -<h4 id="XML_SetEndDoctypeDeclHandler">XML_SetEndDoctypeDeclHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetEndDoctypeDeclHandler(XML_Parser p, - XML_EndDoctypeDeclHandler end); + XML_EndDoctypeDeclHandler end); </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_EndDoctypeDeclHandler)(void *userData); </pre> -<p>Set a handler that is called at the end of a DOCTYPE declaration, -after parsing any external subset.</p> -</div> + <p> + Set a handler that is called at the end of a DOCTYPE declaration, after parsing + any external subset. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetDoctypeDeclHandler"> + XML_SetDoctypeDeclHandler + </h4> -<div class="handler"> -<h4 id="XML_SetDoctypeDeclHandler">XML_SetDoctypeDeclHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetDoctypeDeclHandler(XML_Parser p, - XML_StartDoctypeDeclHandler start, - XML_EndDoctypeDeclHandler end); + XML_StartDoctypeDeclHandler start, + XML_EndDoctypeDeclHandler end); </pre> -<p>Set both doctype handlers with one call.</p> -</div> + <p> + Set both doctype handlers with one call. + </p> + </div> -<div class="handler"> -<h4 id="XML_SetElementDeclHandler">XML_SetElementDeclHandler</h4> -<pre class="setter"> + <div class="handler"> + <h4 id="XML_SetElementDeclHandler"> + XML_SetElementDeclHandler + </h4> + + <pre class="setter"> void XMLCALL XML_SetElementDeclHandler(XML_Parser p, - XML_ElementDeclHandler eldecl); + XML_ElementDeclHandler eldecl); </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_ElementDeclHandler)(void *userData, const XML_Char *name, XML_Content *model); </pre> -<pre class="signature"> + + <pre class="signature"> enum XML_Content_Type { XML_CTYPE_EMPTY = 1, XML_CTYPE_ANY, @@ -1897,55 +2630,65 @@ enum XML_Content_Quant { typedef struct XML_cp XML_Content; struct XML_cp { - enum XML_Content_Type type; - enum XML_Content_Quant quant; - const XML_Char * name; - unsigned int numchildren; - XML_Content * children; + enum XML_Content_Type type; + enum XML_Content_Quant quant; + const XML_Char * name; + unsigned int numchildren; + XML_Content * children; }; </pre> -<p>Sets a handler for element declarations in a DTD. The handler gets -called with the name of the element in the declaration and a pointer -to a structure that contains the element model. It's the user code's -responsibility to free model when finished with via a call to <code> -<a href="#XML_FreeContentModel">XML_FreeContentModel</a></code>. -There is no need to free the model from the handler, it can be kept -around and freed at a later stage.</p> + <p> + Sets a handler for element declarations in a DTD. The handler gets called with + the name of the element in the declaration and a pointer to a structure that + contains the element model. It's the user code's responsibility to free model + when finished with via a call to <code><a href= + "#XML_FreeContentModel">XML_FreeContentModel</a></code>. There is no need to + free the model from the handler, it can be kept around and freed at a later + stage. + </p> + + <p> + The <code>model</code> argument is the root of a tree of + <code>XML_Content</code> nodes. If <code>type</code> equals + <code>XML_CTYPE_EMPTY</code> or <code>XML_CTYPE_ANY</code>, then + <code>quant</code> will be <code>XML_CQUANT_NONE</code>, and the other fields + will be zero or <code>NULL</code>. If <code>type</code> is + <code>XML_CTYPE_MIXED</code>, then <code>quant</code> will be + <code>XML_CQUANT_NONE</code> or <code>XML_CQUANT_REP</code> and + <code>numchildren</code> will contain the number of elements that are allowed + to be mixed in and <code>children</code> points to an array of + <code>XML_Content</code> structures that will all have type XML_CTYPE_NAME with + no quantification. Only the root node can be type <code>XML_CTYPE_EMPTY</code>, + <code>XML_CTYPE_ANY</code>, or <code>XML_CTYPE_MIXED</code>. + </p> -<p>The <code>model</code> argument is the root of a tree of -<code>XML_Content</code> nodes. If <code>type</code> equals -<code>XML_CTYPE_EMPTY</code> or <code>XML_CTYPE_ANY</code>, then -<code>quant</code> will be <code>XML_CQUANT_NONE</code>, and the other -fields will be zero or <code>NULL</code>. If <code>type</code> is -<code>XML_CTYPE_MIXED</code>, then <code>quant</code> will be -<code>XML_CQUANT_NONE</code> or <code>XML_CQUANT_REP</code> and -<code>numchildren</code> will contain the number of elements that are -allowed to be mixed in and <code>children</code> points to an array of -<code>XML_Content</code> structures that will all have type -XML_CTYPE_NAME with no quantification. Only the root node can be type -<code>XML_CTYPE_EMPTY</code>, <code>XML_CTYPE_ANY</code>, or -<code>XML_CTYPE_MIXED</code>.</p> + <p> + For type <code>XML_CTYPE_NAME</code>, the <code>name</code> field points to the + name and the <code>numchildren</code> and <code>children</code> fields will be + zero and <code>NULL</code>. The <code>quant</code> field will indicate any + quantifiers placed on the name. + </p> -<p>For type <code>XML_CTYPE_NAME</code>, the <code>name</code> field -points to the name and the <code>numchildren</code> and -<code>children</code> fields will be zero and <code>NULL</code>. The -<code>quant</code> field will indicate any quantifiers placed on the -name.</p> + <p> + Types <code>XML_CTYPE_CHOICE</code> and <code>XML_CTYPE_SEQ</code> indicate a + choice or sequence respectively. The <code>numchildren</code> field indicates + how many nodes in the choice or sequence and <code>children</code> points to + the nodes. + </p> + </div> -<p>Types <code>XML_CTYPE_CHOICE</code> and <code>XML_CTYPE_SEQ</code> -indicate a choice or sequence respectively. The -<code>numchildren</code> field indicates how many nodes in the choice -or sequence and <code>children</code> points to the nodes.</p> -</div> + <div class="handler"> + <h4 id="XML_SetAttlistDeclHandler"> + XML_SetAttlistDeclHandler + </h4> -<div class="handler"> -<h4 id="XML_SetAttlistDeclHandler">XML_SetAttlistDeclHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetAttlistDeclHandler(XML_Parser p, XML_AttlistDeclHandler attdecl); </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_AttlistDeclHandler)(void *userData, const XML_Char *elname, @@ -1954,563 +2697,693 @@ typedef void const XML_Char *dflt, int isrequired); </pre> -<p>Set a handler for attlist declarations in the DTD. This handler is -called for <em>each</em> attribute. So a single attlist declaration -with multiple attributes declared will generate multiple calls to this -handler. The <code>elname</code> parameter returns the name of the -element for which the attribute is being declared. The attribute name -is in the <code>attname</code> parameter. The attribute type is in the -<code>att_type</code> parameter. It is the string representing the -type in the declaration with whitespace removed.</p> + <p> + Set a handler for attlist declarations in the DTD. This handler is called for + <em>each</em> attribute. So a single attlist declaration with multiple + attributes declared will generate multiple calls to this handler. The + <code>elname</code> parameter returns the name of the element for which the + attribute is being declared. The attribute name is in the <code>attname</code> + parameter. The attribute type is in the <code>att_type</code> parameter. It is + the string representing the type in the declaration with whitespace removed. + </p> + + <p> + The <code>dflt</code> parameter holds the default value. It will be + <code>NULL</code> in the case of "#IMPLIED" or "#REQUIRED" attributes. You can + distinguish these two cases by checking the <code>isrequired</code> parameter, + which will be true in the case of "#REQUIRED" attributes. Attributes which are + "#FIXED" will have also have a true <code>isrequired</code>, but they will have + the non-<code>NULL</code> fixed value in the <code>dflt</code> parameter. + </p> + </div> -<p>The <code>dflt</code> parameter holds the default value. It will be -<code>NULL</code> in the case of "#IMPLIED" or "#REQUIRED" attributes. You can -distinguish these two cases by checking the <code>isrequired</code> -parameter, which will be true in the case of "#REQUIRED" attributes. -Attributes which are "#FIXED" will have also have a true -<code>isrequired</code>, but they will have the non-<code>NULL</code> fixed value -in the <code>dflt</code> parameter.</p> -</div> + <div class="handler"> + <h4 id="XML_SetEntityDeclHandler"> + XML_SetEntityDeclHandler + </h4> -<div class="handler"> -<h4 id="XML_SetEntityDeclHandler">XML_SetEntityDeclHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetEntityDeclHandler(XML_Parser p, - XML_EntityDeclHandler handler); + XML_EntityDeclHandler handler); </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_EntityDeclHandler)(void *userData, const XML_Char *entityName, int is_parameter_entity, const XML_Char *value, - int value_length, + int value_length, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName); </pre> -<p>Sets a handler that will be called for all entity declarations. -The <code>is_parameter_entity</code> argument will be non-zero in the -case of parameter entities and zero otherwise.</p> + <p> + Sets a handler that will be called for all entity declarations. The + <code>is_parameter_entity</code> argument will be non-zero in the case of + parameter entities and zero otherwise. + </p> + + <p> + For internal entities (<code><!ENTITY foo "bar"></code>), + <code>value</code> will be non-<code>NULL</code> and <code>systemId</code>, + <code>publicId</code>, and <code>notationName</code> will all be + <code>NULL</code>. The value string is <em>not</em> null-terminated; the length + is provided in the <code>value_length</code> parameter. Do not use + <code>value_length</code> to test for internal entities, since it is legal to + have zero-length values. Instead check for whether or not <code>value</code> is + <code>NULL</code>. + </p> -<p>For internal entities (<code><!ENTITY foo "bar"></code>), -<code>value</code> will be non-<code>NULL</code> and <code>systemId</code>, -<code>publicId</code>, and <code>notationName</code> will all be <code>NULL</code>. -The value string is <em>not</em> null-terminated; the length is -provided in the <code>value_length</code> parameter. Do not use -<code>value_length</code> to test for internal entities, since it is -legal to have zero-length values. Instead check for whether or not -<code>value</code> is <code>NULL</code>.</p> <p>The <code>notationName</code> -argument will have a non-<code>NULL</code> value only for unparsed entity -declarations.</p> -</div> + <p> + The <code>notationName</code> argument will have a non-<code>NULL</code> value + only for unparsed entity declarations. + </p> + </div> -<div class="handler"> -<h4 id="XML_SetUnparsedEntityDeclHandler">XML_SetUnparsedEntityDeclHandler</h4> -<pre class="setter"> + <div class="handler"> + <h4 id="XML_SetUnparsedEntityDeclHandler"> + XML_SetUnparsedEntityDeclHandler + </h4> + + <pre class="setter"> void XMLCALL XML_SetUnparsedEntityDeclHandler(XML_Parser p, XML_UnparsedEntityDeclHandler h) </pre> -<pre class="signature"> + + <pre class="signature"> typedef void (XMLCALL *XML_UnparsedEntityDeclHandler)(void *userData, - const XML_Char *entityName, + const XML_Char *entityName, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId, const XML_Char *notationName); </pre> -<p>Set a handler that receives declarations of unparsed entities. These -are entity declarations that have a notation (NDATA) field:</p> + <p> + Set a handler that receives declarations of unparsed entities. These are entity + declarations that have a notation (NDATA) field: + </p> -<div id="eg"><pre> + <div id="eg"> + <pre> <!ENTITY logo SYSTEM "images/logo.gif" NDATA gif> -</pre></div> -<p>This handler is obsolete and is provided for backwards -compatibility. Use instead <a href= "#XML_SetEntityDeclHandler" ->XML_SetEntityDeclHandler</a>.</p> -</div> +</pre> + </div> + + <p> + This handler is obsolete and is provided for backwards compatibility. Use + instead <a href="#XML_SetEntityDeclHandler">XML_SetEntityDeclHandler</a>. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetNotationDeclHandler"> + XML_SetNotationDeclHandler + </h4> -<div class="handler"> -<h4 id="XML_SetNotationDeclHandler">XML_SetNotationDeclHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetNotationDeclHandler(XML_Parser p, XML_NotationDeclHandler h) </pre> -<pre class="signature"> + + <pre class="signature"> typedef void -(XMLCALL *XML_NotationDeclHandler)(void *userData, +(XMLCALL *XML_NotationDeclHandler)(void *userData, const XML_Char *notationName, const XML_Char *base, const XML_Char *systemId, const XML_Char *publicId); </pre> -<p>Set a handler that receives notation declarations.</p> -</div> + <p> + Set a handler that receives notation declarations. + </p> + </div> + + <div class="handler"> + <h4 id="XML_SetNotStandaloneHandler"> + XML_SetNotStandaloneHandler + </h4> -<div class="handler"> -<h4 id="XML_SetNotStandaloneHandler">XML_SetNotStandaloneHandler</h4> -<pre class="setter"> + <pre class="setter"> void XMLCALL XML_SetNotStandaloneHandler(XML_Parser p, XML_NotStandaloneHandler h) </pre> -<pre class="signature"> -typedef int + + <pre class="signature"> +typedef int (XMLCALL *XML_NotStandaloneHandler)(void *userData); </pre> -<p>Set a handler that is called if the document is not "standalone". -This happens when there is an external subset or a reference to a -parameter entity, but does not have standalone set to "yes" in an XML -declaration. If this handler returns <code>XML_STATUS_ERROR</code>, -then the parser will throw an <code>XML_ERROR_NOT_STANDALONE</code> -error.</p> -</div> + <p> + Set a handler that is called if the document is not "standalone". This happens + when there is an external subset or a reference to a parameter entity, but does + not have standalone set to "yes" in an XML declaration. If this handler returns + <code>XML_STATUS_ERROR</code>, then the parser will throw an + <code>XML_ERROR_NOT_STANDALONE</code> error. + </p> + </div> -<h3><a name="position">Parse position and error reporting functions</a></h3> + <h3> + <a id="position" name="position">Parse position and error reporting functions</a> + </h3> -<p>These are the functions you'll want to call when the parse -functions return <code>XML_STATUS_ERROR</code> (a parse error has -occurred), although the position reporting functions are useful outside -of errors. The position reported is the byte position (in the original -document or entity encoding) of the first of the sequence of -characters that generated the current event (or the error that caused -the parse functions to return <code>XML_STATUS_ERROR</code>.) The -exceptions are callbacks triggered by declarations in the document -prologue, in which case they exact position reported is somewhere in the -relevant markup, but not necessarily as meaningful as for other -events.</p> + <p> + These are the functions you'll want to call when the parse functions return + <code>XML_STATUS_ERROR</code> (a parse error has occurred), although the position + reporting functions are useful outside of errors. The position reported is the + byte position (in the original document or entity encoding) of the first of the + sequence of characters that generated the current event (or the error that caused + the parse functions to return <code>XML_STATUS_ERROR</code>.) The exceptions are + callbacks triggered by declarations in the document prologue, in which case they + exact position reported is somewhere in the relevant markup, but not necessarily + as meaningful as for other events. + </p> -<p>The position reporting functions are accurate only outside of the -DTD. In other words, they usually return bogus information when -called from within a DTD declaration handler.</p> + <p> + The position reporting functions are accurate only outside of the DTD. In other + words, they usually return bogus information when called from within a DTD + declaration handler. + </p> -<h4 id="XML_GetErrorCode">XML_GetErrorCode</h4> -<pre class="fcndec"> + <h4 id="XML_GetErrorCode"> + XML_GetErrorCode + </h4> + + <pre class="fcndec"> enum XML_Error XMLCALL XML_GetErrorCode(XML_Parser p); </pre> -<div class="fcndef"> -Return what type of error has occurred. -</div> + <div class="fcndef"> + Return what type of error has occurred. + </div> + + <h4 id="XML_ErrorString"> + XML_ErrorString + </h4> -<h4 id="XML_ErrorString">XML_ErrorString</h4> -<pre class="fcndec"> + <pre class="fcndec"> const XML_LChar * XMLCALL XML_ErrorString(enum XML_Error code); </pre> -<div class="fcndef"> -Return a string describing the error corresponding to code. -The code should be one of the enums that can be returned from -<code><a href= "#XML_GetErrorCode" >XML_GetErrorCode</a></code>. -</div> + <div class="fcndef"> + Return a string describing the error corresponding to code. The code should be + one of the enums that can be returned from <code><a href= + "#XML_GetErrorCode">XML_GetErrorCode</a></code>. + </div> -<h4 id="XML_GetCurrentByteIndex">XML_GetCurrentByteIndex</h4> -<pre class="fcndec"> + <h4 id="XML_GetCurrentByteIndex"> + XML_GetCurrentByteIndex + </h4> + + <pre class="fcndec"> XML_Index XMLCALL XML_GetCurrentByteIndex(XML_Parser p); </pre> -<div class="fcndef"> -Return the byte offset of the position. This always corresponds to -the values returned by <code><a href= "#XML_GetCurrentLineNumber" ->XML_GetCurrentLineNumber</a></code> and <code><a href= -"#XML_GetCurrentColumnNumber" >XML_GetCurrentColumnNumber</a></code>. -</div> + <div class="fcndef"> + Return the byte offset of the position. This always corresponds to the values + returned by <code><a href= + "#XML_GetCurrentLineNumber">XML_GetCurrentLineNumber</a></code> and + <code><a href="#XML_GetCurrentColumnNumber">XML_GetCurrentColumnNumber</a></code>. + </div> + + <h4 id="XML_GetCurrentLineNumber"> + XML_GetCurrentLineNumber + </h4> -<h4 id="XML_GetCurrentLineNumber">XML_GetCurrentLineNumber</h4> -<pre class="fcndec"> + <pre class="fcndec"> XML_Size XMLCALL XML_GetCurrentLineNumber(XML_Parser p); </pre> -<div class="fcndef"> -Return the line number of the position. The first line is reported as -<code>1</code>. -</div> + <div class="fcndef"> + Return the line number of the position. The first line is reported as + <code>1</code>. + </div> -<h4 id="XML_GetCurrentColumnNumber">XML_GetCurrentColumnNumber</h4> -<pre class="fcndec"> + <h4 id="XML_GetCurrentColumnNumber"> + XML_GetCurrentColumnNumber + </h4> + + <pre class="fcndec"> XML_Size XMLCALL XML_GetCurrentColumnNumber(XML_Parser p); </pre> -<div class="fcndef"> -Return the <em>offset</em>, from the beginning of the current line, of -the position. The first column is reported as <code>0</code>. -</div> + <div class="fcndef"> + Return the <em>offset</em>, from the beginning of the current line, of the + position. The first column is reported as <code>0</code>. + </div> + + <h4 id="XML_GetCurrentByteCount"> + XML_GetCurrentByteCount + </h4> -<h4 id="XML_GetCurrentByteCount">XML_GetCurrentByteCount</h4> -<pre class="fcndec"> + <pre class="fcndec"> int XMLCALL XML_GetCurrentByteCount(XML_Parser p); </pre> -<div class="fcndef"> -Return the number of bytes in the current event. Returns -<code>0</code> if the event is inside a reference to an internal -entity and for the end-tag event for empty element tags (the later can -be used to distinguish empty-element tags from empty elements using -separate start and end tags). -</div> + <div class="fcndef"> + Return the number of bytes in the current event. Returns <code>0</code> if the + event is inside a reference to an internal entity and for the end-tag event for + empty element tags (the later can be used to distinguish empty-element tags from + empty elements using separate start and end tags). + </div> -<h4 id="XML_GetInputContext">XML_GetInputContext</h4> -<pre class="fcndec"> + <h4 id="XML_GetInputContext"> + XML_GetInputContext + </h4> + + <pre class="fcndec"> const char * XMLCALL XML_GetInputContext(XML_Parser p, int *offset, int *size); </pre> -<div class="fcndef"> + <div class="fcndef"> + <p> + Returns the parser's input buffer, sets the integer pointed at by + <code>offset</code> to the offset within this buffer of the current parse + position, and set the integer pointed at by <code>size</code> to the size of + the returned buffer. + </p> -<p>Returns the parser's input buffer, sets the integer pointed at by -<code>offset</code> to the offset within this buffer of the current -parse position, and set the integer pointed at by <code>size</code> to -the size of the returned buffer.</p> + <p> + This should only be called from within a handler during an active parse and the + returned buffer should only be referred to from within the handler that made + the call. This input buffer contains the untranslated bytes of the input. + </p> -<p>This should only be called from within a handler during an active -parse and the returned buffer should only be referred to from within -the handler that made the call. This input buffer contains the -untranslated bytes of the input.</p> + <p> + Only a limited amount of context is kept, so if the event triggering a call + spans over a very large amount of input, the actual parse position may be + before the beginning of the buffer. + </p> -<p>Only a limited amount of context is kept, so if the event -triggering a call spans over a very large amount of input, the actual -parse position may be before the beginning of the buffer.</p> + <p> + If <code>XML_CONTEXT_BYTES</code> is zero, this will always return + <code>NULL</code>. + </p> + </div> -<p>If <code>XML_CONTEXT_BYTES</code> is zero, this will always -return <code>NULL</code>.</p> -</div> + <h3> + <a id="attack-protection" name="attack-protection">Attack Protection</a><a id= + "billion-laughs" name="billion-laughs"></a> + </h3> -<h3><a name="attack-protection">Attack Protection</a><a name="billion-laughs"></a></h3> + <h4 id="XML_SetBillionLaughsAttackProtectionMaximumAmplification"> + XML_SetBillionLaughsAttackProtectionMaximumAmplification + </h4> -<h4 id="XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</h4> -<pre class="fcndec"> + <pre class="fcndec"> /* Added in Expat 2.4.0. */ XML_Bool XMLCALL XML_SetBillionLaughsAttackProtectionMaximumAmplification(XML_Parser p, float maximumAmplificationFactor); </pre> -<div class="fcndef"> - <p> - Sets the maximum tolerated amplification factor - for protection against - <a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a> - (default: <code>100.0</code>) - of parser <code>p</code> to <code>maximumAmplificationFactor</code>, and - returns <code>XML_TRUE</code> upon success and <code>XML_FALSE</code> upon error. - </p> + <div class="fcndef"> + <p> + Sets the maximum tolerated amplification factor for protection against <a href= + "https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs + attacks</a> (default: <code>100.0</code>) of parser <code>p</code> to + <code>maximumAmplificationFactor</code>, and returns <code>XML_TRUE</code> upon + success and <code>XML_FALSE</code> upon error. + </p> + + <p> + Once the <a href= + "#XML_SetBillionLaughsAttackProtectionActivationThreshold">threshold for + activation</a> is reached, the amplification factor is calculated as .. + </p> - <p> - Once the <a href="#XML_SetBillionLaughsAttackProtectionActivationThreshold">threshold for activation</a> is reached, - the amplification factor is calculated as .. - </p> - <pre>amplification := (direct + indirect) / direct</pre> - <p> - .. while parsing, whereas - <code>direct</code> is the number of bytes read from the primary document in parsing and - <code>indirect</code> is the number of bytes added by expanding entities and reading of external DTD files, combined. - </p> + <pre>amplification := (direct + indirect) / direct</pre> + <p> + .. while parsing, whereas <code>direct</code> is the number of bytes read from + the primary document in parsing and <code>indirect</code> is the number of + bytes added by expanding entities and reading of external DTD files, combined. + </p> - <p>For a call to <code>XML_SetBillionLaughsAttackProtectionMaximumAmplification</code> to succeed:</p> - <ul> - <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without any parent parsers) and</li> - <li><code>maximumAmplificationFactor</code> must be non-<code>NaN</code> and greater than or equal to <code>1.0</code>.</li> - </ul> + <p> + For a call to + <code>XML_SetBillionLaughsAttackProtectionMaximumAmplification</code> to + succeed: + </p> - <p> - <strong>Note:</strong> - If you ever need to increase this value for non-attack payload, - please <a href="https://github.com/libexpat/libexpat/issues">file a bug report</a>. - </p> + <ul> + <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without + any parent parsers) and + </li> - <p> - <strong>Note:</strong> - Peak amplifications - of factor 15,000 for the entire payload and - of factor 30,000 in the middle of parsing - have been observed with small benign files in practice. + <li> + <code>maximumAmplificationFactor</code> must be non-<code>NaN</code> and + greater than or equal to <code>1.0</code>. + </li> + </ul> - So if you do reduce the maximum allowed amplification, - please make sure that the activation threshold is still big enough - to not end up with undesired false positives (i.e. benign files being rejected). - </p> -</div> + <p> + <strong>Note:</strong> If you ever need to increase this value for non-attack + payload, please <a href="https://github.com/libexpat/libexpat/issues">file a + bug report</a>. + </p> -<h4 id="XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</h4> -<pre class="fcndec"> + <p> + <strong>Note:</strong> Peak amplifications of factor 15,000 for the entire + payload and of factor 30,000 in the middle of parsing have been observed with + small benign files in practice. So if you do reduce the maximum allowed + amplification, please make sure that the activation threshold is still big + enough to not end up with undesired false positives (i.e. benign files being + rejected). + </p> + </div> + + <h4 id="XML_SetBillionLaughsAttackProtectionActivationThreshold"> + XML_SetBillionLaughsAttackProtectionActivationThreshold + </h4> + + <pre class="fcndec"> /* Added in Expat 2.4.0. */ XML_Bool XMLCALL XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p, unsigned long long activationThresholdBytes); </pre> -<div class="fcndef"> - <p> - Sets number of output bytes (including amplification from entity expansion and reading DTD files) - needed to activate protection against - <a href="https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs attacks</a> - (default: <code>8 MiB</code>) - of parser <code>p</code> to <code>activationThresholdBytes</code>, and - returns <code>XML_TRUE</code> upon success and <code>XML_FALSE</code> upon error. - </p> + <div class="fcndef"> + <p> + Sets number of output bytes (including amplification from entity expansion and + reading DTD files) needed to activate protection against <a href= + "https://en.wikipedia.org/wiki/Billion_laughs_attack">billion laughs + attacks</a> (default: <code>8 MiB</code>) of parser <code>p</code> to + <code>activationThresholdBytes</code>, and returns <code>XML_TRUE</code> upon + success and <code>XML_FALSE</code> upon error. + </p> + + <p> + For a call to + <code>XML_SetBillionLaughsAttackProtectionActivationThreshold</code> to + succeed: + </p> - <p>For a call to <code>XML_SetBillionLaughsAttackProtectionActivationThreshold</code> to succeed:</p> - <ul> - <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without any parent parsers).</li> - </ul> + <ul> + <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without + any parent parsers). + </li> + </ul> - <p> - <strong>Note:</strong> - If you ever need to increase this value for non-attack payload, - please <a href="https://github.com/libexpat/libexpat/issues">file a bug report</a>. - </p> + <p> + <strong>Note:</strong> If you ever need to increase this value for non-attack + payload, please <a href="https://github.com/libexpat/libexpat/issues">file a + bug report</a>. + </p> - <p> - <strong>Note:</strong> - Activation thresholds below 4 MiB are known to break support for - <a href="https://en.wikipedia.org/wiki/Darwin_Information_Typing_Architecture">DITA</a> 1.3 payload - and are hence not recommended. - </p> -</div> + <p> + <strong>Note:</strong> Activation thresholds below 4 MiB are known to break + support for <a href= + "https://en.wikipedia.org/wiki/Darwin_Information_Typing_Architecture">DITA</a> + 1.3 payload and are hence not recommended. + </p> + </div> -<h4 id="XML_SetAllocTrackerMaximumAmplification">XML_SetAllocTrackerMaximumAmplification</h4> -<pre class="fcndec"> + <h4 id="XML_SetAllocTrackerMaximumAmplification"> + XML_SetAllocTrackerMaximumAmplification + </h4> + + <pre class="fcndec"> /* Added in Expat 2.7.2. */ XML_Bool XML_SetAllocTrackerMaximumAmplification(XML_Parser p, float maximumAmplificationFactor); </pre> -<div class="fcndef"> - <p> - Sets the maximum tolerated amplification factor - between direct input and bytes of dynamic memory allocated - (default: <code>100.0</code>) - of parser <code>p</code> to <code>maximumAmplificationFactor</code>, and - returns <code>XML_TRUE</code> upon success and <code>XML_FALSE</code> upon error. - </p> + <div class="fcndef"> + <p> + Sets the maximum tolerated amplification factor between direct input and bytes + of dynamic memory allocated (default: <code>100.0</code>) of parser + <code>p</code> to <code>maximumAmplificationFactor</code>, and returns + <code>XML_TRUE</code> upon success and <code>XML_FALSE</code> upon error. + </p> + + <p> + <strong>Note:</strong> There are three types of allocations that intentionally + bypass tracking and limiting: + </p> + + <ul> + <li>application calls to functions <code><a href= + "#XML_MemMalloc">XML_MemMalloc</a></code> and <code><a href="#XML_MemRealloc"> + XML_MemRealloc</a></code> — <em>healthy</em> use of these two functions + continues to be a responsibility of the application using Expat —, + </li> + + <li>the main character buffer used by functions <code><a href="#XML_GetBuffer"> + XML_GetBuffer</a></code> and <code><a href= + "#XML_ParseBuffer">XML_ParseBuffer</a></code> (and thus also by plain + <code><a href="#XML_Parse">XML_Parse</a></code>), and + </li> + + <li>the <a href="#XML_SetElementDeclHandler">content model memory</a> (that is + passed to the <a href="#XML_SetElementDeclHandler">element declaration + handler</a> and freed by a call to <code><a href= + "#XML_FreeContentModel">XML_FreeContentModel</a></code>). + </li> + </ul> + + <p> + Once the <a href="#XML_SetAllocTrackerActivationThreshold">threshold for + activation</a> is reached, the amplification factor is calculated as .. + </p> - <p> - <strong>Note:</strong> - There are three types of allocations that intentionally bypass tracking and limiting: - </p> - <ul> - <li> - application calls to functions - <code><a href="#XML_MemMalloc">XML_MemMalloc</a></code> - and - <code><a href="#XML_MemRealloc">XML_MemRealloc</a></code> - — - <em>healthy</em> use of these two functions continues to be a responsibility - of the application using Expat - —, - </li> - <li> - the main character buffer used by functions - <code><a href="#XML_GetBuffer">XML_GetBuffer</a></code> - and - <code><a href="#XML_ParseBuffer">XML_ParseBuffer</a></code> - (and thus also by plain - <code><a href="#XML_Parse">XML_Parse</a></code>), and - </li> - <li> - the <a href="#XML_SetElementDeclHandler">content model memory</a> - (that is passed to the - <a href="#XML_SetElementDeclHandler">element declaration handler</a> - and freed by a call to - <code><a href="#XML_FreeContentModel">XML_FreeContentModel</a></code>). - </li> - </ul> + <pre>amplification := allocated / direct</pre> + <p> + .. while parsing, whereas <code>direct</code> is the number of bytes read from + the primary document in parsing and <code>allocated</code> is the number of + bytes of dynamic memory allocated in the parser hierarchy. + </p> - <p> - Once the <a href="#XML_SetAllocTrackerActivationThreshold">threshold for activation</a> is reached, - the amplification factor is calculated as .. - </p> - <pre>amplification := allocated / direct</pre> - <p> - .. while parsing, whereas - <code>direct</code> is the number of bytes read from the primary document in parsing and - <code>allocated</code> is the number of bytes of dynamic memory allocated in the parser hierarchy. - </p> + <p> + For a call to <code>XML_SetAllocTrackerMaximumAmplification</code> to succeed: + </p> - <p>For a call to <code>XML_SetAllocTrackerMaximumAmplification</code> to succeed:</p> - <ul> - <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without any parent parsers) and</li> - <li><code>maximumAmplificationFactor</code> must be non-<code>NaN</code> and greater than or equal to <code>1.0</code>.</li> - </ul> + <ul> + <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without + any parent parsers) and + </li> - <p> - <strong>Note:</strong> - If you ever need to increase this value for non-attack payload, - please <a href="https://github.com/libexpat/libexpat/issues">file a bug report</a>. - </p> + <li> + <code>maximumAmplificationFactor</code> must be non-<code>NaN</code> and + greater than or equal to <code>1.0</code>. + </li> + </ul> - <p> - <strong>Note:</strong> - Amplifications factors greater than <code>100.0</code> can been observed near the start of parsing - even with benign files in practice. + <p> + <strong>Note:</strong> If you ever need to increase this value for non-attack + payload, please <a href="https://github.com/libexpat/libexpat/issues">file a + bug report</a>. + </p> - So if you do reduce the maximum allowed amplification, - please make sure that the activation threshold is still big enough - to not end up with undesired false positives (i.e. benign files being rejected). - </p> -</div> + <p> + <strong>Note:</strong> Amplifications factors greater than <code>100.0</code> + can been observed near the start of parsing even with benign files in practice. + So if you do reduce the maximum allowed amplification, please make sure that + the activation threshold is still big enough to not end up with undesired false + positives (i.e. benign files being rejected). + </p> + </div> -<h4 id="XML_SetAllocTrackerActivationThreshold">XML_SetAllocTrackerActivationThreshold</h4> -<pre class="fcndec"> + <h4 id="XML_SetAllocTrackerActivationThreshold"> + XML_SetAllocTrackerActivationThreshold + </h4> + + <pre class="fcndec"> /* Added in Expat 2.7.2. */ XML_Bool XML_SetAllocTrackerActivationThreshold(XML_Parser p, unsigned long long activationThresholdBytes); </pre> -<div class="fcndef"> - <p> - Sets number of allocated bytes of dynamic memory - needed to activate protection against disproportionate use of RAM - (default: <code>64 MiB</code>) - of parser <code>p</code> to <code>activationThresholdBytes</code>, and - returns <code>XML_TRUE</code> upon success and <code>XML_FALSE</code> upon error. - </p> + <div class="fcndef"> + <p> + Sets number of allocated bytes of dynamic memory needed to activate protection + against disproportionate use of RAM (default: <code>64 MiB</code>) of parser + <code>p</code> to <code>activationThresholdBytes</code>, and returns + <code>XML_TRUE</code> upon success and <code>XML_FALSE</code> upon error. + </p> + + <p> + <strong>Note:</strong> For types of allocations that intentionally bypass + tracking and limiting, please see <code><a href= + "#XML_SetAllocTrackerMaximumAmplification">XML_SetAllocTrackerMaximumAmplification</a></code> + above. + </p> - <p> - <strong>Note:</strong> - For types of allocations that intentionally bypass tracking and limiting, please see - <code><a href="#XML_SetAllocTrackerMaximumAmplification">XML_SetAllocTrackerMaximumAmplification</a></code> - above. - </p> + <p> + For a call to <code>XML_SetAllocTrackerActivationThreshold</code> to succeed: + </p> - <p>For a call to <code>XML_SetAllocTrackerActivationThreshold</code> to succeed:</p> - <ul> - <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without any parent parsers).</li> - </ul> + <ul> + <li>parser <code>p</code> must be a non-<code>NULL</code> root parser (without + any parent parsers). + </li> + </ul> - <p> - <strong>Note:</strong> - If you ever need to increase this value for non-attack payload, - please <a href="https://github.com/libexpat/libexpat/issues">file a bug report</a>. - </p> -</div> + <p> + <strong>Note:</strong> If you ever need to increase this value for non-attack + payload, please <a href="https://github.com/libexpat/libexpat/issues">file a + bug report</a>. + </p> + </div> -<h4 id="XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</h4> -<pre class="fcndec"> + <h4 id="XML_SetReparseDeferralEnabled"> + XML_SetReparseDeferralEnabled + </h4> + + <pre class="fcndec"> /* Added in Expat 2.6.0. */ XML_Bool XMLCALL XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); </pre> -<div class="fcndef"> - <p> - Large tokens may require many parse calls before enough data is available for Expat to parse it in full. - If Expat retried parsing the token on every parse call, parsing could take quadratic time. - To avoid this, Expat only retries once a significant amount of new data is available. - This function allows disabling this behavior. - </p> - <p> - The <code>enabled</code> argument should be <code>XML_TRUE</code> or <code>XML_FALSE</code>. - </p> - <p> - Returns <code>XML_TRUE</code> on success, and <code>XML_FALSE</code> on error. - </p> -</div> + <div class="fcndef"> + <p> + Large tokens may require many parse calls before enough data is available for + Expat to parse it in full. If Expat retried parsing the token on every parse + call, parsing could take quadratic time. To avoid this, Expat only retries once + a significant amount of new data is available. This function allows disabling + this behavior. + </p> + + <p> + The <code>enabled</code> argument should be <code>XML_TRUE</code> or + <code>XML_FALSE</code>. + </p> -<h3><a name="miscellaneous">Miscellaneous functions</a></h3> + <p> + Returns <code>XML_TRUE</code> on success, and <code>XML_FALSE</code> on error. + </p> + </div> -<p>The functions in this section either obtain state information from -the parser or can be used to dynamically set parser options.</p> + <h3> + <a id="miscellaneous" name="miscellaneous">Miscellaneous functions</a> + </h3> -<h4 id="XML_SetUserData">XML_SetUserData</h4> -<pre class="fcndec"> + <p> + The functions in this section either obtain state information from the parser or + can be used to dynamically set parser options. + </p> + + <h4 id="XML_SetUserData"> + XML_SetUserData + </h4> + + <pre class="fcndec"> void XMLCALL XML_SetUserData(XML_Parser p, void *userData); </pre> -<div class="fcndef"> -This sets the user data pointer that gets passed to handlers. It -overwrites any previous value for this pointer. Note that the -application is responsible for freeing the memory associated with -<code>userData</code> when it is finished with the parser. So if you -call this when there's already a pointer there, and you haven't freed -the memory associated with it, then you've probably just leaked -memory. -</div> + <div class="fcndef"> + This sets the user data pointer that gets passed to handlers. It overwrites any + previous value for this pointer. Note that the application is responsible for + freeing the memory associated with <code>userData</code> when it is finished with + the parser. So if you call this when there's already a pointer there, and you + haven't freed the memory associated with it, then you've probably just leaked + memory. + </div> -<h4 id="XML_GetUserData">XML_GetUserData</h4> -<pre class="fcndec"> + <h4 id="XML_GetUserData"> + XML_GetUserData + </h4> + + <pre class="fcndec"> void * XMLCALL XML_GetUserData(XML_Parser p); </pre> -<div class="fcndef"> -This returns the user data pointer that gets passed to handlers. -It is actually implemented as a macro. -</div> + <div class="fcndef"> + This returns the user data pointer that gets passed to handlers. It is actually + implemented as a macro. + </div> + + <h4 id="XML_UseParserAsHandlerArg"> + XML_UseParserAsHandlerArg + </h4> -<h4 id="XML_UseParserAsHandlerArg">XML_UseParserAsHandlerArg</h4> -<pre class="fcndec"> + <pre class="fcndec"> void XMLCALL XML_UseParserAsHandlerArg(XML_Parser p); </pre> -<div class="fcndef"> -After this is called, handlers receive the parser in their -<code>userData</code> arguments. The user data can still be obtained -using the <code><a href= "#XML_GetUserData" ->XML_GetUserData</a></code> function. -</div> + <div class="fcndef"> + After this is called, handlers receive the parser in their <code>userData</code> + arguments. The user data can still be obtained using the <code><a href= + "#XML_GetUserData">XML_GetUserData</a></code> function. + </div> -<h4 id="XML_SetBase">XML_SetBase</h4> -<pre class="fcndec"> + <h4 id="XML_SetBase"> + XML_SetBase + </h4> + + <pre class="fcndec"> enum XML_Status XMLCALL XML_SetBase(XML_Parser p, const XML_Char *base); </pre> -<div class="fcndef"> -Set the base to be used for resolving relative URIs in system -identifiers. The return value is <code>XML_STATUS_ERROR</code> if -there's no memory to store base, otherwise it's -<code>XML_STATUS_OK</code>. -</div> + <div class="fcndef"> + Set the base to be used for resolving relative URIs in system identifiers. The + return value is <code>XML_STATUS_ERROR</code> if there's no memory to store base, + otherwise it's <code>XML_STATUS_OK</code>. + </div> + + <h4 id="XML_GetBase"> + XML_GetBase + </h4> -<h4 id="XML_GetBase">XML_GetBase</h4> -<pre class="fcndec"> + <pre class="fcndec"> const XML_Char * XMLCALL XML_GetBase(XML_Parser p); </pre> -<div class="fcndef"> -Return the base for resolving relative URIs. -</div> + <div class="fcndef"> + Return the base for resolving relative URIs. + </div> + + <h4 id="XML_GetSpecifiedAttributeCount"> + XML_GetSpecifiedAttributeCount + </h4> -<h4 id="XML_GetSpecifiedAttributeCount">XML_GetSpecifiedAttributeCount</h4> -<pre class="fcndec"> + <pre class="fcndec"> int XMLCALL XML_GetSpecifiedAttributeCount(XML_Parser p); </pre> -<div class="fcndef"> -When attributes are reported to the start handler in the atts vector, -attributes that were explicitly set in the element occur before any -attributes that receive their value from default information in an -ATTLIST declaration. This function returns the number of attributes -that were explicitly set times two, thus giving the offset in the -<code>atts</code> array passed to the start tag handler of the first -attribute set due to defaults. It supplies information for the last -call to a start handler. If called inside a start handler, then that -means the current call. -</div> + <div class="fcndef"> + When attributes are reported to the start handler in the atts vector, attributes + that were explicitly set in the element occur before any attributes that receive + their value from default information in an ATTLIST declaration. This function + returns the number of attributes that were explicitly set times two, thus giving + the offset in the <code>atts</code> array passed to the start tag handler of the + first attribute set due to defaults. It supplies information for the last call to + a start handler. If called inside a start handler, then that means the current + call. + </div> -<h4 id="XML_GetIdAttributeIndex">XML_GetIdAttributeIndex</h4> -<pre class="fcndec"> + <h4 id="XML_GetIdAttributeIndex"> + XML_GetIdAttributeIndex + </h4> + + <pre class="fcndec"> int XMLCALL XML_GetIdAttributeIndex(XML_Parser p); </pre> -<div class="fcndef"> -Returns the index of the ID attribute passed in the atts array in the -last call to <code><a href= "#XML_StartElementHandler" ->XML_StartElementHandler</a></code>, or -1 if there is no ID -attribute. If called inside a start handler, then that means the -current call. -</div> + <div class="fcndef"> + Returns the index of the ID attribute passed in the atts array in the last call + to <code><a href="#XML_StartElementHandler">XML_StartElementHandler</a></code>, + or -1 if there is no ID attribute. If called inside a start handler, then that + means the current call. + </div> + + <h4 id="XML_GetAttributeInfo"> + XML_GetAttributeInfo + </h4> -<h4 id="XML_GetAttributeInfo">XML_GetAttributeInfo</h4> -<pre class="fcndec"> + <pre class="fcndec"> const XML_AttrInfo * XMLCALL XML_GetAttributeInfo(XML_Parser parser); </pre> -<pre class="signature"> + + <pre class="signature"> typedef struct { XML_Index nameStart; /* Offset to beginning of the attribute name. */ XML_Index nameEnd; /* Offset after the attribute name's last byte. */ @@ -2518,188 +3391,240 @@ typedef struct { XML_Index valueEnd; /* Offset after the attribute value's last byte. */ } XML_AttrInfo; </pre> -<div class="fcndef"> -Returns an array of <code>XML_AttrInfo</code> structures for the -attribute/value pairs passed in the last call to the -<code>XML_StartElementHandler</code> that were specified -in the start-tag rather than defaulted. Each attribute/value pair counts -as 1; thus the number of entries in the array is -<code>XML_GetSpecifiedAttributeCount(parser) / 2</code>. -</div> + <div class="fcndef"> + Returns an array of <code>XML_AttrInfo</code> structures for the attribute/value + pairs passed in the last call to the <code>XML_StartElementHandler</code> that + were specified in the start-tag rather than defaulted. Each attribute/value pair + counts as 1; thus the number of entries in the array is + <code>XML_GetSpecifiedAttributeCount(parser) / 2</code>. + </div> + + <h4 id="XML_SetEncoding"> + XML_SetEncoding + </h4> -<h4 id="XML_SetEncoding">XML_SetEncoding</h4> -<pre class="fcndec"> + <pre class="fcndec"> enum XML_Status XMLCALL XML_SetEncoding(XML_Parser p, const XML_Char *encoding); </pre> -<div class="fcndef"> -Set the encoding to be used by the parser. It is equivalent to -passing a non-<code>NULL</code> encoding argument to the parser creation functions. -It must not be called after <code><a href= "#XML_Parse" ->XML_Parse</a></code> or <code><a href= "#XML_ParseBuffer" ->XML_ParseBuffer</a></code> have been called on the given parser. -Returns <code>XML_STATUS_OK</code> on success or -<code>XML_STATUS_ERROR</code> on error. -</div> + <div class="fcndef"> + Set the encoding to be used by the parser. It is equivalent to passing a + non-<code>NULL</code> encoding argument to the parser creation functions. It must + not be called after <code><a href="#XML_Parse">XML_Parse</a></code> or + <code><a href="#XML_ParseBuffer">XML_ParseBuffer</a></code> have been called on + the given parser. Returns <code>XML_STATUS_OK</code> on success or + <code>XML_STATUS_ERROR</code> on error. + </div> -<h4 id="XML_SetParamEntityParsing">XML_SetParamEntityParsing</h4> -<pre class="fcndec"> + <h4 id="XML_SetParamEntityParsing"> + XML_SetParamEntityParsing + </h4> + + <pre class="fcndec"> int XMLCALL XML_SetParamEntityParsing(XML_Parser p, enum XML_ParamEntityParsing code); </pre> -<div class="fcndef"> -This enables parsing of parameter entities, including the external -parameter entity that is the external DTD subset, according to -<code>code</code>. -The choices for <code>code</code> are: -<ul> -<li><code>XML_PARAM_ENTITY_PARSING_NEVER</code></li> -<li><code>XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE</code></li> -<li><code>XML_PARAM_ENTITY_PARSING_ALWAYS</code></li> -</ul> -<b>Note:</b> If <code>XML_SetParamEntityParsing</code> is called after -<code>XML_Parse</code> or <code>XML_ParseBuffer</code>, then it has -no effect and will always return 0. -</div> + <div class="fcndef"> + This enables parsing of parameter entities, including the external parameter + entity that is the external DTD subset, according to <code>code</code>. The + choices for <code>code</code> are: + <ul> + <li> + <code>XML_PARAM_ENTITY_PARSING_NEVER</code> + </li> + + <li> + <code>XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE</code> + </li> -<h4 id="XML_SetHashSalt">XML_SetHashSalt</h4> -<pre class="fcndec"> + <li> + <code>XML_PARAM_ENTITY_PARSING_ALWAYS</code> + </li> + </ul> + <b>Note:</b> If <code>XML_SetParamEntityParsing</code> is called after + <code>XML_Parse</code> or <code>XML_ParseBuffer</code>, then it has no effect and + will always return 0. + </div> + + <h4 id="XML_SetHashSalt"> + XML_SetHashSalt + </h4> + + <pre class="fcndec"> int XMLCALL XML_SetHashSalt(XML_Parser p, unsigned long hash_salt); </pre> -<div class="fcndef"> -Sets the hash salt to use for internal hash calculations. -Helps in preventing DoS attacks based on predicting hash -function behavior. In order to have an effect this must be called -before parsing has started. Returns 1 if successful, 0 when called -after <code>XML_Parse</code> or <code>XML_ParseBuffer</code>. -<p><b>Note:</b> This call is optional, as the parser will auto-generate -a new random salt value if no value has been set at the start of parsing.</p> -<p><b>Note:</b> One should not call <code>XML_SetHashSalt</code> with a -hash salt value of 0, as this value is used as sentinel value to indicate -that <code>XML_SetHashSalt</code> has <b>not</b> been called. Consequently -such a call will have no effect, even if it returns 1.</p> -</div> + <div class="fcndef"> + Sets the hash salt to use for internal hash calculations. Helps in preventing DoS + attacks based on predicting hash function behavior. In order to have an effect + this must be called before parsing has started. Returns 1 if successful, 0 when + called after <code>XML_Parse</code> or <code>XML_ParseBuffer</code>. + <p> + <b>Note:</b> This call is optional, as the parser will auto-generate a new + random salt value if no value has been set at the start of parsing. + </p> + + <p> + <b>Note:</b> One should not call <code>XML_SetHashSalt</code> with a hash salt + value of 0, as this value is used as sentinel value to indicate that + <code>XML_SetHashSalt</code> has <b>not</b> been called. Consequently such a + call will have no effect, even if it returns 1. + </p> + </div> + + <h4 id="XML_UseForeignDTD"> + XML_UseForeignDTD + </h4> -<h4 id="XML_UseForeignDTD">XML_UseForeignDTD</h4> -<pre class="fcndec"> + <pre class="fcndec"> enum XML_Error XMLCALL XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD); </pre> -<div class="fcndef"> -<p>This function allows an application to provide an external subset -for the document type declaration for documents which do not specify -an external subset of their own. For documents which specify an -external subset in their DOCTYPE declaration, the application-provided -subset will be ignored. If the document does not contain a DOCTYPE -declaration at all and <code>useDTD</code> is true, the -application-provided subset will be parsed, but the -<code>startDoctypeDeclHandler</code> and -<code>endDoctypeDeclHandler</code> functions, if set, will not be -called. The setting of parameter entity parsing, controlled using -<code><a href= "#XML_SetParamEntityParsing" ->XML_SetParamEntityParsing</a></code>, will be honored.</p> + <div class="fcndef"> + <p> + This function allows an application to provide an external subset for the + document type declaration for documents which do not specify an external subset + of their own. For documents which specify an external subset in their DOCTYPE + declaration, the application-provided subset will be ignored. If the document + does not contain a DOCTYPE declaration at all and <code>useDTD</code> is true, + the application-provided subset will be parsed, but the + <code>startDoctypeDeclHandler</code> and <code>endDoctypeDeclHandler</code> + functions, if set, will not be called. The setting of parameter entity parsing, + controlled using <code><a href= + "#XML_SetParamEntityParsing">XML_SetParamEntityParsing</a></code>, will be + honored. + </p> -<p>The application-provided external subset is read by calling the -external entity reference handler set via <code><a href= -"#XML_SetExternalEntityRefHandler" ->XML_SetExternalEntityRefHandler</a></code> with both -<code>publicId</code> and <code>systemId</code> set to <code>NULL</code>.</p> + <p> + The application-provided external subset is read by calling the external entity + reference handler set via <code><a href= + "#XML_SetExternalEntityRefHandler">XML_SetExternalEntityRefHandler</a></code> + with both <code>publicId</code> and <code>systemId</code> set to + <code>NULL</code>. + </p> -<p>If this function is called after parsing has begun, it returns -<code>XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING</code> and ignores -<code>useDTD</code>. If called when Expat has been compiled without -DTD support, it returns -<code>XML_ERROR_FEATURE_REQUIRES_XML_DTD</code>. Otherwise, it -returns <code>XML_ERROR_NONE</code>.</p> + <p> + If this function is called after parsing has begun, it returns + <code>XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING</code> and ignores + <code>useDTD</code>. If called when Expat has been compiled without DTD + support, it returns <code>XML_ERROR_FEATURE_REQUIRES_XML_DTD</code>. Otherwise, + it returns <code>XML_ERROR_NONE</code>. + </p> -<p><b>Note:</b> For the purpose of checking WFC: Entity Declared, passing -<code>useDTD == XML_TRUE</code> will make the parser behave as if -the document had a DTD with an external subset. This holds true even if -the external entity reference handler returns without action.</p> -</div> + <p> + <b>Note:</b> For the purpose of checking WFC: Entity Declared, passing + <code>useDTD == XML_TRUE</code> will make the parser behave as if the document + had a DTD with an external subset. This holds true even if the external entity + reference handler returns without action. + </p> + </div> -<h4 id="XML_SetReturnNSTriplet">XML_SetReturnNSTriplet</h4> -<pre class="fcndec"> + <h4 id="XML_SetReturnNSTriplet"> + XML_SetReturnNSTriplet + </h4> + + <pre class="fcndec"> void XMLCALL XML_SetReturnNSTriplet(XML_Parser parser, int do_nst); </pre> -<div class="fcndef"> -<p> -This function only has an effect when using a parser created with -<code><a href= "#XML_ParserCreateNS" >XML_ParserCreateNS</a></code>, -i.e. when namespace processing is in effect. The <code>do_nst</code> -sets whether or not prefixes are returned with names qualified with a -namespace prefix. If this function is called with <code>do_nst</code> -non-zero, then afterwards namespace qualified names (that is qualified -with a prefix as opposed to belonging to a default namespace) are -returned as a triplet with the three parts separated by the namespace -separator specified when the parser was created. The order of -returned parts is URI, local name, and prefix.</p> <p>If -<code>do_nst</code> is zero, then namespaces are reported in the -default manner, URI then local_name separated by the namespace -separator.</p> -</div> + <div class="fcndef"> + <p> + This function only has an effect when using a parser created with + <code><a href="#XML_ParserCreateNS">XML_ParserCreateNS</a></code>, i.e. when + namespace processing is in effect. The <code>do_nst</code> sets whether or not + prefixes are returned with names qualified with a namespace prefix. If this + function is called with <code>do_nst</code> non-zero, then afterwards namespace + qualified names (that is qualified with a prefix as opposed to belonging to a + default namespace) are returned as a triplet with the three parts separated by + the namespace separator specified when the parser was created. The order of + returned parts is URI, local name, and prefix. + </p> + + <p> + If <code>do_nst</code> is zero, then namespaces are reported in the default + manner, URI then local_name separated by the namespace separator. + </p> + </div> -<h4 id="XML_DefaultCurrent">XML_DefaultCurrent</h4> -<pre class="fcndec"> + <h4 id="XML_DefaultCurrent"> + XML_DefaultCurrent + </h4> + + <pre class="fcndec"> void XMLCALL XML_DefaultCurrent(XML_Parser parser); </pre> -<div class="fcndef"> -This can be called within a handler for a start element, end element, -processing instruction or character data. It causes the corresponding -markup to be passed to the default handler set by <code><a -href="#XML_SetDefaultHandler" >XML_SetDefaultHandler</a></code> or -<code><a href="#XML_SetDefaultHandlerExpand" ->XML_SetDefaultHandlerExpand</a></code>. It does nothing if there is -not a default handler. -</div> + <div class="fcndef"> + This can be called within a handler for a start element, end element, processing + instruction or character data. It causes the corresponding markup to be passed to + the default handler set by <code><a href= + "#XML_SetDefaultHandler">XML_SetDefaultHandler</a></code> or <code><a href= + "#XML_SetDefaultHandlerExpand">XML_SetDefaultHandlerExpand</a></code>. It does + nothing if there is not a default handler. + </div> + + <h4 id="XML_ExpatVersion"> + XML_ExpatVersion + </h4> -<h4 id="XML_ExpatVersion">XML_ExpatVersion</h4> -<pre class="fcndec"> + <pre class="fcndec"> XML_LChar * XMLCALL XML_ExpatVersion(); </pre> -<div class="fcndef"> -Return the library version as a string (e.g. <code>"expat_1.95.1"</code>). -</div> + <div class="fcndef"> + Return the library version as a string (e.g. <code>"expat_1.95.1"</code>). + </div> -<h4 id="XML_ExpatVersionInfo">XML_ExpatVersionInfo</h4> -<pre class="fcndec"> + <h4 id="XML_ExpatVersionInfo"> + XML_ExpatVersionInfo + </h4> + + <pre class="fcndec"> struct XML_Expat_Version XMLCALL XML_ExpatVersionInfo(); </pre> -<pre class="signature"> + + <pre class="signature"> typedef struct { int major; int minor; int micro; } XML_Expat_Version; </pre> -<div class="fcndef"> -Return the library version information as a structure. -Some macros are also defined that support compile-time tests of the -library version: -<ul> -<li><code>XML_MAJOR_VERSION</code></li> -<li><code>XML_MINOR_VERSION</code></li> -<li><code>XML_MICRO_VERSION</code></li> -</ul> -Testing these constants is currently the best way to determine if -particular parts of the Expat API are available. -</div> + <div class="fcndef"> + Return the library version information as a structure. Some macros are also + defined that support compile-time tests of the library version: + <ul> + <li> + <code>XML_MAJOR_VERSION</code> + </li> + + <li> + <code>XML_MINOR_VERSION</code> + </li> + + <li> + <code>XML_MICRO_VERSION</code> + </li> + </ul> + Testing these constants is currently the best way to determine if particular + parts of the Expat API are available. + </div> -<h4 id="XML_GetFeatureList">XML_GetFeatureList</h4> -<pre class="fcndec"> + <h4 id="XML_GetFeatureList"> + XML_GetFeatureList + </h4> + + <pre class="fcndec"> const XML_Feature * XMLCALL XML_GetFeatureList(); </pre> -<pre class="signature"> + + <pre class="signature"> enum XML_FeatureEnum { XML_FEATURE_END = 0, XML_FEATURE_UNICODE, @@ -2719,114 +3644,140 @@ typedef struct { long int value; } XML_Feature; </pre> -<div class="fcndef"> -<p>Returns a list of "feature" records, providing details on how -Expat was configured at compile time. Most applications should not -need to worry about this, but this information is otherwise not -available from Expat. This function allows code that does need to -check these features to do so at runtime.</p> + <div class="fcndef"> + <p> + Returns a list of "feature" records, providing details on how Expat was + configured at compile time. Most applications should not need to worry about + this, but this information is otherwise not available from Expat. This function + allows code that does need to check these features to do so at runtime. + </p> + + <p> + The return value is an array of <code>XML_Feature</code>, terminated by a + record with a <code>feature</code> of <code>XML_FEATURE_END</code> and + <code>name</code> of <code>NULL</code>, identifying the feature-test macros + Expat was compiled with. Since an application that requires this kind of + information needs to determine the type of character the <code>name</code> + points to, records for the <code>XML_FEATURE_SIZEOF_XML_CHAR</code> and + <code>XML_FEATURE_SIZEOF_XML_LCHAR</code> will be located at the beginning of + the list, followed by <code>XML_FEATURE_UNICODE</code> and + <code>XML_FEATURE_UNICODE_WCHAR_T</code>, if they are present at all. + </p> + + <p> + Some features have an associated value. If there isn't an associated value, the + <code>value</code> field is set to 0. At this time, the following features have + been defined to have values: + </p> -<p>The return value is an array of <code>XML_Feature</code>, -terminated by a record with a <code>feature</code> of -<code>XML_FEATURE_END</code> and <code>name</code> of <code>NULL</code>, -identifying the feature-test macros Expat was compiled with. Since an -application that requires this kind of information needs to determine -the type of character the <code>name</code> points to, records for the -<code>XML_FEATURE_SIZEOF_XML_CHAR</code> and -<code>XML_FEATURE_SIZEOF_XML_LCHAR</code> will be located at the -beginning of the list, followed by <code>XML_FEATURE_UNICODE</code> -and <code>XML_FEATURE_UNICODE_WCHAR_T</code>, if they are present at -all.</p> + <dl> + <dt> + <code>XML_FEATURE_SIZEOF_XML_CHAR</code> + </dt> -<p>Some features have an associated value. If there isn't an -associated value, the <code>value</code> field is set to 0. At this -time, the following features have been defined to have values:</p> + <dd> + The number of bytes occupied by one <code>XML_Char</code> character. + </dd> -<dl> - <dt><code>XML_FEATURE_SIZEOF_XML_CHAR</code></dt> - <dd>The number of bytes occupied by one <code>XML_Char</code> - character.</dd> - <dt><code>XML_FEATURE_SIZEOF_XML_LCHAR</code></dt> - <dd>The number of bytes occupied by one <code>XML_LChar</code> - character.</dd> - <dt><code>XML_FEATURE_CONTEXT_BYTES</code></dt> - <dd>The maximum number of characters of context which can be - reported by <code><a href= "#XML_GetInputContext" - >XML_GetInputContext</a></code>.</dd> -</dl> -</div> + <dt> + <code>XML_FEATURE_SIZEOF_XML_LCHAR</code> + </dt> -<h4 id="XML_FreeContentModel">XML_FreeContentModel</h4> -<pre class="fcndec"> + <dd> + The number of bytes occupied by one <code>XML_LChar</code> character. + </dd> + + <dt> + <code>XML_FEATURE_CONTEXT_BYTES</code> + </dt> + + <dd> + The maximum number of characters of context which can be reported by + <code><a href="#XML_GetInputContext">XML_GetInputContext</a></code>. + </dd> + </dl> + </div> + + <h4 id="XML_FreeContentModel"> + XML_FreeContentModel + </h4> + + <pre class="fcndec"> void XMLCALL XML_FreeContentModel(XML_Parser parser, XML_Content *model); </pre> -<div class="fcndef"> -Function to deallocate the <code>model</code> argument passed to the -<code>XML_ElementDeclHandler</code> callback set using <code><a -href="#XML_SetElementDeclHandler" >XML_ElementDeclHandler</a></code>. -This function should not be used for any other purpose. -</div> + <div class="fcndef"> + Function to deallocate the <code>model</code> argument passed to the + <code>XML_ElementDeclHandler</code> callback set using <code><a href= + "#XML_SetElementDeclHandler">XML_ElementDeclHandler</a></code>. This function + should not be used for any other purpose. + </div> -<p>The following functions allow external code to share the memory -allocator an <code>XML_Parser</code> has been configured to use. This -is especially useful for third-party libraries that interact with a -parser object created by application code, or heavily layered -applications. This can be essential when using dynamically loaded -libraries which use different C standard libraries (this can happen on -Windows, at least).</p> + <p> + The following functions allow external code to share the memory allocator an + <code>XML_Parser</code> has been configured to use. This is especially useful for + third-party libraries that interact with a parser object created by application + code, or heavily layered applications. This can be essential when using + dynamically loaded libraries which use different C standard libraries (this can + happen on Windows, at least). + </p> -<h4 id="XML_MemMalloc">XML_MemMalloc</h4> -<pre class="fcndec"> + <h4 id="XML_MemMalloc"> + XML_MemMalloc + </h4> + + <pre class="fcndec"> void * XMLCALL XML_MemMalloc(XML_Parser parser, size_t size); </pre> -<div class="fcndef"> -Allocate <code>size</code> bytes of memory using the allocator the -<code>parser</code> object has been configured to use. Returns a -pointer to the memory or <code>NULL</code> on failure. Memory allocated in this -way must be freed using <code><a href="#XML_MemFree" ->XML_MemFree</a></code>. -</div> + <div class="fcndef"> + Allocate <code>size</code> bytes of memory using the allocator the + <code>parser</code> object has been configured to use. Returns a pointer to the + memory or <code>NULL</code> on failure. Memory allocated in this way must be + freed using <code><a href="#XML_MemFree">XML_MemFree</a></code>. + </div> + + <h4 id="XML_MemRealloc"> + XML_MemRealloc + </h4> -<h4 id="XML_MemRealloc">XML_MemRealloc</h4> -<pre class="fcndec"> + <pre class="fcndec"> void * XMLCALL XML_MemRealloc(XML_Parser parser, void *ptr, size_t size); </pre> -<div class="fcndef"> -Allocate <code>size</code> bytes of memory using the allocator the -<code>parser</code> object has been configured to use. -<code>ptr</code> must point to a block of memory allocated by <code><a -href="#XML_MemMalloc" >XML_MemMalloc</a></code> or -<code>XML_MemRealloc</code>, or be <code>NULL</code>. This function tries to -expand the block pointed to by <code>ptr</code> if possible. Returns -a pointer to the memory or <code>NULL</code> on failure. On success, the original -block has either been expanded or freed. On failure, the original -block has not been freed; the caller is responsible for freeing the -original block. Memory allocated in this way must be freed using -<code><a href="#XML_MemFree" ->XML_MemFree</a></code>. -</div> + <div class="fcndef"> + Allocate <code>size</code> bytes of memory using the allocator the + <code>parser</code> object has been configured to use. <code>ptr</code> must + point to a block of memory allocated by <code><a href= + "#XML_MemMalloc">XML_MemMalloc</a></code> or <code>XML_MemRealloc</code>, or be + <code>NULL</code>. This function tries to expand the block pointed to by + <code>ptr</code> if possible. Returns a pointer to the memory or + <code>NULL</code> on failure. On success, the original block has either been + expanded or freed. On failure, the original block has not been freed; the caller + is responsible for freeing the original block. Memory allocated in this way must + be freed using <code><a href="#XML_MemFree">XML_MemFree</a></code>. + </div> -<h4 id="XML_MemFree">XML_MemFree</h4> -<pre class="fcndec"> + <h4 id="XML_MemFree"> + XML_MemFree + </h4> + + <pre class="fcndec"> void XMLCALL XML_MemFree(XML_Parser parser, void *ptr); </pre> -<div class="fcndef"> -Free a block of memory pointed to by <code>ptr</code>. The block must -have been allocated by <code><a href="#XML_MemMalloc" ->XML_MemMalloc</a></code> or <code>XML_MemRealloc</code>, or be <code>NULL</code>. -</div> - -<hr /> + <div class="fcndef"> + Free a block of memory pointed to by <code>ptr</code>. The block must have been + allocated by <code><a href="#XML_MemMalloc">XML_MemMalloc</a></code> or + <code>XML_MemRealloc</code>, or be <code>NULL</code>. + </div> - <div class="footer"> - Found a bug in the documentation? - <a href="https://github.com/libexpat/libexpat/issues">Please file a bug report.</a> - </div> + <hr /> -</div> -</body> + <div class="footer"> + Found a bug in the documentation? <a href= + "https://github.com/libexpat/libexpat/issues">Please file a bug report.</a> + </div> + </div> + </body> </html> diff --git a/doc/xmlwf.1 b/doc/xmlwf.1 index aa2e9c218007..75318fccc856 100644 --- a/doc/xmlwf.1 +++ b/doc/xmlwf.1 @@ -5,7 +5,7 @@ \\$2 \(la\\$1\(ra\\$3 .. .if \n(.g .mso www.tmac -.TH XMLWF 1 "September 24, 2025" "" "" +.TH XMLWF 1 "March 17, 2026" "" "" .SH NAME xmlwf \- Determines if an XML document is well-formed .SH SYNOPSIS @@ -97,7 +97,7 @@ The amplification factor is calculated as .. .nf - amplification := (direct + indirect) / direct + amplification := (direct + indirect) / direct .fi @@ -105,7 +105,7 @@ The amplification factor is calculated as .. .nf - amplification := allocated / direct + amplification := allocated / direct .fi @@ -235,7 +235,7 @@ the operating system reporting memory in a strange way; there is not a leak in \fBxmlwf\fR. .TP \*(T<\fB\-s\fR\*(T> -Prints an error if the document is not standalone. +Prints an error if the document is not standalone. A document is standalone if it has no external subset and no references to parameter entities. .TP @@ -261,6 +261,7 @@ page. See also \*(T<\fB\-e\fR\*(T>. .TP \*(T<\fB\-x\fR\*(T> Turns on parsing external entities. +(CAREFUL! This makes xmlwf vulnerable to external entity attacks (XXE).) Non-validating parsers are not required to resolve external entities, or even expand entities at all. @@ -275,6 +276,7 @@ This is an example of an internal entity: .nf <!ENTITY vers '1.0.2'> + .fi And here are some examples of external entities: @@ -283,6 +285,7 @@ And here are some examples of external entities: <!ENTITY header SYSTEM "header\-&vers;.xml"> (parsed) <!ENTITY logo SYSTEM "logo.png" PNG> (unparsed) + .fi .TP \*(T<\fB\-\-\fR\*(T> @@ -293,6 +296,7 @@ starts with a hyphen. For example: .nf xmlwf \-\- \-myfile.xml + .fi will run \fBxmlwf\fR on the file @@ -307,7 +311,7 @@ input file cannot be opened, \fBxmlwf\fR prints a single line describing the problem to standard output. .PP If the \*(T<\fB\-k\fR\*(T> option is not provided, \fBxmlwf\fR -halts upon encountering a well-formedness or output-file error. +halts upon encountering a well-formedness or output-file error. If \*(T<\fB\-k\fR\*(T> is provided, \fBxmlwf\fR continues processing the remaining input files, describing problems found with any of them. .SH "EXIT STATUS" @@ -344,6 +348,7 @@ me, I'd like to add this information to this manpage. The Expat home page: https://libexpat.github.io/ The W3 XML 1.0 specification (fourth edition): https://www.w3.org/TR/2006/REC\-xml\-20060816/ Billion laughs attack: https://en.wikipedia.org/wiki/Billion_laughs_attack + .fi .SH AUTHOR This manual page was originally written by Scott Bronson <\*(T<bronson@rinspin.com\*(T>> diff --git a/doc/xmlwf.xml b/doc/xmlwf.xml index 01316bb16627..c4fe92d44fb4 100644 --- a/doc/xmlwf.xml +++ b/doc/xmlwf.xml @@ -9,7 +9,7 @@ Copyright (c) 2001 Scott Bronson <bronson@rinspin.com> Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2009 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2016 Ardo van Rangelrooij <ardo@debian.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2020 Joe Orton <jorton@redhat.com> @@ -21,7 +21,7 @@ "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [ <!ENTITY dhfirstname "<firstname>Scott</firstname>"> <!ENTITY dhsurname "<surname>Bronson</surname>"> - <!ENTITY dhdate "<date>September 24, 2025</date>"> + <!ENTITY dhdate "<date>March 17, 2026</date>"> <!-- Please adjust this^^ date whenever cutting a new release. --> <!ENTITY dhsection "<manvolnum>1</manvolnum>"> <!ENTITY dhemail "<email>bronson@rinspin.com</email>"> @@ -29,8 +29,8 @@ <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>"> <!ENTITY dhpackage "xmlwf"> - <!ENTITY debian "<productname>Debian GNU/Linux</productname>"> <!ENTITY gnu "<acronym>GNU</acronym>"> + <!ENTITY debian "<productname>Debian &gnu;/Linux</productname>"> ]> <refentry> @@ -84,73 +84,77 @@ <title>DESCRIPTION</title> <para> - <command>&dhpackage;</command> uses the Expat library to - determine if an XML document is well-formed. It is - non-validating. - </para> - - <para> - If you do not specify any files on the command-line, and you - have a recent version of <command>&dhpackage;</command>, the - input file will be read from standard input. - </para> + <command>&dhpackage;</command> uses the Expat library to + determine if an XML document is well-formed. It is + non-validating. + </para> + <para> + If you do not specify any files on the command-line, and you + have a recent version of <command>&dhpackage;</command>, the + input file will be read from standard input. + </para> </refsect1> <refsect1> <title>WELL-FORMED DOCUMENTS</title> - - <para> - A well-formed document must adhere to the - following rules: - </para> - - <itemizedlist> - <listitem><para> - The file begins with an XML declaration. For instance, - <literal><?xml version="1.0" standalone="yes"?></literal>. - <emphasis>NOTE</emphasis>: - <command>&dhpackage;</command> does not currently - check for a valid XML declaration. - </para></listitem> - <listitem><para> - Every start tag is either empty (<tag/>) - or has a corresponding end tag. - </para></listitem> - <listitem><para> - There is exactly one root element. This element must contain - all other elements in the document. Only comments, white - space, and processing instructions may come after the close - of the root element. - </para></listitem> - <listitem><para> - All elements nest properly. - </para></listitem> - <listitem><para> - All attribute values are enclosed in quotes (either single - or double). - </para></listitem> + <para> + A well-formed document must adhere to the + following rules: + </para> + <itemizedlist> + <listitem> + <para> + The file begins with an XML declaration. For instance, + <literal><?xml version="1.0" standalone="yes"?></literal>. + <emphasis>NOTE</emphasis>: + <command>&dhpackage;</command> does not currently + check for a valid XML declaration. + </para> + </listitem> + <listitem> + <para> + Every start tag is either empty (<tag/>) + or has a corresponding end tag. + </para> + </listitem> + <listitem> + <para> + There is exactly one root element. This element must contain + all other elements in the document. Only comments, white + space, and processing instructions may come after the close + of the root element. + </para> + </listitem> + <listitem> + <para> + All elements nest properly. + </para> + </listitem> + <listitem> + <para> + All attribute values are enclosed in quotes (either single + or double). + </para> + </listitem> </itemizedlist> - - <para> - If the document has a DTD, and it strictly complies with that - DTD, then the document is also considered <emphasis>valid</emphasis>. - <command>&dhpackage;</command> is a non-validating parser -- - it does not check the DTD. However, it does support - external entities (see the <option>-x</option> option). - </para> + <para> + If the document has a DTD, and it strictly complies with that + DTD, then the document is also considered <emphasis>valid</emphasis>. + <command>&dhpackage;</command> is a non-validating parser -- + it does not check the DTD. However, it does support + external entities (see the <option>-x</option> option). + </para> </refsect1> <refsect1> <title>OPTIONS</title> - -<para> -When an option includes an argument, you may specify the argument either -separately ("<option>-d</option> <replaceable>output</replaceable>") or concatenated with the -option ("<option>-d</option><replaceable>output</replaceable>"). <command>&dhpackage;</command> -supports both. -</para> - + <para> + When an option includes an argument, you may specify the argument either + separately ("<option>-d</option> <replaceable>output</replaceable>") or concatenated with the + option ("<option>-d</option><replaceable>output</replaceable>"). <command>&dhpackage;</command> + supports both. + </para> <variablelist> <varlistentry> @@ -166,13 +170,13 @@ supports both. The amplification factor is calculated as .. </para> <literallayout> - amplification := (direct + indirect) / direct + amplification := (direct + indirect) / direct </literallayout> <para> .. with regard to use of entities and .. </para> <literallayout> - amplification := allocated / direct + amplification := allocated / direct </literallayout> <para> .. with regard to dynamic memory while parsing. @@ -214,60 +218,60 @@ supports both. <varlistentry> <term><option>-c</option></term> <listitem> - <para> - If the input file is well-formed and <command>&dhpackage;</command> - doesn't encounter any errors, the input file is simply copied to - the output directory unchanged. - This implies no namespaces (turns off <option>-n</option>) and - requires <option>-d</option> to specify an output directory. - </para> + <para> + If the input file is well-formed and <command>&dhpackage;</command> + doesn't encounter any errors, the input file is simply copied to + the output directory unchanged. + This implies no namespaces (turns off <option>-n</option>) and + requires <option>-d</option> to specify an output directory. + </para> </listitem> </varlistentry> <varlistentry> <term><option>-d</option> <replaceable>output-dir</replaceable></term> <listitem> - <para> - Specifies a directory to contain transformed - representations of the input files. - By default, <option>-d</option> outputs a canonical representation - (described below). - You can select different output formats using <option>-c</option>, - <option>-m</option> and <option>-N</option>. - </para> - <para> - The output filenames will - be exactly the same as the input filenames or "STDIN" if the input is - coming from standard input. Therefore, you must be careful that the - output file does not go into the same directory as the input - file. Otherwise, <command>&dhpackage;</command> will delete the - input file before it generates the output file (just like running - <literal>cat < file > file</literal> in most shells). - </para> - <para> - Two structurally equivalent XML documents have a byte-for-byte - identical canonical XML representation. - Note that ignorable white space is considered significant and - is treated equivalently to data. - More on canonical XML can be found at - http://www.jclark.com/xml/canonxml.html . - </para> + <para> + Specifies a directory to contain transformed + representations of the input files. + By default, <option>-d</option> outputs a canonical representation + (described below). + You can select different output formats using <option>-c</option>, + <option>-m</option> and <option>-N</option>. + </para> + <para> + The output filenames will + be exactly the same as the input filenames or "STDIN" if the input is + coming from standard input. Therefore, you must be careful that the + output file does not go into the same directory as the input + file. Otherwise, <command>&dhpackage;</command> will delete the + input file before it generates the output file (just like running + <literal>cat < file > file</literal> in most shells). + </para> + <para> + Two structurally equivalent XML documents have a byte-for-byte + identical canonical XML representation. + Note that ignorable white space is considered significant and + is treated equivalently to data. + More on canonical XML can be found at + http://www.jclark.com/xml/canonxml.html . + </para> </listitem> </varlistentry> <varlistentry> <term><option>-e</option> <replaceable>encoding</replaceable></term> <listitem> - <para> - Specifies the character encoding for the document, overriding - any document encoding declaration. <command>&dhpackage;</command> - supports four built-in encodings: - <literal>US-ASCII</literal>, - <literal>UTF-8</literal>, - <literal>UTF-16</literal>, and - <literal>ISO-8859-1</literal>. - Also see the <option>-w</option> option. - </para> + <para> + Specifies the character encoding for the document, overriding + any document encoding declaration. <command>&dhpackage;</command> + supports four built-in encodings: + <literal>US-ASCII</literal>, + <literal>UTF-8</literal>, + <literal>UTF-16</literal>, and + <literal>ISO-8859-1</literal>. + Also see the <option>-w</option> option. + </para> </listitem> </varlistentry> @@ -312,21 +316,21 @@ supports both. <varlistentry> <term><option>-m</option></term> <listitem> - <para> - Outputs some strange sort of XML file that completely - describes the input file, including character positions. - Requires <option>-d</option> to specify an output file. - </para> + <para> + Outputs some strange sort of XML file that completely + describes the input file, including character positions. + Requires <option>-d</option> to specify an output file. + </para> </listitem> </varlistentry> <varlistentry> <term><option>-n</option></term> <listitem> - <para> - Turns on namespace processing. (describe namespaces) - <option>-c</option> disables namespaces. - </para> + <para> + Turns on namespace processing. (describe namespaces) + <option>-c</option> disables namespaces. + </para> </listitem> </varlistentry> @@ -334,9 +338,9 @@ supports both. <term><option>-N</option></term> <listitem> <para> - Adds a doctype and notation declarations to canonical XML output. - This matches the example output used by the formal XML test cases. - Requires <option>-d</option> to specify an output file. + Adds a doctype and notation declarations to canonical XML output. + This matches the example output used by the formal XML test cases. + Requires <option>-d</option> to specify an output file. </para> </listitem> </varlistentry> @@ -344,15 +348,15 @@ supports both. <varlistentry> <term><option>-p</option></term> <listitem> - <para> - Tells <command>&dhpackage;</command> to process external DTDs and parameter - entities. - </para> - <para> - Normally <command>&dhpackage;</command> never parses parameter - entities. <option>-p</option> tells it to always parse them. - <option>-p</option> implies <option>-x</option>. - </para> + <para> + Tells <command>&dhpackage;</command> to process external DTDs and parameter + entities. + </para> + <para> + Normally <command>&dhpackage;</command> never parses parameter + entities. <option>-p</option> tells it to always parse them. + <option>-p</option> implies <option>-x</option>. + </para> </listitem> </varlistentry> @@ -369,47 +373,47 @@ supports both. <varlistentry> <term><option>-r</option></term> <listitem> - <para> - Normally <command>&dhpackage;</command> memory-maps the XML file - before parsing; this can result in faster parsing on many - platforms. - <option>-r</option> turns off memory-mapping and uses normal file - IO calls instead. - Of course, memory-mapping is automatically turned off - when reading from standard input. - </para> - <para> - Use of memory-mapping can cause some platforms to report - substantially higher memory usage for - <command>&dhpackage;</command>, but this appears to be a matter of - the operating system reporting memory in a strange way; there is - not a leak in <command>&dhpackage;</command>. - </para> + <para> + Normally <command>&dhpackage;</command> memory-maps the XML file + before parsing; this can result in faster parsing on many + platforms. + <option>-r</option> turns off memory-mapping and uses normal file + IO calls instead. + Of course, memory-mapping is automatically turned off + when reading from standard input. + </para> + <para> + Use of memory-mapping can cause some platforms to report + substantially higher memory usage for + <command>&dhpackage;</command>, but this appears to be a matter of + the operating system reporting memory in a strange way; there is + not a leak in <command>&dhpackage;</command>. + </para> </listitem> </varlistentry> <varlistentry> <term><option>-s</option></term> <listitem> - <para> - Prints an error if the document is not standalone. - A document is standalone if it has no external subset and no - references to parameter entities. - </para> + <para> + Prints an error if the document is not standalone. + A document is standalone if it has no external subset and no + references to parameter entities. + </para> </listitem> </varlistentry> <varlistentry> <term><option>-t</option></term> <listitem> - <para> - Turns on timings. This tells Expat to parse the entire file, - but not perform any processing. - This gives a fairly accurate idea of the raw speed of Expat itself - without client overhead. - <option>-t</option> turns off most of the output options - (<option>-d</option>, <option>-m</option>, <option>-c</option>, ...). - </para> + <para> + Turns on timings. This tells Expat to parse the entire file, + but not perform any processing. + This gives a fairly accurate idea of the raw speed of Expat itself + without client overhead. + <option>-t</option> turns off most of the output options + (<option>-d</option>, <option>-m</option>, <option>-c</option>, ...). + </para> </listitem> </varlistentry> @@ -417,104 +421,102 @@ supports both. <term><option>-v</option></term> <term><option>--version</option></term> <listitem> - <para> - Prints the version of the Expat library being used, including some - information on the compile-time configuration of the library, and - then exits. - </para> + <para> + Prints the version of the Expat library being used, including some + information on the compile-time configuration of the library, and + then exits. + </para> </listitem> </varlistentry> <varlistentry> <term><option>-w</option></term> <listitem> - <para> - Enables support for Windows code pages. - Normally, <command>&dhpackage;</command> will throw an error if it - runs across an encoding that it is not equipped to handle itself. With - <option>-w</option>, <command>&dhpackage;</command> will try to use a Windows code - page. See also <option>-e</option>. - </para> + <para> + Enables support for Windows code pages. + Normally, <command>&dhpackage;</command> will throw an error if it + runs across an encoding that it is not equipped to handle itself. With + <option>-w</option>, <command>&dhpackage;</command> will try to use a Windows code + page. See also <option>-e</option>. + </para> </listitem> </varlistentry> <varlistentry> <term><option>-x</option></term> <listitem> - <para> - Turns on parsing external entities. - </para> -<para> - Non-validating parsers are not required to resolve external - entities, or even expand entities at all. - Expat always expands internal entities (?), - but external entity parsing must be enabled explicitly. - </para> - <para> - External entities are simply entities that obtain their - data from outside the XML file currently being parsed. - </para> - <para> - This is an example of an internal entity: -<literallayout> + <para> + Turns on parsing external entities. + (CAREFUL! This makes xmlwf vulnerable to external entity attacks (XXE).) + </para> + <para> + Non-validating parsers are not required to resolve external + entities, or even expand entities at all. + Expat always expands internal entities (?), + but external entity parsing must be enabled explicitly. + </para> + <para> + External entities are simply entities that obtain their + data from outside the XML file currently being parsed. + </para> + <para> + This is an example of an internal entity: + <literallayout> <!ENTITY vers '1.0.2'> -</literallayout> - </para> - <para> - And here are some examples of external entities: + </literallayout> + </para> + <para> + And here are some examples of external entities: -<literallayout> + <literallayout> <!ENTITY header SYSTEM "header-&vers;.xml"> (parsed) <!ENTITY logo SYSTEM "logo.png" PNG> (unparsed) -</literallayout> - - </para> + </literallayout> + </para> </listitem> </varlistentry> <varlistentry> <term><option>--</option></term> <listitem> - <para> - (Two hyphens.) - Terminates the list of options. This is only needed if a filename - starts with a hyphen. For example: - </para> -<literallayout> + <para> + (Two hyphens.) + Terminates the list of options. This is only needed if a filename + starts with a hyphen. For example: + </para> + <literallayout> &dhpackage; -- -myfile.xml -</literallayout> - <para> - will run <command>&dhpackage;</command> on the file - <filename>-myfile.xml</filename>. - </para> + </literallayout> + <para> + will run <command>&dhpackage;</command> on the file + <filename>-myfile.xml</filename>. + </para> </listitem> </varlistentry> </variablelist> - - <para> - Older versions of <command>&dhpackage;</command> do not support - reading from standard input. - </para> + <para> + Older versions of <command>&dhpackage;</command> do not support + reading from standard input. + </para> </refsect1> <refsect1> - <title>OUTPUT</title> - <para> - <command>&dhpackage;</command> outputs nothing for files which are problem-free. - If any input file is not well-formed, or if the output for any - input file cannot be opened, <command>&dhpackage;</command> prints a single - line describing the problem to standard output. + <title>OUTPUT</title> + <para><command>&dhpackage;</command> outputs nothing for files which are problem-free. + If any input file is not well-formed, or if the output for any + input file cannot be opened, <command>&dhpackage;</command> prints a single + line describing the problem to standard output. </para> <para> - If the <option>-k</option> option is not provided, <command>&dhpackage;</command> - halts upon encountering a well-formedness or output-file error. - If <option>-k</option> is provided, <command>&dhpackage;</command> continues - processing the remaining input files, describing problems found with any of them. + If the <option>-k</option> option is not provided, <command>&dhpackage;</command> + halts upon encountering a well-formedness or output-file error. + If <option>-k</option> is provided, <command>&dhpackage;</command> continues + processing the remaining input files, describing problems found with any of them. </para> </refsect1> <refsect1> - <title>EXIT STATUS</title> + <title>EXIT STATUS</title> <para>For options <option>-v</option>|<option>--version</option> or <option>-h</option>|<option>--help</option>, <command>&dhpackage;</command> always exits with status code 0. For other cases, the following exit status codes are returned: <variablelist> <varlistentry> @@ -543,39 +545,37 @@ supports both. </listitem> </varlistentry> </variablelist> - </para> + </para> </refsect1> <refsect1> <title>BUGS</title> - <para> - The errors should go to standard error, not standard output. - </para> - <para> - There should be a way to get <option>-d</option> to send its - output to standard output rather than forcing the user to send - it to a file. - </para> - <para> - I have no idea why anyone would want to use the - <option>-d</option>, <option>-c</option>, and - <option>-m</option> options. If someone could explain it to - me, I'd like to add this information to this manpage. - </para> + <para> + The errors should go to standard error, not standard output. + </para> + <para> + There should be a way to get <option>-d</option> to send its + output to standard output rather than forcing the user to send + it to a file. + </para> + <para> + I have no idea why anyone would want to use the + <option>-d</option>, <option>-c</option>, and + <option>-m</option> options. If someone could explain it to + me, I'd like to add this information to this manpage. + </para> </refsect1> <refsect1> <title>SEE ALSO</title> - <para> - -<literallayout> + <para> + <literallayout> The Expat home page: https://libexpat.github.io/ The W3 XML 1.0 specification (fourth edition): https://www.w3.org/TR/2006/REC-xml-20060816/ Billion laughs attack: https://en.wikipedia.org/wiki/Billion_laughs_attack -</literallayout> - - </para> + </literallayout> + </para> </refsect1> <refsect1> @@ -585,8 +585,8 @@ Billion laughs attack: https://en.wikipedia.org/wiki/Bi in December 2001 for the &debian; system (but may be used by others). Permission is granted to copy, distribute and/or modify this document under - the terms of the <acronym>GNU</acronym> Free Documentation + the terms of the &gnu; Free Documentation License, Version 1.1. - </para> + </para> </refsect1> </refentry> diff --git a/examples/Makefile.in b/examples/Makefile.in index 0e55052ce6e4..56a6f69c07b7 100644 --- a/examples/Makefile.in +++ b/examples/Makefile.in @@ -321,6 +321,9 @@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ STRIP = @STRIP@ VERSION = @VERSION@ +VSCRIPT_LDFLAGS = @VSCRIPT_LDFLAGS@ +_EXPAT_COMMENT_ATTR_INFO = @_EXPAT_COMMENT_ATTR_INFO@ +_EXPAT_COMMENT_DTD_OR_GE = @_EXPAT_COMMENT_DTD_OR_GE@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ diff --git a/expat_config.h.in b/expat_config.h.in index 543db8252448..7541bf6005e6 100644 --- a/expat_config.h.in +++ b/expat_config.h.in @@ -33,9 +33,6 @@ /* Define to 1 if you have the <inttypes.h> header file. */ #undef HAVE_INTTYPES_H -/* Define to 1 if you have the 'bsd' library (-lbsd). */ -#undef HAVE_LIBBSD - /* Define to 1 if you have a working 'mmap' system call. */ #undef HAVE_MMAP diff --git a/fix-xmltest-log.sh b/fix-xmltest-log.sh index 4739acab6b02..4deafe53a7a8 100755 --- a/fix-xmltest-log.sh +++ b/fix-xmltest-log.sh @@ -6,7 +6,7 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2019-2022 Sebastian Pipping <sebastian@pipping.org> +# Copyright (c) 2019-2026 Sebastian Pipping <sebastian@pipping.org> # Copyright (c) 2024 Dag-Erling Smørgrav <des@des.dev> # Licensed under the MIT license: # @@ -31,9 +31,10 @@ set -e +sed="$(type -P gsed sed false | head -n 1)" # e.g. for Solaris filename="${1:-tests/xmltest.log}" -sed -i.bak \ +exec "${sed}" -i.bak \ -e '# convert DOS line endings to Unix without resorting to dos2unix' \ -e $'s/\r//' \ \ diff --git a/lib/Makefile.am b/lib/Makefile.am index 1958f322f319..493077231c57 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -6,9 +6,10 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2024 Sebastian Pipping <sebastian@pipping.org> +# Copyright (c) 2017-2026 Sebastian Pipping <sebastian@pipping.org> # Copyright (c) 2017 Tomasz Kłoczko <kloczek@fedoraproject.org> # Copyright (c) 2019 David Loffredo <loffredo@steptools.com> +# Copyright (c) 2026 Gordon Messmer <gordon.messmer@gmail.com> # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining @@ -45,6 +46,9 @@ libexpat_la_LDFLAGS = \ @LIBM@ \ -no-undefined \ -version-info @LIBCURRENT@:@LIBREVISION@:@LIBAGE@ +if HAVE_VSCRIPT +libexpat_la_LDFLAGS += $(VSCRIPT_LDFLAGS),@builddir@/libexpat.map +endif libexpat_la_SOURCES = \ xmlparse.c \ diff --git a/lib/Makefile.in b/lib/Makefile.in index d85f80dbdbba..d8e4fd59e117 100644 --- a/lib/Makefile.in +++ b/lib/Makefile.in @@ -22,9 +22,10 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2017-2024 Sebastian Pipping <sebastian@pipping.org> +# Copyright (c) 2017-2026 Sebastian Pipping <sebastian@pipping.org> # Copyright (c) 2017 Tomasz Kłoczko <kloczek@fedoraproject.org> # Copyright (c) 2019 David Loffredo <loffredo@steptools.com> +# Copyright (c) 2026 Gordon Messmer <gordon.messmer@gmail.com> # Licensed under the MIT license: # # Permission is hereby granted, free of charge, to any person obtaining @@ -124,6 +125,7 @@ PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ +@HAVE_VSCRIPT_TRUE@am__append_1 = $(VSCRIPT_LDFLAGS),@builddir@/libexpat.map subdir = lib ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ @@ -146,7 +148,7 @@ DIST_COMMON = $(srcdir)/Makefile.am $(include_HEADERS) \ $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/expat_config.h -CONFIG_CLEAN_FILES = +CONFIG_CLEAN_FILES = libexpat.map CONFIG_CLEAN_VPATH_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ @@ -259,7 +261,7 @@ am__define_uniq_tagged_files = \ unique=`for i in $$list; do \ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ done | $(am__uniquify_input)` -am__DIST_COMMON = $(srcdir)/Makefile.in \ +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/libexpat.map.in \ $(top_srcdir)/conftools/depcomp DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ @@ -358,6 +360,9 @@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ STRIP = @STRIP@ VERSION = @VERSION@ +VSCRIPT_LDFLAGS = @VSCRIPT_LDFLAGS@ +_EXPAT_COMMENT_ATTR_INFO = @_EXPAT_COMMENT_ATTR_INFO@ +_EXPAT_COMMENT_DTD_OR_GE = @_EXPAT_COMMENT_DTD_OR_GE@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ @@ -421,12 +426,8 @@ include_HEADERS = \ lib_LTLIBRARIES = libexpat.la @WITH_TESTS_TRUE@noinst_LTLIBRARIES = libtestpat.la -libexpat_la_LDFLAGS = \ - @AM_LDFLAGS@ \ - @LIBM@ \ - -no-undefined \ - -version-info @LIBCURRENT@:@LIBREVISION@:@LIBAGE@ - +libexpat_la_LDFLAGS = @AM_LDFLAGS@ @LIBM@ -no-undefined -version-info \ + @LIBCURRENT@:@LIBREVISION@:@LIBAGE@ $(am__append_1) libexpat_la_SOURCES = \ xmlparse.c \ xmltok.c \ @@ -490,6 +491,8 @@ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): +libexpat.map: $(top_builddir)/config.status $(srcdir)/libexpat.map.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ install-libLTLIBRARIES: $(lib_LTLIBRARIES) @$(NORMAL_INSTALL) diff --git a/lib/expat.h b/lib/expat.h index 290dfeb0f6dd..18dbaebde293 100644 --- a/lib/expat.h +++ b/lib/expat.h @@ -11,7 +11,7 @@ Copyright (c) 2000-2005 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net> Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2016 Cristian Rodríguez <crrodriguez@opensuse.org> Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> @@ -1082,7 +1082,7 @@ XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); */ # define XML_MAJOR_VERSION 2 # define XML_MINOR_VERSION 7 -# define XML_MICRO_VERSION 3 +# define XML_MICRO_VERSION 5 # ifdef __cplusplus } diff --git a/lib/expat_external.h b/lib/expat_external.h index 96f955eefb6b..d9ddeb612f6d 100644 --- a/lib/expat_external.h +++ b/lib/expat_external.h @@ -12,7 +12,7 @@ Copyright (c) 2001-2002 Greg Stein <gstein@users.sourceforge.net> Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net> Copyright (c) 2016 Cristian Rodríguez <crrodriguez@opensuse.org> - Copyright (c) 2016-2019 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2018 Yury Gribov <tetra2005@gmail.com> Licensed under the MIT license: @@ -88,8 +88,7 @@ # ifndef XML_BUILDING_EXPAT /* using Expat from an application */ -# if defined(_MSC_EXTENSIONS) && ! defined(__BEOS__) \ - && ! defined(__CYGWIN__) +# if defined(_MSC_VER) && ! defined(__BEOS__) && ! defined(__CYGWIN__) # define XMLIMPORT __declspec(dllimport) # endif diff --git a/lib/internal.h b/lib/internal.h index 8f5edf48ef7c..61266ebb7723 100644 --- a/lib/internal.h +++ b/lib/internal.h @@ -128,7 +128,7 @@ # elif ULONG_MAX == 18446744073709551615u // 2^64-1 # define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" # define EXPAT_FMT_SIZE_T(midpart) "%" midpart "lu" -# elif defined(EMSCRIPTEN) // 32bit mode Emscripten +# elif defined(__wasm32__) // 32bit mode Emscripten or WASI SDK # define EXPAT_FMT_PTRDIFF_T(midpart) "%" midpart "ld" # define EXPAT_FMT_SIZE_T(midpart) "%" midpart "zu" # else diff --git a/lib/libexpat.map.in b/lib/libexpat.map.in new file mode 100644 index 000000000000..52e59ed3d931 --- /dev/null +++ b/lib/libexpat.map.in @@ -0,0 +1,119 @@ +LIBEXPAT_1.0.0 { + global: + XML_DefaultCurrent; + XML_ErrorString; + XML_ExternalEntityParserCreate; + XML_GetBase; + XML_GetBuffer; + XML_GetCurrentByteIndex; + XML_GetCurrentColumnNumber; + XML_GetCurrentLineNumber; + XML_GetErrorCode; + XML_Parse; + XML_ParseBuffer; + XML_ParserCreate; + XML_ParserFree; + XML_SetBase; + XML_SetCharacterDataHandler; + XML_SetDefaultHandler; + XML_SetElementHandler; + XML_SetExternalEntityRefHandler; + XML_SetNotationDeclHandler; + XML_SetProcessingInstructionHandler; + XML_SetUnknownEncodingHandler; + XML_SetUnparsedEntityDeclHandler; + XML_SetUserData; + XML_UseParserAsHandlerArg; +}; + +LIBEXPAT_1.1.0 { + global: + XML_GetCurrentByteCount; + XML_GetSpecifiedAttributeCount; + XML_ParserCreateNS; + XML_SetCdataSectionHandler; + XML_SetCommentHandler; + XML_SetDefaultHandlerExpand; + XML_SetEncoding; + XML_SetExternalEntityRefHandlerArg; + XML_SetNamespaceDeclHandler; + XML_SetNotStandaloneHandler; +} LIBEXPAT_1.0.0; + +LIBEXPAT_1.95.0 { + global: + XML_ExpatVersion; + XML_GetIdAttributeIndex; + XML_GetInputContext; + XML_ParserCreate_MM; + XML_SetAttlistDeclHandler; + XML_SetDoctypeDeclHandler; + XML_SetElementDeclHandler; + XML_SetEndCdataSectionHandler; + XML_SetEndDoctypeDeclHandler; + XML_SetEndElementHandler; + XML_SetEndNamespaceDeclHandler; + XML_SetEntityDeclHandler; + XML_SetParamEntityParsing; + XML_SetReturnNSTriplet; + XML_SetStartCdataSectionHandler; + XML_SetStartDoctypeDeclHandler; + XML_SetStartElementHandler; + XML_SetStartNamespaceDeclHandler; + XML_SetXmlDeclHandler; +} LIBEXPAT_1.1.0; + +LIBEXPAT_1.95.3 { + global: + XML_ExpatVersionInfo; + XML_ParserReset; +} LIBEXPAT_1.95.0; + +LIBEXPAT_1.95.4 { + global: + XML_SetSkippedEntityHandler; +} LIBEXPAT_1.95.3; + +LIBEXPAT_1.95.5 { + global: + XML_GetFeatureList; + XML_UseForeignDTD; +} LIBEXPAT_1.95.4; + +LIBEXPAT_1.95.6 { + global: + XML_FreeContentModel; + XML_MemFree; + XML_MemMalloc; + XML_MemRealloc; +} LIBEXPAT_1.95.5; + +LIBEXPAT_1.95.8 { + global: + XML_GetParsingStatus; + XML_ResumeParser; + XML_StopParser; +} LIBEXPAT_1.95.6; + +LIBEXPAT_2.1.0 { + global: +@_EXPAT_COMMENT_ATTR_INFO@ XML_GetAttributeInfo; + XML_SetHashSalt; +} LIBEXPAT_1.95.8; + +LIBEXPAT_2.4.0 { + global: +@_EXPAT_COMMENT_DTD_OR_GE@ XML_SetBillionLaughsAttackProtectionActivationThreshold; +@_EXPAT_COMMENT_DTD_OR_GE@ XML_SetBillionLaughsAttackProtectionMaximumAmplification; +} LIBEXPAT_2.1.0; + +LIBEXPAT_2.6.0 { + global: + XML_SetReparseDeferralEnabled; +} LIBEXPAT_2.4.0; + +LIBEXPAT_2.7.2 { + global: +@_EXPAT_COMMENT_DTD_OR_GE@ XML_SetAllocTrackerActivationThreshold; +@_EXPAT_COMMENT_DTD_OR_GE@ XML_SetAllocTrackerMaximumAmplification; +} LIBEXPAT_2.6.0; diff --git a/lib/xmlparse.c b/lib/xmlparse.c index a187a3a18f19..0248b6651ffb 100644 --- a/lib/xmlparse.c +++ b/lib/xmlparse.c @@ -1,4 +1,4 @@ -/* 28bcd8b1ba7eb595d82822908257fd9c3589b4243e3c922d0369f35bfcd7b506 (2.7.3+) +/* 93c1caa66e2b0310459482516af05505b57c5cb7b96df777105308fc585c85d1 (2.7.5+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -13,7 +13,7 @@ Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> Copyright (c) 2016 Eric Rahm <erahm@mozilla.com> - Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2016 Gaurav <g.gupta@samsung.com> Copyright (c) 2016 Thomas Beutlich <tc@tbeu.de> Copyright (c) 2016 Gustavo Grieco <gustavo.grieco@imag.fr> @@ -42,6 +42,9 @@ Copyright (c) 2024-2025 Berkay Eren Ürün <berkay.ueruen@siemens.com> Copyright (c) 2024 Hanno Böck <hanno@gentoo.org> Copyright (c) 2025 Matthew Fernandez <matthew.fernandez@gmail.com> + Copyright (c) 2025 Atrem Borovik <polzovatellllk@gmail.com> + Copyright (c) 2025 Alfonso Gregory <gfunni234@gmail.com> + Copyright (c) 2026 Rosen Penev <rosenp@gmail.com> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -101,7 +104,7 @@ #include <limits.h> /* INT_MAX, UINT_MAX */ #include <stdio.h> /* fprintf */ #include <stdlib.h> /* getenv, rand_s */ -#include <stdint.h> /* uintptr_t */ +#include <stdint.h> /* SIZE_MAX, uintptr_t */ #include <math.h> /* isnan */ #ifdef _WIN32 @@ -134,11 +137,6 @@ # endif /* defined(GRND_NONBLOCK) */ #endif /* defined(HAVE_GETRANDOM) || defined(HAVE_SYSCALL_GETRANDOM) */ -#if defined(HAVE_LIBBSD) \ - && (defined(HAVE_ARC4RANDOM_BUF) || defined(HAVE_ARC4RANDOM)) -# include <bsd/stdlib.h> -#endif - #if defined(_WIN32) && ! defined(LOAD_LIBRARY_SEARCH_SYSTEM32) # define LOAD_LIBRARY_SEARCH_SYSTEM32 0x00000800 #endif @@ -155,8 +153,6 @@ * Linux >=3.17 + glibc (including <2.25) (syscall SYS_getrandom): HAVE_SYSCALL_GETRANDOM, \ * BSD / macOS >=10.7 / glibc >=2.36 (arc4random_buf): HAVE_ARC4RANDOM_BUF, \ * BSD / macOS (including <10.7) / glibc >=2.36 (arc4random): HAVE_ARC4RANDOM, \ - * libbsd (arc4random_buf): HAVE_ARC4RANDOM_BUF + HAVE_LIBBSD, \ - * libbsd (arc4random): HAVE_ARC4RANDOM + HAVE_LIBBSD, \ * Linux (including <3.17) / BSD / macOS (including <10.7) / Solaris >=8 (/dev/urandom): XML_DEV_URANDOM, \ * Windows >=Vista (rand_s): _WIN32. \ \ @@ -311,8 +307,11 @@ typedef struct tag { const char *rawName; /* tagName in the original encoding */ int rawNameLength; TAG_NAME name; /* tagName in the API encoding */ - char *buf; /* buffer for name components */ - char *bufEnd; /* end of the buffer */ + union { + char *raw; /* for byte-level access (rawName storage) */ + XML_Char *str; /* for character-level access (converted name) */ + } buf; /* buffer for name components */ + char *bufEnd; /* end of the buffer */ BINDING *bindings; } TAG; @@ -349,7 +348,7 @@ typedef struct { typedef struct block { struct block *next; int size; - XML_Char s[1]; + XML_Char s[]; } BLOCK; typedef struct { @@ -591,6 +590,8 @@ static XML_Char *poolStoreString(STRING_POOL *pool, const ENCODING *enc, static XML_Bool FASTCALL poolGrow(STRING_POOL *pool); static const XML_Char *FASTCALL poolCopyString(STRING_POOL *pool, const XML_Char *s); +static const XML_Char *FASTCALL poolCopyStringNoFinish(STRING_POOL *pool, + const XML_Char *s); static const XML_Char *poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n); static const XML_Char *FASTCALL poolAppendString(STRING_POOL *pool, @@ -1230,8 +1231,11 @@ generate_hash_secret_salt(XML_Parser parser) { # endif /* ! defined(_WIN32) && defined(XML_DEV_URANDOM) */ /* .. and self-made low quality for backup: */ + entropy = gather_time_entropy(); +# if ! defined(__wasi__) /* Process ID is 0 bits entropy if attacker has local access */ - entropy = gather_time_entropy() ^ getpid(); + entropy ^= getpid(); +# endif /* Factors are 2^31-1 and 2^61-1 (Mersenne primes M31 and M61) */ if (sizeof(unsigned long) == 4) { @@ -1754,6 +1758,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, XML_ExternalEntityRefHandler oldExternalEntityRefHandler; XML_SkippedEntityHandler oldSkippedEntityHandler; XML_UnknownEncodingHandler oldUnknownEncodingHandler; + void *oldUnknownEncodingHandlerData; XML_ElementDeclHandler oldElementDeclHandler; XML_AttlistDeclHandler oldAttlistDeclHandler; XML_EntityDeclHandler oldEntityDeclHandler; @@ -1799,6 +1804,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, oldExternalEntityRefHandler = parser->m_externalEntityRefHandler; oldSkippedEntityHandler = parser->m_skippedEntityHandler; oldUnknownEncodingHandler = parser->m_unknownEncodingHandler; + oldUnknownEncodingHandlerData = parser->m_unknownEncodingHandlerData; oldElementDeclHandler = parser->m_elementDeclHandler; oldAttlistDeclHandler = parser->m_attlistDeclHandler; oldEntityDeclHandler = parser->m_entityDeclHandler; @@ -1859,6 +1865,7 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, parser->m_externalEntityRefHandler = oldExternalEntityRefHandler; parser->m_skippedEntityHandler = oldSkippedEntityHandler; parser->m_unknownEncodingHandler = oldUnknownEncodingHandler; + parser->m_unknownEncodingHandlerData = oldUnknownEncodingHandlerData; parser->m_elementDeclHandler = oldElementDeclHandler; parser->m_attlistDeclHandler = oldAttlistDeclHandler; parser->m_entityDeclHandler = oldEntityDeclHandler; @@ -1934,7 +1941,7 @@ XML_ParserFree(XML_Parser parser) { } p = tagList; tagList = tagList->parent; - FREE(parser, p->buf); + FREE(parser, p->buf.raw); destroyBindings(p->bindings, parser); FREE(parser, p); } @@ -2599,7 +2606,7 @@ XML_GetBuffer(XML_Parser parser, int len) { // NOTE: We are avoiding MALLOC(..) here to leave limiting // the input size to the application using Expat. newBuf = parser->m_mem.malloc_fcn(bufferSize); - if (newBuf == 0) { + if (newBuf == NULL) { parser->m_errorCode = XML_ERROR_NO_MEMORY; return NULL; } @@ -3126,7 +3133,7 @@ storeRawNames(XML_Parser parser) { size_t bufSize; size_t nameLen = sizeof(XML_Char) * (tag->name.strLen + 1); size_t rawNameLen; - char *rawNameBuf = tag->buf + nameLen; + char *rawNameBuf = tag->buf.raw + nameLen; /* Stop if already stored. Since m_tagStack is a stack, we can stop at the first entry that has already been copied; everything below it in the stack is already been accounted for in a @@ -3142,22 +3149,22 @@ storeRawNames(XML_Parser parser) { if (rawNameLen > (size_t)INT_MAX - nameLen) return XML_FALSE; bufSize = nameLen + rawNameLen; - if (bufSize > (size_t)(tag->bufEnd - tag->buf)) { - char *temp = REALLOC(parser, tag->buf, bufSize); + if (bufSize > (size_t)(tag->bufEnd - tag->buf.raw)) { + char *temp = REALLOC(parser, tag->buf.raw, bufSize); if (temp == NULL) return XML_FALSE; - /* if tag->name.str points to tag->buf (only when namespace + /* if tag->name.str points to tag->buf.str (only when namespace processing is off) then we have to update it */ - if (tag->name.str == (XML_Char *)tag->buf) + if (tag->name.str == tag->buf.str) tag->name.str = (XML_Char *)temp; /* if tag->name.localPart is set (when namespace processing is on) then update it as well, since it will always point into tag->buf */ if (tag->name.localPart) tag->name.localPart - = (XML_Char *)temp + (tag->name.localPart - (XML_Char *)tag->buf); - tag->buf = temp; + = (XML_Char *)temp + (tag->name.localPart - tag->buf.str); + tag->buf.raw = temp; tag->bufEnd = temp + bufSize; rawNameBuf = temp + nameLen; } @@ -3472,12 +3479,12 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, tag = MALLOC(parser, sizeof(TAG)); if (! tag) return XML_ERROR_NO_MEMORY; - tag->buf = MALLOC(parser, INIT_TAG_BUF_SIZE); - if (! tag->buf) { + tag->buf.raw = MALLOC(parser, INIT_TAG_BUF_SIZE); + if (! tag->buf.raw) { FREE(parser, tag); return XML_ERROR_NO_MEMORY; } - tag->bufEnd = tag->buf + INIT_TAG_BUF_SIZE; + tag->bufEnd = tag->buf.raw + INIT_TAG_BUF_SIZE; } tag->bindings = NULL; tag->parent = parser->m_tagStack; @@ -3490,31 +3497,32 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, { const char *rawNameEnd = tag->rawName + tag->rawNameLength; const char *fromPtr = tag->rawName; - toPtr = (XML_Char *)tag->buf; + toPtr = tag->buf.str; for (;;) { - int bufSize; int convLen; const enum XML_Convert_Result convert_res = XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); - convLen = (int)(toPtr - (XML_Char *)tag->buf); + convLen = (int)(toPtr - tag->buf.str); if ((fromPtr >= rawNameEnd) || (convert_res == XML_CONVERT_INPUT_INCOMPLETE)) { tag->name.strLen = convLen; break; } - bufSize = (int)(tag->bufEnd - tag->buf) << 1; + if (SIZE_MAX / 2 < (size_t)(tag->bufEnd - tag->buf.raw)) + return XML_ERROR_NO_MEMORY; + const size_t bufSize = (size_t)(tag->bufEnd - tag->buf.raw) * 2; { - char *temp = REALLOC(parser, tag->buf, bufSize); + char *temp = REALLOC(parser, tag->buf.raw, bufSize); if (temp == NULL) return XML_ERROR_NO_MEMORY; - tag->buf = temp; + tag->buf.raw = temp; tag->bufEnd = temp + bufSize; toPtr = (XML_Char *)temp + convLen; } } } - tag->name.str = (XML_Char *)tag->buf; + tag->name.str = tag->buf.str; *toPtr = XML_T('\0'); result = storeAtts(parser, enc, s, &(tag->name), &(tag->bindings), account); @@ -3878,7 +3886,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(ATTRIBUTE)) { + if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(ATTRIBUTE)) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; } @@ -3897,7 +3905,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ # if UINT_MAX >= SIZE_MAX - if ((unsigned)parser->m_attsSize > (size_t)(-1) / sizeof(XML_AttrInfo)) { + if ((unsigned)parser->m_attsSize > SIZE_MAX / sizeof(XML_AttrInfo)) { parser->m_attsSize = oldAttsSize; return XML_ERROR_NO_MEMORY; } @@ -4073,7 +4081,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if (nsAttsSize > (size_t)(-1) / sizeof(NS_ATT)) { + if (nsAttsSize > SIZE_MAX / sizeof(NS_ATT)) { /* Restore actual size of memory in m_nsAtts */ parser->m_nsAttsPower = oldNsAttsPower; return XML_ERROR_NO_MEMORY; @@ -4256,7 +4264,7 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)(n + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { + if ((unsigned)(n + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } #endif @@ -4502,7 +4510,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { + if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } #endif @@ -4529,7 +4537,7 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)(len + EXPAND_SPARE) > (size_t)(-1) / sizeof(XML_Char)) { + if ((unsigned)(len + EXPAND_SPARE) > SIZE_MAX / sizeof(XML_Char)) { return XML_ERROR_NO_MEMORY; } #endif @@ -5080,7 +5088,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, } /* If we get this token, we have the start of what might be a normal tag, but not a declaration (i.e. it doesn't begin with - "<!"). In a DTD context, that isn't legal. + "<!" or "<?"). In a DTD context, that isn't legal. */ else if (tok == XML_TOK_INSTANCE_START) { *nextPtr = next; @@ -5169,6 +5177,15 @@ entityValueProcessor(XML_Parser parser, const char *s, const char *end, /* found end of entity value - can store it now */ return storeEntityValue(parser, enc, s, end, XML_ACCOUNT_DIRECT, NULL); } + /* If we get this token, we have the start of what might be a + normal tag, but not a declaration (i.e. it doesn't begin with + "<!" or "<?"). In a DTD context, that isn't legal. + */ + else if (tok == XML_TOK_INSTANCE_START) { + *nextPtr = next; + return XML_ERROR_SYNTAX; + } + start = next; } } @@ -5920,15 +5937,18 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if (parser->m_groupSize > (size_t)(-1) / sizeof(int)) { + if (parser->m_groupSize > SIZE_MAX / sizeof(int)) { + parser->m_groupSize /= 2; return XML_ERROR_NO_MEMORY; } #endif int *const new_scaff_index = REALLOC( parser, dtd->scaffIndex, parser->m_groupSize * sizeof(int)); - if (new_scaff_index == NULL) + if (new_scaff_index == NULL) { + parser->m_groupSize /= 2; return XML_ERROR_NO_MEMORY; + } dtd->scaffIndex = new_scaff_index; } } else { @@ -6780,7 +6800,14 @@ storeEntityValue(XML_Parser parser, const ENCODING *enc, return XML_ERROR_NO_MEMORY; } - const char *next; + const char *next = entityTextPtr; + + /* Nothing to tokenize. */ + if (entityTextPtr >= entityTextEnd) { + result = XML_ERROR_NONE; + goto endEntityValue; + } + for (;;) { next = entityTextPtr; /* XmlEntityValueTok doesn't always set the last arg */ @@ -7190,7 +7217,7 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((unsigned)count > (size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE)) { + if ((unsigned)count > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) { return 0; } #endif @@ -7430,16 +7457,24 @@ setContext(XML_Parser parser, const XML_Char *context) { else { if (! poolAppendChar(&parser->m_tempPool, XML_T('\0'))) return XML_FALSE; - prefix - = (PREFIX *)lookup(parser, &dtd->prefixes, - poolStart(&parser->m_tempPool), sizeof(PREFIX)); - if (! prefix) + const XML_Char *const prefixName = poolCopyStringNoFinish( + &dtd->pool, poolStart(&parser->m_tempPool)); + if (! prefixName) { return XML_FALSE; - if (prefix->name == poolStart(&parser->m_tempPool)) { - prefix->name = poolCopyString(&dtd->pool, prefix->name); - if (! prefix->name) - return XML_FALSE; } + + prefix = (PREFIX *)lookup(parser, &dtd->prefixes, prefixName, + sizeof(PREFIX)); + + const bool prefixNameUsed = prefix && prefix->name == prefixName; + if (prefixNameUsed) + poolFinish(&dtd->pool); + else + poolDiscard(&dtd->pool); + + if (! prefix) + return XML_FALSE; + poolDiscard(&parser->m_tempPool); } for (context = s + 1; *context != CONTEXT_SEP && *context != XML_T('\0'); @@ -7666,8 +7701,7 @@ dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, * from -Wtype-limits on platforms where * sizeof(int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if ((size_t)oldE->nDefaultAtts - > ((size_t)(-1) / sizeof(DEFAULT_ATTRIBUTE))) { + if ((size_t)oldE->nDefaultAtts > SIZE_MAX / sizeof(DEFAULT_ATTRIBUTE)) { return 0; } #endif @@ -7869,7 +7903,7 @@ lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { unsigned long newMask = (unsigned long)newSize - 1; /* Detect and prevent integer overflow */ - if (newSize > (size_t)(-1) / sizeof(NAMED *)) { + if (newSize > SIZE_MAX / sizeof(NAMED *)) { return NULL; } @@ -8028,6 +8062,23 @@ poolCopyString(STRING_POOL *pool, const XML_Char *s) { return s; } +// A version of `poolCopyString` that does not call `poolFinish` +// and reverts any partial advancement upon failure. +static const XML_Char *FASTCALL +poolCopyStringNoFinish(STRING_POOL *pool, const XML_Char *s) { + const XML_Char *const original = s; + do { + if (! poolAppendChar(pool, *s)) { + // Revert any previously successful advancement + const ptrdiff_t advancedBy = s - original; + if (advancedBy > 0) + pool->ptr -= advancedBy; + return NULL; + } + } while (*s++); + return pool->start; +} + static const XML_Char * poolCopyStringN(STRING_POOL *pool, const XML_Char *s, int n) { if (! pool->ptr && ! poolGrow(pool)) { @@ -8105,7 +8156,7 @@ poolBytesToAllocateFor(int blockSize) { static XML_Bool FASTCALL poolGrow(STRING_POOL *pool) { if (pool->freeBlocks) { - if (pool->start == 0) { + if (pool->start == NULL) { pool->blocks = pool->freeBlocks; pool->freeBlocks = pool->freeBlocks->next; pool->blocks->next = NULL; @@ -8217,7 +8268,7 @@ nextScaffoldPart(XML_Parser parser) { * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if (parser->m_groupSize > ((size_t)(-1) / sizeof(int))) { + if (parser->m_groupSize > SIZE_MAX / sizeof(int)) { return -1; } #endif @@ -8244,7 +8295,7 @@ nextScaffoldPart(XML_Parser parser) { * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if (dtd->scaffSize > (size_t)(-1) / 2u / sizeof(CONTENT_SCAFFOLD)) { + if (dtd->scaffSize > SIZE_MAX / 2u / sizeof(CONTENT_SCAFFOLD)) { return -1; } #endif @@ -8294,15 +8345,15 @@ build_model(XML_Parser parser) { * from -Wtype-limits on platforms where * sizeof(unsigned int) < sizeof(size_t), e.g. on x86_64. */ #if UINT_MAX >= SIZE_MAX - if (dtd->scaffCount > (size_t)(-1) / sizeof(XML_Content)) { + if (dtd->scaffCount > SIZE_MAX / sizeof(XML_Content)) { return NULL; } - if (dtd->contentStringLen > (size_t)(-1) / sizeof(XML_Char)) { + if (dtd->contentStringLen > SIZE_MAX / sizeof(XML_Char)) { return NULL; } #endif if (dtd->scaffCount * sizeof(XML_Content) - > (size_t)(-1) - dtd->contentStringLen * sizeof(XML_Char)) { + > SIZE_MAX - dtd->contentStringLen * sizeof(XML_Char)) { return NULL; } diff --git a/lib/xmlrole.c b/lib/xmlrole.c index 2c48bf408679..b1dfb456e5df 100644 --- a/lib/xmlrole.c +++ b/lib/xmlrole.c @@ -12,10 +12,11 @@ Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net> Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> - Copyright (c) 2016-2023 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2019 David Loffredo <loffredo@steptools.com> Copyright (c) 2021 Donghee Na <donghee.na@python.org> + Copyright (c) 2025 Alfonso Gregory <gfunni234@gmail.com> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -46,7 +47,6 @@ # include "winconfig.h" #endif -#include "expat_external.h" #include "internal.h" #include "xmlrole.h" #include "ascii.h" diff --git a/lib/xmltok.c b/lib/xmltok.c index 95d5e84b67f1..f6e5f742c928 100644 --- a/lib/xmltok.c +++ b/lib/xmltok.c @@ -12,7 +12,7 @@ Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net> Copyright (c) 2002-2016 Karl Waclawek <karl@waclawek.net> Copyright (c) 2005-2009 Steven Solie <steven@solie.ca> - Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2016 Pascal Cuoq <cuoq@trust-in-soft.com> Copyright (c) 2016 Don Lewis <truckman@apache.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> @@ -24,6 +24,7 @@ Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com> Copyright (c) 2022 Sean McBride <sean@rogue-research.com> Copyright (c) 2023 Hanno Böck <hanno@gentoo.org> + Copyright (c) 2025 Alfonso Gregory <gfunni234@gmail.com> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -56,7 +57,6 @@ # include "winconfig.h" #endif -#include "expat_external.h" #include "internal.h" #include "xmltok.h" #include "nametab.h" diff --git a/lib/xmltok_ns.c b/lib/xmltok_ns.c index fbdd3e3c7b79..1cd60de1e4fe 100644 --- a/lib/xmltok_ns.c +++ b/lib/xmltok_ns.c @@ -11,7 +11,8 @@ Copyright (c) 2002 Greg Stein <gstein@users.sourceforge.net> Copyright (c) 2002 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2002-2006 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2017-2021 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2017-2026 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2025 Alfonso Gregory <gfunni234@gmail.com> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -98,13 +99,13 @@ NS(findEncoding)(const ENCODING *enc, const char *ptr, const char *end) { int i; XmlUtf8Convert(enc, &ptr, end, &p, p + ENCODING_MAX - 1); if (ptr != end) - return 0; + return NULL; *p = 0; if (streqci(buf, KW_UTF_16) && enc->minBytesPerChar == 2) return enc; i = getEncodingIndex(buf); if (i == UNKNOWN_ENC) - return 0; + return NULL; return NS(encodings)[i]; } diff --git a/tests/Makefile.in b/tests/Makefile.in index 830560e2daba..9ffb46a09c75 100644 --- a/tests/Makefile.in +++ b/tests/Makefile.in @@ -616,6 +616,9 @@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ STRIP = @STRIP@ VERSION = @VERSION@ +VSCRIPT_LDFLAGS = @VSCRIPT_LDFLAGS@ +_EXPAT_COMMENT_ATTR_INFO = @_EXPAT_COMMENT_ATTR_INFO@ +_EXPAT_COMMENT_DTD_OR_GE = @_EXPAT_COMMENT_DTD_OR_GE@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ diff --git a/tests/basic_tests.c b/tests/basic_tests.c index 0231e0949ee9..02d1d5fd3c10 100644 --- a/tests/basic_tests.c +++ b/tests/basic_tests.c @@ -10,7 +10,7 @@ Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2017 Joe Orton <jorton@redhat.com> Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> @@ -3112,12 +3112,16 @@ START_TEST(test_buffer_can_grow_to_max) { #if defined(__MINGW32__) && ! defined(__MINGW64__) // workaround for mingw/wine32 on GitHub CI not being able to reach 1GiB // Can we make a big allocation? - void *big = malloc(maxbuf); - if (! big) { + for (int i = 1; i <= 2; i++) { + void *const big = malloc(maxbuf); + if (big != NULL) { + free(big); + break; + } // The big allocation failed. Let's be a little lenient. maxbuf = maxbuf / 2; + fprintf(stderr, "Reducing maxbuf to %d...\n", maxbuf); } - free(big); #endif for (int i = 0; i < num_prefixes; ++i) { @@ -4570,6 +4574,46 @@ START_TEST(test_unknown_encoding_invalid_attr_value) { } END_TEST +START_TEST(test_unknown_encoding_user_data_primary) { + // This test is based on ideas contributed by Artiphishell Inc. + const char *const text = "<?xml version='1.0' encoding='x-unk'?>\n" + "<root />\n"; + XML_Parser parser = XML_ParserCreate(NULL); + XML_SetUnknownEncodingHandler(parser, + user_data_checking_unknown_encoding_handler, + (void *)(intptr_t)0xC0FFEE); + + assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_OK); + + XML_ParserFree(parser); +} +END_TEST + +START_TEST(test_unknown_encoding_user_data_secondary) { + // This test is based on ideas contributed by Artiphishell Inc. + const char *const text_main = "<!DOCTYPE r [\n" + " <!ENTITY ext SYSTEM 'ext.ent'>\n" + "]>\n" + "<r>&ext;</r>\n"; + const char *const text_external = "<?xml version='1.0' encoding='x-unk'?>\n" + "<e>data</e>"; + ExtTest2 test_data = {text_external, (int)strlen(text_external), NULL, NULL}; + XML_Parser parser = XML_ParserCreate(NULL); + XML_SetExternalEntityRefHandler(parser, external_entity_loader2); + XML_SetUnknownEncodingHandler(parser, + user_data_checking_unknown_encoding_handler, + (void *)(intptr_t)0xC0FFEE); + XML_SetUserData(parser, &test_data); + + assert_true(_XML_Parse_SINGLE_BYTES(parser, text_main, (int)strlen(text_main), + XML_TRUE) + == XML_STATUS_OK); + + XML_ParserFree(parser); +} +END_TEST + /* Test an external entity parser set to use latin-1 detects UTF-16 * BOMs correctly. */ @@ -6001,6 +6045,7 @@ START_TEST(test_bypass_heuristic_when_close_to_bufsize) { const int document_length = 65536; char *const document = (char *)malloc(document_length); + assert_true(document != NULL); const XML_Memory_Handling_Suite memfuncs = { counting_malloc, @@ -6213,6 +6258,24 @@ START_TEST(test_varying_buffer_fills) { } END_TEST +START_TEST(test_empty_ext_param_entity_in_value) { + const char *text = "<!DOCTYPE r SYSTEM \"ext.dtd\"><r/>"; + ExtOption options[] = { + {XCS("ext.dtd"), "<!ENTITY % pe SYSTEM \"empty\">" + "<!ENTITY ge \"%pe;\">"}, + {XCS("empty"), ""}, + {NULL, NULL}, + }; + + XML_SetParamEntityParsing(g_parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(g_parser, external_entity_optioner); + XML_SetUserData(g_parser, options); + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); +} +END_TEST + void make_basic_test_case(Suite *s) { TCase *tc_basic = tcase_create("basic tests"); @@ -6416,6 +6479,8 @@ make_basic_test_case(Suite *s) { tcase_add_test(tc_basic, test_unknown_encoding_invalid_surrogate); tcase_add_test(tc_basic, test_unknown_encoding_invalid_high); tcase_add_test(tc_basic, test_unknown_encoding_invalid_attr_value); + tcase_add_test(tc_basic, test_unknown_encoding_user_data_primary); + tcase_add_test(tc_basic, test_unknown_encoding_user_data_secondary); tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom); tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16be_bom); tcase_add_test__if_xml_ge(tc_basic, test_ext_entity_latin1_utf16le_bom2); @@ -6458,6 +6523,7 @@ make_basic_test_case(Suite *s) { tcase_add_test(tc_basic, test_empty_element_abort); tcase_add_test__ifdef_xml_dtd(tc_basic, test_pool_integrity_with_unfinished_attr); + tcase_add_test__ifdef_xml_dtd(tc_basic, test_empty_ext_param_entity_in_value); tcase_add_test__if_xml_ge(tc_basic, test_entity_ref_no_elements); tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_entity); tcase_add_test__if_xml_ge(tc_basic, test_deep_nested_attribute_entity); diff --git a/tests/benchmark/Makefile.in b/tests/benchmark/Makefile.in index e72e901a39af..89a29cef053b 100644 --- a/tests/benchmark/Makefile.in +++ b/tests/benchmark/Makefile.in @@ -311,6 +311,9 @@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ STRIP = @STRIP@ VERSION = @VERSION@ +VSCRIPT_LDFLAGS = @VSCRIPT_LDFLAGS@ +_EXPAT_COMMENT_ATTR_INFO = @_EXPAT_COMMENT_ATTR_INFO@ +_EXPAT_COMMENT_DTD_OR_GE = @_EXPAT_COMMENT_DTD_OR_GE@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ diff --git a/tests/handlers.c b/tests/handlers.c index 5bca2b1f551e..e456df21c77c 100644 --- a/tests/handlers.c +++ b/tests/handlers.c @@ -10,7 +10,7 @@ Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2017 Joe Orton <jorton@redhat.com> Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> @@ -45,6 +45,7 @@ # undef NDEBUG /* because test suite relies on assert(...) at the moment */ #endif +#include <stdint.h> #include <stdio.h> #include <string.h> #include <assert.h> @@ -407,6 +408,15 @@ long_encoding_handler(void *userData, const XML_Char *encoding, return XML_STATUS_OK; } +int XMLCALL +user_data_checking_unknown_encoding_handler(void *userData, + const XML_Char *encoding, + XML_Encoding *info) { + const intptr_t number = (intptr_t)userData; + assert_true(number == 0xC0FFEE); + return long_encoding_handler(userData, encoding, info); +} + /* External Entity Handlers */ int XMLCALL diff --git a/tests/handlers.h b/tests/handlers.h index fa6267fbbd08..fcde27ae4940 100644 --- a/tests/handlers.h +++ b/tests/handlers.h @@ -10,7 +10,7 @@ Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2017 Joe Orton <jorton@redhat.com> Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> @@ -159,6 +159,9 @@ extern int XMLCALL long_encoding_handler(void *userData, const XML_Char *encoding, XML_Encoding *info); +extern int XMLCALL user_data_checking_unknown_encoding_handler( + void *userData, const XML_Char *encoding, XML_Encoding *info); + /* External Entity Handlers */ typedef struct ExtOption { diff --git a/tests/misc_tests.c b/tests/misc_tests.c index 2a8054546a12..1c508bd10466 100644 --- a/tests/misc_tests.c +++ b/tests/misc_tests.c @@ -10,7 +10,7 @@ Copyright (c) 2003 Greg Stein <gstein@users.sourceforge.net> Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> Copyright (c) 2005-2012 Karl Waclawek <karl@waclawek.net> - Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017-2022 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2017 Joe Orton <jorton@redhat.com> Copyright (c) 2017 José Gutiérrez de la Concha <jose@zeroc.com> @@ -19,6 +19,7 @@ Copyright (c) 2020 Tim Gates <tim.gates@iress.com> Copyright (c) 2021 Donghee Na <donghee.na@python.org> Copyright (c) 2023 Sony Corporation / Snild Dolkow <snild@sony.com> + Copyright (c) 2025 Berkay Eren Ürün <berkay.ueruen@siemens.com> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -211,7 +212,7 @@ START_TEST(test_misc_version) { if (! versions_equal(&read_version, &parsed_version)) fail("Version mismatch"); - if (xcstrcmp(version_text, XCS("expat_2.7.3")) + if (xcstrcmp(version_text, XCS("expat_2.7.5")) != 0) /* needs bump on releases */ fail("XML_*_VERSION in expat.h out of sync?\n"); } @@ -771,6 +772,35 @@ START_TEST(test_misc_async_entity_rejected) { } END_TEST +START_TEST(test_misc_no_infinite_loop_issue_1161) { + XML_Parser parser = XML_ParserCreate(NULL); + + const char *text = "<!DOCTYPE d SYSTEM 'secondary.txt'>"; + + struct ExtOption options[] = { + {XCS("secondary.txt"), + "<!ENTITY % p SYSTEM 'tertiary.txt'><!ENTITY g '%p;'>"}, + {XCS("tertiary.txt"), "<?xml version='1.0'?><a"}, + {NULL, NULL}, + }; + + XML_SetUserData(parser, options); + XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS); + XML_SetExternalEntityRefHandler(parser, external_entity_optioner); + + assert_true(_XML_Parse_SINGLE_BYTES(parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR); + +#if defined(XML_DTD) + assert_true(XML_GetErrorCode(parser) == XML_ERROR_EXTERNAL_ENTITY_HANDLING); +#else + assert_true(XML_GetErrorCode(parser) == XML_ERROR_NO_ELEMENTS); +#endif + + XML_ParserFree(parser); +} +END_TEST + void make_miscellaneous_test_case(Suite *s) { TCase *tc_misc = tcase_create("miscellaneous tests"); @@ -801,4 +831,5 @@ make_miscellaneous_test_case(Suite *s) { tcase_add_test(tc_misc, test_misc_expected_event_ptr_issue_980); tcase_add_test(tc_misc, test_misc_sync_entity_tolerated); tcase_add_test(tc_misc, test_misc_async_entity_rejected); + tcase_add_test(tc_misc, test_misc_no_infinite_loop_issue_1161); } diff --git a/tests/nsalloc_tests.c b/tests/nsalloc_tests.c index 60fa87f83461..9e26d4ee1418 100644 --- a/tests/nsalloc_tests.c +++ b/tests/nsalloc_tests.c @@ -1505,6 +1505,32 @@ START_TEST(test_nsalloc_prefixed_element) { } END_TEST +/* Verify that retry after OOM in setContext() does not crash. + */ +START_TEST(test_nsalloc_setContext_zombie) { + const char *text = "<doc>Hello</doc>"; + unsigned int i; + const unsigned int max_alloc_count = 30; + + for (i = 0; i < max_alloc_count; i++) { + g_allocation_count = (int)i; + if (XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE) + != XML_STATUS_ERROR) + break; + /* Retry on the same parser — must not crash */ + g_allocation_count = ALLOC_ALWAYS_SUCCEED; + XML_Parse(g_parser, text, (int)strlen(text), XML_TRUE); + + nsalloc_teardown(); + nsalloc_setup(); + } + if (i == 0) + fail("Parsing worked despite failing allocations"); + else if (i == max_alloc_count) + fail("Parsing failed even at maximum allocation count"); +} +END_TEST + void make_nsalloc_test_case(Suite *s) { TCase *tc_nsalloc = tcase_create("namespace allocation tests"); @@ -1539,4 +1565,5 @@ make_nsalloc_test_case(Suite *s) { tcase_add_test__if_xml_ge(tc_nsalloc, test_nsalloc_long_default_in_ext); tcase_add_test(tc_nsalloc, test_nsalloc_long_systemid_in_ext); tcase_add_test(tc_nsalloc, test_nsalloc_prefixed_element); + tcase_add_test(tc_nsalloc, test_nsalloc_setContext_zombie); } diff --git a/xmlwf/Makefile.in b/xmlwf/Makefile.in index 07f2423aea10..d6d4033b0090 100644 --- a/xmlwf/Makefile.in +++ b/xmlwf/Makefile.in @@ -319,6 +319,9 @@ SO_MINOR = @SO_MINOR@ SO_PATCH = @SO_PATCH@ STRIP = @STRIP@ VERSION = @VERSION@ +VSCRIPT_LDFLAGS = @VSCRIPT_LDFLAGS@ +_EXPAT_COMMENT_ATTR_INFO = @_EXPAT_COMMENT_ATTR_INFO@ +_EXPAT_COMMENT_DTD_OR_GE = @_EXPAT_COMMENT_DTD_OR_GE@ abs_builddir = @abs_builddir@ abs_srcdir = @abs_srcdir@ abs_top_builddir = @abs_top_builddir@ diff --git a/xmlwf/xmlfile.c b/xmlwf/xmlfile.c index ce0b61217ed7..c4eb839f6b3b 100644 --- a/xmlwf/xmlfile.c +++ b/xmlwf/xmlfile.c @@ -11,11 +11,12 @@ Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net> Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> - Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2019 David Loffredo <loffredo@steptools.com> Copyright (c) 2021 Donghee Na <donghee.na@python.org> Copyright (c) 2024 Hanno Böck <hanno@gentoo.org> + Copyright (c) 2025 Alfonso Gregory <gfunni234@gmail.com> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -225,7 +226,6 @@ processStream(const XML_Char *filename, XML_Parser parser) { if (filename != NULL) close(fd); break; - ; } } return 1; diff --git a/xmlwf/xmlwf.c b/xmlwf/xmlwf.c index 534f32170590..2d0c4f8efd19 100644 --- a/xmlwf/xmlwf.c +++ b/xmlwf/xmlwf.c @@ -11,7 +11,7 @@ Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> Copyright (c) 2004-2009 Karl Waclawek <karl@waclawek.net> Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> - Copyright (c) 2016-2025 Sebastian Pipping <sebastian@pipping.org> + Copyright (c) 2016-2026 Sebastian Pipping <sebastian@pipping.org> Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> Copyright (c) 2019 David Loffredo <loffredo@steptools.com> Copyright (c) 2020 Joe Orton <jorton@redhat.com> @@ -19,6 +19,7 @@ Copyright (c) 2021 Tim Bray <tbray@textuality.com> Copyright (c) 2022 Martin Ettl <ettl.martin78@googlemail.com> Copyright (c) 2022 Sean McBride <sean@rogue-research.com> + Copyright (c) 2025 Alfonso Gregory <gfunni234@gmail.com> Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -390,16 +391,13 @@ endDoctypeDecl(void *userData) { notationCount++; if (notationCount == 0) { /* Nothing to report */ - free((void *)data->currentDoctypeName); - data->currentDoctypeName = NULL; - return; + goto cleanUp; } notations = malloc(notationCount * sizeof(NotationList *)); if (notations == NULL) { fprintf(stderr, "Unable to sort notations"); - freeNotations(data); - return; + goto cleanUp; } for (p = data->notationListHead, i = 0; i < notationCount; p = p->next, i++) { @@ -439,6 +437,8 @@ endDoctypeDecl(void *userData) { fputts(T("]>\n"), data->fp); free(notations); + +cleanUp: freeNotations(data); free((void *)data->currentDoctypeName); data->currentDoctypeName = NULL; @@ -900,6 +900,7 @@ usage(const XML_Char *prog, int rc) { T(" -n enable [n]amespace processing\n") T(" -p enable processing of external DTDs and [p]arameter entities\n") T(" -x enable processing of e[x]ternal entities\n") + T(" (CAREFUL! This makes xmlwf vulnerable to external entity attacks (XXE).)\n") T(" -e ENCODING override any in-document [e]ncoding declaration\n") T(" -w enable support for [W]indows code pages\n") T(" -r disable memory-mapping and use [r]ead calls instead\n") diff --git a/xmlwf/xmlwf_helpgen.py b/xmlwf/xmlwf_helpgen.py index 71f7baa43396..be41d59f0a8c 100755 --- a/xmlwf/xmlwf_helpgen.py +++ b/xmlwf/xmlwf_helpgen.py @@ -6,7 +6,7 @@ # \___/_/\_\ .__/ \__,_|\__| # |_| XML parser # -# Copyright (c) 2019-2025 Sebastian Pipping <sebastian@pipping.org> +# Copyright (c) 2019-2026 Sebastian Pipping <sebastian@pipping.org> # Copyright (c) 2021 Tim Bray <tbray@textuality.com> # Licensed under the MIT license: # @@ -30,28 +30,31 @@ # USE OR OTHER DEALINGS IN THE SOFTWARE. import argparse +from textwrap import dedent -epilog = """ -environment variables: - EXPAT_ACCOUNTING_DEBUG=(0|1|2|3) - Control verbosity of accounting debugging (default: 0) - EXPAT_ENTITY_DEBUG=(0|1) - Control verbosity of entity debugging (default: 0) - EXPAT_ENTROPY_DEBUG=(0|1) - Control verbosity of entropy debugging (default: 0) - EXPAT_MALLOC_DEBUG=(0|1|2) - Control verbosity of allocation tracker (default: 0) +epilog = dedent( + """ + environment variables: + EXPAT_ACCOUNTING_DEBUG=(0|1|2|3) + Control verbosity of accounting debugging (default: 0) + EXPAT_ENTITY_DEBUG=(0|1) + Control verbosity of entity debugging (default: 0) + EXPAT_ENTROPY_DEBUG=(0|1) + Control verbosity of entropy debugging (default: 0) + EXPAT_MALLOC_DEBUG=(0|1|2) + Control verbosity of allocation tracker (default: 0) -exit status: - 0 the input files are well-formed and the output (if requested) was written successfully - 1 could not allocate data structures, signals a serious problem with execution environment - 2 one or more input files were not well-formed - 3 could not create an output file - 4 command-line argument error + exit status: + 0 the input files are well-formed and the output (if requested) was written successfully + 1 could not allocate data structures, signals a serious problem with execution environment + 2 one or more input files were not well-formed + 3 could not create an output file + 4 command-line argument error -xmlwf of libexpat is software libre, licensed under the MIT license. -Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you! -""" + xmlwf of libexpat is software libre, licensed under the MIT license. + Please report bugs at https://github.com/libexpat/libexpat/issues -- thank you! + """ +) usage = """ %(prog)s [OPTIONS] [FILE ...] @@ -59,50 +62,121 @@ usage = """ %(prog)s -v|--version """ -parser = argparse.ArgumentParser(prog='xmlwf', add_help=False, - usage=usage, - description='xmlwf - Determines if an XML document is well-formed', - formatter_class=argparse.RawTextHelpFormatter, - epilog=epilog) +parser = argparse.ArgumentParser( + prog="xmlwf", + add_help=False, + usage=usage, + description="xmlwf - Determines if an XML document is well-formed", + formatter_class=argparse.RawTextHelpFormatter, + epilog=epilog, +) -input_related = parser.add_argument_group('input control arguments') -input_related.add_argument('-s', action='store_true', help='print an error if the document is not [s]tandalone') -input_related.add_argument('-n', action='store_true', help='enable [n]amespace processing') -input_related.add_argument('-p', action='store_true', help='enable processing of external DTDs and [p]arameter entities') -input_related.add_argument('-x', action='store_true', help='enable processing of e[x]ternal entities') -input_related.add_argument('-e', action='store', metavar='ENCODING', help='override any in-document [e]ncoding declaration') -input_related.add_argument('-w', action='store_true', help='enable support for [W]indows code pages') -input_related.add_argument('-r', action='store_true', help='disable memory-mapping and use [r]ead calls instead') -input_related.add_argument('-g', metavar='BYTES', help='buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)') -input_related.add_argument('-k', action='store_true', help='when processing multiple files, [k]eep processing after first file with error') +input_related = parser.add_argument_group("input control arguments") +input_related.add_argument( + "-s", action="store_true", help="print an error if the document is not [s]tandalone" +) +input_related.add_argument( + "-n", action="store_true", help="enable [n]amespace processing" +) +input_related.add_argument( + "-p", + action="store_true", + help="enable processing of external DTDs and [p]arameter entities", +) +input_related.add_argument( + "-x", + action="store_true", + help=( + "enable processing of e[x]ternal entities" + "\n" + "(CAREFUL! This makes xmlwf vulnerable to external entity attacks (XXE).)" + ), +) +input_related.add_argument( + "-e", + action="store", + metavar="ENCODING", + help="override any in-document [e]ncoding declaration", +) +input_related.add_argument( + "-w", action="store_true", help="enable support for [W]indows code pages" +) +input_related.add_argument( + "-r", + action="store_true", + help="disable memory-mapping and use [r]ead calls instead", +) +input_related.add_argument( + "-g", + metavar="BYTES", + help="buffer size to request per call pair to XML_[G]etBuffer and read (default: 8 KiB)", +) +input_related.add_argument( + "-k", + action="store_true", + help="when processing multiple files, [k]eep processing after first file with error", +) -output_related = parser.add_argument_group('output control arguments') -output_related.add_argument('-d', action='store', metavar='DIRECTORY', help='output [d]estination directory') +output_related = parser.add_argument_group("output control arguments") +output_related.add_argument( + "-d", action="store", metavar="DIRECTORY", help="output [d]estination directory" +) output_mode = output_related.add_mutually_exclusive_group() -output_mode.add_argument('-c', action='store_true', help='write a [c]opy of input XML, not canonical XML') -output_mode.add_argument('-m', action='store_true', help='write [m]eta XML, not canonical XML') -output_mode.add_argument('-t', action='store_true', help='write no XML output for [t]iming of plain parsing') -output_related.add_argument('-N', action='store_true', help='enable adding doctype and [n]otation declarations') +output_mode.add_argument( + "-c", action="store_true", help="write a [c]opy of input XML, not canonical XML" +) +output_mode.add_argument( + "-m", action="store_true", help="write [m]eta XML, not canonical XML" +) +output_mode.add_argument( + "-t", action="store_true", help="write no XML output for [t]iming of plain parsing" +) +output_related.add_argument( + "-N", action="store_true", help="enable adding doctype and [n]otation declarations" +) -billion_laughs = parser.add_argument_group('amplification attack protection (e.g. billion laughs)', - description='NOTE: ' - 'If you ever need to increase these values ' - 'for non-attack payload, please file a bug report.') -billion_laughs.add_argument('-a', metavar='FACTOR', - help='set maximum tolerated [a]mplification factor (default: 100.0)') -billion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)') +billion_laughs = parser.add_argument_group( + "amplification attack protection (e.g. billion laughs)", + description=( + "NOTE: " + "If you ever need to increase these values " + "for non-attack payload, please file a bug report." + ), +) +billion_laughs.add_argument( + "-a", + metavar="FACTOR", + help="set maximum tolerated [a]mplification factor (default: 100.0)", +) +billion_laughs.add_argument( + "-b", + metavar="BYTES", + help="set number of output [b]ytes needed to activate (default: 8 MiB/64 MiB)", +) -reparse_deferral = parser.add_argument_group('reparse deferral') -reparse_deferral.add_argument('-q', action='store_true', - help='disable reparse deferral, and allow [q]uadratic parse runtime with large tokens') +reparse_deferral = parser.add_argument_group("reparse deferral") +reparse_deferral.add_argument( + "-q", + action="store_true", + help="disable reparse deferral, and allow [q]uadratic parse runtime with large tokens", +) -parser.add_argument('files', metavar='FILE', nargs='*', help='file to process (default: STDIN)') +parser.add_argument( + "files", metavar="FILE", nargs="*", help="file to process (default: STDIN)" +) -info = parser.add_argument_group('info arguments') +info = parser.add_argument_group("info arguments") info = info.add_mutually_exclusive_group() -info.add_argument('-h', '--help', action='store_true', help='show this [h]elp message and exit') -info.add_argument('-v', '--version', action='store_true', help='show program\'s [v]ersion number and exit') +info.add_argument( + "-h", "--help", action="store_true", help="show this [h]elp message and exit" +) +info.add_argument( + "-v", + "--version", + action="store_true", + help="show program's [v]ersion number and exit", +) -if __name__ == '__main__': +if __name__ == "__main__": parser.print_help() |
