10 files changed, 7669 insertions, 0 deletions
diff --git a/contrib/perl5/pod/Makefile.SH b/contrib/perl5/pod/Makefile.SH
new file mode 100644
index 0000000000000..b8c8c8f24c730
--- /dev/null
+++ b/contrib/perl5/pod/Makefile.SH
@@ -0,0 +1,167 @@
+case $CONFIG in
+'')
+	if test -f config.sh; then TOP=.;
+	elif test -f ../config.sh; then TOP=..;
+	elif test -f ../../config.sh; then TOP=../..;
+	elif test -f ../../../config.sh; then TOP=../../..;
+	elif test -f ../../../../config.sh; then TOP=../../../..;
+	else
+		echo "Can't find config.sh."; exit 1
+	fi
+	. $TOP/config.sh
+	;;
+esac
+: This forces SH files to create target in same directory as SH file.
+: This is so that make depend always knows where to find SH derivatives.
+case "$0" in
+*/*) cd `expr X$0 : 'X\(.*\)/'` ;;
+esac
+
+if test -d pod; then
+  cd pod || exit 1
+fi
+POD=`echo *.pod`
+MAN=`echo $POD|sed 's/\.pod/\.man/g'`
+HTML=`echo $POD|sed 's/perltoc.pod//'|sed 's/\.pod/\.html/g'`
+TEX=`echo $POD|sed 's/\.pod/\.tex/g'`
+
+echo "Extracting pod/Makefile (with variable substitutions)"
+: This section of the file will have variable substitutions done on it.
+: Move anything that needs config subs from !NO!SUBS! section to !GROK!THIS!.
+: Protect any dollar signs and backticks that you do not want interpreted
+: by putting a backslash in front.  You may delete these comments.
+
+$spitshell >Makefile <<!GROK!THIS!
+# pod/Makefile
+# This file is derived from pod/Makefile.SH.  Any changes made here will
+# be lost the next time you run Configure.
+
+POD = $POD
+
+MAN = $MAN
+
+# no perltoc.html
+HTML = $HTML
+
+TEX  = $TEX  
+
+!GROK!THIS!
+
+## In the following dollars and backticks do not need the extra backslash.
+$spitshell >>Makefile <<'!NO!SUBS!'
+
+CONVERTERS = pod2html pod2latex pod2man pod2text checkpods \
+		pod2usage podchecker podselect
+
+HTMLROOT = /	# Change this to fix cross-references in HTML
+POD2HTML = pod2html \
+	    --htmlroot=$(HTMLROOT) \
+	    --podroot=.. --podpath=pod:lib:ext:vms \
+	    --libpods=perlfunc:perlguts:perlvar:perlrun:perlop
+
+PERL = ../miniperl
+PERLILIB = $(PERL) -I../lib
+REALPERL = ../perl
+
+all: $(CONVERTERS) man
+
+converters: $(CONVERTERS)
+
+regen_pods: perlmodlib.pod toc
+
+buildtoc:	buildtoc.PL perl.pod ../MANIFEST
+	$(PERLILIB) buildtoc.PL
+
+perltoc.pod:	buildtoc
+
+man:	pod2man $(MAN)
+
+html:	pod2html $(HTML)
+
+tex:	pod2latex $(TEX)
+
+toc:	buildtoc
+	$(PERLILIB) buildtoc
+
+.SUFFIXES: .pm .pod
+
+.SUFFIXES: .man
+
+.pm.man:     pod2man
+	$(PERL) -I../lib pod2man $*.pm >$*.man
+
+.pod.man:     pod2man
+	$(PERL) -I../lib pod2man $*.pod >$*.man
+
+.SUFFIXES: .html
+
+.pm.html:    pod2html
+	$(PERL) -I../lib $(POD2HTML) --infile=$*.pm --outfile=$*.html
+
+.pod.html:    pod2html
+	$(PERL) -I../lib $(POD2HTML) --infile=$*.pod --outfile=$*.html
+
+.SUFFIXES: .tex
+
+.pm.tex: pod2latex
+	$(PERL) -I../lib pod2latex $*.pm
+
+.pod.tex: pod2latex
+	$(PERL) -I../lib pod2latex $*.pod
+
+clean:
+	rm -f $(MAN)
+	rm -f $(HTML)
+	rm -f $(TEX)
+	rm -f pod2html-*cache
+	rm -f *.aux *.log *.exe
+
+realclean:	clean
+	rm -f $(CONVERTERS)
+
+distclean:	realclean
+
+veryclean:	distclean
+	-rm -f *~ *.orig
+
+check:	checkpods
+	@echo "checking..."; \
+	$(PERL) -I../lib checkpods $(POD)
+
+# Dependencies.
+pod2latex:	pod2latex.PL ../lib/Config.pm
+	$(PERL) -I../lib pod2latex.PL
+
+pod2html:	pod2html.PL ../lib/Config.pm
+	$(PERL) -I ../lib pod2html.PL
+
+pod2man:	pod2man.PL ../lib/Config.pm
+	$(PERL) -I ../lib pod2man.PL
+
+pod2text:	pod2text.PL ../lib/Config.pm
+	$(PERL) -I ../lib pod2text.PL
+
+checkpods:	checkpods.PL ../lib/Config.pm
+	$(PERL) -I ../lib checkpods.PL
+
+pod2usage:	pod2usage.PL ../lib/Config.pm
+	$(PERL) -I ../lib pod2usage.PL
+
+podchecker:	podchecker.PL ../lib/Config.pm
+	$(PERL) -I ../lib podchecker.PL
+
+podselect:	podselect.PL ../lib/Config.pm
+	$(PERL) -I ../lib podselect.PL
+
+perlmodlib.pod:	$(PERL) perlmodlib.PL ../mv-if-diff
+	rm -f perlmodlib.tmp
+	$(PERL) -I ../lib perlmodlib.PL
+	sh ../mv-if-diff perlmodlib.tmp perlmodlib.pod
+
+compile: all
+	$(REALPERL) -I../lib ../utils/perlcc -o pod2latex.exe pod2latex -log ../compilelog
+	$(REALPERL) -I../lib ../utils/perlcc -o pod2man.exe pod2man -log ../compilelog
+	$(REALPERL) -I../lib ../utils/perlcc -o pod2text.exe pod2text -log ../compilelog
+	$(REALPERL) -I../lib ../utils/perlcc -o checkpods.exe checkpods -log ../compilelog
+
+!NO!SUBS!
diff --git a/contrib/perl5/pod/buildtoc.PL b/contrib/perl5/pod/buildtoc.PL
new file mode 100755
index 0000000000000..7c5a45018e8e5
--- /dev/null
+++ b/contrib/perl5/pod/buildtoc.PL
@@ -0,0 +1,492 @@
+#!/usr/local/bin/perl
+
+use Config;
+use File::Basename qw(&basename &dirname);
+use Cwd;
+
+# List explicitly here the variables you want Configure to
+# generate.  Metaconfig only looks for shell variables, so you
+# have to mention them as if they were shell variables, not
+# %Config entries.  Thus you write
+#  $startperl
+# to ensure Configure will look for $Config{startperl}.
+
+# This forces PL files to create target in same directory as PL file.
+# This is so that make depend always knows where to find PL derivatives.
+$origdir = cwd;
+chdir(dirname($0));
+($file = basename($0)) =~ s/\.PL$//;
+$file =~ s/\.pl$// if ($^O eq 'os2' or $^O eq 'dos');  # "case-forgiving"
+$file =~ s/\.pl$/.com/ if ($^O eq 'VMS');              # "case-forgiving"
+
+open OUT,">$file" or die "Can't create $file: $!";
+
+print "Extracting $file (with variable substitutions)\n";
+
+# In this section, perl variables will be expanded during extraction.
+# You can use $Config{...} to use Configure variables.
+
+print OUT <<"!GROK!THIS!";
+$Config{'startperl'}
+    eval 'exec perl -S \$0 "\$@"'
+        if 0;
+!GROK!THIS!
+
+# In the following, perl variables are not expanded during extraction.
+
+print OUT <<'!NO!SUBS!';
+
+#
+# buildtoc
+#
+# !!!!!!!   DO NOT EDIT THIS FILE   !!!!!!!
+# This file is autogenerated by buildtoc.PL.
+# Edit that file and run it to effect changes.
+#
+# Builds perltoc.pod and sanity checks the list of pods against all
+# of the MANIFEST, perl.pod, and ourselves.
+#
+
+use File::Find;
+use Cwd;
+use Text::Wrap;
+
+@PODS = glob("*.pod");
+
+sub output ($);
+
+if (-d "pod") {
+  die "$0: failed to chdir('pod'): $!\n" unless chdir("pod");
+}
+
+@pods = qw(
+    perl
+    perlfaq
+    perltoc
+    perlbook
+
+    perlsyn
+    perldata
+    perlop
+    perlsub
+    perlfunc
+    perlreftut
+    perldsc
+    perlrequick
+    perlpod
+    perlstyle
+    perltrap
+
+    perlrun
+    perldiag
+    perllexwarn
+    perldebtut
+    perldebug
+
+    perlvar
+    perllol
+    perlopentut
+    perlretut
+
+    perlre
+    perlref
+
+    perlform
+
+    perlboot
+    perltoot
+    perltootc
+    perlobj
+    perlbot
+    perltie
+
+    perlipc
+    perlfork
+    perlnumber
+    perlthrtut
+
+    perlport
+    perllocale
+    perlunicode
+    perlebcdic
+
+    perlsec
+
+    perlmod
+    perlmodlib
+    perlmodinstall
+    perlnewmod
+
+    perlfaq1            
+    perlfaq2            
+    perlfaq3            
+    perlfaq4            
+    perlfaq5            
+    perlfaq6            
+    perlfaq7            
+    perlfaq8            
+    perlfaq9            
+
+    perlcompile        
+
+    perlembed          
+    perldebguts         
+    perlxstut           
+    perlxs              
+    perlclib            
+    perlguts            
+    perlcall            
+    perlutil            
+    perlfilter          
+    perldbmfilter       
+    perlapi             
+    perlintern          
+    perlapio            
+    perltodo            
+    perlhack            
+
+    perlhist           
+    perldelta           
+    perl5005delta       
+    perl5004delta       
+
+    perlaix
+    perlamiga          
+    perlbs2000
+    perlcygwin          
+    perldos             
+    perlepoc             
+    perlhpux            
+    perlmachten         
+    perlmacos
+    perlmpeix         
+    perlos2             
+    perlos390           
+    perlsolaris
+    perlvmesa             
+    perlvms             
+    perlvos             
+    perlwin32           
+	  );
+
+@ARCHPODS = qw(
+    perlaix	
+    perlamiga          
+    perlbs2000
+    perlcygwin          
+    perldos             
+    perlepoc             
+    perlhpux            
+    perlmachten         
+    perlmacos
+    perlmpeix
+    perlos2             
+    perlos390           
+    perlsolaris
+    perlvmesa             
+    perlvms             
+    perlvos             
+    perlwin32           
+          );
+for (@ARCHPODS) { s/$/.pod/ }
+@ARCHPODS{@ARCHPODS} = ();
+
+for (@pods) { s/$/.pod/ }
+@pods{@pods} = ();
+@PODS{@PODS} = ();
+
+open(MANI, "../MANIFEST") || die "$0: opening ../MANIFEST failed: $!";
+while (<MANI>) {
+  if (m!^pod/([^.]+\.pod)\s+!i) {
+     push @MANIPODS, $1;
+  }
+}
+close(MANI);
+@MANIPODS{@MANIPODS} = ();
+
+open(PERLPOD, "perl.pod") || die "$0: opening perl.pod failed: $!\n";
+while (<PERLPOD>) {
+  if (/^For ease of access, /../^\(If you're intending /) {
+	if (/^\s+(perl\S*)\s+\w/) {
+		push @PERLPODS, "$1.pod";
+	}
+  }
+}
+close(PERLPOD);
+die "$0: could not find the pod listing of perl.pod\n"
+  unless @PERLPODS;
+@PERLPODS{@PERLPODS} = ();
+
+# Cross-check against ourselves
+# Cross-check against the MANIFEST
+# Cross-check against the perl.pod
+
+foreach my $i (sort keys %PODS) {
+  warn "$0: $i exists but is unknown by buildtoc\n"
+	unless exists $pods{$i};
+  warn "$0: $i exists but is unknown by ../MANIFEST\n"
+	if !exists $MANIPODS{$i} && !exists $ARCHPODS{$i};
+  warn "$0: $i exists but is unknown by perl.pod\n"
+	unless exists $PERLPODS{$i};
+}
+foreach my $i (sort keys %pods) {
+  warn "$0: $i is known by buildtoc but does not exist\n"
+	unless exists $PODS{$i};
+}
+foreach my $i (sort keys %MANIPODS) {
+  warn "$0: $i is known by ../MANIFEST but does not exist\n"
+	unless exists $PODS{$i};
+}
+foreach my $i (sort keys %PERLPODS) {
+  warn "$0: $i is known by perl.pod but does not exist\n"
+	unless exists $PODS{$i};
+}
+
+# We are ready to rock.
+open(OUT, ">perltoc.pod") || die "$0: creating perltoc.pod failed: $!";
+
+$/ = '';
+@ARGV = @pods;
+
+($_= <<EOPOD2B) =~ s/^\t//gm && output($_);
+
+	=head1 NAME
+
+	perltoc - perl documentation table of contents
+
+	=head1 DESCRIPTION
+
+	This page provides a brief table of contents for the rest of the Perl
+	documentation set.  It is meant to be scanned quickly or grepped
+	through to locate the proper section you're looking for.
+
+	=head1 BASIC DOCUMENTATION
+
+EOPOD2B
+#' make emacs happy
+
+podset(@pods);
+
+find \&getpods => qw(../lib ../ext);
+
+sub getpods {
+    if (/\.p(od|m)$/) {
+	# Skip .pm files that have corresponding .pod files, and Functions.pm.
+	return if /(.*)\.pm$/ && -f "$1.pod";
+	my $file = $File::Find::name;
+	return if $file eq '../lib/Pod/Functions.pm'; # Used only by pod itself
+
+	die "tut $name" if $file =~ /TUT/;
+	unless (open (F, "< $_\0")) {
+	    warn "bogus <$file>: $!";
+	    system "ls", "-l", $file;
+	}
+	else {
+	    my $line;
+	    while ($line = <F>) {
+		if ($line =~ /^=head1\s+NAME\b/) {
+		    push @modpods, $file;
+		    #warn "GOOD $file\n";
+		    return;
+		}
+	    }
+	    warn "$0: $file: cannot find =head1 NAME\n";
+	}
+    }
+}
+
+die "no pods" unless @modpods;
+
+for (@modpods) {
+    #($name) = /(\w+)\.p(m|od)$/;
+    $name = path2modname($_);
+    if ($name =~ /^[a-z]/) {
+	push @pragmata, $_;
+    } else {
+	if ($done{$name}++) {
+	    # warn "already did $_\n";
+	    next;
+	}
+	push @modules, $_;
+	push @modname, $name;
+    }
+}
+
+($_= <<EOPOD2B) =~ s/^\t//gm && output($_);
+
+
+
+	=head1 PRAGMA DOCUMENTATION
+
+EOPOD2B
+
+podset(sort @pragmata);
+
+($_= <<EOPOD2B) =~ s/^\t//gm && output($_);
+
+
+
+	=head1 MODULE DOCUMENTATION
+
+EOPOD2B
+
+podset( @modules[ sort { $modname[$a] cmp $modname[$b] } 0 .. $#modules ] );
+
+($_= <<EOPOD2B) =~ s/^\t//gm;
+
+
+	=head1 AUXILIARY DOCUMENTATION
+
+	Here should be listed all the extra programs' documentation, but they
+	don't all have manual pages yet:
+
+	=over 4
+
+	=item a2p
+
+	=item s2p
+
+	=item find2perl
+
+	=item h2ph
+
+	=item c2ph
+
+	=item h2xs
+
+	=item xsubpp
+
+	=item pod2man
+
+	=item wrapsuid
+
+	=back
+
+	=head1 AUTHOR
+
+	Larry Wall <F<larry\@wall.org>>, with the help of oodles
+	of other folks.
+
+
+EOPOD2B
+output $_;
+output "\n";                    # flush $LINE
+exit;
+
+sub podset {
+    local @ARGV = @_;
+
+    while(<>) {
+	if (s/^=head1 (NAME)\s*/=head2 /) {
+	    $pod = path2modname($ARGV);
+	    unhead1();
+	    output "\n \n\n=head2 ";
+	    $_ = <>;
+	    if ( /^\s*$pod\b/ ) {
+		s/$pod\.pm/$pod/;       # '.pm' in NAME !?
+		output $_;
+	    } else {
+		s/^/$pod, /;
+		output $_;
+	    }
+	    next;
+	}
+	if (s/^=head1 (.*)/=item $1/) {
+	    unhead2();
+	    output "=over 4\n\n" unless $inhead1;
+	    $inhead1 = 1;
+	    output $_; nl(); next;
+	}
+	if (s/^=head2 (.*)/=item $1/) {
+	    unitem();
+	    output "=over 4\n\n" unless $inhead2;
+	    $inhead2 = 1;
+	    output $_; nl(); next;
+	}
+	if (s/^=item ([^=].*)/$1/) {
+	    next if $pod eq 'perldiag';
+	    s/^\s*\*\s*$// && next;
+	    s/^\s*\*\s*//;
+	    s/\n/ /g;
+	    s/\s+$//;
+	    next if /^[\d.]+$/;
+	    next if $pod eq 'perlmodlib' && /^ftp:/;
+	    ##print "=over 4\n\n" unless $initem;
+	    output ", " if $initem;
+	    $initem = 1;
+	    s/\.$//;
+	    s/^-X\b/-I<X>/;
+	    output $_; next;
+	}
+	if (s/^=cut\s*\n//) {
+	    unhead1();
+	    next;
+	}
+    }
+}
+
+sub path2modname {
+    local $_ = shift;
+    s/\.p(m|od)$//;
+    s-.*?/(lib|ext)/--;
+    s-/-::-g;
+    s/(\w+)::\1/$1/;
+    return $_;
+}
+
+sub unhead1 {
+    unhead2();
+    if ($inhead1) {
+	output "\n\n=back\n\n";
+    }
+    $inhead1 = 0;
+}
+
+sub unhead2 {
+    unitem();
+    if ($inhead2) {
+	output "\n\n=back\n\n";
+    }
+    $inhead2 = 0;
+}
+
+sub unitem {
+    if ($initem) {
+	output "\n\n";
+	##print "\n\n=back\n\n";
+    }
+    $initem = 0;
+}
+
+sub nl {
+    output "\n";
+}
+
+my $NEWLINE;	# how many newlines have we seen recently
+my $LINE;	# what remains to be printed
+
+sub output ($) {
+    for (split /(\n)/, shift) {
+	if ($_ eq "\n") {
+	    if ($LINE) {
+		print OUT wrap('', '', $LINE);
+		$LINE = '';
+	    }
+	    if ($NEWLINE < 2) {
+		print OUT;
+		$NEWLINE++;
+	    }
+	}
+	elsif (/\S/ && length) {
+	    $LINE .= $_;
+	    $NEWLINE = 0;
+	}
+    }
+}
+
+!NO!SUBS!
+
+close OUT or die "Can't close $file: $!";
+chmod 0755, $file or die "Can't reset permissions for $file: $!\n";
+exec("$Config{'eunicefix'} $file") if $Config{'eunicefix'} ne ':';
+chdir $origdir;
diff --git a/contrib/perl5/pod/perlclib.pod b/contrib/perl5/pod/perlclib.pod
new file mode 100644
index 0000000000000..a0f4a80eecd70
--- /dev/null
+++ b/contrib/perl5/pod/perlclib.pod
@@ -0,0 +1,197 @@
+=head1 NAME
+
+perlclib - Internal replacements for standard C library functions
+
+=head1 DESCRIPTION
+
+One thing Perl porters should note is that F<perl> doesn't tend to use that
+much of the C standard library internally; you'll see very little use of, 
+for example, the F<ctype.h> functions in there. This is because Perl
+tends to reimplement or abstract standard library functions, so that we
+know exactly how they're going to operate.
+
+This is a reference card for people who are familiar with the C library
+and who want to do things the Perl way; to tell them which functions
+they ought to use instead of the more normal C functions. 
+
+=head2 Conventions
+
+In the following tables:
+
+=over 3
+
+=item C<t>
+
+is a type.
+
+=item C<p>
+
+is a pointer.
+
+=item C<n>
+
+is a number.
+
+=item C<s>
+
+is a string.
+
+=back
+
+C<sv>, C<av>, C<hv>, etc. represent variables of their respective types.
+
+=head2 File Operations
+
+Instead of the F<stdio.h> functions, you should use the Perl abstraction
+layer. Instead of C<FILE*> types, you need to be handling C<PerlIO*>
+types.  Don't forget that with the new PerlIO layered I/O abstraction 
+C<FILE*> types may not even be available. See also the C<perlapio>
+documentation for more information about the following functions:
+
+    Instead Of:                 Use:
+
+    stdin                       PerlIO_stdin()
+    stdout                      PerlIO_stdout()
+    stderr                      PerlIO_stderr()
+
+    fopen(fn, mode)             PerlIO_open(fn, mode)
+    freopen(fn, mode, stream)   PerlIO_reopen(fn, mode, perlio) (Deprecated)
+    fflush(stream)              PerlIO_flush(perlio)
+    fclose(stream)              PerlIO_close(perlio)
+
+=head2 File Input and Output
+
+    Instead Of:                 Use:
+
+    fprintf(stream, fmt, ...)   PerlIO_printf(perlio, fmt, ...)
+
+    [f]getc(stream)             PerlIO_getc(perlio)
+    [f]putc(stream, n)          PerlIO_putc(perlio, n)
+    ungetc(n, stream)           PerlIO_ungetc(perlio, n)
+
+Note that the PerlIO equivalents of C<fread> and C<fwrite> are slightly
+different from their C library counterparts:
+
+    fread(p, size, n, stream)   PerlIO_read(perlio, buf, numbytes)
+    fwrite(p, size, n, stream)  PerlIO_write(perlio, buf, numbytes)
+
+    fputs(s, stream)            PerlIO_puts(perlio, s)
+
+There is no equivalent to C<fgets>; one should use C<sv_gets> instead:
+
+    fgets(s, n, stream)         sv_gets(sv, perlio, append)
+
+=head2 File Positioning
+
+    Instead Of:                 Use:
+
+    feof(stream)                PerlIO_eof(perlio)
+    fseek(stream, n, whence)    PerlIO_seek(perlio, n, whence)
+    rewind(stream)              PerlIO_rewind(perlio)
+
+    fgetpos(stream, p)          PerlIO_getpos(perlio, sv)
+    fsetpos(stream, p)          PerlIO_setpos(perlio, sv)
+
+    ferror(stream)              PerlIO_error(perlio)
+    clearerr(stream)            PerlIO_clearerr(perlio)
+
+=head2 Memory Management and String Handling
+
+    Instead Of:                 Use:
+
+    t* p = malloc(n)            New(id, p, n, t)
+    t* p = calloc(n, s)         Newz(id, p, n, t)
+    p = realloc(p, n)           Renew(p, n, t)
+    memcpy(dst, src, n)         Copy(src, dst, n, t)
+    memmove(dst, src, n)        Move(src, dst, n, t)
+    memcpy/*(struct foo *)      StructCopy(src, dst, t)
+    free(p)                     Safefree(p)
+
+    strdup(p)                   savepv(p)
+    strndup(p, n)               savepvn(p, n) (Hey, strndup doesn't exist!)
+
+    strstr(big, little)         instr(big, little)
+    strcmp(s1, s2)              strLE(s1, s2) / strEQ(s1, s2) / strGT(s1,s2)
+    strncmp(s1, s2, n)          strnNE(s1, s2, n) / strnEQ(s1, s2, n)
+
+Notice the different order of arguments to C<Copy> and C<Move> than used
+in C<memcpy> and C<memmove>.
+
+Most of the time, though, you'll want to be dealing with SVs internally
+instead of raw C<char *> strings:
+
+    strlen(s)                   sv_len(sv)
+    strcpy(dt, src)             sv_setpv(sv, s)
+    strncpy(dt, src, n)         sv_setpvn(sv, s, n)
+    strcat(dt, src)             sv_catpv(sv, s)
+    strncat(dt, src)            sv_catpvn(sv, s)
+    sprintf(s, fmt, ...)        sv_setpvf(sv, fmt, ...)
+
+Note also the existence of C<sv_catpvf> and C<sv_catpvfn>, combining
+concatenation with formatting.
+
+=head2 Character Class Tests
+
+There are two types of character class tests that Perl implements: one
+type deals in C<char>s and are thus B<not> Unicode aware (and hence
+deprecated unless you B<know> you should use them) and the other type
+deal in C<UV>s and know about Unicode properties. In the following
+table, C<c> is a C<char>, and C<u> is a Unicode codepoint.
+
+    Instead Of:                 Use:            But better use:
+
+    isalnum(c)                  isALNUM(c)      isALNUM_uni(u)
+    isalpha(c)                  isALPHA(c)      isALPHA_uni(u)
+    iscntrl(c)                  isCNTRL(c)      isCNTRL_uni(u)
+    isdigit(c)                  isDIGIT(c)      isDIGIT_uni(u)
+    isgraph(c)                  isGRAPH(c)      isGRAPH_uni(u)
+    islower(c)                  isLOWER(c)      isLOWER_uni(u)
+    isprint(c)                  isPRINT(c)      isPRINT_uni(u)
+    ispunct(c)                  isPUNCT(c)      isPUNCT_uni(u)
+    isspace(c)                  isSPACE(c)      isSPACE_uni(u)
+    isupper(c)                  isUPPER(c)      isUPPER_uni(u)
+    isxdigit(c)                 isXDIGIT(c)     isXDIGIT_uni(u)
+
+    tolower(c)                  toLOWER(c)      toLOWER_uni(u)
+    toupper(c)                  toUPPER(c)      toUPPER_uni(u)
+
+=head2 F<stdlib.h> functions
+
+    Instead Of:                 Use: 
+
+    atof(s)                     Atof(s)
+    atol(s)                     Atol(s)
+    strtod(s, *p)               Nothing.  Just don't use it.
+    strtol(s, *p, n)            Strtol(s, *p, n)
+    strtoul(s, *p, n)           Strtoul(s, *p, n)
+
+Notice also the C<scan_bin>, C<scan_hex>, and C<scan_oct> functions in
+F<util.c> for converting strings representing numbers in the respective
+bases into C<NV>s.
+
+In theory C<Strtol> and C<Strtoul> may not be defined if the machine perl is
+built on doesn't actually have strtol and strtoul. But as those 2
+functions are part of the 1989 ANSI C spec we suspect you'll find them
+everywhere by now.
+
+    int rand()                  double Drand01()
+    srand(n)                    { seedDrand01((Rand_seed_t)n); 
+                                  PL_srand_called = TRUE; }
+
+    exit(n)                     my_exit(n)
+    system(s)                   Don't. Look at pp_system or use my_popen
+
+    getenv(s)                   PerlEnv_getenv(s)
+    setenv(s, val)              my_putenv(s, val)
+
+=head2 Miscellaneous functions
+
+You should not even B<want> to use F<setjmp.h> functions, but if you
+think you do, use the C<JMPENV> stack in F<scope.h> instead.
+
+For C<signal>/C<sigaction>, use C<rsignal(signo, handler)>.
+
+=head1 SEE ALSO
+
+C<perlapi>, C<perlapio>, C<perlguts>
+
diff --git a/contrib/perl5/pod/perldebtut.pod b/contrib/perl5/pod/perldebtut.pod
new file mode 100644
index 0000000000000..e11102e5676ef
--- /dev/null
+++ b/contrib/perl5/pod/perldebtut.pod
@@ -0,0 +1,721 @@
+=head1 NAME
+
+perldebtut - Perl debugging tutorial
+
+=head1 DESCRIPTION
+
+A (very) lightweight introduction in the use of the perl debugger, and a
+pointer to existing, deeper sources of information on the subject of debugging
+perl programs.  
+
+There's an extraordinary number of people out there who don't appear to know
+anything about using the perl debugger, though they use the language every
+day.  
+This is for them.  
+
+
+=head1 use strict
+
+First of all, there's a few things you can do to make your life a lot more
+straightforward when it comes to debugging perl programs, without using the
+debugger at all.  To demonstrate, here's a simple script with a problem:
+
+	#!/usr/bin/perl
+
+	$var1 = 'Hello World'; # always wanted to do that :-)
+	$var2 = "$varl\n";
+
+	print $var2; 
+	exit;
+
+While this compiles and runs happily, it probably won't do what's expected,
+namely it doesn't print "Hello World\n" at all;  It will on the other hand do
+exactly what it was told to do, computers being a bit that way inclined.  That
+is, it will print out a newline character, and you'll get what looks like a
+blank line.  It looks like there's 2 variables when (because of the typo)
+there's really 3:
+
+	$var1 = 'Hello World'
+	$varl = undef
+	$var2 = "\n"
+
+To catch this kind of problem, we can force each variable to be declared
+before use by pulling in the strict module, by putting 'use strict;' after the
+first line of the script.
+
+Now when you run it, perl complains about the 3 undeclared variables and we
+get four error messages because one variable is referenced twice:
+
+ Global symbol "$var1" requires explicit package name at ./t1 line 4.
+ Global symbol "$var2" requires explicit package name at ./t1 line 5.
+ Global symbol "$varl" requires explicit package name at ./t1 line 5.
+ Global symbol "$var2" requires explicit package name at ./t1 line 7.
+ Execution of ./hello aborted due to compilation errors.     
+
+Luvverly! and to fix this we declare all variables explicitly and now our
+script looks like this:	
+
+	#!/usr/bin/perl
+	use strict;
+
+	my $var1 = 'Hello World';
+	my $varl = '';
+	my $var2 = "$varl\n";
+
+	print $var2; 
+	exit;
+
+We then do (always a good idea) a syntax check before we try to run it again:
+
+	> perl -c hello
+	hello syntax OK 
+
+And now when we run it, we get "\n" still, but at least we know why.  Just
+getting this script to compile has exposed the '$varl' (with the letter 'l)
+variable, and simply changing $varl to $var1 solves the problem.
+
+
+=head1 Looking at data and -w and w
+
+Ok, but how about when you want to really see your data, what's in that
+dynamic variable, just before using it?
+
+	#!/usr/bin/perl 
+	use strict;
+
+	my $key = 'welcome';
+	my %data = (
+		'this' => qw(that), 
+		'tom' => qw(and jerry),
+		'welcome' => q(Hello World),
+		'zip' => q(welcome),
+	);
+	my @data = keys %data;
+
+	print "$data{$key}\n";
+	exit;                               
+
+Looks OK, after it's been through the syntax check (perl -c scriptname), we
+run it and all we get is a blank line again!  Hmmmm.
+
+One common debugging approach here, would be to liberally sprinkle a few print
+statements, to add a check just before we print out our data, and another just
+after:
+
+	print "All OK\n" if grep($key, keys %data);
+	print "$data{$key}\n";
+	print "done: '$data{$key}'\n";
+
+And try again:
+
+	> perl data
+	All OK     
+
+	done: ''
+
+After much staring at the same piece of code and not seeing the wood for the
+trees for some time, we get a cup of coffee and try another approach.  That
+is, we bring in the cavalry by giving perl the 'B<-d>' switch on the command
+line:
+
+	> perl -d data 
+	Default die handler restored.
+
+	Loading DB routines from perl5db.pl version 1.07
+	Editor support available.
+
+	Enter h or `h h' for help, or `man perldebug' for more help.
+
+	main::(./data:4):     my $key = 'welcome';   
+
+Now, what we've done here is to launch the built-in perl debugger on our
+script.  It's stopped at the first line of executable code and is waiting for
+input.
+
+Before we go any further, you'll want to know how to quit the debugger: use
+just the letter 'B<q>', not the words 'quit' or 'exit':
+
+	DB<1> q
+	>
+
+That's it, you're back on home turf again.
+
+
+=head1 help
+
+Fire the debugger up again on your script and we'll look at the help menu. 
+There's a couple of ways of calling help: a simple 'B<h>' will get you a long
+scrolled list of help, 'B<|h>' (pipe-h) will pipe the help through your pager
+('more' or 'less' probably), and finally, 'B<h h>' (h-space-h) will give you a
+helpful mini-screen snapshot:
+
+ DB<1> h h
+ List/search source lines:               Control script execution:
+ l [ln|sub]  List source code            T           Stack trace
+ - or .      List previous/current line  s [expr]    Single step [in expr]
+ w [line]    List around line            n [expr]    Next, steps over subs
+ f filename  View source in file         <CR/Enter>  Repeat last n or s
+ /pattern/ ?patt?   Search forw/backw    r           Return from subroutine
+ v           Show versions of modules    c [ln|sub]  Continue until position
+ Debugger controls:                        L           List
+break/watch/actions
+ O [...]     Set debugger options        t [expr]    Toggle trace [trace expr]
+ <[<]|{[{]|>[>] [cmd] Do pre/post-prompt b [ln|event|sub] [cnd] Set breakpoint
+ ! [N|pat]   Redo a previous command     d [ln] or D Delete a/all breakpoints
+ H [-num]    Display last num commands   a [ln] cmd  Do cmd before line
+ = [a val]   Define/list an alias        W expr      Add a watch expression
+ h [db_cmd]  Get help on command         A or W      Delete all actions/watch
+ |[|]db_cmd  Send output to pager        ![!] syscmd Run cmd in a subprocess
+ q or ^D     Quit                        R           Attempt a restart
+ Data Examination:       expr     Execute perl code, also see: s,n,t expr
+ x|m expr      Evals expr in list context, dumps the result or lists methods.
+ p expr        Print expression (uses script's current package).
+ S [[!]pat]    List subroutine names [not] matching pattern
+ V [Pk [Vars]] List Variables in Package.  Vars can be ~pattern or !pattern.
+ X [Vars]      Same as "V current_package [Vars]".
+ For more help, type h cmd_letter, or run man perldebug for all docs.       
+
+More confusing options than you can shake a big stick at!  It's not as bad as
+it looks and it's very useful to know more about all of it, and fun too!
+
+There's a couple of useful ones to know about straight away.  You wouldn't
+think we're using any libraries at all at the moment, but 'B<v>' will show
+which modules are currently loaded, by the debugger as well your script. 
+'B<V>' and 'B<X>' show variables in the program by package scope and can be
+constrained by pattern.  'B<m>' shows methods and 'B<S>' shows all subroutines
+(by pattern):
+
+	DB<2>S str 
+	dumpvar::stringify
+	strict::bits
+	strict::import
+	strict::unimport  
+
+Using 'X' and cousins requires you not to use the type identifiers ($@%), just
+the 'name':
+
+	DM<3>X ~err
+	FileHandle(stderr) => fileno(2)    
+
+Remember we're in our tiny program with a problem, we should have a look at
+where we are, and what our data looks like. First of all let's have a window
+on our present position (the first line of code in this case), via the letter
+'B<w>':
+
+	DB<4> w
+	1       #!/usr/bin/perl
+	2:      use strict;
+	3
+	4==>    my $key = 'welcome';
+	5:      my %data = (
+	6               'this' => qw(that),
+	7               'tom' => qw(and jerry),
+	8               'welcome' => q(Hello World),
+	9               'zip' => q(welcome),
+	10      );                                 
+
+At line number 4 is a helpful pointer, that tells you where you are now.  To
+see more code, type 'w' again:
+
+	DB<4> w
+	8               'welcome' => q(Hello World),
+	9               'zip' => q(welcome),
+	10      );
+	11:     my @data = keys %data;
+	12:     print "All OK\n" if grep($key, keys %data);
+	13:     print "$data{$key}\n";
+	14:     print "done: '$data{$key}'\n";
+	15:     exit;      
+
+And if you wanted to list line 5 again, type 'l 5', (note the space):
+
+	DB<4> l 5
+	5:      my %data = (
+
+In this case, there's not much to see, but of course normally there's pages of
+stuff to wade through, and 'l' can be very useful.  To reset your view to the
+line we're about to execute, type a lone period '.':
+
+	DB<5> .
+	main::(./data_a:4):     my $key = 'welcome';  
+
+The line shown is the one that is about to be executed B<next>, it hasn't
+happened yet.  So while we can print a variable with the letter 'B<p>', at
+this point all we'd get is an empty (undefined) value back.  What we need to
+do is to step through the next executable statement with an 'B<s>':
+
+	DB<6> s
+	main::(./data_a:5):     my %data = (
+	main::(./data_a:6):             'this' => qw(that),
+	main::(./data_a:7):             'tom' => qw(and jerry),
+	main::(./data_a:8):             'welcome' => q(Hello World),
+	main::(./data_a:9):             'zip' => q(welcome),
+	main::(./data_a:10):    );   
+
+Now we can have a look at that first ($key) variable:
+
+	DB<7> p $key 
+	welcome 
+
+line 13 is where the action is, so let's continue down to there via the letter
+'B<c>', which by the way, inserts a 'one-time-only' breakpoint at the given
+line or sub routine:
+
+	DB<8> c 13
+	All OK
+	main::(./data_a:13):    print "$data{$key}\n";
+
+We've gone past our check (where 'All OK' was printed) and have stopped just
+before the meat of our task.  We could try to print out a couple of variables
+to see what is happening:
+
+	DB<9> p $data{$key}
+
+Not much in there, lets have a look at our hash:
+
+	DB<10> p %data
+	Hello Worldziptomandwelcomejerrywelcomethisthat 
+
+	DB<11> p keys %data
+	Hello Worldtomwelcomejerrythis  
+
+Well, this isn't very easy to read, and using the helpful manual (B<h h>), the
+'B<x>' command looks promising:
+
+	DB<12> x %data
+	0  'Hello World'
+	1  'zip'
+	2  'tom'
+	3  'and'
+	4  'welcome'
+	5  undef
+	6  'jerry'
+	7  'welcome'
+	8  'this'
+	9  'that'     
+
+That's not much help, a couple of welcomes in there, but no indication of
+which are keys, and which are values, it's just a listed array dump and, in
+this case, not particularly helpful.  The trick here, is to use a B<reference>
+to the data structure:
+
+	DB<13> x \%data
+	0  HASH(0x8194bc4)
+	   'Hello World' => 'zip'
+	   'jerry' => 'welcome'
+	   'this' => 'that'
+	   'tom' => 'and'
+	   'welcome' => undef  
+
+The reference is truly dumped and we can finally see what we're dealing with. 
+Our quoting was perfectly valid but wrong for our purposes, with 'and jerry'
+being treated as 2 separate words rather than a phrase, thus throwing the
+evenly paired hash structure out of alignment.
+
+The 'B<-w>' switch would have told us about this, had we used it at the start,
+and saved us a lot of trouble: 
+
+	> perl -w data
+	Odd number of elements in hash assignment at ./data line 5.    
+
+We fix our quoting: 'tom' => q(and jerry), and run it again, this time we get
+our expected output:
+
+	> perl -w data
+	Hello World
+
+
+While we're here, take a closer look at the 'B<x>' command, it's really useful
+and will merrily dump out nested references, complete objects, partial objects
+- just about whatever you throw at it:
+
+Let's make a quick object and x-plode it, first we'll start the the debugger:
+it wants some form of input from STDIN, so we give it something non-commital,
+a zero:
+
+	> perl -de 0
+	Default die handler restored.
+
+	Loading DB routines from perl5db.pl version 1.07
+	Editor support available.
+
+	Enter h or `h h' for help, or `man perldebug' for more help.
+
+	main::(-e:1):   0                       
+
+Now build an on-the-fly object over a couple of lines (note the backslash):
+
+	DB<1> $obj = bless({'unique_id'=>'123', 'attr'=> \
+	cont: 	{'col' => 'black', 'things' => [qw(this that etc)]}}, 'MY_class')
+
+And let's have a look at it:
+
+  	DB<2> x $obj
+	0  MY_class=HASH(0x828ad98)
+   		'attr' => HASH(0x828ad68)
+      	'col' => 'black'
+      	'things' => ARRAY(0x828abb8)
+         	0  'this'
+         	1  'that'
+         	2  'etc'
+   		'unique_id' => 123       
+  	DB<3>
+
+Useful, huh?  You can eval nearly anything in there, and experiment with bits
+of code or regexes until the cows come home:
+
+	DB<3> @data = qw(this that the other atheism leather theory scythe)
+
+	DB<4> p 'saw -> '.($cnt += map { print "\t:\t$_\n" } grep(/the/, sort @data))
+	atheism
+	leather
+	other
+	scythe
+	the
+	theory  
+	saw -> 6
+
+If you want to see the command History, type an 'B<H>':
+
+	DB<5> H
+	4: p 'saw -> '.($cnt += map { print "\t:\t$_\n" } grep(/the/, sort @data))
+	3: @data = qw(this that the other atheism leather theory scythe)
+	2: x $obj
+	1: $obj = bless({'unique_id'=>'123', 'attr'=>
+	{'col' => 'black', 'things' => [qw(this that etc)]}}, 'MY_class')
+	DB<5>
+
+And if you want to repeat any previous command, use the exclamation: 'B<!>':
+
+	DB<5> !4
+	p 'saw -> '.($cnt += map { print "$_\n" } grep(/the/, sort @data))
+	atheism
+	leather
+	other
+	scythe
+	the
+	theory  
+	saw -> 12
+
+For more on references see L<perlref> and L<perlreftut>
+
+
+=head1 Stepping through code
+
+Here's a simple program which converts between Celsius and Fahrenheit, it too
+has a problem:
+
+	#!/usr/bin/perl -w
+	use strict;
+
+	my $arg = $ARGV[0] || '-c20';
+
+	if ($arg =~ /^\-(c|f)((\-|\+)*\d+(\.\d+)*)$/) {
+		my ($deg, $num) = ($1, $2);
+		my ($in, $out) = ($num, $num);
+		if ($deg eq 'c') {
+			$deg = 'f';
+			$out = &c2f($num);
+		} else {
+			$deg = 'c';
+			$out = &f2c($num);
+		}
+		$out = sprintf('%0.2f', $out);
+		$out =~ s/^((\-|\+)*\d+)\.0+$/$1/;
+		print "$out $deg\n";
+	} else {
+		print "Usage: $0 -[c|f] num\n";
+	}
+	exit;
+
+	sub f2c {
+		my $f = shift;
+		my $c = 5 * $f - 32 / 9;
+		return $c;
+	}
+
+	sub c2f {
+		my $c = shift;
+		my $f = 9 * $c / 5 + 32;
+		return $f;
+	}
+
+
+For some reason, the Fahrenheit to Celsius conversion fails to return the
+expected output.  This is what it does:
+
+	> temp -c0.72
+	33.30 f
+
+	> temp -f33.3
+	162.94 c
+
+Not very consistent!  We'll set a breakpoint in the code manually and run it
+under the debugger to see what's going on.  A breakpoint is a flag, to which
+the debugger will run without interruption, when it reaches the breakpoint, it
+will stop execution and offer a prompt for further interaction.  In normal
+use, these debugger commands are completely ignored, and they are safe - if a
+little messy, to leave in production code.
+
+	my ($in, $out) = ($num, $num);
+	$DB::single=2; # insert at line 9!
+	if ($deg eq 'c') 
+		...
+
+	> perl -d temp -f33.3
+	Default die handler restored.
+
+	Loading DB routines from perl5db.pl version 1.07
+	Editor support available.
+
+	Enter h or `h h' for help, or `man perldebug' for more help.
+
+	main::(temp:4): my $arg = $ARGV[0] || '-c100';     
+
+We'll simply continue down to our pre-set breakpoint with a 'B<c>':
+
+  	DB<1> c
+	main::(temp:10):                if ($deg eq 'c') {   
+
+Followed by a window command to see where we are:
+
+	DB<1> w
+	7:              my ($deg, $num) = ($1, $2);
+	8:              my ($in, $out) = ($num, $num);
+	9:              $DB::single=2;
+	10==>           if ($deg eq 'c') {
+	11:                     $deg = 'f';
+	12:                     $out = &c2f($num);
+	13              } else {
+	14:                     $deg = 'c';
+	15:                     $out = &f2c($num);
+	16              }                             
+
+And a print to show what values we're currently using:
+
+	DB<1> p $deg, $num
+	f33.3
+
+We can put another break point on any line beginning with a colon, we'll use
+line 17 as that's just as we come out of the subroutine, and we'd like to
+pause there later on:
+
+	DB<2> b 17
+
+There's no feedback from this, but you can see what breakpoints are set by
+using the list 'L' command:
+
+	DB<3> L
+	temp:
+ 		17:            print "$out $deg\n";
+   		break if (1)     
+
+Note that to delete a breakpoint you use 'd' or 'D'.
+
+Now we'll continue down into our subroutine, this time rather than by line
+number, we'll use the subroutine name, followed by the now familiar 'w':
+
+	DB<3> c f2c
+	main::f2c(temp:30):             my $f = shift;  
+
+	DB<4> w
+	24:     exit;
+	25
+	26      sub f2c {
+	27==>           my $f = shift;
+	28:             my $c = 5 * $f - 32 / 9; 
+	29:             return $c;
+	30      }
+	31
+	32      sub c2f {
+	33:             my $c = shift;   
+
+
+Note that if there was a subroutine call between us and line 29, and we wanted
+to B<single-step> through it, we could use the 'B<s>' command, and to step
+over it we would use 'B<n>' which would execute the sub, but not descend into
+it for inspection.  In this case though, we simply continue down to line 29:
+
+	DB<4> c 29  
+	main::f2c(temp:29):             return $c;
+
+And have a look at the return value:
+
+	DB<5> p $c
+	162.944444444444
+
+This is not the right answer at all, but the sum looks correct.  I wonder if
+it's anything to do with operator precedence?  We'll try a couple of other
+possibilities with our sum:
+
+	DB<6> p (5 * $f - 32 / 9)
+	162.944444444444
+
+	DB<7> p 5 * $f - (32 / 9) 
+	162.944444444444
+
+	DB<8> p (5 * $f) - 32 / 9
+	162.944444444444
+
+	DB<9> p 5 * ($f - 32) / 9
+	0.722222222222221
+
+:-) that's more like it!  Ok, now we can set our return variable and we'll
+return out of the sub with an 'r':
+
+	DB<10> $c = 5 * ($f - 32) / 9
+
+	DB<11> r
+	scalar context return from main::f2c: 0.722222222222221
+
+Looks good, let's just continue off the end of the script:
+
+	DB<12> c
+	0.72 c 
+	Debugged program terminated.  Use q to quit or R to restart,
+  	use O inhibit_exit to avoid stopping after program termination,
+  	h q, h R or h O to get additional info.   
+
+A quick fix to the offending line (insert the missing parentheses) in the
+actual program and we're finished.
+
+
+=head1 Placeholder for a, w, t, T
+
+Actions, watch variables, stack traces etc.: on the TODO list.
+
+	a 
+
+	W 
+
+	t 
+
+	T
+
+
+=head1 REGULAR EXPRESSIONS
+
+Ever wanted to know what a regex looked like?  You'll need perl compiled with
+the DEBUGGING flag for this one:
+
+	> perl -Dr -e '/^pe(a)*rl$/i'
+	Compiling REx `^pe(a)*rl$'
+	size 17 first at 2
+	rarest char
+	 at 0
+	   1: BOL(2)
+	   2: EXACTF <pe>(4)
+	   4: CURLYN[1] {0,32767}(14)
+	   6:   NOTHING(8)
+	   8:   EXACTF <a>(0)
+	  12:   WHILEM(0)
+	  13: NOTHING(14)
+	  14: EXACTF <rl>(16)
+	  16: EOL(17)
+	  17: END(0)
+	floating `'$ at 4..2147483647 (checking floating) stclass `EXACTF <pe>'
+anchored(BOL) minlen 4
+	Omitting $` $& $' support.
+
+	EXECUTING...
+
+	Freeing REx: `^pe(a)*rl$'  
+
+Did you really want to know? :-)
+For more gory details on getting regular expressions to work, have a look at
+L<perlre>, L<perlretut>, and to decode the mysterious labels (BOL and CURLYN,
+etc. above), see L<perldebguts>.
+
+
+=head1 OUTPUT TIPS
+
+To get all the output from your error log, and not miss any messages via
+helpful operating system buffering, insert a line like this, at the start of
+your script:
+
+	$|=1;	
+
+To watch the tail of a dynamically growing logfile, (from the command line):
+
+	tail -f $error_log
+
+Wrapping all die calls in a handler routine can be useful to see how, and from
+where, they're being called, L<perlvar> has more information:
+
+	BEGIN { $SIG{__DIE__} = sub { require Carp; Carp::confess(@_) } }
+
+Various useful techniques for the redirection of STDOUT and STDERR filehandles
+are explained in L<perlopentut> and L<perlfaq8>.
+
+
+=head1 CGI
+
+Just a quick hint here for all those CGI programmers who can't figure out how
+on earth to get past that 'waiting for input' prompt, when running their CGI
+script from the command-line, try something like this:
+
+	> perl -d my_cgi.pl -nodebug 
+
+Of course L<CGI> and L<perlfaq9> will tell you more.
+
+
+=head1 GUIs
+
+The command line interface is tightly integrated with an B<emacs> extension
+and there's a B<vi> interface too.  
+
+You don't have to do this all on the command line, though, there are a few GUI
+options out there.  The nice thing about these is you can wave a mouse over a
+variable and a dump of it's data will appear in an appropriate window, or in a
+popup balloon, no more tiresome typing of 'x $varname' :-)
+
+In particular have a hunt around for the following:
+
+B<ptkdb> perlTK based wrapper for the built-in debugger
+
+B<ddd> data display debugger
+
+B<PerlDevKit> and B<PerlBuilder> are NT specific
+
+NB. (more info on these and others would be appreciated).
+
+
+=head1 SUMMARY
+
+We've seen how to encourage good coding practices with B<use strict> and
+B<-w>.  We can run the perl debugger B<perl -d scriptname> to inspect your
+data from within the perl debugger with the B<p> and B<x> commands.  You can
+walk through your code, set breakpoints with B<b> and step through that code
+with B<s> or B<n>, continue with B<c> and return from a sub with B<r>.  Fairly
+intuitive stuff when you get down to it.  
+
+There is of course lots more to find out about, this has just scratched the
+surface.  The best way to learn more is to use perldoc to find out more about
+the language, to read the on-line help (L<perldebug> is probably the next
+place to go), and of course, experiment.  
+
+
+=head1 SEE ALSO
+
+L<perldebug>, 
+L<perldebguts>, 
+L<perldiag>,
+L<dprofpp>,
+L<perlrun>
+
+
+=head1 AUTHOR
+
+Richard Foley <richard@rfi.net> Copyright (c) 2000
+
+
+=head1 CONTRIBUTORS
+
+Various people have made helpful suggestions and contributions, in particular:
+
+Ronald J Kimball <rjk@linguist.dartmouth.edu>
+
+Hugo van der Sanden <hv@crypt0.demon.co.uk>
+
+Peter Scott <Peter@PSDT.com>
+
diff --git a/contrib/perl5/pod/perlebcdic.pod b/contrib/perl5/pod/perlebcdic.pod
new file mode 100644
index 0000000000000..12ea2f3ef4b16
--- /dev/null
+++ b/contrib/perl5/pod/perlebcdic.pod
@@ -0,0 +1,1235 @@
+=head1 NAME
+
+perlebcdic - Considerations for running Perl on EBCDIC platforms
+
+=head1 DESCRIPTION
+
+An exploration of some of the issues facing Perl programmers
+on EBCDIC based computers.  We do not cover localization, 
+internationalization, or multi byte character set issues (yet).
+
+Portions that are still incomplete are marked with XXX.
+
+=head1 COMMON CHARACTER CODE SETS
+
+=head2 ASCII
+
+The American Standard Code for Information Interchange is a set of
+integers running from 0 to 127 (decimal) that imply character 
+interpretation by the display and other system(s) of computers.  
+The range 0..127 can be covered by setting the bits in a 7-bit binary 
+digit, hence the set is sometimes referred to as a "7-bit ASCII".  
+ASCII was described by the American National Standards Institute 
+document ANSI X3.4-1986.  It was also described by ISO 646:1991 
+(with localization for currency symbols).  The full ASCII set is 
+given in the table below as the first 128 elements.  Languages that 
+can be written adequately with the characters in ASCII include 
+English, Hawaiian, Indonesian, Swahili and some Native American 
+languages.
+
+There are many character sets that extend the range of integers
+from 0..2**7-1 up to 2**8-1, or 8 bit bytes (octets if you prefer).
+One common one is the ISO 8859-1 character set.
+
+=head2 ISO 8859
+
+The ISO 8859-$n are a collection of character code sets from the 
+International Organization for Standardization (ISO) each of which 
+adds characters to the ASCII set that are typically found in European 
+languages many of which are based on the Roman, or Latin, alphabet.
+
+=head2 Latin 1 (ISO 8859-1)
+
+A particular 8-bit extension to ASCII that includes grave and acute 
+accented Latin characters.  Languages that can employ ISO 8859-1 
+include all the languages covered by ASCII as well as Afrikaans, 
+Albanian, Basque, Catalan, Danish, Faroese, Finnish, Norwegian, 
+Portugese, Spanish, and Swedish.  Dutch is covered albeit without 
+the ij ligature.  French is covered too but without the oe ligature. 
+German can use ISO 8859-1 but must do so without German-style
+quotation marks.  This set is based on Western European extensions 
+to ASCII and is commonly encountered in world wide web work.
+In IBM character code set identification terminology ISO 8859-1 is
+also known as CCSID 819 (or sometimes 0819 or even 00819).
+
+=head2 EBCDIC
+
+The Extended Binary Coded Decimal Interchange Code  refers to a 
+large collection of slightly different single and multi byte 
+coded character sets that are different from ASCII or ISO 8859-1 
+and typically run on host computers.  The EBCDIC encodings derive 
+from 8 bit byte extensions of Hollerith punched card encodings.
+The layout on the cards was such that high bits were set for the
+upper and lower case alphabet characters [a-z] and [A-Z], but there
+were gaps within each latin alphabet range.
+
+Some IBM EBCDIC character sets may be known by character code set 
+identification numbers (CCSID numbers) or code page numbers.  Leading
+zero digits in CCSID numbers within this document are insignificant.
+E.g. CCSID 0037 may be referred to as 37 in places.
+
+=head2 13 variant characters
+
+Among IBM EBCDIC character code sets there are 13 characters that
+are often mapped to different integer values.  Those characters
+are known as the 13 "variant" characters and are:
+
+    \ [ ] { } ^ ~ ! # | $ @ ` 
+
+=head2 0037
+
+Character code set ID 0037 is a mapping of the ASCII plus Latin-1 
+characters (i.e. ISO 8859-1) to an EBCDIC set.  0037 is used 
+in North American English locales on the OS/400 operating system 
+that runs on AS/400 computers.  CCSID 37 differs from ISO 8859-1 
+in 237 places, in other words they agree on only 19 code point values.
+
+=head2 1047
+
+Character code set ID 1047 is also a mapping of the ASCII plus 
+Latin-1 characters (i.e. ISO 8859-1) to an EBCDIC set.  1047 is 
+used under Unix System Services for OS/390, and OpenEdition for VM/ESA. 
+CCSID 1047 differs from CCSID 0037 in eight places.
+
+=head2 POSIX-BC
+
+The EBCDIC code page in use on Siemens' BS2000 system is distinct from
+1047 and 0037.  It is identified below as the POSIX-BC set.
+
+=head1 SINGLE OCTET TABLES
+
+The following tables list the ASCII and Latin 1 ordered sets including
+the subsets: C0 controls (0..31), ASCII graphics (32..7e), delete (7f),
+C1 controls (80..9f), and Latin-1 (a.k.a. ISO 8859-1) (a0..ff).  In the 
+table non-printing control character names as well as the Latin 1 
+extensions to ASCII have been labelled with character names roughly 
+corresponding to I<The Unicode Standard, Version 2.0> albeit with 
+substitutions such as s/LATIN// and s/VULGAR// in all cases, 
+s/CAPITAL LETTER// in some cases, and s/SMALL LETTER ([A-Z])/\l$1/ 
+in some other cases (the C<charnames> pragma names unfortunately do 
+not list explicit names for the C0 or C1 control characters).  The 
+"names" of the C1 control set (128..159 in ISO 8859-1) listed here are 
+somewhat arbitrary.  The differences between the 0037 and 1047 sets are 
+flagged with ***.  The differences between the 1047 and POSIX-BC sets 
+are flagged with ###.  All ord() numbers listed are decimal.  If you 
+would rather see this table listing octal values then run the table 
+(that is, the pod version of this document since this recipe may not 
+work with a pod2_other_format translation) through:
+
+=over 4
+
+=item recipe 0
+
+=back
+
+    perl -ne 'if(/(.{33})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/)' \
+     -e '{printf("%s%-9o%-9o%-9o%-9o\n",$1,$2,$3,$4,$5)}' perlebcdic.pod
+
+If you would rather see this table listing hexadecimal values then
+run the table through:
+
+=over 4
+
+=item recipe 1
+
+=back
+
+    perl -ne 'if(/(.{33})(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/)' \
+     -e '{printf("%s%-9X%-9X%-9X%-9X\n",$1,$2,$3,$4,$5)}' perlebcdic.pod
+
+
+                                 8859-1
+    chr                          0819     0037     1047     POSIX-BC
+    ----------------------------------------------------------------
+    <NULL>                       0        0        0        0 
+    <START OF HEADING>           1        1        1        1
+    <START OF TEXT>              2        2        2        2
+    <END OF TEXT>                3        3        3        3
+    <END OF TRANSMISSION>        4        55       55       55
+    <ENQUIRY>                    5        45       45       45
+    <ACKNOWLEDGE>                6        46       46       46
+    <BELL>                       7        47       47       47
+    <BACKSPACE>                  8        22       22       22
+    <HORIZONTAL TABULATION>      9        5        5        5
+    <LINE FEED>                  10       37       21       21  ***
+    <VERTICAL TABULATION>        11       11       11       11
+    <FORM FEED>                  12       12       12       12
+    <CARRIAGE RETURN>            13       13       13       13
+    <SHIFT OUT>                  14       14       14       14
+    <SHIFT IN>                   15       15       15       15
+    <DATA LINK ESCAPE>           16       16       16       16
+    <DEVICE CONTROL ONE>         17       17       17       17
+    <DEVICE CONTROL TWO>         18       18       18       18
+    <DEVICE CONTROL THREE>       19       19       19       19
+    <DEVICE CONTROL FOUR>        20       60       60       60
+    <NEGATIVE ACKNOWLEDGE>       21       61       61       61
+    <SYNCHRONOUS IDLE>           22       50       50       50
+    <END OF TRANSMISSION BLOCK>  23       38       38       38
+    <CANCEL>                     24       24       24       24
+    <END OF MEDIUM>              25       25       25       25
+    <SUBSTITUTE>                 26       63       63       63
+    <ESCAPE>                     27       39       39       39
+    <FILE SEPARATOR>             28       28       28       28
+    <GROUP SEPARATOR>            29       29       29       29
+    <RECORD SEPARATOR>           30       30       30       30
+    <UNIT SEPARATOR>             31       31       31       31
+    <SPACE>                      32       64       64       64
+    !                            33       90       90       90
+    "                            34       127      127      127
+    #                            35       123      123      123
+    $                            36       91       91       91
+    %                            37       108      108      108
+    &                            38       80       80       80
+    '                            39       125      125      125
+    (                            40       77       77       77
+    )                            41       93       93       93
+    *                            42       92       92       92
+    +                            43       78       78       78
+    ,                            44       107      107      107
+    -                            45       96       96       96
+    .                            46       75       75       75
+    /                            47       97       97       97
+    0                            48       240      240      240
+    1                            49       241      241      241
+    2                            50       242      242      242
+    3                            51       243      243      243
+    4                            52       244      244      244
+    5                            53       245      245      245
+    6                            54       246      246      246
+    7                            55       247      247      247
+    8                            56       248      248      248
+    9                            57       249      249      249
+    :                            58       122      122      122
+    ;                            59       94       94       94
+    <                            60       76       76       76
+    =                            61       126      126      126
+    >                            62       110      110      110
+    ?                            63       111      111      111
+    @                            64       124      124      124
+    A                            65       193      193      193
+    B                            66       194      194      194
+    C                            67       195      195      195
+    D                            68       196      196      196
+    E                            69       197      197      197
+    F                            70       198      198      198
+    G                            71       199      199      199
+    H                            72       200      200      200
+    I                            73       201      201      201
+    J                            74       209      209      209
+    K                            75       210      210      210
+    L                            76       211      211      211
+    M                            77       212      212      212
+    N                            78       213      213      213
+    O                            79       214      214      214
+    P                            80       215      215      215
+    Q                            81       216      216      216
+    R                            82       217      217      217
+    S                            83       226      226      226
+    T                            84       227      227      227
+    U                            85       228      228      228
+    V                            86       229      229      229
+    W                            87       230      230      230
+    X                            88       231      231      231
+    Y                            89       232      232      232
+    Z                            90       233      233      233
+    [                            91       186      173      187 *** ###
+    \                            92       224      224      188 ### 
+    ]                            93       187      189      189 ***
+    ^                            94       176      95       106 *** ###
+    _                            95       109      109      109
+    `                            96       121      121      74  ###
+    a                            97       129      129      129
+    b                            98       130      130      130
+    c                            99       131      131      131
+    d                            100      132      132      132
+    e                            101      133      133      133
+    f                            102      134      134      134
+    g                            103      135      135      135
+    h                            104      136      136      136
+    i                            105      137      137      137
+    j                            106      145      145      145
+    k                            107      146      146      146
+    l                            108      147      147      147
+    m                            109      148      148      148
+    n                            110      149      149      149
+    o                            111      150      150      150
+    p                            112      151      151      151
+    q                            113      152      152      152
+    r                            114      153      153      153
+    s                            115      162      162      162
+    t                            116      163      163      163
+    u                            117      164      164      164
+    v                            118      165      165      165
+    w                            119      166      166      166
+    x                            120      167      167      167
+    y                            121      168      168      168
+    z                            122      169      169      169
+    {                            123      192      192      251 ###
+    |                            124      79       79       79
+    }                            125      208      208      253 ###
+    ~                            126      161      161      255 ###
+    <DELETE>                     127      7        7        7
+    <C1 0>                       128      32       32       32
+    <C1 1>                       129      33       33       33
+    <C1 2>                       130      34       34       34
+    <C1 3>                       131      35       35       35
+    <C1 4>                       132      36       36       36
+    <C1 5>                       133      21       37       37  ***
+    <C1 6>                       134      6        6        6
+    <C1 7>                       135      23       23       23
+    <C1 8>                       136      40       40       40
+    <C1 9>                       137      41       41       41
+    <C1 10>                      138      42       42       42
+    <C1 11>                      139      43       43       43
+    <C1 12>                      140      44       44       44
+    <C1 13>                      141      9        9        9
+    <C1 14>                      142      10       10       10
+    <C1 15>                      143      27       27       27
+    <C1 16>                      144      48       48       48
+    <C1 17>                      145      49       49       49
+    <C1 18>                      146      26       26       26
+    <C1 19>                      147      51       51       51
+    <C1 20>                      148      52       52       52
+    <C1 21>                      149      53       53       53
+    <C1 22>                      150      54       54       54
+    <C1 23>                      151      8        8        8
+    <C1 24>                      152      56       56       56
+    <C1 25>                      153      57       57       57
+    <C1 26>                      154      58       58       58
+    <C1 27>                      155      59       59       59
+    <C1 28>                      156      4        4        4
+    <C1 29>                      157      20       20       20
+    <C1 30>                      158      62       62       62
+    <C1 31>                      159      255      255      95  ###
+    <NON-BREAKING SPACE>         160      65       65       65
+    <INVERTED EXCLAMATION MARK>  161      170      170      170
+    <CENT SIGN>                  162      74       74       176 ###
+    <POUND SIGN>                 163      177      177      177
+    <CURRENCY SIGN>              164      159      159      159
+    <YEN SIGN>                   165      178      178      178
+    <BROKEN BAR>                 166      106      106      208 ###
+    <SECTION SIGN>               167      181      181      181
+    <DIAERESIS>                  168      189      187      121 *** ###
+    <COPYRIGHT SIGN>             169      180      180      180
+    <FEMININE ORDINAL INDICATOR> 170      154      154      154
+    <LEFT POINTING GUILLEMET>    171      138      138      138
+    <NOT SIGN>                   172      95       176      186 *** ###       
+    <SOFT HYPHEN>                173      202      202      202
+    <REGISTERED TRADE MARK SIGN> 174      175      175      175
+    <MACRON>                     175      188      188      161 ###
+    <DEGREE SIGN>                176      144      144      144
+    <PLUS-OR-MINUS SIGN>         177      143      143      143
+    <SUPERSCRIPT TWO>            178      234      234      234
+    <SUPERSCRIPT THREE>          179      250      250      250
+    <ACUTE ACCENT>               180      190      190      190
+    <MICRO SIGN>                 181      160      160      160
+    <PARAGRAPH SIGN>             182      182      182      182
+    <MIDDLE DOT>                 183      179      179      179
+    <CEDILLA>                    184      157      157      157
+    <SUPERSCRIPT ONE>            185      218      218      218
+    <MASC. ORDINAL INDICATOR>    186      155      155      155
+    <RIGHT POINTING GUILLEMET>   187      139      139      139
+    <FRACTION ONE QUARTER>       188      183      183      183
+    <FRACTION ONE HALF>          189      184      184      184
+    <FRACTION THREE QUARTERS>    190      185      185      185
+    <INVERTED QUESTION MARK>     191      171      171      171
+    <A WITH GRAVE>               192      100      100      100
+    <A WITH ACUTE>               193      101      101      101
+    <A WITH CIRCUMFLEX>          194      98       98       98
+    <A WITH TILDE>               195      102      102      102
+    <A WITH DIAERESIS>           196      99       99       99
+    <A WITH RING ABOVE>          197      103      103      103
+    <CAPITAL LIGATURE AE>        198      158      158      158
+    <C WITH CEDILLA>             199      104      104      104
+    <E WITH GRAVE>               200      116      116      116
+    <E WITH ACUTE>               201      113      113      113
+    <E WITH CIRCUMFLEX>          202      114      114      114
+    <E WITH DIAERESIS>           203      115      115      115
+    <I WITH GRAVE>               204      120      120      120
+    <I WITH ACUTE>               205      117      117      117
+    <I WITH CIRCUMFLEX>          206      118      118      118
+    <I WITH DIAERESIS>           207      119      119      119
+    <CAPITAL LETTER ETH>         208      172      172      172
+    <N WITH TILDE>               209      105      105      105
+    <O WITH GRAVE>               210      237      237      237
+    <O WITH ACUTE>               211      238      238      238
+    <O WITH CIRCUMFLEX>          212      235      235      235
+    <O WITH TILDE>               213      239      239      239
+    <O WITH DIAERESIS>           214      236      236      236
+    <MULTIPLICATION SIGN>        215      191      191      191
+    <O WITH STROKE>              216      128      128      128
+    <U WITH GRAVE>               217      253      253      224 ###
+    <U WITH ACUTE>               218      254      254      254
+    <U WITH CIRCUMFLEX>          219      251      251      221 ###
+    <U WITH DIAERESIS>           220      252      252      252
+    <Y WITH ACUTE>               221      173      186      173 *** ###
+    <CAPITAL LETTER THORN>       222      174      174      174
+    <SMALL LETTER SHARP S>       223      89       89       89
+    <a WITH GRAVE>               224      68       68       68
+    <a WITH ACUTE>               225      69       69       69
+    <a WITH CIRCUMFLEX>          226      66       66       66
+    <a WITH TILDE>               227      70       70       70
+    <a WITH DIAERESIS>           228      67       67       67
+    <a WITH RING ABOVE>          229      71       71       71
+    <SMALL LIGATURE ae>          230      156      156      156
+    <c WITH CEDILLA>             231      72       72       72
+    <e WITH GRAVE>               232      84       84       84
+    <e WITH ACUTE>               233      81       81       81
+    <e WITH CIRCUMFLEX>          234      82       82       82
+    <e WITH DIAERESIS>           235      83       83       83
+    <i WITH GRAVE>               236      88       88       88
+    <i WITH ACUTE>               237      85       85       85
+    <i WITH CIRCUMFLEX>          238      86       86       86
+    <i WITH DIAERESIS>           239      87       87       87
+    <SMALL LETTER eth>           240      140      140      140
+    <n WITH TILDE>               241      73       73       73
+    <o WITH GRAVE>               242      205      205      205
+    <o WITH ACUTE>               243      206      206      206
+    <o WITH CIRCUMFLEX>          244      203      203      203
+    <o WITH TILDE>               245      207      207      207
+    <o WITH DIAERESIS>           246      204      204      204
+    <DIVISION SIGN>              247      225      225      225
+    <o WITH STROKE>              248      112      112      112
+    <u WITH GRAVE>               249      221      221      192 ###
+    <u WITH ACUTE>               250      222      222      222
+    <u WITH CIRCUMFLEX>          251      219      219      219
+    <u WITH DIAERESIS>           252      220      220      220
+    <y WITH ACUTE>               253      141      141      141
+    <SMALL LETTER thorn>         254      142      142      142
+    <y WITH DIAERESIS>           255      223      223      223
+
+If you would rather see the above table in CCSID 0037 order rather than
+ASCII + Latin-1 order then run the table through:
+
+=over 4
+
+=item recipe 2
+
+=back
+
+    perl -ne 'if(/.{33}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)'\
+     -e '{push(@l,$_)}' \
+     -e 'END{print map{$_->[0]}' \
+     -e '          sort{$a->[1] <=> $b->[1]}' \ 
+     -e '          map{[$_,substr($_,42,3)]}@l;}' perlebcdic.pod
+
+If you would rather see it in CCSID 1047 order then change the digit
+42 in the last line to 51, like this:
+
+=over 4
+
+=item recipe 3
+
+=back
+
+    perl -ne 'if(/.{33}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)'\
+     -e '{push(@l,$_)}' \
+     -e 'END{print map{$_->[0]}' \
+     -e '          sort{$a->[1] <=> $b->[1]}' \ 
+     -e '          map{[$_,substr($_,51,3)]}@l;}' perlebcdic.pod
+
+If you would rather see it in POSIX-BC order then change the digit
+51 in the last line to 60, like this:
+
+=over 4
+
+=item recipe 4
+
+=back
+
+    perl -ne 'if(/.{33}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}\s{6,8}\d{1,3}/)'\
+     -e '{push(@l,$_)}' \
+     -e 'END{print map{$_->[0]}' \
+     -e '          sort{$a->[1] <=> $b->[1]}' \ 
+     -e '          map{[$_,substr($_,60,3)]}@l;}' perlebcdic.pod
+
+
+=head1 IDENTIFYING CHARACTER CODE SETS
+
+To determine the character set you are running under from perl one 
+could use the return value of ord() or chr() to test one or more 
+character values.  For example:
+
+    $is_ascii  = "A" eq chr(65);
+    $is_ebcdic = "A" eq chr(193);
+
+Also, "\t" is a C<HORIZONTAL TABULATION> character so that:
+
+    $is_ascii  = ord("\t") == 9;
+    $is_ebcdic = ord("\t") == 5;
+
+To distinguish EBCDIC code pages try looking at one or more of
+the characters that differ between them.  For example:
+
+    $is_ebcdic_37   = "\n" eq chr(37);
+    $is_ebcdic_1047 = "\n" eq chr(21);
+
+Or better still choose a character that is uniquely encoded in any
+of the code sets, e.g.:
+
+    $is_ascii           = ord('[') == 91;
+    $is_ebcdic_37       = ord('[') == 186;
+    $is_ebcdic_1047     = ord('[') == 173;
+    $is_ebcdic_POSIX_BC = ord('[') == 187;
+
+However, it would be unwise to write tests such as:
+
+    $is_ascii = "\r" ne chr(13);  #  WRONG
+    $is_ascii = "\n" ne chr(10);  #  ILL ADVISED
+
+Obviously the first of these will fail to distinguish most ASCII machines
+from either a CCSID 0037, a 1047, or a POSIX-BC EBCDIC machine since "\r" eq 
+chr(13) under all of those coded character sets.  But note too that 
+because "\n" is chr(13) and "\r" is chr(10) on the MacIntosh (which is an 
+ASCII machine) the second C<$is_ascii> test will lead to trouble there.
+
+To determine whether or not perl was built under an EBCDIC 
+code page you can use the Config module like so:
+
+    use Config;
+    $is_ebcdic = $Config{'ebcdic'} eq 'define';
+
+=head1 CONVERSIONS
+
+=head2 tr///
+
+In order to convert a string of characters from one character set to 
+another a simple list of numbers, such as in the right columns in the
+above table, along with perl's tr/// operator is all that is needed.  
+The data in the table are in ASCII order hence the EBCDIC columns 
+provide easy to use ASCII to EBCDIC operations that are also easily 
+reversed.
+
+For example, to convert ASCII to code page 037 take the output of the second 
+column from the output of recipe 0 (modified to add \\ characters) and use 
+it in tr/// like so:
+
+    $cp_037 = 
+    '\000\001\002\003\234\011\206\177\227\215\216\013\014\015\016\017' .
+    '\020\021\022\023\235\205\010\207\030\031\222\217\034\035\036\037' .
+    '\200\201\202\203\204\012\027\033\210\211\212\213\214\005\006\007' .
+    '\220\221\026\223\224\225\226\004\230\231\232\233\024\025\236\032' .
+    '\040\240\342\344\340\341\343\345\347\361\242\056\074\050\053\174' .
+    '\046\351\352\353\350\355\356\357\354\337\041\044\052\051\073\254' .
+    '\055\057\302\304\300\301\303\305\307\321\246\054\045\137\076\077' .
+    '\370\311\312\313\310\315\316\317\314\140\072\043\100\047\075\042' .
+    '\330\141\142\143\144\145\146\147\150\151\253\273\360\375\376\261' .
+    '\260\152\153\154\155\156\157\160\161\162\252\272\346\270\306\244' .
+    '\265\176\163\164\165\166\167\170\171\172\241\277\320\335\336\256' .
+    '\136\243\245\267\251\247\266\274\275\276\133\135\257\250\264\327' .
+    '\173\101\102\103\104\105\106\107\110\111\255\364\366\362\363\365' .
+    '\175\112\113\114\115\116\117\120\121\122\271\373\374\371\372\377' .
+    '\134\367\123\124\125\126\127\130\131\132\262\324\326\322\323\325' .
+    '\060\061\062\063\064\065\066\067\070\071\263\333\334\331\332\237' ;
+
+    my $ebcdic_string = $ascii_string;
+    eval '$ebcdic_string =~ tr/\000-\377/' . $cp_037 . '/';
+
+To convert from EBCDIC 037 to ASCII just reverse the order of the tr/// 
+arguments like so:
+
+    my $ascii_string = $ebcdic_string;
+    eval '$ascii_string = tr/' . $cp_037 . '/\000-\377/';
+
+Similarly one could take the output of the third column from recipe 0 to
+obtain a C<$cp_1047> table.  The fourth column of the output from recipe
+0 could provide a C<$cp_posix_bc> table suitable for transcoding as well.
+
+=head2 iconv
+
+XPG operability often implies the presence of an I<iconv> utility
+available from the shell or from the C library.  Consult your system's
+documentation for information on iconv.
+
+On OS/390 see the iconv(1) man page.  One way to invoke the iconv 
+shell utility from within perl would be to:
+
+    # OS/390 example
+    $ascii_data = `echo '$ebcdic_data'| iconv -f IBM-1047 -t ISO8859-1`
+
+or the inverse map:
+
+    # OS/390 example
+    $ebcdic_data = `echo '$ascii_data'| iconv -f ISO8859-1 -t IBM-1047`
+
+For other perl based conversion options see the Convert::* modules on CPAN.
+
+=head2 C RTL
+
+The OS/390 C run time library provides _atoe() and _etoa() functions.
+
+=head1 OPERATOR DIFFERENCES
+
+The C<..> range operator treats certain character ranges with 
+care on EBCDIC machines.  For example the following array
+will have twenty six elements on either an EBCDIC machine
+or an ASCII machine:
+
+    @alphabet = ('A'..'Z');   #  $#alphabet == 25
+
+The bitwise operators such as & ^ | may return different results
+when operating on string or character data in a perl program running 
+on an EBCDIC machine than when run on an ASCII machine.  Here is
+an example adapted from the one in L<perlop>:
+
+    # EBCDIC-based examples
+    print "j p \n" ^ " a h";                      # prints "JAPH\n"
+    print "JA" | "  ph\n";                        # prints "japh\n" 
+    print "JAPH\nJunk" & "\277\277\277\277\277";  # prints "japh\n";
+    print 'p N$' ^ " E<H\n";                      # prints "Perl\n";
+
+An interesting property of the 32 C0 control characters
+in the ASCII table is that they can "literally" be constructed
+as control characters in perl, e.g. C<(chr(0) eq "\c@")> 
+C<(chr(1) eq "\cA")>, and so on.  Perl on EBCDIC machines has been 
+ported to take "\c@" to chr(0) and "\cA" to chr(1) as well, but the
+thirty three characters that result depend on which code page you are
+using.  The table below uses the character names from the previous table 
+but with substitutions such as s/START OF/S.O./; s/END OF /E.O./; 
+s/TRANSMISSION/TRANS./; s/TABULATION/TAB./; s/VERTICAL/VERT./; 
+s/HORIZONTAL/HORIZ./; s/DEVICE CONTROL/D.C./; s/SEPARATOR/SEP./; 
+s/NEGATIVE ACKNOWLEDGE/NEG. ACK./;.  The POSIX-BC and 1047 sets are
+identical throughout this range and differ from the 0037 set at only 
+one spot (21 decimal).  Note that the C<LINE FEED> character
+may be generated by "\cJ" on ASCII machines but by "\cU" on 1047 or POSIX-BC 
+machines and cannot be generated as a C<"\c.letter."> control character on 
+0037 machines.  Note also that "\c\\" maps to two characters
+not one.
+
+    chr   ord  8859-1               0037                1047 && POSIX-BC     
+    ------------------------------------------------------------------------
+    "\c?" 127  <DELETE>             "                   "              ***><
+    "\c@"   0  <NULL>               <NULL>              <NULL>         ***><
+    "\cA"   1  <S.O. HEADING>       <S.O. HEADING>      <S.O. HEADING> 
+    "\cB"   2  <S.O. TEXT>          <S.O. TEXT>         <S.O. TEXT>
+    "\cC"   3  <E.O. TEXT>          <E.O. TEXT>         <E.O. TEXT>
+    "\cD"   4  <E.O. TRANS.>        <C1 28>             <C1 28> 
+    "\cE"   5  <ENQUIRY>            <HORIZ. TAB.>       <HORIZ. TAB.>    
+    "\cF"   6  <ACKNOWLEDGE>        <C1 6>              <C1 6>   
+    "\cG"   7  <BELL>               <DELETE>            <DELETE>   
+    "\cH"   8  <BACKSPACE>          <C1 23>             <C1 23>
+    "\cI"   9  <HORIZ. TAB.>        <C1 13>             <C1 13>
+    "\cJ"  10  <LINE FEED>          <C1 14>             <C1 14>
+    "\cK"  11  <VERT. TAB.>         <VERT. TAB.>        <VERT. TAB.>
+    "\cL"  12  <FORM FEED>          <FORM FEED>         <FORM FEED>    
+    "\cM"  13  <CARRIAGE RETURN>    <CARRIAGE RETURN>   <CARRIAGE RETURN> 
+    "\cN"  14  <SHIFT OUT>          <SHIFT OUT>         <SHIFT OUT>
+    "\cO"  15  <SHIFT IN>           <SHIFT IN>          <SHIFT IN>
+    "\cP"  16  <DATA LINK ESCAPE>   <DATA LINK ESCAPE>  <DATA LINK ESCAPE> 
+    "\cQ"  17  <D.C. ONE>           <D.C. ONE>          <D.C. ONE>
+    "\cR"  18  <D.C. TWO>           <D.C. TWO>          <D.C. TWO>
+    "\cS"  19  <D.C. THREE>         <D.C. THREE>        <D.C. THREE> 
+    "\cT"  20  <D.C. FOUR>          <C1 29>             <C1 29> 
+    "\cU"  21  <NEG. ACK.>          <C1 5>              <LINE FEED>    ***
+    "\cV"  22  <SYNCHRONOUS IDLE>   <BACKSPACE>         <BACKSPACE>
+    "\cW"  23  <E.O. TRANS. BLOCK>  <C1 7>              <C1 7>
+    "\cX"  24  <CANCEL>             <CANCEL>            <CANCEL>
+    "\cY"  25  <E.O. MEDIUM>        <E.O. MEDIUM>       <E.O. MEDIUM>
+    "\cZ"  26  <SUBSTITUTE>         <C1 18>             <C1 18>
+    "\c["  27  <ESCAPE>             <C1 15>             <C1 15>
+    "\c\\" 28  <FILE SEP.>\         <FILE SEP.>\        <FILE SEP.>\
+    "\c]"  29  <GROUP SEP.>         <GROUP SEP.>        <GROUP SEP.>
+    "\c^"  30  <RECORD SEP.>        <RECORD SEP.>       <RECORD SEP.>  ***><
+    "\c_"  31  <UNIT SEP.>          <UNIT SEP.>         <UNIT SEP.>    ***><
+
+
+=head1 FUNCTION DIFFERENCES
+
+=over 8
+
+=item chr()
+
+chr() must be given an EBCDIC code number argument to yield a desired 
+character return value on an EBCDIC machine.  For example:
+
+    $CAPITAL_LETTER_A = chr(193);
+
+=item ord()
+
+ord() will return EBCDIC code number values on an EBCDIC machine.
+For example:
+
+    $the_number_193 = ord("A");
+
+=item pack()
+
+The c and C templates for pack() are dependent upon character set 
+encoding.  Examples of usage on EBCDIC include:
+
+    $foo = pack("CCCC",193,194,195,196);
+    # $foo eq "ABCD"
+    $foo = pack("C4",193,194,195,196);
+    # same thing
+
+    $foo = pack("ccxxcc",193,194,195,196);
+    # $foo eq "AB\0\0CD"
+
+=item print()
+
+One must be careful with scalars and strings that are passed to
+print that contain ASCII encodings.  One common place
+for this to occur is in the output of the MIME type header for
+CGI script writing.  For example, many perl programming guides 
+recommend something similar to:
+
+    print "Content-type:\ttext/html\015\012\015\012"; 
+    # this may be wrong on EBCDIC
+
+Under the IBM OS/390 USS Web Server for example you should instead
+write that as:
+
+    print "Content-type:\ttext/html\r\n\r\n"; # OK for DGW et alia
+
+That is because the translation from EBCDIC to ASCII is done
+by the web server in this case (such code will not be appropriate for
+the Macintosh however).  Consult your web server's documentation for 
+further details.
+
+=item printf()
+
+The formats that can convert characters to numbers and vice versa
+will be different from their ASCII counterparts when executed
+on an EBCDIC machine.  Examples include:
+
+    printf("%c%c%c",193,194,195);  # prints ABC
+
+=item sort()
+
+EBCDIC sort results may differ from ASCII sort results especially for 
+mixed case strings.  This is discussed in more detail below.
+
+=item sprintf()
+
+See the discussion of printf() above.  An example of the use
+of sprintf would be:
+
+    $CAPITAL_LETTER_A = sprintf("%c",193);
+
+=item unpack()
+
+See the discussion of pack() above.
+
+=back
+
+=head1 REGULAR EXPRESSION DIFFERENCES
+
+As of perl 5.005_03 the letter range regular expression such as 
+[A-Z] and [a-z] have been especially coded to not pick up gap 
+characters.  For example, characters such as E<ocirc> C<o WITH CIRCUMFLEX> 
+that lie between I and J would not be matched by the 
+regular expression range C</[H-K]/>.  
+
+If you do want to match the alphabet gap characters in a single octet 
+regular expression try matching the hex or octal code such 
+as C</\313/> on EBCDIC or C</\364/> on ASCII machines to 
+have your regular expression match C<o WITH CIRCUMFLEX>.
+
+Another construct to be wary of is the inappropriate use of hex or
+octal constants in regular expressions.  Consider the following
+set of subs:
+
+    sub is_c0 {
+        my $char = substr(shift,0,1);
+        $char =~ /[\000-\037]/;
+    }
+
+    sub is_print_ascii {
+        my $char = substr(shift,0,1);
+        $char =~ /[\040-\176]/;
+    }
+
+    sub is_delete {
+        my $char = substr(shift,0,1);
+        $char eq "\177";
+    }
+
+    sub is_c1 {
+        my $char = substr(shift,0,1);
+        $char =~ /[\200-\237]/;
+    }
+
+    sub is_latin_1 {
+        my $char = substr(shift,0,1);
+        $char =~ /[\240-\377]/;
+    }
+
+The above would be adequate if the concern was only with numeric code points.
+However, the concern may be with characters rather than code points 
+and on an EBCDIC machine it may be desirable for constructs such as 
+C<if (is_print_ascii("A")) {print "A is a printable character\n";}> to print
+out the expected message.  One way to represent the above collection
+of character classification subs that is capable of working across the
+four coded character sets discussed in this document is as follows:
+
+    sub Is_c0 {
+        my $char = substr(shift,0,1);
+        if (ord('^')==94)  { # ascii
+            return $char =~ /[\000-\037]/;
+        } 
+        if (ord('^')==176) { # 37
+            return $char =~ /[\000-\003\067\055-\057\026\005\045\013-\023\074\075\062\046\030\031\077\047\034-\037]/;
+        }
+        if (ord('^')==95 || ord('^')==106) { # 1047 || posix-bc
+            return $char =~ /[\000-\003\067\055-\057\026\005\025\013-\023\074\075\062\046\030\031\077\047\034-\037]/;
+        }
+    }
+
+    sub Is_print_ascii {
+        my $char = substr(shift,0,1);
+        $char =~ /[ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~]/;
+    }
+
+    sub Is_delete {
+        my $char = substr(shift,0,1);
+        if (ord('^')==94)  { # ascii
+            return $char eq "\177";
+        }
+        else  {              # ebcdic
+            return $char eq "\007";
+        }
+    }
+
+    sub Is_c1 {
+        my $char = substr(shift,0,1);
+        if (ord('^')==94)  { # ascii
+            return $char =~ /[\200-\237]/;
+        }
+        if (ord('^')==176) { # 37
+            return $char =~ /[\040-\044\025\006\027\050-\054\011\012\033\060\061\032\063-\066\010\070-\073\040\024\076\377]/;
+        }
+        if (ord('^')==95)  { # 1047
+            return $char =~ /[\040-\045\006\027\050-\054\011\012\033\060\061\032\063-\066\010\070-\073\040\024\076\377]/;
+        }
+        if (ord('^')==106) { # posix-bc
+            return $char =~ 
+              /[\040-\045\006\027\050-\054\011\012\033\060\061\032\063-\066\010\070-\073\040\024\076\137]/;
+        }
+    }
+
+    sub Is_latin_1 {
+        my $char = substr(shift,0,1);
+        if (ord('^')==94)  { # ascii
+            return $char =~ /[\240-\377]/;
+        }
+        if (ord('^')==176) { # 37
+            return $char =~ 
+              /[\101\252\112\261\237\262\152\265\275\264\232\212\137\312\257\274\220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\144\145\142\146\143\147\236\150\164\161-\163\170\165-\167\254\151\355\356\353\357\354\277\200\375\376\373\374\255\256\131\104\105\102\106\103\107\234\110\124\121-\123\130\125-\127\214\111\315\316\313\317\314\341\160\335\336\333\334\215\216\337]/;
+        }
+        if (ord('^')==95)  { # 1047
+            return $char =~
+              /[\101\252\112\261\237\262\152\265\273\264\232\212\260\312\257\274\220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\144\145\142\146\143\147\236\150\164\161-\163\170\165-\167\254\151\355\356\353\357\354\277\200\375\376\373\374\272\256\131\104\105\102\106\103\107\234\110\124\121-\123\130\125-\127\214\111\315\316\313\317\314\341\160\335\336\333\334\215\216\337]/; 
+        }
+        if (ord('^')==106) { # posix-bc
+            return $char =~ 
+              /[\101\252\260\261\237\262\320\265\171\264\232\212\272\312\257\241\220\217\352\372\276\240\266\263\235\332\233\213\267\270\271\253\144\145\142\146\143\147\236\150\164\161-\163\170\165-\167\254\151\355\356\353\357\354\277\200\340\376\335\374\255\256\131\104\105\102\106\103\107\234\110\124\121-\123\130\125-\127\214\111\315\316\313\317\314\341\160\300\336\333\334\215\216\337]/;
+        }
+    }
+
+Note however that only the C<Is_ascii_print()> sub is really independent 
+of coded character set.  Another way to write C<Is_latin_1()> would be 
+to use the characters in the range explicitly:
+
+    sub Is_latin_1 {
+        my $char = substr(shift,0,1);
+        $char =~ /[������������������������������������������������������������������������������������������������]/;
+    }
+
+Although that form may run into trouble in network transit (due to the 
+presence of 8 bit characters) or on non ISO-Latin character sets.
+
+=head1 SOCKETS
+
+Most socket programming assumes ASCII character encodings in network
+byte order.  Exceptions can include CGI script writing under a
+host web server where the server may take care of translation for you.
+Most host web servers convert EBCDIC data to ISO-8859-1 or Unicode on
+output.
+
+=head1 SORTING
+
+One big difference between ASCII based character sets and EBCDIC ones
+are the relative positions of upper and lower case letters and the
+letters compared to the digits.  If sorted on an ASCII based machine the
+two letter abbreviation for a physician comes before the two letter
+for drive, that is:
+
+    @sorted = sort(qw(Dr. dr.));  # @sorted holds ('Dr.','dr.') on ASCII,
+                                  # but ('dr.','Dr.') on EBCDIC
+
+The property of lower case before uppercase letters in EBCDIC is
+even carried to the Latin 1 EBCDIC pages such as 0037 and 1047.
+An example would be that E<Euml> C<E WITH DIAERESIS> (203) comes 
+before E<euml> C<e WITH DIAERESIS> (235) on an ASCII machine, but 
+the latter (83) comes before the former (115) on an EBCDIC machine.  
+(Astute readers will note that the upper case version of E<szlig> 
+C<SMALL LETTER SHARP S> is simply "SS" and that the upper case version of 
+E<yuml> C<y WITH DIAERESIS> is not in the 0..255 range but it is 
+at U+x0178 in Unicode, or C<"\x{178}"> in a Unicode enabled Perl).
+
+The sort order will cause differences between results obtained on
+ASCII machines versus EBCDIC machines.  What follows are some suggestions
+on how to deal with these differences.
+
+=head2 Ignore ASCII vs. EBCDIC sort differences.
+
+This is the least computationally expensive strategy.  It may require
+some user education.
+
+=head2 MONO CASE then sort data.
+
+In order to minimize the expense of mono casing mixed test try to
+C<tr///> towards the character set case most employed within the data.
+If the data are primarily UPPERCASE non Latin 1 then apply tr/[a-z]/[A-Z]/
+then sort().  If the data are primarily lowercase non Latin 1 then
+apply tr/[A-Z]/[a-z]/ before sorting.  If the data are primarily UPPERCASE
+and include Latin-1 characters then apply:  
+
+    tr/[a-z]/[A-Z]/; 
+    tr/[������������������������������]/[������������������������������]/;
+    s/�/SS/g; 
+
+then sort().  Do note however that such Latin-1 manipulation does not 
+address the E<yuml> C<y WITH DIAERESIS> character that will remain at 
+code point 255 on ASCII machines, but 223 on most EBCDIC machines 
+where it will sort to a place less than the EBCDIC numerals.  With a 
+Unicode enabled Perl you might try:
+
+    tr/^?/\x{178}/;
+
+The strategy of mono casing data before sorting does not preserve the case 
+of the data and may not be acceptable for that reason.
+
+=head2 Convert, sort data, then re convert.
+
+This is the most expensive proposition that does not employ a network
+connection.
+
+=head2 Perform sorting on one type of machine only.
+
+This strategy can employ a network connection.  As such
+it would be computationally expensive.
+
+=head1 TRANFORMATION FORMATS
+
+There are a variety of ways of transforming data with an intra character set 
+mapping that serve a variety of purposes.  Sorting was discussed in the 
+previous section and a few of the other more popular mapping techniques are 
+discussed next.
+
+=head2 URL decoding and encoding
+
+Note that some URLs have hexadecimal ASCII code points in them in an
+attempt to overcome character or protocol limitation issues.  For example 
+the tilde character is not on every keyboard hence a URL of the form:
+
+    http://www.pvhp.com/~pvhp/
+
+may also be expressed as either of:
+
+    http://www.pvhp.com/%7Epvhp/
+
+    http://www.pvhp.com/%7epvhp/
+
+where 7E is the hexadecimal ASCII code point for '~'.  Here is an example
+of decoding such a URL under CCSID 1047:
+
+    $url = 'http://www.pvhp.com/%7Epvhp/';
+    # this array assumes code page 1047
+    my @a2e_1047 = (
+          0,  1,  2,  3, 55, 45, 46, 47, 22,  5, 21, 11, 12, 13, 14, 15,
+         16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31,
+         64, 90,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97,
+        240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111,
+        124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214,
+        215,216,217,226,227,228,229,230,231,232,233,173,224,189, 95,109,
+        121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150,
+        151,152,153,162,163,164,165,166,167,168,169,192, 79,208,161,  7,
+         32, 33, 34, 35, 36, 37,  6, 23, 40, 41, 42, 43, 44,  9, 10, 27,
+         48, 49, 26, 51, 52, 53, 54,  8, 56, 57, 58, 59,  4, 20, 62,255,
+         65,170, 74,177,159,178,106,181,187,180,154,138,176,202,175,188,
+        144,143,234,250,190,160,182,179,157,218,155,139,183,184,185,171,
+        100,101, 98,102, 99,103,158,104,116,113,114,115,120,117,118,119,
+        172,105,237,238,235,239,236,191,128,253,254,251,252,186,174, 89,
+         68, 69, 66, 70, 67, 71,156, 72, 84, 81, 82, 83, 88, 85, 86, 87,
+        140, 73,205,206,203,207,204,225,112,221,222,219,220,141,142,223
+    );
+    $url =~ s/%([0-9a-fA-F]{2})/pack("c",$a2e_1047[hex($1)])/ge;
+
+Conversely, here is a partial solution for the task of encoding such 
+a URL under the 1047 code page:
+
+    $url = 'http://www.pvhp.com/~pvhp/';
+    # this array assumes code page 1047
+    my @e2a_1047 = (
+          0,  1,  2,  3,156,  9,134,127,151,141,142, 11, 12, 13, 14, 15,
+         16, 17, 18, 19,157, 10,  8,135, 24, 25,146,143, 28, 29, 30, 31,
+        128,129,130,131,132,133, 23, 27,136,137,138,139,140,  5,  6,  7,
+        144,145, 22,147,148,149,150,  4,152,153,154,155, 20, 21,158, 26,
+         32,160,226,228,224,225,227,229,231,241,162, 46, 60, 40, 43,124,
+         38,233,234,235,232,237,238,239,236,223, 33, 36, 42, 41, 59, 94,
+         45, 47,194,196,192,193,195,197,199,209,166, 44, 37, 95, 62, 63,
+        248,201,202,203,200,205,206,207,204, 96, 58, 35, 64, 39, 61, 34,
+        216, 97, 98, 99,100,101,102,103,104,105,171,187,240,253,254,177,
+        176,106,107,108,109,110,111,112,113,114,170,186,230,184,198,164,
+        181,126,115,116,117,118,119,120,121,122,161,191,208, 91,222,174,
+        172,163,165,183,169,167,182,188,189,190,221,168,175, 93,180,215,
+        123, 65, 66, 67, 68, 69, 70, 71, 72, 73,173,244,246,242,243,245,
+        125, 74, 75, 76, 77, 78, 79, 80, 81, 82,185,251,252,249,250,255,
+         92,247, 83, 84, 85, 86, 87, 88, 89, 90,178,212,214,210,211,213,
+         48, 49, 50, 51, 52, 53, 54, 55, 56, 57,179,219,220,217,218,159
+    );
+    # The following regular expression does not address the 
+    # mappings for: ('.' => '%2E', '/' => '%2F', ':' => '%3A') 
+    $url =~ s/([\t "#%&\(\),;<=>\?\@\[\\\]^`{|}~])/sprintf("%%%02X",$e2a_1047[ord($1)])/ge;
+
+where a more complete solution would split the URL into components 
+and apply a full s/// substitution only to the appropriate parts.
+
+In the remaining examples a @e2a or @a2e array may be employed
+but the assignment will not be shown explicitly.  For code page 1047
+you could use the @a2e_1047 or @e2a_1047 arrays just shown.
+
+=head2 uu encoding and decoding
+
+The C<u> template to pack() or unpack() will render EBCDIC data in EBCDIC 
+characters equivalent to their ASCII counterparts.  For example, the 
+following will print "Yes indeed\n" on either an ASCII or EBCDIC computer:
+
+    $all_byte_chrs = '';
+    for (0..255) { $all_byte_chrs .= chr($_); }
+    $uuencode_byte_chrs = pack('u', $all_byte_chrs);
+    ($uu = <<'    ENDOFHEREDOC') =~ s/^\s*//gm;
+    M``$"`P0%!@<("0H+#`T.#Q`1$A,4%187&!D:&QP='A\@(2(C)"4F)R@I*BLL
+    M+2XO,#$R,S0U-C<X.3H[/#T^/T!!0D-$149'2$E*2TQ-3D]045)35%565UA9
+    M6EM<75Y?8&%B8V1E9F=H:6IK;&UN;W!Q<G-T=79W>'EZ>WQ]?G^`@8*#A(6&
+    MAXB)BHN,C8Z/D)&2DY25EI>8F9J;G)V>GZ"AHJ.DI::GJ*FJJZRMKJ^PL;*S
+    MM+6VM[BYNKN\O;Z_P,'"P\3%QL?(R<K+S,W.S]#1TM/4U=;7V-G:V]S=WM_@
+    ?X>+CY.7FY^CIZNOL[>[O\/'R\_3U]O?X^?K[_/W^_P``
+    ENDOFHEREDOC
+    if ($uuencode_byte_chrs eq $uu) {
+        print "Yes ";
+    }
+    $uudecode_byte_chrs = unpack('u', $uuencode_byte_chrs);
+    if ($uudecode_byte_chrs eq $all_byte_chrs) {
+        print "indeed\n";
+    }
+
+Here is a very spartan uudecoder that will work on EBCDIC provided
+that the @e2a array is filled in appropriately:
+
+    #!/usr/local/bin/perl
+    @e2a = ( # this must be filled in
+           );
+    $_ = <> until ($mode,$file) = /^begin\s*(\d*)\s*(\S*)/;
+    open(OUT, "> $file") if $file ne "";
+    while(<>) {
+        last if /^end/;
+        next if /[a-z]/;
+        next unless int(((($e2a[ord()] - 32 ) & 077) + 2) / 3) ==
+            int(length() / 4);
+        print OUT unpack("u", $_);
+    }
+    close(OUT);
+    chmod oct($mode), $file;
+
+
+=head2 Quoted-Printable encoding and decoding
+
+On ASCII encoded machines it is possible to strip characters outside of
+the printable set using:
+
+    # This QP encoder works on ASCII only
+    $qp_string =~ s/([=\x00-\x1F\x80-\xFF])/sprintf("=%02X",ord($1))/ge;
+
+Whereas a QP encoder that works on both ASCII and EBCDIC machines 
+would look somewhat like the following (where the EBCDIC branch @e2a 
+array is omitted for brevity):
+
+    if (ord('A') == 65) {    # ASCII
+        $delete = "\x7F";    # ASCII
+        @e2a = (0 .. 255)    # ASCII to ASCII identity map
+    }
+    else {                   # EBCDIC
+        $delete = "\x07";    # EBCDIC
+        @e2a =               # EBCDIC to ASCII map (as shown above)
+    }
+    $qp_string =~
+      s/([^ !"\#\$%&'()*+,\-.\/0-9:;<>?\@A-Z[\\\]^_`a-z{|}~$delete])/sprintf("=%02X",$e2a[ord($1)])/ge;
+
+(although in production code the substitutions might be done
+in the EBCDIC branch with the @e2a array and separately in the 
+ASCII branch without the expense of the identity map).
+
+Such QP strings can be decoded with:
+
+    # This QP decoder is limited to ASCII only
+    $string =~ s/=([0-9A-Fa-f][0-9A-Fa-f])/chr hex $1/ge;
+    $string =~ s/=[\n\r]+$//;
+
+Whereas a QP decoder that works on both ASCII and EBCDIC machines 
+would look somewhat like the following (where the @a2e array is
+omitted for brevity):
+
+    $string =~ s/=([0-9A-Fa-f][0-9A-Fa-f])/chr $a2e[hex $1]/ge;
+    $string =~ s/=[\n\r]+$//;
+
+=head2 Caesarian cyphers
+
+The practice of shifting an alphabet one or more characters for encipherment
+dates back thousands of years and was explicitly detailed by Gaius Julius
+Caesar in his B<Gallic Wars> text.  A single alphabet shift is sometimes 
+referred to as a rotation and the shift amount is given as a number $n after
+the string 'rot' or "rot$n".  Rot0 and rot26 would designate identity maps 
+on the 26 letter English version of the Latin alphabet.  Rot13 has the 
+interesting property that alternate subsequent invocations are identity maps 
+(thus rot13 is its own non-trivial inverse in the group of 26 alphabet 
+rotations).  Hence the following is a rot13 encoder and decoder that will 
+work on ASCII and EBCDIC machines:
+
+    #!/usr/local/bin/perl
+
+    while(<>){
+        tr/n-za-mN-ZA-M/a-zA-Z/;
+        print;
+    }
+
+In one-liner form:
+
+    perl -ne 'tr/n-za-mN-ZA-M/a-zA-Z/;print'
+
+
+=head1 Hashing order and checksums
+
+XXX
+
+=head1 I18N AND L10N
+
+Internationalization(I18N) and localization(L10N) are supported at least 
+in principle even on EBCDIC machines.  The details are system dependent 
+and discussed under the L<perlebcdic/OS ISSUES> section below.
+
+=head1 MULTI OCTET CHARACTER SETS
+
+Multi byte EBCDIC code pages; Unicode, UTF-8, UTF-EBCDIC, XXX.
+
+=head1 OS ISSUES
+
+There may be a few system dependent issues 
+of concern to EBCDIC Perl programmers.
+
+=head2 OS/400 
+
+The PASE environment.
+
+=over 8
+
+=item IFS access
+
+XXX.
+
+=back
+
+=head2 OS/390 
+
+Perl runs under Unix Systems Services or USS.
+
+=over 8
+
+=item chcp
+
+B<chcp> is supported as a shell utility for displaying and changing 
+one's code page.  See also L<chcp>.
+
+=item dataset access
+
+For sequential data set access try:
+
+    my @ds_records = `cat //DSNAME`;
+
+or:
+
+    my @ds_records = `cat //'HLQ.DSNAME'`;
+
+See also the OS390::Stdio module on CPAN.
+
+=item OS/390 iconv
+
+B<iconv> is supported as both a shell utility and a C RTL routine.
+See also the iconv(1) and iconv(3) manual pages.
+
+=item locales
+
+On OS/390 see L<locale> for information on locales.  The L10N files
+are in F</usr/nls/locale>.  $Config{d_setlocale} is 'define' on OS/390.
+
+=back
+
+=head2 VM/ESA?
+
+XXX.
+
+=head2 POSIX-BC?
+
+XXX.
+
+=head1 BUGS
+
+This pod document contains literal Latin 1 characters and may encounter 
+translation difficulties.  In particular one popular nroff implementation 
+was known to strip accented characters to their unaccented counterparts 
+while attempting to view this document through the B<pod2man> program 
+(for example, you may see a plain C<y> rather than one with a diaeresis 
+as in E<yuml>).  Another nroff truncated the resultant man page at
+the first occurence of 8 bit characters.
+
+Not all shells will allow multiple C<-e> string arguments to perl to
+be concatenated together properly as recipes 2, 3, and 4 might seem
+to imply.
+
+Perl does not yet work with any Unicode features on EBCDIC platforms.
+
+=head1 SEE ALSO
+
+L<perllocale>, L<perlfunc>.
+
+=head1 REFERENCES
+
+http://anubis.dkuug.dk/i18n/charmaps
+
+http://www.unicode.org/
+
+http://www.unicode.org/unicode/reports/tr16/
+
+http://www.wps.com/texts/codes/
+B<ASCII: American Standard Code for Information Infiltration> Tom Jennings,
+September 1999.
+
+B<The Unicode Standard Version 2.0> The Unicode Consortium, 
+ISBN 0-201-48345-9, Addison Wesley Developers Press, July 1996. 
+
+B<The Unicode Standard Version 3.0> The Unicode Consortium, Lisa Moore ed., 
+ISBN 0-201-61633-5, Addison Wesley Developers Press, February 2000. 
+
+B<CDRA: IBM - Character Data Representation Architecture - 
+Reference and Registry>, IBM SC09-2190-00, December 1996. 
+
+"Demystifying Character Sets", Andrea Vine, Multilingual Computing 
+& Technology, B<#26 Vol. 10 Issue 4>, August/September 1999;
+ISSN 1523-0309; Multilingual Computing Inc. Sandpoint ID, USA.
+
+B<Codes, Ciphers, and Other Cryptic and Clandestine Communication>
+Fred B. Wrixon, ISBN 1-57912-040-7, Black Dog & Leventhal Publishers,
+1998.
+
+=head1 AUTHOR
+
+Peter Prymmer pvhp@best.com wrote this in 1999 and 2000 
+with CCSID 0819 and 0037 help from Chris Leach and 
+AndrE<eacute> Pirard A.Pirard@ulg.ac.be as well as POSIX-BC 
+help from Thomas Dorner Thomas.Dorner@start.de.
+Thanks also to Vickie Cooper, Philip Newton, William Raffloer, and 
+Joe Smith.  Trademarks, registered trademarks, service marks and 
+registered service marks used in this document are the property of 
+their respective owners.
+
+
diff --git a/contrib/perl5/pod/perlmodlib.PL b/contrib/perl5/pod/perlmodlib.PL
new file mode 100755
index 0000000000000..0cdadb76c79cf
--- /dev/null
+++ b/contrib/perl5/pod/perlmodlib.PL
@@ -0,0 +1,1383 @@
+#!../miniperl
+
+open (OUT, ">perlmodlib.tmp") or die $!;
+my (@pragma, @mod);
+open (MANIFEST, "../MANIFEST") or die $!;
+
+while (<MANIFEST>) {
+     my $filename;
+     next unless s|^lib/|| or m|^ext/|;
+     ($filename) = /(\S+)/;
+     $filename =~ s|^[^/]+/|| if $filename =~ s|^ext/||;
+     next unless $filename =~ /\.p(m|od)$/;
+     next unless open (MOD, "../lib/$filename");
+
+     my ($name, $thing);
+     my $foundit=0;
+     {
+	 local $/="";
+	 while (<MOD>) {
+	     next unless /^=head1 NAME/;
+	     $foundit++;
+	     last;
+	 }
+     }
+     unless ($foundit) {
+	 warn "$filename missing head1\n";
+	 next;
+     }
+     my $title = <MOD>;
+     chomp($title);
+     close MOD;
+
+     my $perlname = $filename;
+     $perlname =~ s!\.p(m|od)$!!;
+     $perlname =~ s!/!::!g;
+
+     ($name, $thing) = split / --? /, $title, 2;
+
+     unless ($name and $thing) {
+	 warn "$filename missing name\n"  unless $name;
+	 warn "$filename missing thing\n" unless $thing;
+	 next;
+     }
+
+     $thing =~ s/^perl pragma to //i;
+     $thing = ucfirst($thing);
+     $title = "=item $perlname\n\n$thing\n\n";
+
+     # print "$perlname $thing\n";
+
+     if ($filename=~/[A-Z]/) {
+          push @mod, $title;
+     } else {
+          push @pragma, $title;
+     }
+}
+
+print OUT <<'EOF';
+# Generated by perlmodlib.PL  DO NOT EDIT!
+
+=head1 NAME
+
+perlmodlib - constructing new Perl modules and finding existing ones
+
+=head1 DESCRIPTION
+
+=head1 THE PERL MODULE LIBRARY
+
+Many modules are included the Perl distribution.  These are described
+below, and all end in F<.pm>.  You may discover compiled library
+file (usually ending in F<.so>) or small pieces of modules to be
+autoloaded (ending in F<.al>); these were automatically generated
+by the installation process.  You may also discover files in the
+library directory that end in either F<.pl> or F<.ph>.  These are
+old libraries supplied so that old programs that use them still
+run.  The F<.pl> files will all eventually be converted into standard
+modules, and the F<.ph> files made by B<h2ph> will probably end up
+as extension modules made by B<h2xs>.  (Some F<.ph> values may
+already be available through the POSIX, Errno, or Fcntl modules.)
+The B<pl2pm> file in the distribution may help in your conversion,
+but it's just a mechanical process and therefore far from bulletproof.
+
+=head2 Pragmatic Modules
+
+They work somewhat like compiler directives (pragmata) in that they
+tend to affect the compilation of your program, and thus will usually
+work well only when used within a C<use>, or C<no>.  Most of these
+are lexically scoped, so an inner BLOCK may countermand them
+by saying:
+
+    no integer;
+    no strict 'refs';
+    no warnings;
+
+which lasts until the end of that BLOCK.
+
+Some pragmas are lexically scoped--typically those that affect the
+C<$^H> hints variable.  Others affect the current package instead,
+like C<use vars> and C<use subs>, which allow you to predeclare a
+variables or subroutines within a particular I<file> rather than
+just a block.  Such declarations are effective for the entire file
+for which they were declared.  You cannot rescind them with C<no
+vars> or C<no subs>.
+
+The following pragmas are defined (and have their own documentation).
+
+=over 12
+
+EOF
+
+print OUT $_ for (sort @pragma);
+
+print OUT <<EOF;
+=back
+
+=head2 Standard Modules
+
+Standard, bundled modules are all expected to behave in a well-defined
+manner with respect to namespace pollution because they use the
+Exporter module.  See their own documentation for details.
+
+=over 12
+
+EOF
+
+print OUT $_ for (sort @mod);
+
+print OUT <<'EOF';
+=back
+
+To find out I<all> modules installed on your system, including
+those without documentation or outside the standard release,
+just do this:
+
+    % find `perl -e 'print "@INC"'` -name '*.pm' -print
+
+They should all have their own documentation installed and accessible
+via your system man(1) command.  If you do not have a B<find>
+program, you can use the Perl B<find2perl> program instead, which
+generates Perl code as output you can run through perl.  If you
+have a B<man> program but it doesn't find your modules, you'll have
+to fix your manpath.  See L<perl> for details.  If you have no
+system B<man> command, you might try the B<perldoc> program.
+
+=head2 Extension Modules
+
+Extension modules are written in C (or a mix of Perl and C).  They
+are usually dynamically loaded into Perl if and when you need them,
+but may also be be linked in statically.  Supported extension modules
+include Socket, Fcntl, and POSIX.
+
+Many popular C extension modules do not come bundled (at least, not
+completely) due to their sizes, volatility, or simply lack of time
+for adequate testing and configuration across the multitude of
+platforms on which Perl was beta-tested.  You are encouraged to
+look for them on CPAN (described below), or using web search engines
+like Alta Vista or Deja News.
+
+=head1 CPAN
+
+CPAN stands for Comprehensive Perl Archive Network; it's a globally
+replicated trove of Perl materials, including documentation, style
+guides, tricks and traps, alternate ports to non-Unix systems and
+occasional binary distributions for these.   Search engines for
+CPAN can be found at http://cpan.perl.com/ and at
+http://theory.uwinnipeg.ca/mod_perl/cpan-search.pl .
+
+Most importantly, CPAN includes around a thousand unbundled modules,
+some of which require a C compiler to build.  Major categories of
+modules are:
+
+=over
+
+=item *
+
+Language Extensions and Documentation Tools
+
+=item *
+
+Development Support
+
+=item *
+
+Operating System Interfaces
+
+=item *
+
+Networking, Device Control (modems) and InterProcess Communication
+
+=item *
+
+Data Types and Data Type Utilities
+
+=item *
+
+Database Interfaces
+
+=item *
+
+User Interfaces
+
+=item *
+
+Interfaces to / Emulations of Other Programming Languages
+
+=item *
+
+File Names, File Systems and File Locking (see also File Handles)
+
+=item *
+
+String Processing, Language Text Processing, Parsing, and Searching
+
+=item *
+
+Option, Argument, Parameter, and Configuration File Processing
+
+=item *
+
+Internationalization and Locale
+
+=item *
+
+Authentication, Security, and Encryption
+
+=item *
+
+World Wide Web, HTML, HTTP, CGI, MIME
+
+=item *
+
+Server and Daemon Utilities
+
+=item *
+
+Archiving and Compression
+
+=item *
+
+Images, Pixmap and Bitmap Manipulation, Drawing, and Graphing
+
+=item *
+
+Mail and Usenet News
+
+=item *
+
+Control Flow Utilities (callbacks and exceptions etc)
+
+=item *
+
+File Handle and Input/Output Stream Utilities
+
+=item *
+
+Miscellaneous Modules
+
+=back
+
+Registered CPAN sites as of this writing include the following.
+You should try to choose one close to you:
+
+=head2 Africa
+
+=over 4
+
+=item *
+
+South Africa
+
+    ftp://ftp.is.co.za/programming/perl/CPAN/
+    ftp://ftp.saix.net/pub/CPAN/
+    ftp://ftpza.co.za/pub/mirrors/cpan/
+    ftp://ftp.sun.ac.za/CPAN/
+
+=back
+
+=head2 Asia
+
+=over 4
+
+=item *
+
+China
+
+    ftp://freesoft.cei.gov.cn/pub/languages/perl/CPAN/
+    http://www2.linuxforum.net/mirror/CPAN/
+    http://cpan.shellhung.org/
+    ftp://ftp.shellhung.org/pub/CPAN
+
+=item *
+
+Hong Kong
+
+    http://CPAN.pacific.net.hk/
+    ftp://ftp.pacific.net.hk/pub/mirror/CPAN/
+
+=item *
+
+Indonesia
+
+    http://piksi.itb.ac.id/CPAN/
+    ftp://mirrors.piksi.itb.ac.id/CPAN/
+    http://CPAN.mweb.co.id/
+    ftp://ftp.mweb.co.id/pub/languages/perl/CPAN/
+
+=item *
+
+Israel
+
+    http://www.iglu.org.il:/pub/CPAN/
+    ftp://ftp.iglu.org.il/pub/CPAN/
+    http://bioinfo.weizmann.ac.il/pub/software/perl/CPAN/
+    ftp://bioinfo.weizmann.ac.il/pub/software/perl/CPAN/
+
+=item *
+
+Japan
+
+    ftp://ftp.u-aizu.ac.jp/pub/lang/perl/CPAN/
+    ftp://ftp.kddlabs.co.jp/CPAN/
+    http://mirror.nucba.ac.jp/mirror/Perl/
+    ftp://mirror.nucba.ac.jp/mirror/Perl/
+    ftp://ftp.meisei-u.ac.jp/pub/CPAN/
+    ftp://ftp.jaist.ac.jp/pub/lang/perl/CPAN/
+    ftp://ftp.dti.ad.jp/pub/lang/CPAN/
+    ftp://ftp.ring.gr.jp/pub/lang/perl/CPAN/
+
+=item *
+
+Saudi Arabia
+
+    ftp://ftp.isu.net.sa/pub/CPAN/
+
+=item *
+
+Singapore
+
+    http://cpan.hjc.edu.sg
+    http://ftp.nus.edu.sg/unix/perl/CPAN/
+    ftp://ftp.nus.edu.sg/pub/unix/perl/CPAN/
+
+=item *
+
+South Korea
+
+    http://CPAN.bora.net/
+    ftp://ftp.bora.net/pub/CPAN/
+    http://ftp.kornet.net/CPAN/
+    ftp://ftp.kornet.net/pub/CPAN/
+    ftp://ftp.nuri.net/pub/CPAN/
+
+=item *
+
+Taiwan
+
+    ftp://coda.nctu.edu.tw/UNIX/perl/CPAN
+    ftp://ftp.ee.ncku.edu.tw/pub/perl/CPAN/
+    ftp://ftp1.sinica.edu.tw/pub1/perl/CPAN/
+
+=item *
+
+Thailand
+
+    http://download.nectec.or.th/CPAN/
+    ftp://ftp.nectec.or.th/pub/languages/CPAN/
+    ftp://ftp.cs.riubon.ac.th/pub/mirrors/CPAN/
+
+=back
+
+=head2 Central America
+
+=over 4
+
+=item *
+
+Costa Rica
+
+    ftp://ftp.linux.co.cr/mirrors/CPAN/
+    http://ftp.ucr.ac.cr/Unix/CPAN/
+    ftp://ftp.ucr.ac.cr/pub/Unix/CPAN/
+
+=back
+
+=head2 Europe
+
+=over 4
+
+=item *
+
+Austria
+
+    ftp://ftp.tuwien.ac.at/pub/languages/perl/CPAN/
+
+=item *
+
+Belgium
+
+    http://ftp.easynet.be/CPAN/
+    ftp://ftp.easynet.be/CPAN/
+    ftp://ftp.kulnet.kuleuven.ac.be/pub/mirror/CPAN/
+
+=item *
+
+Bulgaria
+
+    ftp://ftp.ntrl.net/pub/mirrors/CPAN/
+
+=item *
+
+Croatia
+
+    ftp://ftp.linux.hr/pub/CPAN/
+
+=item *
+
+Czech Republic
+
+    http://www.fi.muni.cz/pub/perl/
+    ftp://ftp.fi.muni.cz/pub/perl/
+    ftp://sunsite.mff.cuni.cz/MIRRORS/ftp.funet.fi/pub/languages/perl/CPAN/
+
+=item *
+
+Denmark
+
+    ftp://sunsite.auc.dk/pub/languages/perl/CPAN/
+    http://www.cpan.dk/CPAN/
+    ftp://www.cpan.dk/ftp.cpan.org/CPAN/
+
+=item *
+
+England
+
+    http://www.mirror.ac.uk/sites/ftp.funet.fi/pub/languages/perl/CPAN
+    ftp://ftp.mirror.ac.uk/sites/ftp.funet.fi/pub/languages/perl/CPAN/
+    ftp://ftp.demon.co.uk/pub/mirrors/perl/CPAN/
+    ftp://ftp.flirble.org/pub/languages/perl/CPAN/
+    ftp://ftp.plig.org/pub/CPAN/
+    ftp://sunsite.doc.ic.ac.uk/packages/CPAN/
+    http://mirror.uklinux.net/CPAN/
+    ftp://mirror.uklinux.net/pub/CPAN/
+    ftp://usit.shef.ac.uk/pub/packages/CPAN/
+
+=item *
+
+Estonia
+
+    ftp://ftp.ut.ee/pub/languages/perl/CPAN/
+
+=item *
+
+Finland
+
+    ftp://ftp.funet.fi/pub/languages/perl/CPAN/
+
+=item *
+
+France
+
+    ftp://cpan.ftp.worldonline.fr/pub/CPAN/
+    ftp://ftp.club-internet.fr/pub/perl/CPAN/
+    ftp://ftp.lip6.fr/pub/perl/CPAN/
+    ftp://ftp.oleane.net/pub/mirrors/CPAN/
+    ftp://ftp.pasteur.fr/pub/computing/CPAN/
+    ftp://cpan.cict.fr/pub/CPAN/
+    ftp://ftp.uvsq.fr/pub/perl/CPAN/
+
+=item *
+
+Germany
+
+    ftp://ftp.rz.ruhr-uni-bochum.de/pub/CPAN/
+    ftp://ftp.freenet.de/pub/ftp.cpan.org/pub/CPAN/
+    ftp://ftp.uni-erlangen.de/pub/source/CPAN/
+    ftp://ftp-stud.fht-esslingen.de/pub/Mirrors/CPAN
+    ftp://ftp.gigabell.net/pub/CPAN/
+    http://ftp.gwdg.de/pub/languages/perl/CPAN/
+    ftp://ftp.gwdg.de/pub/languages/perl/CPAN/
+    ftp://ftp.uni-hamburg.de/pub/soft/lang/perl/CPAN/
+    ftp://ftp.leo.org/pub/comp/general/programming/languages/script/perl/CPAN/
+    ftp://ftp.mpi-sb.mpg.de/pub/perl/CPAN/
+    ftp://ftp.gmd.de/mirrors/CPAN/
+
+=item *
+
+Greece
+
+    ftp://ftp.forthnet.gr/pub/languages/perl/CPAN
+    ftp://ftp.ntua.gr/pub/lang/perl/
+
+=item *
+
+Hungary
+
+    http://cpan.artifact.hu/
+    ftp://cpan.artifact.hu/CPAN/
+    ftp://ftp.kfki.hu/pub/packages/perl/CPAN/
+
+=item *
+
+Iceland
+
+    http://cpan.gm.is/
+    ftp://ftp.gm.is/pub/CPAN/
+
+=item *
+
+Ireland
+
+    http://cpan.indigo.ie/
+    ftp://cpan.indigo.ie/pub/CPAN/
+    http://sunsite.compapp.dcu.ie/pub/perl/
+    ftp://sunsite.compapp.dcu.ie/pub/perl/
+
+=item *
+
+Italy
+
+    http://cpan.nettuno.it/
+    http://gusp.dyndns.org/CPAN/
+    ftp://gusp.dyndns.org/pub/CPAN
+    http://softcity.iol.it/cpan
+    ftp://softcity.iol.it/pub/cpan
+    ftp://ftp.unina.it/pub/Other/CPAN/
+    ftp://ftp.unipi.it/pub/mirror/perl/CPAN/
+    ftp://cis.uniRoma2.it/CPAN/
+    ftp://ftp.edisontel.it/pub/CPAN_Mirror/
+    ftp://ftp.flashnet.it/pub/CPAN/
+
+=item *
+
+Latvia
+
+    http://kvin.lv/pub/CPAN/
+
+=item *
+
+Netherlands
+
+    ftp://download.xs4all.nl/pub/mirror/CPAN/
+    ftp://ftp.nl.uu.net/pub/CPAN/
+    ftp://ftp.nluug.nl/pub/languages/perl/CPAN/
+    ftp://ftp.cpan.nl/pub/CPAN/
+    http://www.cs.uu.nl/mirror/CPAN/
+    ftp://ftp.cs.uu.nl/mirror/CPAN/
+
+=item *
+
+Norway
+
+    ftp://sunsite.uio.no/pub/languages/perl/CPAN/
+    ftp://ftp.uit.no/pub/languages/perl/cpan/
+
+=item *
+
+Poland
+
+    ftp://ftp.pk.edu.pl/pub/lang/perl/CPAN/
+    ftp://ftp.mega.net.pl/pub/mirrors/ftp.perl.com/
+    ftp://ftp.man.torun.pl/pub/doc/CPAN/
+    ftp://sunsite.icm.edu.pl/pub/CPAN/
+
+=item *
+
+Portugal
+
+    ftp://ftp.ua.pt/pub/CPAN/
+    ftp://perl.di.uminho.pt/pub/CPAN/
+    ftp://ftp.ist.utl.pt/pub/CPAN/
+    ftp://ftp.netc.pt/pub/CPAN/
+
+=item *
+
+Romania
+
+    ftp://archive.logicnet.ro/mirrors/ftp.cpan.org/CPAN/
+    ftp://ftp.kappa.ro/pub/mirrors/ftp.perl.org/pub/CPAN/
+    ftp://ftp.dntis.ro/pub/cpan/
+    ftp://ftp.opsynet.com/cpan/
+    ftp://ftp.dnttm.ro/pub/CPAN/
+    ftp://ftp.timisoara.roedu.net/mirrors/CPAN/
+
+=item *
+
+Russia
+
+    ftp://ftp.chg.ru/pub/lang/perl/CPAN/
+    http://cpan.rinet.ru/
+    ftp://cpan.rinet.ru/pub/mirror/CPAN/
+    ftp://ftp.aha.ru/pub/CPAN/
+    ftp://ftp.sai.msu.su/pub/lang/perl/CPAN/
+
+=item *
+
+Slovakia
+
+    ftp://ftp.entry.sk/pub/languages/perl/CPAN/
+
+=item *
+
+Slovenia
+
+    ftp://ftp.arnes.si/software/perl/CPAN/
+
+=item *
+
+Spain
+
+    ftp://ftp.rediris.es/mirror/CPAN/
+    ftp://ftp.etse.urv.es/pub/perl/
+
+=item *
+
+Sweden
+
+    http://ftp.du.se/CPAN/
+    ftp://ftp.du.se/pub/CPAN/
+    ftp://ftp.sunet.se/pub/lang/perl/CPAN/
+
+=item *
+
+Switzerland
+
+    ftp://ftp.danyk.ch/CPAN/
+    ftp://sunsite.cnlab-switch.ch/mirror/CPAN/
+
+=item *
+
+Turkey
+
+    ftp://sunsite.bilkent.edu.tr/pub/languages/CPAN/
+
+=back
+
+=head2 North America
+
+=over 4
+
+=item *
+
+Canada
+
+=over 8
+
+=item *
+
+Alberta
+
+    http://sunsite.ualberta.ca/pub/Mirror/CPAN/
+    ftp://sunsite.ualberta.ca/pub/Mirror/CPAN/
+
+=item *
+
+Manitoba
+
+    http://theoryx5.uwinnipeg.ca/pub/CPAN/
+    ftp://theoryx5.uwinnipeg.ca/pub/CPAN/
+
+=item *
+
+Nova Scotia
+
+    ftp://cpan.chebucto.ns.ca/pub/CPAN/
+
+=item *
+
+Ontario
+
+    ftp://ftp.crc.ca/pub/packages/lang/perl/CPAN/
+
+=item *
+
+Mexico
+
+    http://www.msg.com.mx/CPAN/
+    ftp://ftp.msg.com.mx/pub/CPAN/
+
+=back
+
+=item *
+
+United States
+
+=over 8
+
+=item *
+
+Alabama
+
+    http://mirror.hiwaay.net/CPAN/
+    ftp://mirror.hiwaay.net/CPAN/
+
+=item *
+
+California
+
+    http://www.cpan.org/
+    ftp://ftp.cpan.org/CPAN/
+    ftp://cpan.nas.nasa.gov/pub/perl/CPAN/
+    ftp://ftp.digital.com/pub/plan/perl/CPAN/
+    http://www.kernel.org/pub/mirrors/cpan/
+    ftp://ftp.kernel.org/pub/mirrors/cpan/
+    http://www.perl.com/CPAN/
+    http://download.sourceforge.net/mirrors/CPAN/
+
+=item *
+
+Colorado
+
+    ftp://ftp.cs.colorado.edu/pub/perl/CPAN/
+
+=item *
+
+Florida
+
+    ftp://ftp.cise.ufl.edu/pub/perl/CPAN/
+
+=item *
+
+Georgia
+
+    ftp://ftp.twoguys.org/CPAN/
+
+=item *
+
+Illinois
+
+    http://www.neurogames.com/mirrors/CPAN
+    http://uiarchive.uiuc.edu/mirrors/ftp/ftp.cpan.org/pub/CPAN/
+    ftp://uiarchive.uiuc.edu/mirrors/ftp/ftp.cpan.org/pub/CPAN/
+
+=item *
+
+Indiana
+
+    ftp://ftp.uwsg.indiana.edu/pub/perl/CPAN/
+    http://cpan.nitco.com/
+    ftp://cpan.nitco.com/pub/CPAN/
+    ftp://cpan.in-span.net/
+    http://csociety-ftp.ecn.purdue.edu/pub/CPAN
+    ftp://csociety-ftp.ecn.purdue.edu/pub/CPAN
+
+=item *
+
+Kentucky
+
+    http://cpan.uky.edu/
+    ftp://cpan.uky.edu/pub/CPAN/
+
+=item *
+
+Massachusetts
+
+    ftp://ftp.ccs.neu.edu/net/mirrors/ftp.funet.fi/pub/languages/perl/CPAN/
+    ftp://ftp.iguide.com/pub/mirrors/packages/perl/CPAN/
+
+=item *
+
+New Jersey
+
+    ftp://ftp.cpanel.net/pub/CPAN/
+
+=item *
+
+New York
+
+    ftp://ftp.freesoftware.com/pub/perl/CPAN/
+    http://www.deao.net/mirrors/CPAN/
+    ftp://ftp.deao.net/pub/CPAN/
+    ftp://ftp.stealth.net/pub/mirrors/ftp.cpan.org/pub/CPAN/
+    http://mirror.nyc.anidea.com/CPAN/
+    ftp://mirror.nyc.anidea.com/pub/CPAN/
+    http://www.rge.com/pub/languages/perl/
+    ftp://ftp.rge.com/pub/languages/perl/
+    ftp://mirrors.cloud9.net/pub/mirrors/CPAN/
+
+=item *
+
+North Carolina
+
+    ftp://ftp.duke.edu/pub/perl/
+
+=item *
+
+Ohio
+
+    ftp://ftp.loaded.net/pub/CPAN/
+
+=item *
+
+Oklahoma
+
+    ftp://ftp.ou.edu/mirrors/CPAN/
+
+=item *
+
+Oregon
+
+    ftp://ftp.orst.edu/pub/packages/CPAN/
+
+=item *
+
+Pennsylvania
+
+    http://ftp.epix.net/CPAN/
+    ftp://ftp.epix.net/pub/languages/perl/
+    ftp://carroll.cac.psu.edu/pub/CPAN/
+
+=item *
+
+Tennessee
+
+    ftp://ftp.sunsite.utk.edu/pub/CPAN/
+
+=item *
+
+Texas
+
+    http://ftp.sedl.org/pub/mirrors/CPAN/
+    http://jhcloos.com/pub/mirror/CPAN/
+    ftp://jhcloos.com/pub/mirror/CPAN/
+
+=item *
+
+Utah
+
+    ftp://mirror.xmission.com/CPAN/
+
+=item *
+
+Virginia
+
+    http://mirrors.rcn.net/pub/lang/CPAN/
+    ftp://mirrors.rcn.net/pub/lang/CPAN/
+    ftp://ruff.cs.jmu.edu/pub/CPAN/
+    http://perl.Liquidation.com/CPAN/
+
+=item *
+
+Washington
+
+    http://cpan.llarian.net/
+    ftp://cpan.llarian.net/pub/CPAN/
+    ftp://ftp-mirror.internap.com/pub/CPAN/
+    ftp://ftp.spu.edu/pub/CPAN/
+
+=back
+
+=back
+
+=head2 Oceania
+
+=over 4
+
+=item *
+
+Australia
+
+    http://ftp.planetmirror.com/pub/CPAN/
+    ftp://ftp.planetmirror.com/pub/CPAN/
+    ftp://mirror.aarnet.edu.au/pub/perl/CPAN/
+    ftp://cpan.topend.com.au/pub/CPAN/
+
+=item *
+
+New Zealand
+
+    ftp://ftp.auckland.ac.nz/pub/perl/CPAN/
+
+=back
+
+=head2 South America
+
+=over 4
+
+=item *
+
+Argentina
+
+    ftp://mirrors.bannerlandia.com.ar/mirrors/CPAN/
+
+=item *
+
+Brazil
+
+    ftp://cpan.pop-mg.com.br/pub/CPAN/
+    ftp://ftp.matrix.com.br/pub/perl/
+    ftp://cpan.if.usp.br/pub/mirror/CPAN/
+
+=item *
+
+Chile
+
+    ftp://ftp.psinet.cl/pub/programming/perl/CPAN/
+    ftp://sunsite.dcc.uchile.cl/pub/lang/perl/
+
+=back
+
+For an up-to-date listing of CPAN sites,
+see http://www.cpan.org/SITES or ftp://www.cpan.org/SITES .
+
+=head1 Modules: Creation, Use, and Abuse
+
+(The following section is borrowed directly from Tim Bunce's modules
+file, available at your nearest CPAN site.)
+
+Perl implements a class using a package, but the presence of a
+package doesn't imply the presence of a class.  A package is just a
+namespace.  A class is a package that provides subroutines that can be
+used as methods.  A method is just a subroutine that expects, as its
+first argument, either the name of a package (for "static" methods),
+or a reference to something (for "virtual" methods).
+
+A module is a file that (by convention) provides a class of the same
+name (sans the .pm), plus an import method in that class that can be
+called to fetch exported symbols.  This module may implement some of
+its methods by loading dynamic C or C++ objects, but that should be
+totally transparent to the user of the module.  Likewise, the module
+might set up an AUTOLOAD function to slurp in subroutine definitions on
+demand, but this is also transparent.  Only the F<.pm> file is required to
+exist.  See L<perlsub>, L<perltoot>, and L<AutoLoader> for details about
+the AUTOLOAD mechanism.
+
+=head2 Guidelines for Module Creation
+
+=over 4
+
+=item  *
+
+Do similar modules already exist in some form?
+
+If so, please try to reuse the existing modules either in whole or
+by inheriting useful features into a new class.  If this is not
+practical try to get together with the module authors to work on
+extending or enhancing the functionality of the existing modules.
+A perfect example is the plethora of packages in perl4 for dealing
+with command line options.
+
+If you are writing a module to expand an already existing set of
+modules, please coordinate with the author of the package.  It
+helps if you follow the same naming scheme and module interaction
+scheme as the original author.
+
+=item  *
+
+Try to design the new module to be easy to extend and reuse.
+
+Try to C<use warnings;> (or C<use warnings qw(...);>).
+Remember that you can add C<no warnings qw(...);> to individual blocks
+of code that need less warnings.
+
+Use blessed references.  Use the two argument form of bless to bless
+into the class name given as the first parameter of the constructor,
+e.g.,:
+
+ sub new {
+     my $class = shift;
+     return bless {}, $class;
+ }
+
+or even this if you'd like it to be used as either a static
+or a virtual method.
+
+ sub new {
+     my $self  = shift;
+     my $class = ref($self) || $self;
+     return bless {}, $class;
+ }
+
+Pass arrays as references so more parameters can be added later
+(it's also faster).  Convert functions into methods where
+appropriate.  Split large methods into smaller more flexible ones.
+Inherit methods from other modules if appropriate.
+
+Avoid class name tests like: C<die "Invalid" unless ref $ref eq 'FOO'>.
+Generally you can delete the C<eq 'FOO'> part with no harm at all.
+Let the objects look after themselves! Generally, avoid hard-wired
+class names as far as possible.
+
+Avoid C<< $r->Class::func() >> where using C<@ISA=qw(... Class ...)> and
+C<< $r->func() >> would work (see L<perlbot> for more details).
+
+Use autosplit so little used or newly added functions won't be a
+burden to programs that don't use them. Add test functions to
+the module after __END__ either using AutoSplit or by saying:
+
+ eval join('',<main::DATA>) || die $@ unless caller();
+
+Does your module pass the 'empty subclass' test? If you say
+C<@SUBCLASS::ISA = qw(YOURCLASS);> your applications should be able
+to use SUBCLASS in exactly the same way as YOURCLASS.  For example,
+does your application still work if you change:  C<$obj = new YOURCLASS;>
+into: C<$obj = new SUBCLASS;> ?
+
+Avoid keeping any state information in your packages. It makes it
+difficult for multiple other packages to use yours. Keep state
+information in objects.
+
+Always use B<-w>.
+
+Try to C<use strict;> (or C<use strict qw(...);>).
+Remember that you can add C<no strict qw(...);> to individual blocks
+of code that need less strictness.
+
+Always use B<-w>.
+
+Follow the guidelines in the perlstyle(1) manual.
+
+Always use B<-w>.
+
+=item  *
+
+Some simple style guidelines
+
+The perlstyle manual supplied with Perl has many helpful points.
+
+Coding style is a matter of personal taste. Many people evolve their
+style over several years as they learn what helps them write and
+maintain good code.  Here's one set of assorted suggestions that
+seem to be widely used by experienced developers:
+
+Use underscores to separate words.  It is generally easier to read
+$var_names_like_this than $VarNamesLikeThis, especially for
+non-native speakers of English. It's also a simple rule that works
+consistently with VAR_NAMES_LIKE_THIS.
+
+Package/Module names are an exception to this rule. Perl informally
+reserves lowercase module names for 'pragma' modules like integer
+and strict. Other modules normally begin with a capital letter and
+use mixed case with no underscores (need to be short and portable).
+
+You may find it helpful to use letter case to indicate the scope
+or nature of a variable. For example:
+
+ $ALL_CAPS_HERE   constants only (beware clashes with Perl vars)
+ $Some_Caps_Here  package-wide global/static
+ $no_caps_here    function scope my() or local() variables
+
+Function and method names seem to work best as all lowercase.
+e.g., C<< $obj->as_string() >>.
+
+You can use a leading underscore to indicate that a variable or
+function should not be used outside the package that defined it.
+
+=item  *
+
+Select what to export.
+
+Do NOT export method names!
+
+Do NOT export anything else by default without a good reason!
+
+Exports pollute the namespace of the module user.  If you must
+export try to use @EXPORT_OK in preference to @EXPORT and avoid
+short or common names to reduce the risk of name clashes.
+
+Generally anything not exported is still accessible from outside the
+module using the ModuleName::item_name (or C<< $blessed_ref->method >>)
+syntax.  By convention you can use a leading underscore on names to
+indicate informally that they are 'internal' and not for public use.
+
+(It is actually possible to get private functions by saying:
+C<my $subref = sub { ... };  &$subref;>.  But there's no way to call that
+directly as a method, because a method must have a name in the symbol
+table.)
+
+As a general rule, if the module is trying to be object oriented
+then export nothing. If it's just a collection of functions then
+@EXPORT_OK anything but use @EXPORT with caution.
+
+=item  *
+
+Select a name for the module.
+
+This name should be as descriptive, accurate, and complete as
+possible.  Avoid any risk of ambiguity. Always try to use two or
+more whole words.  Generally the name should reflect what is special
+about what the module does rather than how it does it.  Please use
+nested module names to group informally or categorize a module.
+There should be a very good reason for a module not to have a nested name.
+Module names should begin with a capital letter.
+
+Having 57 modules all called Sort will not make life easy for anyone
+(though having 23 called Sort::Quick is only marginally better :-).
+Imagine someone trying to install your module alongside many others.
+If in any doubt ask for suggestions in comp.lang.perl.misc.
+
+If you are developing a suite of related modules/classes it's good
+practice to use nested classes with a common prefix as this will
+avoid namespace clashes. For example: Xyz::Control, Xyz::View,
+Xyz::Model etc. Use the modules in this list as a naming guide.
+
+If adding a new module to a set, follow the original author's
+standards for naming modules and the interface to methods in
+those modules.
+
+If developing modules for private internal or project specific use,
+that will never be released to the public, then you should ensure
+that their names will not clash with any future public module. You
+can do this either by using the reserved Local::* category or by
+using a category name that includes an underscore like Foo_Corp::*.
+
+To be portable each component of a module name should be limited to
+11 characters. If it might be used on MS-DOS then try to ensure each is
+unique in the first 8 characters. Nested modules make this easier.
+
+=item  *
+
+Have you got it right?
+
+How do you know that you've made the right decisions? Have you
+picked an interface design that will cause problems later? Have
+you picked the most appropriate name? Do you have any questions?
+
+The best way to know for sure, and pick up many helpful suggestions,
+is to ask someone who knows. Comp.lang.perl.misc is read by just about
+all the people who develop modules and it's the best place to ask.
+
+All you need to do is post a short summary of the module, its
+purpose and interfaces. A few lines on each of the main methods is
+probably enough. (If you post the whole module it might be ignored
+by busy people - generally the very people you want to read it!)
+
+Don't worry about posting if you can't say when the module will be
+ready - just say so in the message. It might be worth inviting
+others to help you, they may be able to complete it for you!
+
+=item  *
+
+README and other Additional Files.
+
+It's well known that software developers usually fully document the
+software they write. If, however, the world is in urgent need of
+your software and there is not enough time to write the full
+documentation please at least provide a README file containing:
+
+=over 10
+
+=item *
+
+A description of the module/package/extension etc.
+
+=item *
+
+A copyright notice - see below.
+
+=item *
+
+Prerequisites - what else you may need to have.
+
+=item *
+
+How to build it - possible changes to Makefile.PL etc.
+
+=item *
+
+How to install it.
+
+=item *
+
+Recent changes in this release, especially incompatibilities
+
+=item *
+
+Changes / enhancements you plan to make in the future.
+
+=back
+
+If the README file seems to be getting too large you may wish to
+split out some of the sections into separate files: INSTALL,
+Copying, ToDo etc.
+
+=over 4
+
+=item Adding a Copyright Notice.
+
+
+How you choose to license your work is a personal decision.
+The general mechanism is to assert your Copyright and then make
+a declaration of how others may copy/use/modify your work.
+
+Perl, for example, is supplied with two types of licence: The GNU
+GPL and The Artistic Licence (see the files README, Copying, and
+Artistic).  Larry has good reasons for NOT just using the GNU GPL.
+
+My personal recommendation, out of respect for Larry, Perl, and the
+Perl community at large is to state something simply like:
+
+ Copyright (c) 1995 Your Name. All rights reserved.
+ This program is free software; you can redistribute it and/or
+ modify it under the same terms as Perl itself.
+
+This statement should at least appear in the README file. You may
+also wish to include it in a Copying file and your source files.
+Remember to include the other words in addition to the Copyright.
+
+=item  *
+
+Give the module a version/issue/release number.
+
+To be fully compatible with the Exporter and MakeMaker modules you
+should store your module's version number in a non-my package
+variable called $VERSION.  This should be a floating point
+number with at least two digits after the decimal (i.e., hundredths,
+e.g, C<$VERSION = "0.01">).  Don't use a "1.3.2" style version.
+See L<Exporter> for details.
+
+It may be handy to add a function or method to retrieve the number.
+Use the number in announcements and archive file names when
+releasing the module (ModuleName-1.02.tar.Z).
+See perldoc ExtUtils::MakeMaker.pm for details.
+
+=item  *
+
+How to release and distribute a module.
+
+It's good idea to post an announcement of the availability of your
+module (or the module itself if small) to the comp.lang.perl.announce
+Usenet newsgroup.  This will at least ensure very wide once-off
+distribution.
+
+If possible, register the module with CPAN.  You should
+include details of its location in your announcement.
+
+Some notes about ftp archives: Please use a long descriptive file
+name that includes the version number. Most incoming directories
+will not be readable/listable, i.e., you won't be able to see your
+file after uploading it. Remember to send your email notification
+message as soon as possible after uploading else your file may get
+deleted automatically. Allow time for the file to be processed
+and/or check the file has been processed before announcing its
+location.
+
+FTP Archives for Perl Modules:
+
+Follow the instructions and links on:
+
+   http://www.cpan.org/modules/00modlist.long.html
+   http://www.cpan.org/modules/04pause.html
+
+or upload to one of these sites:
+
+   https://pause.kbx.de/pause/
+   http://pause.perl.org/pause/
+
+and notify <modules@perl.org>.
+
+By using the WWW interface you can ask the Upload Server to mirror
+your modules from your ftp or WWW site into your own directory on
+CPAN!
+
+Please remember to send me an updated entry for the Module list!
+
+=item  *
+
+Take care when changing a released module.
+
+Always strive to remain compatible with previous released versions.
+Otherwise try to add a mechanism to revert to the
+old behavior if people rely on it.  Document incompatible changes.
+
+=back
+
+=back
+
+=head2 Guidelines for Converting Perl 4 Library Scripts into Modules
+
+=over 4
+
+=item  *
+
+There is no requirement to convert anything.
+
+If it ain't broke, don't fix it! Perl 4 library scripts should
+continue to work with no problems. You may need to make some minor
+changes (like escaping non-array @'s in double quoted strings) but
+there is no need to convert a .pl file into a Module for just that.
+
+=item  *
+
+Consider the implications.
+
+All Perl applications that make use of the script will need to
+be changed (slightly) if the script is converted into a module.  Is
+it worth it unless you plan to make other changes at the same time?
+
+=item  *
+
+Make the most of the opportunity.
+
+If you are going to convert the script to a module you can use the
+opportunity to redesign the interface.  The guidelines for module
+creation above include many of the issues you should consider.
+
+=item  *
+
+The pl2pm utility will get you started.
+
+This utility will read *.pl files (given as parameters) and write
+corresponding *.pm files. The pl2pm utilities does the following:
+
+=over 10
+
+=item *
+
+Adds the standard Module prologue lines
+
+=item *
+
+Converts package specifiers from ' to ::
+
+=item *
+
+Converts die(...) to croak(...)
+
+=item *
+
+Several other minor changes
+
+=back
+
+Being a mechanical process pl2pm is not bullet proof. The converted
+code will need careful checking, especially any package statements.
+Don't delete the original .pl file till the new .pm one works!
+
+=back
+
+=head2 Guidelines for Reusing Application Code
+
+=over 4
+
+=item  *
+
+Complete applications rarely belong in the Perl Module Library.
+
+=item  *
+
+Many applications contain some Perl code that could be reused.
+
+Help save the world! Share your code in a form that makes it easy
+to reuse.
+
+=item  *
+
+Break-out the reusable code into one or more separate module files.
+
+=item  *
+
+Take the opportunity to reconsider and redesign the interfaces.
+
+=item  *
+
+In some cases the 'application' can then be reduced to a small
+
+fragment of code built on top of the reusable modules. In these cases
+the application could invoked as:
+
+     % perl -e 'use Module::Name; method(@ARGV)' ...
+or
+     % perl -mModule::Name ...    (in perl5.002 or higher)
+
+=back
+
+=head1 NOTE
+
+Perl does not enforce private and public parts of its modules as you may
+have been used to in other languages like C++, Ada, or Modula-17.  Perl
+doesn't have an infatuation with enforced privacy.  It would prefer
+that you stayed out of its living room because you weren't invited, not
+because it has a shotgun.
+
+The module and its user have a contract, part of which is common law,
+and part of which is "written".  Part of the common law contract is
+that a module doesn't pollute any namespace it wasn't asked to.  The
+written contract for the module (A.K.A. documentation) may make other
+provisions.  But then you know when you C<use RedefineTheWorld> that
+you're redefining the world and willing to take the consequences.
+EOF
+
+close MANIFEST or warn "$0: failed to close MANIFEST (../MANIFEST): $!";
+close OUT      or warn "$0: failed to close OUT (perlmodlib.tmp): $!";
+
diff --git a/contrib/perl5/pod/perlnewmod.pod b/contrib/perl5/pod/perlnewmod.pod
new file mode 100644
index 0000000000000..ace8d85130f65
--- /dev/null
+++ b/contrib/perl5/pod/perlnewmod.pod
@@ -0,0 +1,282 @@
+=head1 NAME
+
+perlnewmod - preparing a new module for distribution
+
+=head1 DESCRIPTION
+
+This document gives you some suggestions about how to go about writing
+Perl modules, preparing them for distribution, and making them available
+via CPAN.
+
+One of the things that makes Perl really powerful is the fact that Perl
+hackers tend to want to share the solutions to problems they've faced,
+so you and I don't have to battle with the same problem again.
+
+The main way they do this is by abstracting the solution into a Perl
+module. If you don't know what one of these is, the rest of this
+document isn't going to be much use to you. You're also missing out on
+an awful lot of useful code; consider having a look at L<perlmod>,
+L<perlmodlib> and L<perlmodinstall> before coming back here.
+
+When you've found that there isn't a module available for what you're
+trying to do, and you've had to write the code yourself, consider
+packaging up the solution into a module and uploading it to CPAN so that
+others can benefit.
+
+=head2 Warning
+
+We're going to primarily concentrate on Perl-only modules here, rather
+than XS modules. XS modules serve a rather different purpose, and
+you should consider different things before distributing them - the
+popularity of the library you are gluing, the portability to other
+operating systems, and so on. However, the notes on preparing the Perl
+side of the module and packaging and distributing it will apply equally
+well to an XS module as a pure-Perl one.
+
+=head2 What should I make into a module?
+
+You should make a module out of any code that you think is going to be
+useful to others. Anything that's likely to fill a hole in the communal
+library and which someone else can slot directly into their program. Any
+part of your code which you can isolate and extract and plug into
+something else is a likely candidate.
+
+Let's take an example. Suppose you're reading in data from a local
+format into a hash-of-hashes in Perl, turning that into a tree, walking
+the tree and then piping each node to an Acme Transmogrifier Server.
+
+Now, quite a few people have the Acme Transmogrifier, and you've had to
+write something to talk the protocol from scratch - you'd almost
+certainly want to make that into a module. The level at which you pitch
+it is up to you: you might want protocol-level modules analogous to
+L<Net::SMTP|Net::SMTP> which then talk to higher level modules analogous
+to L<Mail::Send|Mail::Send>. The choice is yours, but you do want to get
+a module out for that server protocol.
+
+Nobody else on the planet is going to talk your local data format, so we
+can ignore that. But what about the thing in the middle? Building tree
+structures from Perl variables and then traversing them is a nice,
+general problem, and if nobody's already written a module that does
+that, you might want to modularise that code too.
+
+So hopefully you've now got a few ideas about what's good to modularise.
+Let's now see how it's done.
+
+=head2 Step-by-step: Preparing the ground
+
+Before we even start scraping out the code, there are a few things we'll
+want to do in advance.
+
+=over 3
+
+=item Look around
+
+Dig into a bunch of modules to see how they're written. I'd suggest
+starting with L<Text::Tabs|Text::Tabs>, since it's in the standard
+library and is nice and simple, and then looking at something like
+L<Time::Zone|Time::Zone>, L<File::Copy|File::Copy> and then some of the
+C<Mail::*> modules if you're planning on writing object oriented code.
+
+These should give you an overall feel for how modules are laid out and
+written.
+
+=item Check it's new
+
+There are a lot of modules on CPAN, and it's easy to miss one that's
+similar to what you're planning on contributing. Have a good plough
+through the modules list and the F<by-module> directories, and make sure
+you're not the one reinventing the wheel!
+
+=item Discuss the need
+
+You might love it. You might feel that everyone else needs it. But there
+might not actually be any real demand for it out there. If you're unsure
+about the demand you're module will have, consider sending out feelers
+on the C<comp.lang.perl.modules> newsgroup, or as a last resort, ask the
+modules list at C<modules@perl.org>. Remember that this is a closed list
+with a very long turn-around time - be prepared to wait a good while for
+a response from them.
+
+=item Choose a name
+
+Perl modules included on CPAN have a naming hierarchy you should try to
+fit in with. See L<perlmodlib> for more details on how this works, and
+browse around CPAN and the modules list to get a feel of it. At the very
+least, remember this: modules should be title capitalised, (This::Thing)
+fit in with a category, and explain their purpose succinctly.
+
+=item Check again
+
+While you're doing that, make really sure you haven't missed a module
+similar to the one you're about to write.
+
+When you've got your name sorted out and you're sure that your module is
+wanted and not currently available, it's time to start coding.
+
+=back
+
+=head2 Step-by-step: Making the module
+
+=over 3
+
+=item Start with F<h2xs>
+
+Originally a utility to convert C header files into XS modules,
+L<h2xs|h2xs> has become a useful utility for churning out skeletons for
+Perl-only modules as well. If you don't want to use the
+L<Autoloader|Autoloader> which splits up big modules into smaller
+subroutine-sized chunks, you'll say something like this:
+
+    h2xs -AX -n Net::Acme
+
+The C<-A> omits the Autoloader code, C<-X> omits XS elements, and C<-n>
+specifies the name of the module.
+
+=item Use L<strict|strict> and L<warnings|warnings>
+
+A module's code has to be warning and strict-clean, since you can't
+guarantee the conditions that it'll be used under. Besides, you wouldn't
+want to distribute code that wasn't warning or strict-clean anyway,
+right?
+
+=item Use L<Carp|Carp>
+
+The L<Carp|Carp> module allows you to present your error messages from
+the caller's perspective; this gives you a way to signal a problem with
+the caller and not your module. For instance, if you say this:
+
+    warn "No hostname given";
+
+the user will see something like this:
+
+    No hostname given at /usr/local/lib/perl5/site_perl/5.6.0/Net/Acme.pm
+    line 123.
+
+which looks like your module is doing something wrong. Instead, you want
+to put the blame on the user, and say this:
+
+    No hostname given at bad_code, line 10.
+
+You do this by using L<Carp|Carp> and replacing your C<warn>s with
+C<carp>s. If you need to C<die>, say C<croak> instead. However, keep
+C<warn> and C<die> in place for your sanity checks - where it really is
+your module at fault.
+
+=item Use L<Exporter|Exporter> - wisely!
+
+C<h2xs> provides stubs for L<Exporter|Exporter>, which gives you a
+standard way of exporting symbols and subroutines from your module into
+the caller's namespace. For instance, saying C<use Net::Acme qw(&frob)>
+would import the C<frob> subroutine.
+
+The package variable C<@EXPORT> will determine which symbols will get
+exported when the caller simply says C<use Net::Acme> - you will hardly
+ever want to put anything in there. C<@EXPORT_OK>, on the other hand,
+specifies which symbols you're willing to export. If you do want to
+export a bunch of symbols, use the C<%EXPORT_TAGS> and define a standard
+export set - look at L<Exporter> for more details.
+
+=item Use L<plain old documentation|perlpod>
+
+The work isn't over until the paperwork is done, and you're going to
+need to put in some time writing some documentation for your module.
+C<h2xs> will provide a stub for you to fill in; if you're not sure about
+the format, look at L<perlpod> for an introduction. Provide a good
+synopsis of how your module is used in code, a description, and then
+notes on the syntax and function of the individual subroutines or
+methods. Use Perl comments for developer notes and POD for end-user
+notes.
+
+=item Write tests
+
+You're encouraged to create self-tests for your module to ensure it's
+working as intended on the myriad platforms Perl supports; if you upload
+your module to CPAN, a host of testers will build your module and send
+you the results of the tests. Again, C<h2xs> provides a test framework
+which you can extend - you should do something more than just checking
+your module will compile.
+
+=item Write the README
+
+If you're uploading to CPAN, the automated gremlins will extract the
+README file and place that in your CPAN directory. It'll also appear in
+the main F<by-module> and F<by-category> directories if you make it onto
+the modules list. It's a good idea to put here what the module actually
+does in detail, and the user-visible changes since the last release.
+
+=back
+
+=head2 Step-by-step: Distributing your module
+
+=over 3
+
+=item Get a CPAN user ID
+
+Every developer publishing modules on CPAN needs a CPAN ID. See the
+instructions at C<http://www.cpan.org/modules/04pause.html> (or
+equivalent on your nearest mirror) to find out how to do this.
+
+=item C<perl Makefile.PL; make test; make dist>
+
+Once again, C<h2xs> has done all the work for you. It produces the
+standard C<Makefile.PL> you'll have seen when you downloaded and
+installs modules, and this produces a Makefile with a C<dist> target.
+
+Once you've ensured that your module passes its own tests - always a
+good thing to make sure - you can C<make dist>, and the Makefile will
+hopefully produce you a nice tarball of your module, ready for upload.
+
+=item Upload the tarball
+
+The email you got when you received your CPAN ID will tell you how to
+log in to PAUSE, the Perl Authors Upload SErver. From the menus there,
+you can upload your module to CPAN.
+
+=item Announce to the modules list
+
+Once uploaded, it'll sit unnoticed in your author directory. If you want
+it connected to the rest of the CPAN, you'll need to tell the modules
+list about it. The best way to do this is to email them a line in the
+style of the modules list, like this:
+
+    Net::Acme bdpO  Interface to Acme Frobnicator servers         FOOBAR
+    ^         ^^^^  ^                                             ^
+    |         ||||  Module description                            Your ID
+    |         ||||
+    |         |||\- Interface: (O)OP, (r)eferences, (h)ybrid, (f)unctions
+    |         |||
+    |         ||\-- Language: (p)ure Perl, C(+)+, (h)ybrid, (C), (o)ther
+    |         ||
+    Module    |\--- Support: (d)eveloper, (m)ailing list, (u)senet, (n)one
+    Name      |
+              \---- Maturity: (i)dea, (c)onstructions, (a)lpha, (b)eta,
+                              (R)eleased, (M)ature, (S)tandard
+
+plus a description of the module and why you think it should be
+included. If you hear nothing back, that means your module will
+probably appear on the modules list at the next update. Don't try
+subscribing to C<modules@perl.org>; it's not another mailing list. Just
+have patience.
+
+=item Announce to clpa
+
+If you have a burning desire to tell the world about your release, post
+an announcement to the moderated C<comp.lang.perl.announce> newsgroup.
+
+=item Fix bugs!
+
+Once you start accumulating users, they'll send you bug reports. If
+you're lucky, they'll even send you patches. Welcome to the joys of
+maintaining a software project...
+
+=back
+
+=head1 AUTHOR
+
+Simon Cozens, C<simon@cpan.org>
+
+=head1 SEE ALSO
+
+L<perlmod>, L<perlmodlib>, L<perlmodinstall>, L<h2xs>, L<strict>,
+L<Carp>, L<Exporter>, L<perlpod>, L<Test>, L<ExtUtils::MakeMaker>,
+http://www.cpan.org/
diff --git a/contrib/perl5/pod/perlrequick.pod b/contrib/perl5/pod/perlrequick.pod
new file mode 100644
index 0000000000000..5b72a35187faf
--- /dev/null
+++ b/contrib/perl5/pod/perlrequick.pod
@@ -0,0 +1,503 @@
+=head1 NAME
+
+perlrequick - Perl regular expressions quick start
+
+=head1 DESCRIPTION
+
+This page covers the very basics of understanding, creating and
+using regular expressions ('regexes') in Perl.
+
+
+=head1 The Guide
+
+=head2 Simple word matching
+
+The simplest regex is simply a word, or more generally, a string of
+characters.  A regex consisting of a word matches any string that
+contains that word:
+
+    "Hello World" =~ /World/;  # matches
+
+In this statement, C<World> is a regex and the C<//> enclosing
+C</World/> tells perl to search a string for a match.  The operator
+C<=~> associates the string with the regex match and produces a true
+value if the regex matched, or false if the regex did not match.  In
+our case, C<World> matches the second word in C<"Hello World">, so the
+expression is true.  This idea has several variations.
+
+Expressions like this are useful in conditionals:
+
+    print "It matches\n" if "Hello World" =~ /World/;
+
+The sense of the match can be reversed by using C<!~> operator:
+
+    print "It doesn't match\n" if "Hello World" !~ /World/;
+
+The literal string in the regex can be replaced by a variable:
+
+    $greeting = "World";
+    print "It matches\n" if "Hello World" =~ /$greeting/;
+
+If you're matching against C<$_>, the C<$_ =~> part can be omitted:
+
+    $_ = "Hello World";
+    print "It matches\n" if /World/;
+
+Finally, the C<//> default delimiters for a match can be changed to
+arbitrary delimiters by putting an C<'m'> out front:
+
+    "Hello World" =~ m!World!;   # matches, delimited by '!'
+    "Hello World" =~ m{World};   # matches, note the matching '{}'
+    "/usr/bin/perl" =~ m"/perl"; # matches after '/usr/bin',
+                                 # '/' becomes an ordinary char
+
+Regexes must match a part of the string I<exactly> in order for the
+statement to be true:
+
+    "Hello World" =~ /world/;  # doesn't match, case sensitive
+    "Hello World" =~ /o W/;    # matches, ' ' is an ordinary char
+    "Hello World" =~ /World /; # doesn't match, no ' ' at end
+
+perl will always match at the earliest possible point in the string:
+
+    "Hello World" =~ /o/;       # matches 'o' in 'Hello'
+    "That hat is red" =~ /hat/; # matches 'hat' in 'That'
+
+Not all characters can be used 'as is' in a match.  Some characters,
+called B<metacharacters>, are reserved for use in regex notation.
+The metacharacters are
+
+    {}[]()^$.|*+?\
+
+A metacharacter can be matched by putting a backslash before it:
+
+    "2+2=4" =~ /2+2/;    # doesn't match, + is a metacharacter
+    "2+2=4" =~ /2\+2/;   # matches, \+ is treated like an ordinary +
+    'C:\WIN32' =~ /C:\\WIN/;                       # matches
+    "/usr/bin/perl" =~ /\/usr\/local\/bin\/perl/;  # matches
+
+In the last regex, the forward slash C<'/'> is also backslashed,
+because it is used to delimit the regex.
+
+Non-printable ASCII characters are represented by B<escape sequences>.
+Common examples are C<\t> for a tab, C<\n> for a newline, and C<\r>
+for a carriage return.  Arbitrary bytes are represented by octal
+escape sequences, e.g., C<\033>, or hexadecimal escape sequences,
+e.g., C<\x1B>:
+
+    "1000\t2000" =~ m(0\t2)        # matches
+    "cat"        =~ /\143\x61\x74/ # matches, but a weird way to spell cat
+
+Regexes are treated mostly as double quoted strings, so variable
+substitution works:
+
+    $foo = 'house';
+    'cathouse' =~ /cat$foo/;   # matches
+    'housecat' =~ /${foo}cat/; # matches
+
+With all of the regexes above, if the regex matched anywhere in the
+string, it was considered a match.  To specify I<where> it should
+match, we would use the B<anchor> metacharacters C<^> and C<$>.  The
+anchor C<^> means match at the beginning of the string and the anchor
+C<$> means match at the end of the string, or before a newline at the
+end of the string.  Some examples:
+
+    "housekeeper" =~ /keeper/;         # matches
+    "housekeeper" =~ /^keeper/;        # doesn't match
+    "housekeeper" =~ /keeper$/;        # matches
+    "housekeeper\n" =~ /keeper$/;      # matches
+    "housekeeper" =~ /^housekeeper$/;  # matches
+
+=head2 Using character classes
+
+A B<character class> allows a set of possible characters, rather than
+just a single character, to match at a particular point in a regex.
+Character classes are denoted by brackets C<[...]>, with the set of
+characters to be possibly matched inside.  Here are some examples:
+
+    /cat/;            # matches 'cat'
+    /[bcr]at/;        # matches 'bat', 'cat', or 'rat'
+    "abc" =~ /[cab]/; # matches 'a'
+
+In the last statement, even though C<'c'> is the first character in
+the class, the earliest point at which the regex can match is C<'a'>.
+
+    /[yY][eE][sS]/; # match 'yes' in a case-insensitive way
+                    # 'yes', 'Yes', 'YES', etc.
+    /yes/i;         # also match 'yes' in a case-insensitive way
+
+The last example shows a match with an C<'i'> B<modifier>, which makes
+the match case-insensitive.
+
+Character classes also have ordinary and special characters, but the
+sets of ordinary and special characters inside a character class are
+different than those outside a character class.  The special
+characters for a character class are C<-]\^$> and are matched using an
+escape:
+
+   /[\]c]def/; # matches ']def' or 'cdef'
+   $x = 'bcr';
+   /[$x]at/;   # matches 'bat, 'cat', or 'rat'
+   /[\$x]at/;  # matches '$at' or 'xat'
+   /[\\$x]at/; # matches '\at', 'bat, 'cat', or 'rat'
+
+The special character C<'-'> acts as a range operator within character
+classes, so that the unwieldy C<[0123456789]> and C<[abc...xyz]>
+become the svelte C<[0-9]> and C<[a-z]>:
+
+    /item[0-9]/;  # matches 'item0' or ... or 'item9'
+    /[0-9a-fA-F]/;  # matches a hexadecimal digit
+
+If C<'-'> is the first or last character in a character class, it is
+treated as an ordinary character.
+
+The special character C<^> in the first position of a character class
+denotes a B<negated character class>, which matches any character but
+those in the brackets.  Both C<[...]> and C<[^...]> must match a
+character, or the match fails.  Then
+
+    /[^a]at/;  # doesn't match 'aat' or 'at', but matches
+               # all other 'bat', 'cat, '0at', '%at', etc.
+    /[^0-9]/;  # matches a non-numeric character
+    /[a^]at/;  # matches 'aat' or '^at'; here '^' is ordinary
+
+Perl has several abbreviations for common character classes:
+
+=over 4
+
+=item *
+
+\d is a digit and represents [0-9]
+
+=item *
+
+\s is a whitespace character and represents [\ \t\r\n\f]
+
+=item *
+
+\w is a word character (alphanumeric or _) and represents [0-9a-zA-Z_]
+
+=item *
+
+\D is a negated \d; it represents any character but a digit [^0-9]
+
+=item *
+
+\S is a negated \s; it represents any non-whitespace character [^\s]
+
+=item *
+
+\W is a negated \w; it represents any non-word character [^\w]
+
+=item *
+
+The period '.' matches any character but "\n"
+
+=back
+
+The C<\d\s\w\D\S\W> abbreviations can be used both inside and outside
+of character classes.  Here are some in use:
+
+    /\d\d:\d\d:\d\d/; # matches a hh:mm:ss time format
+    /[\d\s]/;         # matches any digit or whitespace character
+    /\w\W\w/;         # matches a word char, followed by a
+                      # non-word char, followed by a word char
+    /..rt/;           # matches any two chars, followed by 'rt'
+    /end\./;          # matches 'end.'
+    /end[.]/;         # same thing, matches 'end.'
+
+The S<B<word anchor> > C<\b> matches a boundary between a word
+character and a non-word character C<\w\W> or C<\W\w>:
+
+    $x = "Housecat catenates house and cat";
+    $x =~ /\bcat/;  # matches cat in 'catenates'
+    $x =~ /cat\b/;  # matches cat in 'housecat'
+    $x =~ /\bcat\b/;  # matches 'cat' at end of string
+
+In the last example, the end of the string is considered a word
+boundary.
+
+=head2 Matching this or that
+
+We can match match different character strings with the B<alternation>
+metacharacter C<'|'>.  To match C<dog> or C<cat>, we form the regex
+C<dog|cat>.  As before, perl will try to match the regex at the
+earliest possible point in the string.  At each character position,
+perl will first try to match the the first alternative, C<dog>.  If
+C<dog> doesn't match, perl will then try the next alternative, C<cat>.
+If C<cat> doesn't match either, then the match fails and perl moves to
+the next position in the string.  Some examples:
+
+    "cats and dogs" =~ /cat|dog|bird/;  # matches "cat"
+    "cats and dogs" =~ /dog|cat|bird/;  # matches "cat"
+
+Even though C<dog> is the first alternative in the second regex,
+C<cat> is able to match earlier in the string.
+
+    "cats"          =~ /c|ca|cat|cats/; # matches "c"
+    "cats"          =~ /cats|cat|ca|c/; # matches "cats"
+
+At a given character position, the first alternative that allows the
+regex match to succeed wil be the one that matches. Here, all the
+alternatives match at the first string position, so th first matches.
+
+=head2 Grouping things and hierarchical matching
+
+The B<grouping> metacharacters C<()> allow a part of a regex to be
+treated as a single unit.  Parts of a regex are grouped by enclosing
+them in parentheses.  The regex C<house(cat|keeper)> means match
+C<house> followed by either C<cat> or C<keeper>.  Some more examples
+are
+
+    /(a|b)b/;    # matches 'ab' or 'bb'
+    /(^a|b)c/;   # matches 'ac' at start of string or 'bc' anywhere
+
+    /house(cat|)/;  # matches either 'housecat' or 'house'
+    /house(cat(s|)|)/;  # matches either 'housecats' or 'housecat' or
+                        # 'house'.  Note groups can be nested.
+
+    "20" =~ /(19|20|)\d\d/;  # matches the null alternative '()\d\d',
+                             # because '20\d\d' can't match
+
+=head2 Extracting matches
+
+The grouping metacharacters C<()> also allow the extraction of the
+parts of a string that matched.  For each grouping, the part that
+matched inside goes into the special variables C<$1>, C<$2>, etc.
+They can be used just as ordinary variables:
+
+    # extract hours, minutes, seconds
+    $time =~ /(\d\d):(\d\d):(\d\d)/;  # match hh:mm:ss format
+    $hours = $1;
+    $minutes = $2;
+    $seconds = $3;
+
+In list context, a match C</regex/> with groupings will return the
+list of matched values C<($1,$2,...)>.  So we could rewrite it as
+
+    ($hours, $minutes, $second) = ($time =~ /(\d\d):(\d\d):(\d\d)/);
+
+If the groupings in a regex are nested, C<$1> gets the group with the
+leftmost opening parenthesis, C<$2> the next opening parenthesis,
+etc.  For example, here is a complex regex and the matching variables
+indicated below it:
+
+    /(ab(cd|ef)((gi)|j))/;
+     1  2      34
+
+Associated with the matching variables C<$1>, C<$2>, ... are
+the B<backreferences> C<\1>, C<\2>, ...  Backreferences are
+matching variables that can be used I<inside> a regex:
+
+    /(\w\w\w)\s\1/; # find sequences like 'the the' in string
+
+C<$1>, C<$2>, ... should only be used outside of a regex, and C<\1>,
+C<\2>, ... only inside a regex.
+
+=head2 Matching repetitions
+
+The B<quantifier> metacharacters C<?>, C<*>, C<+>, and C<{}> allow us
+to determine the number of repeats of a portion of a regex we
+consider to be a match.  Quantifiers are put immediately after the
+character, character class, or grouping that we want to specify.  They
+have the following meanings:
+
+=over 4
+
+=item *
+
+C<a?> = match 'a' 1 or 0 times
+
+=item *
+
+C<a*> = match 'a' 0 or more times, i.e., any number of times
+
+=item *
+
+C<a+> = match 'a' 1 or more times, i.e., at least once
+
+=item *
+
+C<a{n,m}> = match at least C<n> times, but not more than C<m>
+times.
+
+=item *
+
+C<a{n,}> = match at least C<n> or more times
+
+=item *
+
+C<a{n}> = match exactly C<n> times
+
+=back
+
+Here are some examples:
+
+    /[a-z]+\s+\d*/;  # match a lowercase word, at least some space, and
+                     # any number of digits
+    /(\w+)\s+\1/;    # match doubled words of arbitrary length
+    $year =~ /\d{2,4}/;  # make sure year is at least 2 but not more
+                         # than 4 digits
+    $year =~ /\d{4}|\d{2}/;    # better match; throw out 3 digit dates
+
+These quantifiers will try to match as much of the string as possible,
+while still allowing the regex to match.  So we have
+
+    $x = 'the cat in the hat';
+    $x =~ /^(.*)(at)(.*)$/; # matches,
+                            # $1 = 'the cat in the h'
+                            # $2 = 'at'
+                            # $3 = ''   (0 matches)
+
+The first quantifier C<.*> grabs as much of the string as possible
+while still having the regex match. The second quantifier C<.*> has
+no string left to it, so it matches 0 times.
+
+=head2 More matching
+
+There are a few more things you might want to know about matching
+operators.  In the code
+
+    $pattern = 'Seuss';
+    while (<>) {
+        print if /$pattern/;
+    }
+
+perl has to re-evaluate C<$pattern> each time through the loop.  If
+C<$pattern> won't be changing, use the C<//o> modifier, to only
+perform variable substitutions once.  If you don't want any
+substitutions at all, use the special delimiter C<m''>:
+
+    $pattern = 'Seuss';
+    m'$pattern'; # matches '$pattern', not 'Seuss'
+
+The global modifier C<//g> allows the matching operator to match
+within a string as many times as possible.  In scalar context,
+successive matches against a string will have C<//g> jump from match
+to match, keeping track of position in the string as it goes along.
+You can get or set the position with the C<pos()> function.
+For example,
+
+    $x = "cat dog house"; # 3 words
+    while ($x =~ /(\w+)/g) {
+        print "Word is $1, ends at position ", pos $x, "\n";
+    }
+
+prints
+
+    Word is cat, ends at position 3
+    Word is dog, ends at position 7
+    Word is house, ends at position 13
+
+A failed match or changing the target string resets the position.  If
+you don't want the position reset after failure to match, add the
+C<//c>, as in C</regex/gc>.
+
+In list context, C<//g> returns a list of matched groupings, or if
+there are no groupings, a list of matches to the whole regex.  So
+
+    @words = ($x =~ /(\w+)/g);  # matches,
+                                # $word[0] = 'cat'
+                                # $word[1] = 'dog'
+                                # $word[2] = 'house'
+
+=head2 Search and replace
+
+Search and replace is performed using C<s/regex/replacement/modifiers>.
+The C<replacement> is a Perl double quoted string that replaces in the
+string whatever is matched with the C<regex>.  The operator C<=~> is
+also used here to associate a string with C<s///>.  If matching
+against C<$_>, the S<C<$_ =~> > can be dropped.  If there is a match,
+C<s///> returns the number of substitutions made, otherwise it returns
+false.  Here are a few examples:
+
+    $x = "Time to feed the cat!";
+    $x =~ s/cat/hacker/;   # $x contains "Time to feed the hacker!"
+    $y = "'quoted words'";
+    $y =~ s/^'(.*)'$/$1/;  # strip single quotes,
+                           # $y contains "quoted words"
+
+With the C<s///> operator, the matched variables C<$1>, C<$2>, etc.
+are immediately available for use in the replacement expression. With
+the global modifier, C<s///g> will search and replace all occurrences
+of the regex in the string:
+
+    $x = "I batted 4 for 4";
+    $x =~ s/4/four/;   # $x contains "I batted four for 4"
+    $x = "I batted 4 for 4";
+    $x =~ s/4/four/g;  # $x contains "I batted four for four"
+
+The evaluation modifier C<s///e> wraps an C<eval{...}> around the
+replacement string and the evaluated result is substituted for the
+matched substring.  Some examples:
+
+    # reverse all the words in a string
+    $x = "the cat in the hat";
+    $x =~ s/(\w+)/reverse $1/ge;   # $x contains "eht tac ni eht tah"
+
+    # convert percentage to decimal
+    $x = "A 39% hit rate";
+    $x =~ s!(\d+)%!$1/100!e;       # $x contains "A 0.39 hit rate"
+
+The last example shows that C<s///> can use other delimiters, such as
+C<s!!!> and C<s{}{}>, and even C<s{}//>.  If single quotes are used
+C<s'''>, then the regex and replacement are treated as single quoted
+strings.
+
+=head2 The split operator
+
+C<split /regex/, string> splits C<string> into a list of substrings
+and returns that list.  The regex determines the character sequence
+that C<string> is split with respect to.  For example, to split a
+string into words, use
+
+    $x = "Calvin and Hobbes";
+    @word = split /\s+/, $x;  # $word[0] = 'Calvin'
+                              # $word[1] = 'and'
+                              # $word[2] = 'Hobbes'
+
+To extract a comma-delimited list of numbers, use
+
+    $x = "1.618,2.718,   3.142";
+    @const = split /,\s*/, $x;  # $const[0] = '1.618'
+                                # $const[1] = '2.718'
+                                # $const[2] = '3.142'
+
+If the empty regex C<//> is used, the string is split into individual
+characters.  If the regex has groupings, then list produced contains
+the matched substrings from the groupings as well:
+
+    $x = "/usr/bin";
+    @parts = split m!(/)!, $x;  # $parts[0] = ''
+                                # $parts[1] = '/'
+                                # $parts[2] = 'usr'
+                                # $parts[3] = '/'
+                                # $parts[4] = 'bin'
+
+Since the first character of $x matched the regex, C<split> prepended
+an empty initial element to the list.
+
+=head1 BUGS
+
+None.
+
+=head1 SEE ALSO
+
+This is just a quick start guide.  For a more in-depth tutorial on
+regexes, see L<perlretut> and for the reference page, see L<perlre>.
+
+=head1 AUTHOR AND COPYRIGHT
+
+Copyright (c) 2000 Mark Kvale
+All rights reserved.
+
+This document may be distributed under the same terms as Perl itself.
+
+=head2 Acknowledgments
+
+The author would like to thank Mark-Jason Dominus, Tom Christiansen,
+Ilya Zakharevich, Brad Hughes, and Mike Giroux for all their helpful
+comments.
+
+=cut
+
diff --git a/contrib/perl5/pod/perlretut.pod b/contrib/perl5/pod/perlretut.pod
new file mode 100644
index 0000000000000..fa6479c0c45bc
--- /dev/null
+++ b/contrib/perl5/pod/perlretut.pod
@@ -0,0 +1,2504 @@
+=head1 NAME
+
+perlretut - Perl regular expressions tutorial
+
+=head1 DESCRIPTION
+
+This page provides a basic tutorial on understanding, creating and
+using regular expressions in Perl.  It serves as a complement to the
+reference page on regular expressions L<perlre>.  Regular expressions
+are an integral part of the C<m//>, C<s///>, C<qr//> and C<split>
+operators and so this tutorial also overlaps with
+L<perlop/"Regexp Quote-Like Operators"> and L<perlfunc/split>.
+
+Perl is widely renowned for excellence in text processing, and regular
+expressions are one of the big factors behind this fame.  Perl regular
+expressions display an efficiency and flexibility unknown in most
+other computer languages.  Mastering even the basics of regular
+expressions will allow you to manipulate text with surprising ease.
+
+What is a regular expression?  A regular expression is simply a string
+that describes a pattern.  Patterns are in common use these days;
+examples are the patterns typed into a search engine to find web pages
+and the patterns used to list files in a directory, e.g., C<ls *.txt>
+or C<dir *.*>.  In Perl, the patterns described by regular expressions
+are used to search strings, extract desired parts of strings, and to
+do search and replace operations.
+
+Regular expressions have the undeserved reputation of being abstract
+and difficult to understand.  Regular expressions are constructed using
+simple concepts like conditionals and loops and are no more difficult
+to understand than the corresponding C<if> conditionals and C<while>
+loops in the Perl language itself.  In fact, the main challenge in
+learning regular expressions is just getting used to the terse
+notation used to express these concepts.
+
+This tutorial flattens the learning curve by discussing regular
+expression concepts, along with their notation, one at a time and with
+many examples.  The first part of the tutorial will progress from the
+simplest word searches to the basic regular expression concepts.  If
+you master the first part, you will have all the tools needed to solve
+about 98% of your needs.  The second part of the tutorial is for those
+comfortable with the basics and hungry for more power tools.  It
+discusses the more advanced regular expression operators and
+introduces the latest cutting edge innovations in 5.6.0.
+
+A note: to save time, 'regular expression' is often abbreviated as
+regexp or regex.  Regexp is a more natural abbreviation than regex, but
+is harder to pronounce.  The Perl pod documentation is evenly split on
+regexp vs regex; in Perl, there is more than one way to abbreviate it.
+We'll use regexp in this tutorial.
+
+=head1 Part 1: The basics
+
+=head2 Simple word matching
+
+The simplest regexp is simply a word, or more generally, a string of
+characters.  A regexp consisting of a word matches any string that
+contains that word:
+
+    "Hello World" =~ /World/;  # matches
+
+What is this perl statement all about? C<"Hello World"> is a simple
+double quoted string.  C<World> is the regular expression and the
+C<//> enclosing C</World/> tells perl to search a string for a match.
+The operator C<=~> associates the string with the regexp match and
+produces a true value if the regexp matched, or false if the regexp
+did not match.  In our case, C<World> matches the second word in
+C<"Hello World">, so the expression is true.  Expressions like this
+are useful in conditionals:
+
+    if ("Hello World" =~ /World/) {
+        print "It matches\n";
+    }
+    else {
+        print "It doesn't match\n";
+    }
+
+There are useful variations on this theme.  The sense of the match can
+be reversed by using C<!~> operator:
+
+    if ("Hello World" !~ /World/) {
+        print "It doesn't match\n";
+    }
+    else {
+        print "It matches\n";
+    }
+
+The literal string in the regexp can be replaced by a variable:
+
+    $greeting = "World";
+    if ("Hello World" =~ /$greeting/) {
+        print "It matches\n";
+    }
+    else {
+        print "It doesn't match\n";
+    }
+
+If you're matching against the special default variable C<$_>, the
+C<$_ =~> part can be omitted:
+
+    $_ = "Hello World";
+    if (/World/) {
+        print "It matches\n";
+    }
+    else {
+        print "It doesn't match\n";
+    }
+
+And finally, the C<//> default delimiters for a match can be changed
+to arbitrary delimiters by putting an C<'m'> out front:
+
+    "Hello World" =~ m!World!;   # matches, delimited by '!'
+    "Hello World" =~ m{World};   # matches, note the matching '{}'
+    "/usr/bin/perl" =~ m"/perl"; # matches after '/usr/bin',
+                                 # '/' becomes an ordinary char
+
+C</World/>, C<m!World!>, and C<m{World}> all represent the
+same thing.  When, e.g., C<""> is used as a delimiter, the forward
+slash C<'/'> becomes an ordinary character and can be used in a regexp
+without trouble.
+
+Let's consider how different regexps would match C<"Hello World">:
+
+    "Hello World" =~ /world/;  # doesn't match
+    "Hello World" =~ /o W/;    # matches
+    "Hello World" =~ /oW/;     # doesn't match
+    "Hello World" =~ /World /; # doesn't match
+
+The first regexp C<world> doesn't match because regexps are
+case-sensitive.  The second regexp matches because the substring
+S<C<'o W'> > occurs in the string S<C<"Hello World"> >.  The space
+character ' ' is treated like any other character in a regexp and is
+needed to match in this case.  The lack of a space character is the
+reason the third regexp C<'oW'> doesn't match.  The fourth regexp
+C<'World '> doesn't match because there is a space at the end of the
+regexp, but not at the end of the string.  The lesson here is that
+regexps must match a part of the string I<exactly> in order for the
+statement to be true.
+
+If a regexp matches in more than one place in the string, perl will
+always match at the earliest possible point in the string:
+
+    "Hello World" =~ /o/;       # matches 'o' in 'Hello'
+    "That hat is red" =~ /hat/; # matches 'hat' in 'That'
+
+With respect to character matching, there are a few more points you
+need to know about.   First of all, not all characters can be used 'as
+is' in a match.  Some characters, called B<metacharacters>, are reserved
+for use in regexp notation.  The metacharacters are
+
+    {}[]()^$.|*+?\
+
+The significance of each of these will be explained
+in the rest of the tutorial, but for now, it is important only to know
+that a metacharacter can be matched by putting a backslash before it:
+
+    "2+2=4" =~ /2+2/;    # doesn't match, + is a metacharacter
+    "2+2=4" =~ /2\+2/;   # matches, \+ is treated like an ordinary +
+    "The interval is [0,1)." =~ /[0,1)./     # is a syntax error!
+    "The interval is [0,1)." =~ /\[0,1\)\./  # matches
+    "/usr/bin/perl" =~ /\/usr\/local\/bin\/perl/;  # matches
+
+In the last regexp, the forward slash C<'/'> is also backslashed,
+because it is used to delimit the regexp.  This can lead to LTS
+(leaning toothpick syndrome), however, and it is often more readable
+to change delimiters.
+
+
+The backslash character C<'\'> is a metacharacter itself and needs to
+be backslashed:
+
+    'C:\WIN32' =~ /C:\\WIN/;   # matches
+
+In addition to the metacharacters, there are some ASCII characters
+which don't have printable character equivalents and are instead
+represented by B<escape sequences>.  Common examples are C<\t> for a
+tab, C<\n> for a newline, C<\r> for a carriage return and C<\a> for a
+bell.  If your string is better thought of as a sequence of arbitrary
+bytes, the octal escape sequence, e.g., C<\033>, or hexadecimal escape
+sequence, e.g., C<\x1B> may be a more natural representation for your
+bytes.  Here are some examples of escapes:
+
+    "1000\t2000" =~ m(0\t2)   # matches
+    "1000\n2000" =~ /0\n20/   # matches
+    "1000\t2000" =~ /\000\t2/ # doesn't match, "0" ne "\000"
+    "cat"        =~ /\143\x61\x74/ # matches, but a weird way to spell cat
+
+If you've been around Perl a while, all this talk of escape sequences
+may seem familiar.  Similar escape sequences are used in double-quoted
+strings and in fact the regexps in Perl are mostly treated as
+double-quoted strings.  This means that variables can be used in
+regexps as well.  Just like double-quoted strings, the values of the
+variables in the regexp will be substituted in before the regexp is
+evaluated for matching purposes.  So we have:
+
+    $foo = 'house';
+    'housecat' =~ /$foo/;      # matches
+    'cathouse' =~ /cat$foo/;   # matches
+    'housecat' =~ /${foo}cat/; # matches
+
+So far, so good.  With the knowledge above you can already perform
+searches with just about any literal string regexp you can dream up.
+Here is a I<very simple> emulation of the Unix grep program:
+
+    % cat > simple_grep
+    #!/usr/bin/perl
+    $regexp = shift;
+    while (<>) {
+        print if /$regexp/;
+    }
+    ^D
+
+    % chmod +x simple_grep
+
+    % simple_grep abba /usr/dict/words
+    Babbage
+    cabbage
+    cabbages
+    sabbath
+    Sabbathize
+    Sabbathizes
+    sabbatical
+    scabbard
+    scabbards
+
+This program is easy to understand.  C<#!/usr/bin/perl> is the standard
+way to invoke a perl program from the shell.
+S<C<$regexp = shift;> > saves the first command line argument as the
+regexp to be used, leaving the rest of the command line arguments to
+be treated as files.  S<C<< while (<>) >> > loops over all the lines in
+all the files.  For each line, S<C<print if /$regexp/;> > prints the
+line if the regexp matches the line.  In this line, both C<print> and
+C</$regexp/> use the default variable C<$_> implicitly.
+
+With all of the regexps above, if the regexp matched anywhere in the
+string, it was considered a match.  Sometimes, however, we'd like to
+specify I<where> in the string the regexp should try to match.  To do
+this, we would use the B<anchor> metacharacters C<^> and C<$>.  The
+anchor C<^> means match at the beginning of the string and the anchor
+C<$> means match at the end of the string, or before a newline at the
+end of the string.  Here is how they are used:
+
+    "housekeeper" =~ /keeper/;    # matches
+    "housekeeper" =~ /^keeper/;   # doesn't match
+    "housekeeper" =~ /keeper$/;   # matches
+    "housekeeper\n" =~ /keeper$/; # matches
+
+The second regexp doesn't match because C<^> constrains C<keeper> to
+match only at the beginning of the string, but C<"housekeeper"> has
+keeper starting in the middle.  The third regexp does match, since the
+C<$> constrains C<keeper> to match only at the end of the string.
+
+When both C<^> and C<$> are used at the same time, the regexp has to
+match both the beginning and the end of the string, i.e., the regexp
+matches the whole string.  Consider
+
+    "keeper" =~ /^keep$/;      # doesn't match
+    "keeper" =~ /^keeper$/;    # matches
+    ""       =~ /^$/;          # ^$ matches an empty string
+
+The first regexp doesn't match because the string has more to it than
+C<keep>.  Since the second regexp is exactly the string, it
+matches.  Using both C<^> and C<$> in a regexp forces the complete
+string to match, so it gives you complete control over which strings
+match and which don't.  Suppose you are looking for a fellow named
+bert, off in a string by himself:
+
+    "dogbert" =~ /bert/;   # matches, but not what you want
+
+    "dilbert" =~ /^bert/;  # doesn't match, but ..
+    "bertram" =~ /^bert/;  # matches, so still not good enough
+
+    "bertram" =~ /^bert$/; # doesn't match, good
+    "dilbert" =~ /^bert$/; # doesn't match, good
+    "bert"    =~ /^bert$/; # matches, perfect
+
+Of course, in the case of a literal string, one could just as easily
+use the string equivalence S<C<$string eq 'bert'> > and it would be
+more efficient.   The  C<^...$> regexp really becomes useful when we
+add in the more powerful regexp tools below.
+
+=head2 Using character classes
+
+Although one can already do quite a lot with the literal string
+regexps above, we've only scratched the surface of regular expression
+technology.  In this and subsequent sections we will introduce regexp
+concepts (and associated metacharacter notations) that will allow a
+regexp to not just represent a single character sequence, but a I<whole
+class> of them.
+
+One such concept is that of a B<character class>.  A character class
+allows a set of possible characters, rather than just a single
+character, to match at a particular point in a regexp.  Character
+classes are denoted by brackets C<[...]>, with the set of characters
+to be possibly matched inside.  Here are some examples:
+
+    /cat/;       # matches 'cat'
+    /[bcr]at/;   # matches 'bat, 'cat', or 'rat'
+    /item[0123456789]/;  # matches 'item0' or ... or 'item9'
+    "abc" =~ /[cab]/;    # matches 'a'
+
+In the last statement, even though C<'c'> is the first character in
+the class, C<'a'> matches because the first character position in the
+string is the earliest point at which the regexp can match.
+
+    /[yY][eE][sS]/;      # match 'yes' in a case-insensitive way
+                         # 'yes', 'Yes', 'YES', etc.
+
+This regexp displays a common task: perform a a case-insensitive
+match.  Perl provides away of avoiding all those brackets by simply
+appending an C<'i'> to the end of the match.  Then C</[yY][eE][sS]/;>
+can be rewritten as C</yes/i;>.  The C<'i'> stands for
+case-insensitive and is an example of a B<modifier> of the matching
+operation.  We will meet other modifiers later in the tutorial.
+
+We saw in the section above that there were ordinary characters, which
+represented themselves, and special characters, which needed a
+backslash C<\> to represent themselves.  The same is true in a
+character class, but the sets of ordinary and special characters
+inside a character class are different than those outside a character
+class.  The special characters for a character class are C<-]\^$>.  C<]>
+is special because it denotes the end of a character class.  C<$> is
+special because it denotes a scalar variable.  C<\> is special because
+it is used in escape sequences, just like above.  Here is how the
+special characters C<]$\> are handled:
+
+   /[\]c]def/; # matches ']def' or 'cdef'
+   $x = 'bcr';
+   /[$x]at/;   # matches 'bat', 'cat', or 'rat'
+   /[\$x]at/;  # matches '$at' or 'xat'
+   /[\\$x]at/; # matches '\at', 'bat, 'cat', or 'rat'
+
+The last two are a little tricky.  in C<[\$x]>, the backslash protects
+the dollar sign, so the character class has two members C<$> and C<x>.
+In C<[\\$x]>, the backslash is protected, so C<$x> is treated as a
+variable and substituted in double quote fashion.
+
+The special character C<'-'> acts as a range operator within character
+classes, so that a contiguous set of characters can be written as a
+range.  With ranges, the unwieldy C<[0123456789]> and C<[abc...xyz]>
+become the svelte C<[0-9]> and C<[a-z]>.  Some examples are
+
+    /item[0-9]/;  # matches 'item0' or ... or 'item9'
+    /[0-9bx-z]aa/;  # matches '0aa', ..., '9aa',
+                    # 'baa', 'xaa', 'yaa', or 'zaa'
+    /[0-9a-fA-F]/;  # matches a hexadecimal digit
+    /[0-9a-zA-Z_]/; # matches a "word" character,
+                    # like those in a perl variable name
+
+If C<'-'> is the first or last character in a character class, it is
+treated as an ordinary character; C<[-ab]>, C<[ab-]> and C<[a\-b]> are
+all equivalent.
+
+The special character C<^> in the first position of a character class
+denotes a B<negated character class>, which matches any character but
+those in the brackets.  Both C<[...]> and C<[^...]> must match a
+character, or the match fails.  Then
+
+    /[^a]at/;  # doesn't match 'aat' or 'at', but matches
+               # all other 'bat', 'cat, '0at', '%at', etc.
+    /[^0-9]/;  # matches a non-numeric character
+    /[a^]at/;  # matches 'aat' or '^at'; here '^' is ordinary
+
+Now, even C<[0-9]> can be a bother the write multiple times, so in the
+interest of saving keystrokes and making regexps more readable, Perl
+has several abbreviations for common character classes:
+
+=over 4
+
+=item *
+
+\d is a digit and represents [0-9]
+
+=item *
+
+\s is a whitespace character and represents [\ \t\r\n\f]
+
+=item *
+
+\w is a word character (alphanumeric or _) and represents [0-9a-zA-Z_]
+
+=item *
+
+\D is a negated \d; it represents any character but a digit [^0-9]
+
+=item *
+
+\S is a negated \s; it represents any non-whitespace character [^\s]
+
+=item *
+
+\W is a negated \w; it represents any non-word character [^\w]
+
+=item *
+
+The period '.' matches any character but "\n"
+
+=back
+
+The C<\d\s\w\D\S\W> abbreviations can be used both inside and outside
+of character classes.  Here are some in use:
+
+    /\d\d:\d\d:\d\d/; # matches a hh:mm:ss time format
+    /[\d\s]/;         # matches any digit or whitespace character
+    /\w\W\w/;         # matches a word char, followed by a
+                      # non-word char, followed by a word char
+    /..rt/;           # matches any two chars, followed by 'rt'
+    /end\./;          # matches 'end.'
+    /end[.]/;         # same thing, matches 'end.'
+
+Because a period is a metacharacter, it needs to be escaped to match
+as an ordinary period. Because, for example, C<\d> and C<\w> are sets
+of characters, it is incorrect to think of C<[^\d\w]> as C<[\D\W]>; in
+fact C<[^\d\w]> is the same as C<[^\w]>, which is the same as
+C<[\W]>. Think DeMorgan's laws.
+
+An anchor useful in basic regexps is the S<B<word anchor> >
+C<\b>.  This matches a boundary between a word character and a non-word
+character C<\w\W> or C<\W\w>:
+
+    $x = "Housecat catenates house and cat";
+    $x =~ /cat/;    # matches cat in 'housecat'
+    $x =~ /\bcat/;  # matches cat in 'catenates'
+    $x =~ /cat\b/;  # matches cat in 'housecat'
+    $x =~ /\bcat\b/;  # matches 'cat' at end of string
+
+Note in the last example, the end of the string is considered a word
+boundary.
+
+You might wonder why C<'.'> matches everything but C<"\n"> - why not
+every character? The reason is that often one is matching against
+lines and would like to ignore the newline characters.  For instance,
+while the string C<"\n"> represents one line, we would like to think
+of as empty.  Then
+
+    ""   =~ /^$/;    # matches
+    "\n" =~ /^$/;    # matches, "\n" is ignored
+
+    ""   =~ /./;      # doesn't match; it needs a char
+    ""   =~ /^.$/;    # doesn't match; it needs a char
+    "\n" =~ /^.$/;    # doesn't match; it needs a char other than "\n"
+    "a"  =~ /^.$/;    # matches
+    "a\n"  =~ /^.$/;  # matches, ignores the "\n"
+
+This behavior is convenient, because we usually want to ignore
+newlines when we count and match characters in a line.  Sometimes,
+however, we want to keep track of newlines.  We might even want C<^>
+and C<$> to anchor at the beginning and end of lines within the
+string, rather than just the beginning and end of the string.  Perl
+allows us to choose between ignoring and paying attention to newlines
+by using the C<//s> and C<//m> modifiers.  C<//s> and C<//m> stand for
+single line and multi-line and they determine whether a string is to
+be treated as one continuous string, or as a set of lines.  The two
+modifiers affect two aspects of how the regexp is interpreted: 1) how
+the C<'.'> character class is defined, and 2) where the anchors C<^>
+and C<$> are able to match.  Here are the four possible combinations:
+
+=over 4
+
+=item *
+
+no modifiers (//): Default behavior.  C<'.'> matches any character
+except C<"\n">.  C<^> matches only at the beginning of the string and
+C<$> matches only at the end or before a newline at the end.
+
+=item *
+
+s modifier (//s): Treat string as a single long line.  C<'.'> matches
+any character, even C<"\n">.  C<^> matches only at the beginning of
+the string and C<$> matches only at the end or before a newline at the
+end.
+
+=item *
+
+m modifier (//m): Treat string as a set of multiple lines.  C<'.'>
+matches any character except C<"\n">.  C<^> and C<$> are able to match
+at the start or end of I<any> line within the string.
+
+=item *
+
+both s and m modifiers (//sm): Treat string as a single long line, but
+detect multiple lines.  C<'.'> matches any character, even
+C<"\n">.  C<^> and C<$>, however, are able to match at the start or end
+of I<any> line within the string.
+
+=back
+
+Here are examples of C<//s> and C<//m> in action:
+
+    $x = "There once was a girl\nWho programmed in Perl\n";
+
+    $x =~ /^Who/;   # doesn't match, "Who" not at start of string
+    $x =~ /^Who/s;  # doesn't match, "Who" not at start of string
+    $x =~ /^Who/m;  # matches, "Who" at start of second line
+    $x =~ /^Who/sm; # matches, "Who" at start of second line
+
+    $x =~ /girl.Who/;   # doesn't match, "." doesn't match "\n"
+    $x =~ /girl.Who/s;  # matches, "." matches "\n"
+    $x =~ /girl.Who/m;  # doesn't match, "." doesn't match "\n"
+    $x =~ /girl.Who/sm; # matches, "." matches "\n"
+
+Most of the time, the default behavior is what is want, but C<//s> and
+C<//m> are occasionally very useful.  If C<//m> is being used, the start
+of the string can still be matched with C<\A> and the end of string
+can still be matched with the anchors C<\Z> (matches both the end and
+the newline before, like C<$>), and C<\z> (matches only the end):
+
+    $x =~ /^Who/m;   # matches, "Who" at start of second line
+    $x =~ /\AWho/m;  # doesn't match, "Who" is not at start of string
+
+    $x =~ /girl$/m;  # matches, "girl" at end of first line
+    $x =~ /girl\Z/m; # doesn't match, "girl" is not at end of string
+
+    $x =~ /Perl\Z/m; # matches, "Perl" is at newline before end
+    $x =~ /Perl\z/m; # doesn't match, "Perl" is not at end of string
+
+We now know how to create choices among classes of characters in a
+regexp.  What about choices among words or character strings? Such
+choices are described in the next section.
+
+=head2 Matching this or that
+
+Sometimes we would like to our regexp to be able to match different
+possible words or character strings.  This is accomplished by using
+the B<alternation> metacharacter C<|>.  To match C<dog> or C<cat>, we
+form the regexp C<dog|cat>.  As before, perl will try to match the
+regexp at the earliest possible point in the string.  At each
+character position, perl will first try to match the first
+alternative, C<dog>.  If C<dog> doesn't match, perl will then try the
+next alternative, C<cat>.  If C<cat> doesn't match either, then the
+match fails and perl moves to the next position in the string.  Some
+examples:
+
+    "cats and dogs" =~ /cat|dog|bird/;  # matches "cat"
+    "cats and dogs" =~ /dog|cat|bird/;  # matches "cat"
+
+Even though C<dog> is the first alternative in the second regexp,
+C<cat> is able to match earlier in the string.
+
+    "cats"          =~ /c|ca|cat|cats/; # matches "c"
+    "cats"          =~ /cats|cat|ca|c/; # matches "cats"
+
+Here, all the alternatives match at the first string position, so the
+first alternative is the one that matches.  If some of the
+alternatives are truncations of the others, put the longest ones first
+to give them a chance to match.
+
+    "cab" =~ /a|b|c/ # matches "c"
+                     # /a|b|c/ == /[abc]/
+
+The last example points out that character classes are like
+alternations of characters.  At a given character position, the first
+alternative that allows the regexp match to succeed wil be the one
+that matches.
+
+=head2 Grouping things and hierarchical matching
+
+Alternation allows a regexp to choose among alternatives, but by
+itself it unsatisfying.  The reason is that each alternative is a whole
+regexp, but sometime we want alternatives for just part of a
+regexp.  For instance, suppose we want to search for housecats or
+housekeepers.  The regexp C<housecat|housekeeper> fits the bill, but is
+inefficient because we had to type C<house> twice.  It would be nice to
+have parts of the regexp be constant, like C<house>, and and some
+parts have alternatives, like C<cat|keeper>.
+
+The B<grouping> metacharacters C<()> solve this problem.  Grouping
+allows parts of a regexp to be treated as a single unit.  Parts of a
+regexp are grouped by enclosing them in parentheses.  Thus we could solve
+the C<housecat|housekeeper> by forming the regexp as
+C<house(cat|keeper)>.  The regexp C<house(cat|keeper)> means match
+C<house> followed by either C<cat> or C<keeper>.  Some more examples
+are
+
+    /(a|b)b/;    # matches 'ab' or 'bb'
+    /(ac|b)b/;   # matches 'acb' or 'bb'
+    /(^a|b)c/;   # matches 'ac' at start of string or 'bc' anywhere
+    /(a|[bc])d/; # matches 'ad', 'bd', or 'cd'
+
+    /house(cat|)/;  # matches either 'housecat' or 'house'
+    /house(cat(s|)|)/;  # matches either 'housecats' or 'housecat' or
+                        # 'house'.  Note groups can be nested.
+
+    /(19|20|)\d\d/;  # match years 19xx, 20xx, or the Y2K problem, xx
+    "20" =~ /(19|20|)\d\d/;  # matches the null alternative '()\d\d',
+                             # because '20\d\d' can't match
+
+Alternations behave the same way in groups as out of them: at a given
+string position, the leftmost alternative that allows the regexp to
+match is taken.  So in the last example at tth first string position,
+C<"20"> matches the second alternative, but there is nothing left over
+to match the next two digits C<\d\d>.  So perl moves on to the next
+alternative, which is the null alternative and that works, since
+C<"20"> is two digits.
+
+The process of trying one alternative, seeing if it matches, and
+moving on to the next alternative if it doesn't, is called
+B<backtracking>.  The term 'backtracking' comes from the idea that
+matching a regexp is like a walk in the woods.  Successfully matching
+a regexp is like arriving at a destination.  There are many possible
+trailheads, one for each string position, and each one is tried in
+order, left to right.  From each trailhead there may be many paths,
+some of which get you there, and some which are dead ends.  When you
+walk along a trail and hit a dead end, you have to backtrack along the
+trail to an earlier point to try another trail.  If you hit your
+destination, you stop immediately and forget about trying all the
+other trails.  You are persistent, and only if you have tried all the
+trails from all the trailheads and not arrived at your destination, do
+you declare failure.  To be concrete, here is a step-by-step analysis
+of what perl does when it tries to match the regexp
+
+    "abcde" =~ /(abd|abc)(df|d|de)/;
+
+=over 4
+
+=item 0
+
+Start with the first letter in the string 'a'.
+
+=item 1
+
+Try the first alternative in the first group 'abd'.
+
+=item 2
+
+Match 'a' followed by 'b'. So far so good.
+
+=item 3
+
+'d' in the regexp doesn't match 'c' in the string - a dead
+end.  So backtrack two characters and pick the second alternative in
+the first group 'abc'.
+
+=item 4
+
+Match 'a' followed by 'b' followed by 'c'.  We are on a roll
+and have satisfied the first group. Set $1 to 'abc'.
+
+=item 5
+
+Move on to the second group and pick the first alternative
+'df'.
+
+=item 6
+
+Match the 'd'.
+
+=item 7
+
+'f' in the regexp doesn't match 'e' in the string, so a dead
+end.  Backtrack one character and pick the second alternative in the
+second group 'd'.
+
+=item 8
+
+'d' matches. The second grouping is satisfied, so set $2 to
+'d'.
+
+=item 9
+
+We are at the end of the regexp, so we are done! We have
+matched 'abcd' out of the string "abcde".
+
+=back
+
+There are a couple of things to note about this analysis.  First, the
+third alternative in the second group 'de' also allows a match, but we
+stopped before we got to it - at a given character position, leftmost
+wins.  Second, we were able to get a match at the first character
+position of the string 'a'.  If there were no matches at the first
+position, perl would move to the second character position 'b' and
+attempt the match all over again.  Only when all possible paths at all
+possible character positions have been exhausted does perl give give
+up and declare S<C<$string =~ /(abd|abc)(df|d|de)/;> > to be false.
+
+Even with all this work, regexp matching happens remarkably fast.  To
+speed things up, during compilation stage, perl compiles the regexp
+into a compact sequence of opcodes that can often fit inside a
+processor cache.  When the code is executed, these opcodes can then run
+at full throttle and search very quickly.
+
+=head2 Extracting matches
+
+The grouping metacharacters C<()> also serve another completely
+different function: they allow the extraction of the parts of a string
+that matched.  This is very useful to find out what matched and for
+text processing in general.  For each grouping, the part that matched
+inside goes into the special variables C<$1>, C<$2>, etc.  They can be
+used just as ordinary variables:
+
+    # extract hours, minutes, seconds
+    $time =~ /(\d\d):(\d\d):(\d\d)/;  # match hh:mm:ss format
+    $hours = $1;
+    $minutes = $2;
+    $seconds = $3;
+
+Now, we know that in scalar context,
+S<C<$time =~ /(\d\d):(\d\d):(\d\d)/> > returns a true or false
+value.  In list context, however, it returns the list of matched values
+C<($1,$2,$3)>.  So we could write the code more compactly as
+
+    # extract hours, minutes, seconds
+    ($hours, $minutes, $second) = ($time =~ /(\d\d):(\d\d):(\d\d)/);
+
+If the groupings in a regexp are nested, C<$1> gets the group with the
+leftmost opening parenthesis, C<$2> the next opening parenthesis,
+etc.  For example, here is a complex regexp and the matching variables
+indicated below it:
+
+    /(ab(cd|ef)((gi)|j))/;
+     1  2      34
+
+so that if the regexp matched, e.g., C<$2> would contain 'cd' or 'ef'.
+For convenience, perl sets C<$+> to the highest numbered C<$1>, C<$2>,
+... that got assigned.
+
+Closely associated with the matching variables C<$1>, C<$2>, ... are
+the B<backreferences> C<\1>, C<\2>, ... .  Backreferences are simply
+matching variables that can be used I<inside> a regexp.  This is a
+really nice feature - what matches later in a regexp can depend on
+what matched earlier in the regexp.  Suppose we wanted to look
+for doubled words in text, like 'the the'.  The following regexp finds
+all 3-letter doubles with a space in between:
+
+    /(\w\w\w)\s\1/;
+
+The grouping assigns a value to \1, so that the same 3 letter sequence
+is used for both parts.  Here are some words with repeated parts:
+
+    % simple_grep '^(\w\w\w\w|\w\w\w|\w\w|\w)\1$' /usr/dict/words
+    beriberi
+    booboo
+    coco
+    mama
+    murmur
+    papa
+
+The regexp has a single grouping which considers 4-letter
+combinations, then 3-letter combinations, etc.  and uses C<\1> to look for
+a repeat.  Although C<$1> and C<\1> represent the same thing, care should be
+taken to use matched variables C<$1>, C<$2>, ... only outside a regexp
+and backreferences C<\1>, C<\2>, ... only inside a regexp; not doing
+so may lead to surprising and/or undefined results.
+
+In addition to what was matched, Perl 5.6.0 also provides the
+positions of what was matched with the C<@-> and C<@+>
+arrays. C<$-[0]> is the position of the start of the entire match and
+C<$+[0]> is the position of the end. Similarly, C<$-[n]> is the
+position of the start of the C<$n> match and C<$+[n]> is the position
+of the end. If C<$n> is undefined, so are C<$-[n]> and C<$+[n]>. Then
+this code
+
+    $x = "Mmm...donut, thought Homer";
+    $x =~ /^(Mmm|Yech)\.\.\.(donut|peas)/; # matches
+    foreach $expr (1..$#-) {
+        print "Match $expr: '${$expr}' at position ($-[$expr],$+[$expr])\n";
+    }
+
+prints
+
+    Match 1: 'Mmm' at position (0,3)
+    Match 2: 'donut' at position (6,11)
+
+Even if there are no groupings in a regexp, it is still possible to
+find out what exactly matched in a string.  If you use them, perl
+will set C<$`> to the part of the string before the match, will set C<$&>
+to the part of the string that matched, and will set C<$'> to the part
+of the string after the match.  An example:
+
+    $x = "the cat caught the mouse";
+    $x =~ /cat/;  # $` = 'the ', $& = 'cat', $' = ' caught the mouse'
+    $x =~ /the/;  # $` = '', $& = 'the', $' = ' cat caught the mouse'
+
+In the second match, S<C<$` = ''> > because the regexp matched at the
+first character position in the string and stopped, it never saw the
+second 'the'.  It is important to note that using C<$`> and C<$'>
+slows down regexp matching quite a bit, and C< $& > slows it down to a
+lesser extent, because if they are used in one regexp in a program,
+they are generated for <all> regexps in the program.  So if raw
+performance is a goal of your application, they should be avoided.
+If you need them, use C<@-> and C<@+> instead:
+
+    $` is the same as substr( $x, 0, $-[0] )
+    $& is the same as substr( $x, $-[0], $+[0]-$-[0] )
+    $' is the same as substr( $x, $+[0] )
+
+=head2 Matching repetitions
+
+The examples in the previous section display an annoying weakness.  We
+were only matching 3-letter words, or syllables of 4 letters or
+less.  We'd like to be able to match words or syllables of any length,
+without writing out tedious alternatives like
+C<\w\w\w\w|\w\w\w|\w\w|\w>.
+
+This is exactly the problem the B<quantifier> metacharacters C<?>,
+C<*>, C<+>, and C<{}> were created for.  They allow us to determine the
+number of repeats of a portion of a regexp we consider to be a
+match.  Quantifiers are put immediately after the character, character
+class, or grouping that we want to specify.  They have the following
+meanings:
+
+=over 4
+
+=item *
+
+C<a?> = match 'a' 1 or 0 times
+
+=item *
+
+C<a*> = match 'a' 0 or more times, i.e., any number of times
+
+=item *
+
+C<a+> = match 'a' 1 or more times, i.e., at least once
+
+=item *
+
+C<a{n,m}> = match at least C<n> times, but not more than C<m>
+times.
+
+=item *
+
+C<a{n,}> = match at least C<n> or more times
+
+=item *
+
+C<a{n}> = match exactly C<n> times
+
+=back
+
+Here are some examples:
+
+    /[a-z]+\s+\d*/;  # match a lowercase word, at least some space, and
+                     # any number of digits
+    /(\w+)\s+\1/;    # match doubled words of arbitrary length
+    /y(es)?/i;       # matches 'y', 'Y', or a case-insensitive 'yes'
+    $year =~ /\d{2,4}/;  # make sure year is at least 2 but not more
+                         # than 4 digits
+    $year =~ /\d{4}|\d{2}/;    # better match; throw out 3 digit dates
+    $year =~ /\d{2}(\d{2})?/;  # same thing written differently. However,
+                               # this produces $1 and the other does not.
+
+    % simple_grep '^(\w+)\1$' /usr/dict/words   # isn't this easier?
+    beriberi
+    booboo
+    coco
+    mama
+    murmur
+    papa
+
+For all of these quantifiers, perl will try to match as much of the
+string as possible, while still allowing the regexp to succeed.  Thus
+with C</a?.../>, perl will first try to match the regexp with the C<a>
+present; if that fails, perl will try to match the regexp without the
+C<a> present.  For the quantifier C<*>, we get the following:
+
+    $x = "the cat in the hat";
+    $x =~ /^(.*)(cat)(.*)$/; # matches,
+                             # $1 = 'the '
+                             # $2 = 'cat'
+                             # $3 = ' in the hat'
+
+Which is what we might expect, the match finds the only C<cat> in the
+string and locks onto it.  Consider, however, this regexp:
+
+    $x =~ /^(.*)(at)(.*)$/; # matches,
+                            # $1 = 'the cat in the h'
+                            # $2 = 'at'
+                            # $3 = ''   (0 matches)
+
+One might initially guess that perl would find the C<at> in C<cat> and
+stop there, but that wouldn't give the longest possible string to the
+first quantifier C<.*>.  Instead, the first quantifier C<.*> grabs as
+much of the string as possible while still having the regexp match.  In
+this example, that means having the C<at> sequence with the final C<at>
+in the string.  The other important principle illustrated here is that
+when there are two or more elements in a regexp, the I<leftmost>
+quantifier, if there is one, gets to grab as much the string as
+possible, leaving the rest of the regexp to fight over scraps.  Thus in
+our example, the first quantifier C<.*> grabs most of the string, while
+the second quantifier C<.*> gets the empty string.   Quantifiers that
+grab as much of the string as possible are called B<maximal match> or
+B<greedy> quantifiers.
+
+When a regexp can match a string in several different ways, we can use
+the principles above to predict which way the regexp will match:
+
+=over 4
+
+=item *
+
+Principle 0: Taken as a whole, any regexp will be matched at the
+earliest possible position in the string.
+
+=item *
+
+Principle 1: In an alternation C<a|b|c...>, the leftmost alternative
+that allows a match for the whole regexp will be the one used.
+
+=item *
+
+Principle 2: The maximal matching quantifiers C<?>, C<*>, C<+> and
+C<{n,m}> will in general match as much of the string as possible while
+still allowing the whole regexp to match.
+
+=item *
+
+Principle 3: If there are two or more elements in a regexp, the
+leftmost greedy quantifier, if any, will match as much of the string
+as possible while still allowing the whole regexp to match.  The next
+leftmost greedy quantifier, if any, will try to match as much of the
+string remaining available to it as possible, while still allowing the
+whole regexp to match.  And so on, until all the regexp elements are
+satisfied.
+
+=back
+
+As we have seen above, Principle 0 overrides the others - the regexp
+will be matched as early as possible, with the other principles
+determining how the regexp matches at that earliest character
+position.
+
+Here is an example of these principles in action:
+
+    $x = "The programming republic of Perl";
+    $x =~ /^(.+)(e|r)(.*)$/;  # matches,
+                              # $1 = 'The programming republic of Pe'
+                              # $2 = 'r'
+                              # $3 = 'l'
+
+This regexp matches at the earliest string position, C<'T'>.  One
+might think that C<e>, being leftmost in the alternation, would be
+matched, but C<r> produces the longest string in the first quantifier.
+
+    $x =~ /(m{1,2})(.*)$/;  # matches,
+                            # $1 = 'mm'
+                            # $2 = 'ing republic of Perl'
+
+Here, The earliest possible match is at the first C<'m'> in
+C<programming>. C<m{1,2}> is the first quantifier, so it gets to match
+a maximal C<mm>.
+
+    $x =~ /.*(m{1,2})(.*)$/;  # matches,
+                              # $1 = 'm'
+                              # $2 = 'ing republic of Perl'
+
+Here, the regexp matches at the start of the string. The first
+quantifier C<.*> grabs as much as possible, leaving just a single
+C<'m'> for the second quantifier C<m{1,2}>.
+
+    $x =~ /(.?)(m{1,2})(.*)$/;  # matches,
+                                # $1 = 'a'
+                                # $2 = 'mm'
+                                # $3 = 'ing republic of Perl'
+
+Here, C<.?> eats its maximal one character at the earliest possible
+position in the string, C<'a'> in C<programming>, leaving C<m{1,2}>
+the opportunity to match both C<m>'s. Finally,
+
+    "aXXXb" =~ /(X*)/; # matches with $1 = ''
+
+because it can match zero copies of C<'X'> at the beginning of the
+string.  If you definitely want to match at least one C<'X'>, use
+C<X+>, not C<X*>.
+
+Sometimes greed is not good.  At times, we would like quantifiers to
+match a I<minimal> piece of string, rather than a maximal piece.  For
+this purpose, Larry Wall created the S<B<minimal match> > or
+B<non-greedy> quantifiers C<??>,C<*?>, C<+?>, and C<{}?>.  These are
+the usual quantifiers with a C<?> appended to them.  They have the
+following meanings:
+
+=over 4
+
+=item *
+
+C<a??> = match 'a' 0 or 1 times. Try 0 first, then 1.
+
+=item *
+
+C<a*?> = match 'a' 0 or more times, i.e., any number of times,
+but as few times as possible
+
+=item *
+
+C<a+?> = match 'a' 1 or more times, i.e., at least once, but
+as few times as possible
+
+=item *
+
+C<a{n,m}?> = match at least C<n> times, not more than C<m>
+times, as few times as possible
+
+=item *
+
+C<a{n,}?> = match at least C<n> times, but as few times as
+possible
+
+=item *
+
+C<a{n}?> = match exactly C<n> times.  Because we match exactly
+C<n> times, C<a{n}?> is equivalent to C<a{n}> and is just there for
+notational consistency.
+
+=back
+
+Let's look at the example above, but with minimal quantifiers:
+
+    $x = "The programming republic of Perl";
+    $x =~ /^(.+?)(e|r)(.*)$/; # matches,
+                              # $1 = 'Th'
+                              # $2 = 'e'
+                              # $3 = ' programming republic of Perl'
+
+The minimal string that will allow both the start of the string C<^>
+and the alternation to match is C<Th>, with the alternation C<e|r>
+matching C<e>.  The second quantifier C<.*> is free to gobble up the
+rest of the string.
+
+    $x =~ /(m{1,2}?)(.*?)$/;  # matches,
+                              # $1 = 'm'
+                              # $2 = 'ming republic of Perl'
+
+The first string position that this regexp can match is at the first
+C<'m'> in C<programming>. At this position, the minimal C<m{1,2}?>
+matches just one C<'m'>.  Although the second quantifier C<.*?> would
+prefer to match no characters, it is constrained by the end-of-string
+anchor C<$> to match the rest of the string.
+
+    $x =~ /(.*?)(m{1,2}?)(.*)$/;  # matches,
+                                  # $1 = 'The progra'
+                                  # $2 = 'm'
+                                  # $3 = 'ming republic of Perl'
+
+In this regexp, you might expect the first minimal quantifier C<.*?>
+to match the empty string, because it is not constrained by a C<^>
+anchor to match the beginning of the word.  Principle 0 applies here,
+however.  Because it is possible for the whole regexp to match at the
+start of the string, it I<will> match at the start of the string.  Thus
+the first quantifier has to match everything up to the first C<m>.  The
+second minimal quantifier matches just one C<m> and the third
+quantifier matches the rest of the string.
+
+    $x =~ /(.??)(m{1,2})(.*)$/;  # matches,
+                                 # $1 = 'a'
+                                 # $2 = 'mm'
+                                 # $3 = 'ing republic of Perl'
+
+Just as in the previous regexp, the first quantifier C<.??> can match
+earliest at position C<'a'>, so it does.  The second quantifier is
+greedy, so it matches C<mm>, and the third matches the rest of the
+string.
+
+We can modify principle 3 above to take into account non-greedy
+quantifiers:
+
+=over 4
+
+=item *
+
+Principle 3: If there are two or more elements in a regexp, the
+leftmost greedy (non-greedy) quantifier, if any, will match as much
+(little) of the string as possible while still allowing the whole
+regexp to match.  The next leftmost greedy (non-greedy) quantifier, if
+any, will try to match as much (little) of the string remaining
+available to it as possible, while still allowing the whole regexp to
+match.  And so on, until all the regexp elements are satisfied.
+
+=back
+
+Just like alternation, quantifiers are also susceptible to
+backtracking.  Here is a step-by-step analysis of the example
+
+    $x = "the cat in the hat";
+    $x =~ /^(.*)(at)(.*)$/; # matches,
+                            # $1 = 'the cat in the h'
+                            # $2 = 'at'
+                            # $3 = ''   (0 matches)
+
+=over 4
+
+=item 0
+
+Start with the first letter in the string 't'.
+
+=item 1
+
+The first quantifier '.*' starts out by matching the whole
+string 'the cat in the hat'.
+
+=item 2
+
+'a' in the regexp element 'at' doesn't match the end of the
+string.  Backtrack one character.
+
+=item 3
+
+'a' in the regexp element 'at' still doesn't match the last
+letter of the string 't', so backtrack one more character.
+
+=item 4
+
+Now we can match the 'a' and the 't'.
+
+=item 5
+
+Move on to the third element '.*'.  Since we are at the end of
+the string and '.*' can match 0 times, assign it the empty string.
+
+=item 6
+
+We are done!
+
+=back
+
+Most of the time, all this moving forward and backtracking happens
+quickly and searching is fast.   There are some pathological regexps,
+however, whose execution time exponentially grows with the size of the
+string.  A typical structure that blows up in your face is of the form
+
+    /(a|b+)*/;
+
+The problem is the nested indeterminate quantifiers.  There are many
+different ways of partitioning a string of length n between the C<+>
+and C<*>: one repetition with C<b+> of length n, two repetitions with
+the first C<b+> length k and the second with length n-k, m repetitions
+whose bits add up to length n, etc.  In fact there are an exponential
+number of ways to partition a string as a function of length.  A
+regexp may get lucky and match early in the process, but if there is
+no match, perl will try I<every> possibility before giving up.  So be
+careful with nested C<*>'s, C<{n,m}>'s, and C<+>'s.  The book
+I<Mastering regular expressions> by Jeffrey Friedl gives a wonderful
+discussion of this and other efficiency issues.
+
+=head2 Building a regexp
+
+At this point, we have all the basic regexp concepts covered, so let's
+give a more involved example of a regular expression.  We will build a
+regexp that matches numbers.
+
+The first task in building a regexp is to decide what we want to match
+and what we want to exclude.  In our case, we want to match both
+integers and floating point numbers and we want to reject any string
+that isn't a number.
+
+The next task is to break the problem down into smaller problems that
+are easily converted into a regexp.
+
+The simplest case is integers.  These consist of a sequence of digits,
+with an optional sign in front.  The digits we can represent with
+C<\d+> and the sign can be matched with C<[+-]>.  Thus the integer
+regexp is
+
+    /[+-]?\d+/;  # matches integers
+
+A floating point number potentially has a sign, an integral part, a
+decimal point, a fractional part, and an exponent.  One or more of these
+parts is optional, so we need to check out the different
+possibilities.  Floating point numbers which are in proper form include
+123., 0.345, .34, -1e6, and 25.4E-72.  As with integers, the sign out
+front is completely optional and can be matched by C<[+-]?>.  We can
+see that if there is no exponent, floating point numbers must have a
+decimal point, otherwise they are integers.  We might be tempted to
+model these with C<\d*\.\d*>, but this would also match just a single
+decimal point, which is not a number.  So the three cases of floating
+point number sans exponent are
+
+   /[+-]?\d+\./;  # 1., 321., etc.
+   /[+-]?\.\d+/;  # .1, .234, etc.
+   /[+-]?\d+\.\d+/;  # 1.0, 30.56, etc.
+
+These can be combined into a single regexp with a three-way alternation:
+
+   /[+-]?(\d+\.\d+|\d+\.|\.\d+)/;  # floating point, no exponent
+
+In this alternation, it is important to put C<'\d+\.\d+'> before
+C<'\d+\.'>.  If C<'\d+\.'> were first, the regexp would happily match that
+and ignore the fractional part of the number.
+
+Now consider floating point numbers with exponents.  The key
+observation here is that I<both> integers and numbers with decimal
+points are allowed in front of an exponent.  Then exponents, like the
+overall sign, are independent of whether we are matching numbers with
+or without decimal points, and can be 'decoupled' from the
+mantissa.  The overall form of the regexp now becomes clear:
+
+    /^(optional sign)(integer | f.p. mantissa)(optional exponent)$/;
+
+The exponent is an C<e> or C<E>, followed by an integer.  So the
+exponent regexp is
+
+   /[eE][+-]?\d+/;  # exponent
+
+Putting all the parts together, we get a regexp that matches numbers:
+
+   /^[+-]?(\d+\.\d+|\d+\.|\.\d+|\d+)([eE][+-]?\d+)?$/;  # Ta da!
+
+Long regexps like this may impress your friends, but can be hard to
+decipher.  In complex situations like this, the C<//x> modifier for a
+match is invaluable.  It allows one to put nearly arbitrary whitespace
+and comments into a regexp without affecting their meaning.  Using it,
+we can rewrite our 'extended' regexp in the more pleasing form
+
+   /^
+      [+-]?         # first, match an optional sign
+      (             # then match integers or f.p. mantissas:
+          \d+\.\d+  # mantissa of the form a.b
+         |\d+\.     # mantissa of the form a.
+         |\.\d+     # mantissa of the form .b
+         |\d+       # integer of the form a
+      )
+      ([eE][+-]?\d+)?  # finally, optionally match an exponent
+   $/x;
+
+If whitespace is mostly irrelevant, how does one include space
+characters in an extended regexp? The answer is to backslash it
+S<C<'\ '> > or put it in a character class S<C<[ ]> >.  The same thing
+goes for pound signs, use C<\#> or C<[#]>.  For instance, Perl allows
+a space between the sign and the mantissa/integer, and we could add
+this to our regexp as follows:
+
+   /^
+      [+-]?\ *      # first, match an optional sign *and space*
+      (             # then match integers or f.p. mantissas:
+          \d+\.\d+  # mantissa of the form a.b
+         |\d+\.     # mantissa of the form a.
+         |\.\d+     # mantissa of the form .b
+         |\d+       # integer of the form a
+      )
+      ([eE][+-]?\d+)?  # finally, optionally match an exponent
+   $/x;
+
+In this form, it is easier to see a way to simplify the
+alternation.  Alternatives 1, 2, and 4 all start with C<\d+>, so it
+could be factored out:
+
+   /^
+      [+-]?\ *      # first, match an optional sign
+      (             # then match integers or f.p. mantissas:
+          \d+       # start out with a ...
+          (
+              \.\d* # mantissa of the form a.b or a.
+          )?        # ? takes care of integers of the form a
+         |\.\d+     # mantissa of the form .b
+      )
+      ([eE][+-]?\d+)?  # finally, optionally match an exponent
+   $/x;
+
+or written in the compact form,
+
+    /^[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?$/;
+
+This is our final regexp.  To recap, we built a regexp by
+
+=over 4
+
+=item *
+
+specifying the task in detail,
+
+=item *
+
+breaking down the problem into smaller parts,
+
+=item *
+
+translating the small parts into regexps,
+
+=item *
+
+combining the regexps,
+
+=item *
+
+and optimizing the final combined regexp.
+
+=back
+
+These are also the typical steps involved in writing a computer
+program.  This makes perfect sense, because regular expressions are
+essentially programs written a little computer language that specifies
+patterns.
+
+=head2 Using regular expressions in Perl
+
+The last topic of Part 1 briefly covers how regexps are used in Perl
+programs.  Where do they fit into Perl syntax?
+
+We have already introduced the matching operator in its default
+C</regexp/> and arbitrary delimiter C<m!regexp!> forms.  We have used
+the binding operator C<=~> and its negation C<!~> to test for string
+matches.  Associated with the matching operator, we have discussed the
+single line C<//s>, multi-line C<//m>, case-insensitive C<//i> and
+extended C<//x> modifiers.
+
+There are a few more things you might want to know about matching
+operators.  First, we pointed out earlier that variables in regexps are
+substituted before the regexp is evaluated:
+
+    $pattern = 'Seuss';
+    while (<>) {
+        print if /$pattern/;
+    }
+
+This will print any lines containing the word C<Seuss>.  It is not as
+efficient as it could be, however, because perl has to re-evaluate
+C<$pattern> each time through the loop.  If C<$pattern> won't be
+changing over the lifetime of the script, we can add the C<//o>
+modifier, which directs perl to only perform variable substitutions
+once:
+
+    #!/usr/bin/perl
+    #    Improved simple_grep
+    $regexp = shift;
+    while (<>) {
+        print if /$regexp/o;  # a good deal faster
+    }
+
+If you change C<$pattern> after the first substitution happens, perl
+will ignore it.  If you don't want any substitutions at all, use the
+special delimiter C<m''>:
+
+    $pattern = 'Seuss';
+    while (<>) {
+        print if m'$pattern';  # matches '$pattern', not 'Seuss'
+    }
+
+C<m''> acts like single quotes on a regexp; all other C<m> delimiters
+act like double quotes.  If the regexp evaluates to the empty string,
+the regexp in the I<last successful match> is used instead.  So we have
+
+    "dog" =~ /d/;  # 'd' matches
+    "dogbert =~ //;  # this matches the 'd' regexp used before
+
+The final two modifiers C<//g> and C<//c> concern multiple matches.
+The modifier C<//g> stands for global matching and allows the the
+matching operator to match within a string as many times as possible.
+In scalar context, successive invocations against a string will have
+`C<//g> jump from match to match, keeping track of position in the
+string as it goes along.  You can get or set the position with the
+C<pos()> function.
+
+The use of C<//g> is shown in the following example.  Suppose we have
+a string that consists of words separated by spaces.  If we know how
+many words there are in advance, we could extract the words using
+groupings:
+
+    $x = "cat dog house"; # 3 words
+    $x =~ /^\s*(\w+)\s+(\w+)\s+(\w+)\s*$/; # matches,
+                                           # $1 = 'cat'
+                                           # $2 = 'dog'
+                                           # $3 = 'house'
+
+But what if we had an indeterminate number of words? This is the sort
+of task C<//g> was made for.  To extract all words, form the simple
+regexp C<(\w+)> and loop over all matches with C</(\w+)/g>:
+
+    while ($x =~ /(\w+)/g) {
+        print "Word is $1, ends at position ", pos $x, "\n";
+    }
+
+prints
+
+    Word is cat, ends at position 3
+    Word is dog, ends at position 7
+    Word is house, ends at position 13
+
+A failed match or changing the target string resets the position.  If
+you don't want the position reset after failure to match, add the
+C<//c>, as in C</regexp/gc>.  The current position in the string is
+associated with the string, not the regexp.  This means that different
+strings have different positions and their respective positions can be
+set or read independently.
+
+In list context, C<//g> returns a list of matched groupings, or if
+there are no groupings, a list of matches to the whole regexp.  So if
+we wanted just the words, we could use
+
+    @words = ($x =~ /(\w+)/g);  # matches,
+                                # $word[0] = 'cat'
+                                # $word[1] = 'dog'
+                                # $word[2] = 'house'
+
+Closely associated with the C<//g> modifier is the C<\G> anchor.  The
+C<\G> anchor matches at the point where the previous C<//g> match left
+off.  C<\G> allows us to easily do context-sensitive matching:
+
+    $metric = 1;  # use metric units
+    ...
+    $x = <FILE>;  # read in measurement
+    $x =~ /^([+-]?\d+)\s*/g;  # get magnitude
+    $weight = $1;
+    if ($metric) { # error checking
+        print "Units error!" unless $x =~ /\Gkg\./g;
+    }
+    else {
+        print "Units error!" unless $x =~ /\Glbs\./g;
+    }
+    $x =~ /\G\s+(widget|sprocket)/g;  # continue processing
+
+The combination of C<//g> and C<\G> allows us to process the string a
+bit at a time and use arbitrary Perl logic to decide what to do next.
+
+C<\G> is also invaluable in processing fixed length records with
+regexps.  Suppose we have a snippet of coding region DNA, encoded as
+base pair letters C<ATCGTTGAAT...> and we want to find all the stop
+codons C<TGA>.  In a coding region, codons are 3-letter sequences, so
+we can think of the DNA snippet as a sequence of 3-letter records.  The
+naive regexp
+
+    # expanded, this is "ATC GTT GAA TGC AAA TGA CAT GAC"
+    $dna = "ATCGTTGAATGCAAATGACATGAC";
+    $dna =~ /TGA/;
+
+doesn't work; it may match an C<TGA>, but there is no guarantee that
+the match is aligned with codon boundaries, e.g., the substring
+S<C<GTT GAA> > gives a match.  A better solution is
+
+    while ($dna =~ /(\w\w\w)*?TGA/g) {  # note the minimal *?
+        print "Got a TGA stop codon at position ", pos $dna, "\n";
+    }
+
+which prints
+
+    Got a TGA stop codon at position 18
+    Got a TGA stop codon at position 23
+
+Position 18 is good, but position 23 is bogus.  What happened?
+
+The answer is that our regexp works well until we get past the last
+real match.  Then the regexp will fail to match a synchronized C<TGA>
+and start stepping ahead one character position at a time, not what we
+want.  The solution is to use C<\G> to anchor the match to the codon
+alignment:
+
+    while ($dna =~ /\G(\w\w\w)*?TGA/g) {
+        print "Got a TGA stop codon at position ", pos $dna, "\n";
+    }
+
+This prints
+
+    Got a TGA stop codon at position 18
+
+which is the correct answer.  This example illustrates that it is
+important not only to match what is desired, but to reject what is not
+desired.
+
+B<search and replace>
+
+Regular expressions also play a big role in B<search and replace>
+operations in Perl.  Search and replace is accomplished with the
+C<s///> operator.  The general form is
+C<s/regexp/replacement/modifiers>, with everything we know about
+regexps and modifiers applying in this case as well.  The
+C<replacement> is a Perl double quoted string that replaces in the
+string whatever is matched with the C<regexp>.  The operator C<=~> is
+also used here to associate a string with C<s///>.  If matching
+against C<$_>, the S<C<$_ =~> > can be dropped.  If there is a match,
+C<s///> returns the number of substitutions made, otherwise it returns
+false.  Here are a few examples:
+
+    $x = "Time to feed the cat!";
+    $x =~ s/cat/hacker/;   # $x contains "Time to feed the hacker!"
+    if ($x =~ s/^(Time.*hacker)!$/$1 now!/) {
+        $more_insistent = 1;
+    }
+    $y = "'quoted words'";
+    $y =~ s/^'(.*)'$/$1/;  # strip single quotes,
+                           # $y contains "quoted words"
+
+In the last example, the whole string was matched, but only the part
+inside the single quotes was grouped.  With the C<s///> operator, the
+matched variables C<$1>, C<$2>, etc.  are immediately available for use
+in the replacement expression, so we use C<$1> to replace the quoted
+string with just what was quoted.  With the global modifier, C<s///g>
+will search and replace all occurrences of the regexp in the string:
+
+    $x = "I batted 4 for 4";
+    $x =~ s/4/four/;   # doesn't do it all:
+                       # $x contains "I batted four for 4"
+    $x = "I batted 4 for 4";
+    $x =~ s/4/four/g;  # does it all:
+                       # $x contains "I batted four for four"
+
+If you prefer 'regex' over 'regexp' in this tutorial, you could use
+the following program to replace it:
+
+    % cat > simple_replace
+    #!/usr/bin/perl
+    $regexp = shift;
+    $replacement = shift;
+    while (<>) {
+        s/$regexp/$replacement/go;
+        print;
+    }
+    ^D
+
+    % simple_replace regexp regex perlretut.pod
+
+In C<simple_replace> we used the C<s///g> modifier to replace all
+occurrences of the regexp on each line and the C<s///o> modifier to
+compile the regexp only once.  As with C<simple_grep>, both the
+C<print> and the C<s/$regexp/$replacement/go> use C<$_> implicitly.
+
+A modifier available specifically to search and replace is the
+C<s///e> evaluation modifier.  C<s///e> wraps an C<eval{...}> around
+the replacement string and the evaluated result is substituted for the
+matched substring.  C<s///e> is useful if you need to do a bit of
+computation in the process of replacing text.  This example counts
+character frequencies in a line:
+
+    $x = "Bill the cat";
+    $x =~ s/(.)/$chars{$1}++;$1/eg;  # final $1 replaces char with itself
+    print "frequency of '$_' is $chars{$_}\n"
+        foreach (sort {$chars{$b} <=> $chars{$a}} keys %chars);
+
+This prints
+
+    frequency of ' ' is 2
+    frequency of 't' is 2
+    frequency of 'l' is 2
+    frequency of 'B' is 1
+    frequency of 'c' is 1
+    frequency of 'e' is 1
+    frequency of 'h' is 1
+    frequency of 'i' is 1
+    frequency of 'a' is 1
+
+As with the match C<m//> operator, C<s///> can use other delimiters,
+such as C<s!!!> and C<s{}{}>, and even C<s{}//>.  If single quotes are
+used C<s'''>, then the regexp and replacement are treated as single
+quoted strings and there are no substitutions.  C<s///> in list context
+returns the same thing as in scalar context, i.e., the number of
+matches.
+
+B<The split operator>
+
+The B<C<split> > function can also optionally use a matching operator
+C<m//> to split a string.  C<split /regexp/, string, limit> splits
+C<string> into a list of substrings and returns that list.  The regexp
+is used to match the character sequence that the C<string> is split
+with respect to.  The C<limit>, if present, constrains splitting into
+no more than C<limit> number of strings.  For example, to split a
+string into words, use
+
+    $x = "Calvin and Hobbes";
+    @words = split /\s+/, $x;  # $word[0] = 'Calvin'
+                               # $word[1] = 'and'
+                               # $word[2] = 'Hobbes'
+
+If the empty regexp C<//> is used, the regexp always matches and
+the string is split into individual characters.  If the regexp has
+groupings, then list produced contains the matched substrings from the
+groupings as well.  For instance,
+
+    $x = "/usr/bin/perl";
+    @dirs = split m!/!, $x;  # $dirs[0] = ''
+                             # $dirs[1] = 'usr'
+                             # $dirs[2] = 'bin'
+                             # $dirs[3] = 'perl'
+    @parts = split m!(/)!, $x;  # $parts[0] = ''
+                                # $parts[1] = '/'
+                                # $parts[2] = 'usr'
+                                # $parts[3] = '/'
+                                # $parts[4] = 'bin'
+                                # $parts[5] = '/'
+                                # $parts[6] = 'perl'
+
+Since the first character of $x matched the regexp, C<split> prepended
+an empty initial element to the list.
+
+If you have read this far, congratulations! You now have all the basic
+tools needed to use regular expressions to solve a wide range of text
+processing problems.  If this is your first time through the tutorial,
+why not stop here and play around with regexps a while...  S<Part 2>
+concerns the more esoteric aspects of regular expressions and those
+concepts certainly aren't needed right at the start.
+
+=head1 Part 2: Power tools
+
+OK, you know the basics of regexps and you want to know more.  If
+matching regular expressions is analogous to a walk in the woods, then
+the tools discussed in Part 1 are analogous to topo maps and a
+compass, basic tools we use all the time.  Most of the tools in part 2
+are are analogous to flare guns and satellite phones.  They aren't used
+too often on a hike, but when we are stuck, they can be invaluable.
+
+What follows are the more advanced, less used, or sometimes esoteric
+capabilities of perl regexps.  In Part 2, we will assume you are
+comfortable with the basics and concentrate on the new features.
+
+=head2 More on characters, strings, and character classes
+
+There are a number of escape sequences and character classes that we
+haven't covered yet.
+
+There are several escape sequences that convert characters or strings
+between upper and lower case.  C<\l> and C<\u> convert the next
+character to lower or upper case, respectively:
+
+    $x = "perl";
+    $string =~ /\u$x/;  # matches 'Perl' in $string
+    $x = "M(rs?|s)\\."; # note the double backslash
+    $string =~ /\l$x/;  # matches 'mr.', 'mrs.', and 'ms.',
+
+C<\L> and C<\U> converts a whole substring, delimited by C<\L> or
+C<\U> and C<\E>, to lower or upper case:
+
+    $x = "This word is in lower case:\L SHOUT\E";
+    $x =~ /shout/;       # matches
+    $x = "I STILL KEYPUNCH CARDS FOR MY 360"
+    $x =~ /\Ukeypunch/;  # matches punch card string
+
+If there is no C<\E>, case is converted until the end of the
+string. The regexps C<\L\u$word> or C<\u\L$word> convert the first
+character of C<$word> to uppercase and the rest of the characters to
+lowercase.
+
+Control characters can be escaped with C<\c>, so that a control-Z
+character would be matched with C<\cZ>.  The escape sequence
+C<\Q>...C<\E> quotes, or protects most non-alphabetic characters.   For
+instance,
+
+    $x = "\QThat !^*&%~& cat!";
+    $x =~ /\Q!^*&%~&\E/;  # check for rough language
+
+It does not protect C<$> or C<@>, so that variables can still be
+substituted.
+
+With the advent of 5.6.0, perl regexps can handle more than just the
+standard ASCII character set.  Perl now supports B<Unicode>, a standard
+for encoding the character sets from many of the world's written
+languages.  Unicode does this by allowing characters to be more than
+one byte wide.  Perl uses the UTF-8 encoding, in which ASCII characters
+are still encoded as one byte, but characters greater than C<chr(127)>
+may be stored as two or more bytes.
+
+What does this mean for regexps? Well, regexp users don't need to know
+much about perl's internal representation of strings.  But they do need
+to know 1) how to represent Unicode characters in a regexp and 2) when
+a matching operation will treat the string to be searched as a
+sequence of bytes (the old way) or as a sequence of Unicode characters
+(the new way).  The answer to 1) is that Unicode characters greater
+than C<chr(127)> may be represented using the C<\x{hex}> notation,
+with C<hex> a hexadecimal integer:
+
+    use utf8;    # We will be doing Unicode processing
+    /\x{263a}/;  # match a Unicode smiley face :)
+
+Unicode characters in the range of 128-255 use two hexadecimal digits
+with braces: C<\x{ab}>.  Note that this is different than C<\xab>,
+which is just a hexadecimal byte with no Unicode
+significance.
+
+Figuring out the hexadecimal sequence of a Unicode character you want
+or deciphering someone else's hexadecimal Unicode regexp is about as
+much fun as programming in machine code.  So another way to specify
+Unicode characters is to use the S<B<named character> > escape
+sequence C<\N{name}>.  C<name> is a name for the Unicode character, as
+specified in the Unicode standard.  For instance, if we wanted to
+represent or match the astrological sign for the planet Mercury, we
+could use
+
+    use utf8;              # We will be doing Unicode processing
+    use charnames ":full"; # use named chars with Unicode full names
+    $x = "abc\N{MERCURY}def";
+    $x =~ /\N{MERCURY}/;   # matches
+
+One can also use short names or restrict names to a certain alphabet:
+
+    use utf8;              # We will be doing Unicode processing
+
+    use charnames ':full';
+    print "\N{GREEK SMALL LETTER SIGMA} is called sigma.\n";
+
+    use charnames ":short";
+    print "\N{greek:Sigma} is an upper-case sigma.\n";
+
+    use charnames qw(greek);
+    print "\N{sigma} is Greek sigma\n";
+
+A list of full names is found in the file Names.txt in the
+lib/perl5/5.6.0/unicode directory.
+
+The answer to requirement 2), as of 5.6.0, is that if a regexp
+contains Unicode characters, the string is searched as a sequence of
+Unicode characters.  Otherwise, the string is searched as a sequence of
+bytes.  If the string is being searched as a sequence of Unicode
+characters, but matching a single byte is required, we can use the C<\C>
+escape sequence.  C<\C> is a character class akin to C<.> except that
+it matches I<any> byte 0-255.  So
+
+    use utf8;              # We will be doing Unicode processing
+    use charnames ":full"; # use named chars with Unicode full names
+    $x = "a";
+    $x =~ /\C/;  # matches 'a', eats one byte
+    $x = "";
+    $x =~ /\C/;  # doesn't match, no bytes to match
+    $x = "\N{MERCURY}";  # two-byte Unicode character
+    $x =~ /\C/;  # matches, but dangerous!
+
+The last regexp matches, but is dangerous because the string
+I<character> position is no longer synchronized to the string I<byte>
+position.  This generates the warning 'Malformed UTF-8
+character'.  C<\C> is best used for matching the binary data in strings
+with binary data intermixed with Unicode characters.
+
+Let us now discuss the rest of the character classes.  Just as with
+Unicode characters, there are named Unicode character classes
+represented by the C<\p{name}> escape sequence.  Closely associated is
+the C<\P{name}> character class, which is the negation of the
+C<\p{name}> class.  For example, to match lower and uppercase
+characters,
+
+    use utf8;              # We will be doing Unicode processing
+    use charnames ":full"; # use named chars with Unicode full names
+    $x = "BOB";
+    $x =~ /^\p{IsUpper}/;   # matches, uppercase char class
+    $x =~ /^\P{IsUpper}/;   # doesn't match, char class sans uppercase
+    $x =~ /^\p{IsLower}/;   # doesn't match, lowercase char class
+    $x =~ /^\P{IsLower}/;   # matches, char class sans lowercase
+
+Here is the association between some Perl named classes and the
+traditional Unicode classes:
+
+    Perl class name  Unicode class name or regular expression
+
+    IsAlpha          /^[LM]/
+    IsAlnum          /^[LMN]/
+    IsASCII          $code <= 127
+    IsCntrl          /^C/
+    IsBlank          $code =~ /^(0020|0009)$/ || /^Z[^lp]/
+    IsDigit          Nd
+    IsGraph          /^([LMNPS]|Co)/
+    IsLower          Ll
+    IsPrint          /^([LMNPS]|Co|Zs)/
+    IsPunct          /^P/
+    IsSpace          /^Z/ || ($code =~ /^(0009|000A|000B|000C|000D)$/
+    IsSpacePerl      /^Z/ || ($code =~ /^(0009|000A|000C|000D)$/
+    IsUpper          /^L[ut]/
+    IsWord           /^[LMN]/ || $code eq "005F"
+    IsXDigit         $code =~ /^00(3[0-9]|[46][1-6])$/
+
+You can also use the official Unicode class names with the C<\p> and
+C<\P>, like C<\p{L}> for Unicode 'letters', or C<\p{Lu}> for uppercase
+letters, or C<\P{Nd}> for non-digits.  If a C<name> is just one
+letter, the braces can be dropped.  For instance, C<\pM> is the
+character class of Unicode 'marks'.
+
+C<\X> is an abbreviation for a character class sequence that includes
+the Unicode 'combining character sequences'.  A 'combining character
+sequence' is a base character followed by any number of combining
+characters.  An example of a combining character is an accent.   Using
+the Unicode full names, e.g., S<C<A + COMBINING RING> > is a combining
+character sequence with base character C<A> and combining character
+S<C<COMBINING RING> >, which translates in Danish to A with the circle
+atop it, as in the word Angstrom.  C<\X> is equivalent to C<\PM\pM*}>,
+i.e., a non-mark followed by one or more marks.
+
+As if all those classes weren't enough, Perl also defines POSIX style
+character classes.  These have the form C<[:name:]>, with C<name> the
+name of the POSIX class.  The POSIX classes are C<alpha>, C<alnum>,
+C<ascii>, C<cntrl>, C<digit>, C<graph>, C<lower>, C<print>, C<punct>,
+C<space>, C<upper>, and C<xdigit>, and two extensions, C<word> (a Perl
+extension to match C<\w>), and C<blank> (a GNU extension).  If C<utf8>
+is being used, then these classes are defined the same as their
+corresponding perl Unicode classes: C<[:upper:]> is the same as
+C<\p{IsUpper}>, etc.  The POSIX character classes, however, don't
+require using C<utf8>.  The C<[:digit:]>, C<[:word:]>, and
+C<[:space:]> correspond to the familiar C<\d>, C<\w>, and C<\s>
+character classes.  To negate a POSIX class, put a C<^> in front of
+the name, so that, e.g., C<[:^digit:]> corresponds to C<\D> and under
+C<utf8>, C<\P{IsDigit}>.  The Unicode and POSIX character classes can
+be used just like C<\d>, both inside and outside of character classes:
+
+    /\s+[abc[:digit:]xyz]\s*/;  # match a,b,c,x,y,z, or a digit
+    /^=item\s[:digit:]/;        # match '=item',
+                                # followed by a space and a digit
+    use utf8;
+    use charnames ":full";
+    /\s+[abc\p{IsDigit}xyz]\s+/;  # match a,b,c,x,y,z, or a digit
+    /^=item\s\p{IsDigit}/;        # match '=item',
+                                  # followed by a space and a digit
+
+Whew! That is all the rest of the characters and character classes.
+
+=head2 Compiling and saving regular expressions
+
+In Part 1 we discussed the C<//o> modifier, which compiles a regexp
+just once.  This suggests that a compiled regexp is some data structure
+that can be stored once and used again and again.  The regexp quote
+C<qr//> does exactly that: C<qr/string/> compiles the C<string> as a
+regexp and transforms the result into a form that can be assigned to a
+variable:
+
+    $reg = qr/foo+bar?/;  # reg contains a compiled regexp
+
+Then C<$reg> can be used as a regexp:
+
+    $x = "fooooba";
+    $x =~ $reg;     # matches, just like /foo+bar?/
+    $x =~ /$reg/;   # same thing, alternate form
+
+C<$reg> can also be interpolated into a larger regexp:
+
+    $x =~ /(abc)?$reg/;  # still matches
+
+As with the matching operator, the regexp quote can use different
+delimiters, e.g., C<qr!!>, C<qr{}> and C<qr~~>.  The single quote
+delimiters C<qr''> prevent any interpolation from taking place.
+
+Pre-compiled regexps are useful for creating dynamic matches that
+don't need to be recompiled each time they are encountered.  Using
+pre-compiled regexps, C<simple_grep> program can be expanded into a
+program that matches multiple patterns:
+
+    % cat > multi_grep
+    #!/usr/bin/perl
+    # multi_grep - match any of <number> regexps
+    # usage: multi_grep <number> regexp1 regexp2 ... file1 file2 ...
+
+    $number = shift;
+    $regexp[$_] = shift foreach (0..$number-1);
+    @compiled = map qr/$_/, @regexp;
+    while ($line = <>) {
+        foreach $pattern (@compiled) {
+            if ($line =~ /$pattern/) {
+                print $line;
+                last;  # we matched, so move onto the next line
+            }
+        }
+    }
+    ^D
+
+    % multi_grep 2 last for multi_grep
+        $regexp[$_] = shift foreach (0..$number-1);
+            foreach $pattern (@compiled) {
+                    last;
+
+Storing pre-compiled regexps in an array C<@compiled> allows us to
+simply loop through the regexps without any recompilation, thus gaining
+flexibility without sacrificing speed.
+
+=head2 Embedding comments and modifiers in a regular expression
+
+Starting with this section, we will be discussing Perl's set of
+B<extended patterns>.  These are extensions to the traditional regular
+expression syntax that provide powerful new tools for pattern
+matching.  We have already seen extensions in the form of the minimal
+matching constructs C<??>, C<*?>, C<+?>, C<{n,m}?>, and C<{n,}?>.  The
+rest of the extensions below have the form C<(?char...)>, where the
+C<char> is a character that determines the type of extension.
+
+The first extension is an embedded comment C<(?#text)>.  This embeds a
+comment into the regular expression without affecting its meaning.  The
+comment should not have any closing parentheses in the text.  An
+example is
+
+    /(?# Match an integer:)[+-]?\d+/;
+
+This style of commenting has been largely superseded by the raw,
+freeform commenting that is allowed with the C<//x> modifier.
+
+The modifiers C<//i>, C<//m>, C<//s>, and C<//x> can also embedded in
+a regexp using C<(?i)>, C<(?m)>, C<(?s)>, and C<(?x)>.  For instance,
+
+    /(?i)yes/;  # match 'yes' case insensitively
+    /yes/i;     # same thing
+    /(?x)(          # freeform version of an integer regexp
+             [+-]?  # match an optional sign
+             \d+    # match a sequence of digits
+         )
+    /x;
+
+Embedded modifiers can have two important advantages over the usual
+modifiers.  Embedded modifiers allow a custom set of modifiers to
+I<each> regexp pattern.  This is great for matching an array of regexps
+that must have different modifiers:
+
+    $pattern[0] = '(?i)doctor';
+    $pattern[1] = 'Johnson';
+    ...
+    while (<>) {
+        foreach $patt (@pattern) {
+            print if /$patt/;
+        }
+    }
+
+The second advantage is that embedded modifiers only affect the regexp
+inside the group the embedded modifier is contained in.  So grouping
+can be used to localize the modifier's effects:
+
+    /Answer: ((?i)yes)/;  # matches 'Answer: yes', 'Answer: YES', etc.
+
+Embedded modifiers can also turn off any modifiers already present
+by using, e.g., C<(?-i)>.  Modifiers can also be combined into
+a single expression, e.g., C<(?s-i)> turns on single line mode and
+turns off case insensitivity.
+
+=head2 Non-capturing groupings
+
+We noted in Part 1 that groupings C<()> had two distinct functions: 1)
+group regexp elements together as a single unit, and 2) extract, or
+capture, substrings that matched the regexp in the
+grouping.  Non-capturing groupings, denoted by C<(?:regexp)>, allow the
+regexp to be treated as a single unit, but don't extract substrings or
+set matching variables C<$1>, etc.  Both capturing and non-capturing
+groupings are allowed to co-exist in the same regexp.  Because there is
+no extraction, non-capturing groupings are faster than capturing
+groupings.  Non-capturing groupings are also handy for choosing exactly
+which parts of a regexp are to be extracted to matching variables:
+
+    # match a number, $1-$4 are set, but we only want $1
+    /([+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?)/;
+
+    # match a number faster , only $1 is set
+    /([+-]?\ *(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)/;
+
+    # match a number, get $1 = whole number, $2 = exponent
+    /([+-]?\ *(?:\d+(?:\.\d*)?|\.\d+)(?:[eE]([+-]?\d+))?)/;
+
+Non-capturing groupings are also useful for removing nuisance
+elements gathered from a split operation:
+
+    $x = '12a34b5';
+    @num = split /(a|b)/, $x;    # @num = ('12','a','34','b','5')
+    @num = split /(?:a|b)/, $x;  # @num = ('12','34','5')
+
+Non-capturing groupings may also have embedded modifiers:
+C<(?i-m:regexp)> is a non-capturing grouping that matches C<regexp>
+case insensitively and turns off multi-line mode.
+
+=head2 Looking ahead and looking behind
+
+This section concerns the lookahead and lookbehind assertions.  First,
+a little background.
+
+In Perl regular expressions, most regexp elements 'eat up' a certain
+amount of string when they match.  For instance, the regexp element
+C<[abc}]> eats up one character of the string when it matches, in the
+sense that perl moves to the next character position in the string
+after the match.  There are some elements, however, that don't eat up
+characters (advance the character position) if they match.  The examples
+we have seen so far are the anchors.  The anchor C<^> matches the
+beginning of the line, but doesn't eat any characters.  Similarly, the
+word boundary anchor C<\b> matches, e.g., if the character to the left
+is a word character and the character to the right is a non-word
+character, but it doesn't eat up any characters itself.  Anchors are
+examples of 'zero-width assertions'.  Zero-width, because they consume
+no characters, and assertions, because they test some property of the
+string.  In the context of our walk in the woods analogy to regexp
+matching, most regexp elements move us along a trail, but anchors have
+us stop a moment and check our surroundings.  If the local environment
+checks out, we can proceed forward.  But if the local environment
+doesn't satisfy us, we must backtrack.
+
+Checking the environment entails either looking ahead on the trail,
+looking behind, or both.  C<^> looks behind, to see that there are no
+characters before.  C<$> looks ahead, to see that there are no
+characters after.  C<\b> looks both ahead and behind, to see if the
+characters on either side differ in their 'word'-ness.
+
+The lookahead and lookbehind assertions are generalizations of the
+anchor concept.  Lookahead and lookbehind are zero-width assertions
+that let us specify which characters we want to test for.  The
+lookahead assertion is denoted by C<(?=regexp)> and the lookbehind
+assertion is denoted by C<< (?<=fixed-regexp) >>.  Some examples are
+
+    $x = "I catch the housecat 'Tom-cat' with catnip";
+    $x =~ /cat(?=\s+)/;  # matches 'cat' in 'housecat'
+    @catwords = ($x =~ /(?<=\s)cat\w+/g);  # matches,
+                                           # $catwords[0] = 'catch'
+                                           # $catwords[1] = 'catnip'
+    $x =~ /\bcat\b/;  # matches 'cat' in 'Tom-cat'
+    $x =~ /(?<=\s)cat(?=\s)/; # doesn't match; no isolated 'cat' in
+                              # middle of $x
+
+Note that the parentheses in C<(?=regexp)> and C<< (?<=regexp) >> are
+non-capturing, since these are zero-width assertions.  Thus in the
+second regexp, the substrings captured are those of the whole regexp
+itself.  Lookahead C<(?=regexp)> can match arbitrary regexps, but
+lookbehind C<< (?<=fixed-regexp) >> only works for regexps of fixed
+width, i.e., a fixed number of characters long.  Thus
+C<< (?<=(ab|bc)) >> is fine, but C<< (?<=(ab)*) >> is not.  The
+negated versions of the lookahead and lookbehind assertions are
+denoted by C<(?!regexp)> and C<< (?<!fixed-regexp) >> respectively.
+They evaluate true if the regexps do I<not> match:
+
+    $x = "foobar";
+    $x =~ /foo(?!bar)/;  # doesn't match, 'bar' follows 'foo'
+    $x =~ /foo(?!baz)/;  # matches, 'baz' doesn't follow 'foo'
+    $x =~ /(?<!\s)foo/;  # matches, there is no \s before 'foo'
+
+=head2 Using independent subexpressions to prevent backtracking
+
+The last few extended patterns in this tutorial are experimental as of
+5.6.0.  Play with them, use them in some code, but don't rely on them
+just yet for production code.
+
+S<B<Independent subexpressions> > are regular expressions, in the
+context of a larger regular expression, that function independently of
+the larger regular expression.  That is, they consume as much or as
+little of the string as they wish without regard for the ability of
+the larger regexp to match.  Independent subexpressions are represented
+by C<< (?>regexp) >>.  We can illustrate their behavior by first
+considering an ordinary regexp:
+
+    $x = "ab";
+    $x =~ /a*ab/;  # matches
+
+This obviously matches, but in the process of matching, the
+subexpression C<a*> first grabbed the C<a>.  Doing so, however,
+wouldn't allow the whole regexp to match, so after backtracking, C<a*>
+eventually gave back the C<a> and matched the empty string.  Here, what
+C<a*> matched was I<dependent> on what the rest of the regexp matched.
+
+Contrast that with an independent subexpression:
+
+    $x =~ /(?>a*)ab/;  # doesn't match!
+
+The independent subexpression C<< (?>a*) >> doesn't care about the rest
+of the regexp, so it sees an C<a> and grabs it.  Then the rest of the
+regexp C<ab> cannot match.  Because C<< (?>a*) >> is independent, there
+is no backtracking and and the independent subexpression does not give
+up its C<a>.  Thus the match of the regexp as a whole fails.  A similar
+behavior occurs with completely independent regexps:
+
+    $x = "ab";
+    $x =~ /a*/g;   # matches, eats an 'a'
+    $x =~ /\Gab/g; # doesn't match, no 'a' available
+
+Here C<//g> and C<\G> create a 'tag team' handoff of the string from
+one regexp to the other.  Regexps with an independent subexpression are
+much like this, with a handoff of the string to the independent
+subexpression, and a handoff of the string back to the enclosing
+regexp.
+
+The ability of an independent subexpression to prevent backtracking
+can be quite useful.  Suppose we want to match a non-empty string
+enclosed in parentheses up to two levels deep.  Then the following
+regexp matches:
+
+    $x = "abc(de(fg)h";  # unbalanced parentheses
+    $x =~ /\( ( [^()]+ | \([^()]*\) )+ \)/x;
+
+The regexp matches an open parenthesis, one or more copies of an
+alternation, and a close parenthesis.  The alternation is two-way, with
+the first alternative C<[^()]+> matching a substring with no
+parentheses and the second alternative C<\([^()]*\)>  matching a
+substring delimited by parentheses.  The problem with this regexp is
+that it is pathological: it has nested indeterminate quantifiers
+ of the form C<(a+|b)+>.  We discussed in Part 1 how nested quantifiers
+like this could take an exponentially long time to execute if there
+was no match possible.  To prevent the exponential blowup, we need to
+prevent useless backtracking at some point.  This can be done by
+enclosing the inner quantifier as an independent subexpression:
+
+    $x =~ /\( ( (?>[^()]+) | \([^()]*\) )+ \)/x;
+
+Here, C<< (?>[^()]+) >> breaks the degeneracy of string partitioning
+by gobbling up as much of the string as possible and keeping it.   Then
+match failures fail much more quickly.
+
+=head2 Conditional expressions
+
+A S<B<conditional expression> > is a form of if-then-else statement
+that allows one to choose which patterns are to be matched, based on
+some condition.  There are two types of conditional expression:
+C<(?(condition)yes-regexp)> and
+C<(?(condition)yes-regexp|no-regexp)>.  C<(?(condition)yes-regexp)> is
+like an S<C<'if () {}'> > statement in Perl.  If the C<condition> is true,
+the C<yes-regexp> will be matched.  If the C<condition> is false, the
+C<yes-regexp> will be skipped and perl will move onto the next regexp
+element.  The second form is like an S<C<'if () {} else {}'> > statement
+in Perl.  If the C<condition> is true, the C<yes-regexp> will be
+matched, otherwise the C<no-regexp> will be matched.
+
+The C<condition> can have two forms.  The first form is simply an
+integer in parentheses C<(integer)>.  It is true if the corresponding
+backreference C<\integer> matched earlier in the regexp.  The second
+form is a bare zero width assertion C<(?...)>, either a
+lookahead, a lookbehind, or a code assertion (discussed in the next
+section).
+
+The integer form of the C<condition> allows us to choose, with more
+flexibility, what to match based on what matched earlier in the
+regexp. This searches for words of the form C<"$x$x"> or
+C<"$x$y$y$x">:
+
+    % simple_grep '^(\w+)(\w+)?(?(2)\2\1|\1)$' /usr/dict/words
+    beriberi
+    coco
+    couscous
+    deed
+    ...
+    toot
+    toto
+    tutu
+
+The lookbehind C<condition> allows, along with backreferences,
+an earlier part of the match to influence a later part of the
+match.  For instance,
+
+    /[ATGC]+(?(?<=AA)G|C)$/;
+
+matches a DNA sequence such that it either ends in C<AAG>, or some
+other base pair combination and C<C>.  Note that the form is
+C<< (?(?<=AA)G|C) >> and not C<< (?((?<=AA))G|C) >>; for the
+lookahead, lookbehind or code assertions, the parentheses around the
+conditional are not needed.
+
+=head2 A bit of magic: executing Perl code in a regular expression
+
+Normally, regexps are a part of Perl expressions.
+S<B<Code evaluation> > expressions turn that around by allowing
+arbitrary Perl code to be a part of of a regexp.  A code evaluation
+expression is denoted C<(?{code})>, with C<code> a string of Perl
+statements.
+
+Code expressions are zero-width assertions, and the value they return
+depends on their environment.  There are two possibilities: either the
+code expression is used as a conditional in a conditional expression
+C<(?(condition)...)>, or it is not.  If the code expression is a
+conditional, the code is evaluated and the result (i.e., the result of
+the last statement) is used to determine truth or falsehood.  If the
+code expression is not used as a conditional, the assertion always
+evaluates true and the result is put into the special variable
+C<$^R>.  The variable C<$^R> can then be used in code expressions later
+in the regexp.  Here are some silly examples:
+
+    $x = "abcdef";
+    $x =~ /abc(?{print "Hi Mom!";})def/; # matches,
+                                         # prints 'Hi Mom!'
+    $x =~ /aaa(?{print "Hi Mom!";})def/; # doesn't match,
+                                         # no 'Hi Mom!'
+
+Pay careful attention to the next example:
+
+    $x =~ /abc(?{print "Hi Mom!";})ddd/; # doesn't match,
+                                         # no 'Hi Mom!'
+                                         # but why not?
+
+At first glance, you'd think that it shouldn't print, because obviously
+the C<ddd> isn't going to match the target string. But look at this
+example:
+
+    $x =~ /abc(?{print "Hi Mom!";})[d]dd/; # doesn't match,
+                                           # but _does_ print
+
+Hmm. What happened here? If you've been following along, you know that
+the above pattern should be effectively the same as the last one --
+enclosing the d in a character class isn't going to change what it
+matches. So why does the first not print while the second one does?
+
+The answer lies in the optimizations the REx engine makes. In the first
+case, all the engine sees are plain old characters (aside from the
+C<?{}> construct). It's smart enough to realize that the string 'ddd'
+doesn't occur in our target string before actually running the pattern
+through. But in the second case, we've tricked it into thinking that our
+pattern is more complicated than it is. It takes a look, sees our
+character class, and decides that it will have to actually run the
+pattern to determine whether or not it matches, and in the process of
+running it hits the print statement before it discovers that we don't
+have a match.
+
+To take a closer look at how the engine does optimizations, see the
+section L<"Pragmas and debugging"> below.
+
+More fun with C<?{}>:
+
+    $x =~ /(?{print "Hi Mom!";})/;       # matches,
+                                         # prints 'Hi Mom!'
+    $x =~ /(?{$c = 1;})(?{print "$c";})/;  # matches,
+                                           # prints '1'
+    $x =~ /(?{$c = 1;})(?{print "$^R";})/; # matches,
+                                           # prints '1'
+
+The bit of magic mentioned in the section title occurs when the regexp
+backtracks in the process of searching for a match.  If the regexp
+backtracks over a code expression and if the variables used within are
+localized using C<local>, the changes in the variables produced by the
+code expression are undone! Thus, if we wanted to count how many times
+a character got matched inside a group, we could use, e.g.,
+
+    $x = "aaaa";
+    $count = 0;  # initialize 'a' count
+    $c = "bob";  # test if $c gets clobbered
+    $x =~ /(?{local $c = 0;})         # initialize count
+           ( a                        # match 'a'
+             (?{local $c = $c + 1;})  # increment count
+           )*                         # do this any number of times,
+           aa                         # but match 'aa' at the end
+           (?{$count = $c;})          # copy local $c var into $count
+          /x;
+    print "'a' count is $count, \$c variable is '$c'\n";
+
+This prints
+
+    'a' count is 2, $c variable is 'bob'
+
+If we replace the S<C< (?{local $c = $c + 1;})> > with
+S<C< (?{$c = $c + 1;})> >, the variable changes are I<not> undone
+during backtracking, and we get
+
+    'a' count is 4, $c variable is 'bob'
+
+Note that only localized variable changes are undone.  Other side
+effects of code expression execution are permanent.  Thus
+
+    $x = "aaaa";
+    $x =~ /(a(?{print "Yow\n";}))*aa/;
+
+produces
+
+   Yow
+   Yow
+   Yow
+   Yow
+
+The result C<$^R> is automatically localized, so that it will behave
+properly in the presence of backtracking.
+
+This example uses a code expression in a conditional to match the
+article 'the' in either English or German:
+
+    $lang = 'DE';  # use German
+    ...
+    $text = "das";
+    print "matched\n"
+        if $text =~ /(?(?{
+                          $lang eq 'EN'; # is the language English?
+                         })
+                       the |             # if so, then match 'the'
+                       (die|das|der)     # else, match 'die|das|der'
+                     )
+                    /xi;
+
+Note that the syntax here is C<(?(?{...})yes-regexp|no-regexp)>, not
+C<(?((?{...}))yes-regexp|no-regexp)>.  In other words, in the case of a
+code expression, we don't need the extra parentheses around the
+conditional.
+
+If you try to use code expressions with interpolating variables, perl
+may surprise you:
+
+    $bar = 5;
+    $pat = '(?{ 1 })';
+    /foo(?{ $bar })bar/; # compiles ok, $bar not interpolated
+    /foo(?{ 1 })$bar/;   # compile error!
+    /foo${pat}bar/;      # compile error!
+
+    $pat = qr/(?{ $foo = 1 })/;  # precompile code regexp
+    /foo${pat}bar/;      # compiles ok
+
+If a regexp has (1) code expressions and interpolating variables,or
+(2) a variable that interpolates a code expression, perl treats the
+regexp as an error. If the code expression is precompiled into a
+variable, however, interpolating is ok. The question is, why is this
+an error?
+
+The reason is that variable interpolation and code expressions
+together pose a security risk.  The combination is dangerous because
+many programmers who write search engines often take user input and
+plug it directly into a regexp:
+
+    $regexp = <>;       # read user-supplied regexp
+    $chomp $regexp;     # get rid of possible newline
+    $text =~ /$regexp/; # search $text for the $regexp
+
+If the C<$regexp> variable contains a code expression, the user could
+then execute arbitrary Perl code.  For instance, some joker could
+search for S<C<system('rm -rf *');> > to erase your files.  In this
+sense, the combination of interpolation and code expressions B<taints>
+your regexp.  So by default, using both interpolation and code
+expressions in the same regexp is not allowed.  If you're not
+concerned about malicious users, it is possible to bypass this
+security check by invoking S<C<use re 'eval'> >:
+
+    use re 'eval';       # throw caution out the door
+    $bar = 5;
+    $pat = '(?{ 1 })';
+    /foo(?{ 1 })$bar/;   # compiles ok
+    /foo${pat}bar/;      # compiles ok
+
+Another form of code expression is the S<B<pattern code expression> >.
+The pattern code expression is like a regular code expression, except
+that the result of the code evaluation is treated as a regular
+expression and matched immediately.  A simple example is
+
+    $length = 5;
+    $char = 'a';
+    $x = 'aaaaabb';
+    $x =~ /(??{$char x $length})/x; # matches, there are 5 of 'a'
+
+
+This final example contains both ordinary and pattern code
+expressions.   It detects if a binary string C<1101010010001...> has a
+Fibonacci spacing 0,1,1,2,3,5,...  of the C<1>'s:
+
+    $s0 = 0; $s1 = 1; # initial conditions
+    $x = "1101010010001000001";
+    print "It is a Fibonacci sequence\n"
+        if $x =~ /^1         # match an initial '1'
+                    (
+                       (??{'0' x $s0}) # match $s0 of '0'
+                       1               # and then a '1'
+                       (?{
+                          $largest = $s0;   # largest seq so far
+                          $s2 = $s1 + $s0;  # compute next term
+                          $s0 = $s1;        # in Fibonacci sequence
+                          $s1 = $s2;
+                         })
+                    )+   # repeat as needed
+                  $      # that is all there is
+                 /x;
+    print "Largest sequence matched was $largest\n";
+
+This prints
+
+    It is a Fibonacci sequence
+    Largest sequence matched was 5
+
+Ha! Try that with your garden variety regexp package...
+
+Note that the variables C<$s0> and C<$s1> are not substituted when the
+regexp is compiled, as happens for ordinary variables outside a code
+expression.  Rather, the code expressions are evaluated when perl
+encounters them during the search for a match.
+
+The regexp without the C<//x> modifier is
+
+    /^1((??{'0'x$s0})1(?{$largest=$s0;$s2=$s1+$s0$s0=$s1;$s1=$s2;}))+$/;
+
+and is a great start on an Obfuscated Perl entry :-) When working with
+code and conditional expressions, the extended form of regexps is
+almost necessary in creating and debugging regexps.
+
+=head2 Pragmas and debugging
+
+Speaking of debugging, there are several pragmas available to control
+and debug regexps in Perl.  We have already encountered one pragma in
+the previous section, S<C<use re 'eval';> >, that allows variable
+interpolation and code expressions to coexist in a regexp.  The other
+pragmas are
+
+    use re 'taint';
+    $tainted = <>;
+    @parts = ($tainted =~ /(\w+)\s+(\w+)/; # @parts is now tainted
+
+The C<taint> pragma causes any substrings from a match with a tainted
+variable to be tainted as well.  This is not normally the case, as
+regexps are often used to extract the safe bits from a tainted
+variable.  Use C<taint> when you are not extracting safe bits, but are
+performing some other processing.  Both C<taint> and C<eval> pragmas
+are lexically scoped, which means they are in effect only until
+the end of the block enclosing the pragmas.
+
+    use re 'debug';
+    /^(.*)$/s;       # output debugging info
+
+    use re 'debugcolor';
+    /^(.*)$/s;       # output debugging info in living color
+
+The global C<debug> and C<debugcolor> pragmas allow one to get
+detailed debugging info about regexp compilation and
+execution.  C<debugcolor> is the same as debug, except the debugging
+information is displayed in color on terminals that can display
+termcap color sequences.  Here is example output:
+
+    % perl -e 'use re "debug"; "abc" =~ /a*b+c/;'
+    Compiling REx `a*b+c'
+    size 9 first at 1
+       1: STAR(4)
+       2:   EXACT <a>(0)
+       4: PLUS(7)
+       5:   EXACT <b>(0)
+       7: EXACT <c>(9)
+       9: END(0)
+    floating `bc' at 0..2147483647 (checking floating) minlen 2
+    Guessing start of match, REx `a*b+c' against `abc'...
+    Found floating substr `bc' at offset 1...
+    Guessed: match at offset 0
+    Matching REx `a*b+c' against `abc'
+      Setting an EVAL scope, savestack=3
+       0 <> <abc>             |  1:  STAR
+                               EXACT <a> can match 1 times out of 32767...
+      Setting an EVAL scope, savestack=3
+       1 <a> <bc>             |  4:    PLUS
+                               EXACT <b> can match 1 times out of 32767...
+      Setting an EVAL scope, savestack=3
+       2 <ab> <c>             |  7:      EXACT <c>
+       3 <abc> <>             |  9:      END
+    Match successful!
+    Freeing REx: `a*b+c'
+
+If you have gotten this far into the tutorial, you can probably guess
+what the different parts of the debugging output tell you.  The first
+part
+
+    Compiling REx `a*b+c'
+    size 9 first at 1
+       1: STAR(4)
+       2:   EXACT <a>(0)
+       4: PLUS(7)
+       5:   EXACT <b>(0)
+       7: EXACT <c>(9)
+       9: END(0)
+
+describes the compilation stage.  C<STAR(4)> means that there is a
+starred object, in this case C<'a'>, and if it matches, goto line 4,
+i.e., C<PLUS(7)>.  The middle lines describe some heuristics and
+optimizations performed before a match:
+
+    floating `bc' at 0..2147483647 (checking floating) minlen 2
+    Guessing start of match, REx `a*b+c' against `abc'...
+    Found floating substr `bc' at offset 1...
+    Guessed: match at offset 0
+
+Then the match is executed and the remaining lines describe the
+process:
+
+    Matching REx `a*b+c' against `abc'
+      Setting an EVAL scope, savestack=3
+       0 <> <abc>             |  1:  STAR
+                               EXACT <a> can match 1 times out of 32767...
+      Setting an EVAL scope, savestack=3
+       1 <a> <bc>             |  4:    PLUS
+                               EXACT <b> can match 1 times out of 32767...
+      Setting an EVAL scope, savestack=3
+       2 <ab> <c>             |  7:      EXACT <c>
+       3 <abc> <>             |  9:      END
+    Match successful!
+    Freeing REx: `a*b+c'
+
+Each step is of the form S<C<< n <x> <y> >> >, with C<< <x> >> the
+part of the string matched and C<< <y> >> the part not yet
+matched.  The S<C<< |  1:  STAR >> > says that perl is at line number 1
+n the compilation list above.  See
+L<perldebguts/"Debugging regular expressions"> for much more detail.
+
+An alternative method of debugging regexps is to embed C<print>
+statements within the regexp.  This provides a blow-by-blow account of
+the backtracking in an alternation:
+
+    "that this" =~ m@(?{print "Start at position ", pos, "\n";})
+                     t(?{print "t1\n";})
+                     h(?{print "h1\n";})
+                     i(?{print "i1\n";})
+                     s(?{print "s1\n";})
+                         |
+                     t(?{print "t2\n";})
+                     h(?{print "h2\n";})
+                     a(?{print "a2\n";})
+                     t(?{print "t2\n";})
+                     (?{print "Done at position ", pos, "\n";})
+                    @x;
+
+prints
+
+    Start at position 0
+    t1
+    h1
+    t2
+    h2
+    a2
+    t2
+    Done at position 4
+
+=head1 BUGS
+
+Code expressions, conditional expressions, and independent expressions
+are B<experimental>.  Don't use them in production code.  Yet.
+
+=head1 SEE ALSO
+
+This is just a tutorial.  For the full story on perl regular
+expressions, see the L<perlre> regular expressions reference page.
+
+For more information on the matching C<m//> and substitution C<s///>
+operators, see L<perlop/"Regexp Quote-Like Operators">.  For
+information on the C<split> operation, see L<perlfunc/split>.
+
+For an excellent all-around resource on the care and feeding of
+regular expressions, see the book I<Mastering Regular Expressions> by
+Jeffrey Friedl (published by O'Reilly, ISBN 1556592-257-3).
+
+=head1 AUTHOR AND COPYRIGHT
+
+Copyright (c) 2000 Mark Kvale
+All rights reserved.
+
+This document may be distributed under the same terms as Perl itself.
+
+=head2 Acknowledgments
+
+The inspiration for the stop codon DNA example came from the ZIP
+code example in chapter 7 of I<Mastering Regular Expressions>.
+
+The author would like to thank Jeff Pinyan, Andrew Johnson, Peter
+Haworth, Ronald J Kimball, and Joe Smith for all their helpful
+comments.
+
+=cut
+
diff --git a/contrib/perl5/pod/perlutil.pod b/contrib/perl5/pod/perlutil.pod
new file mode 100644
index 0000000000000..be7a345f79678
--- /dev/null
+++ b/contrib/perl5/pod/perlutil.pod
@@ -0,0 +1,185 @@
+=head1 NAME
+
+perlutil - utilities packaged with the Perl distribution
+
+=head1 DESCRIPTION
+
+Along with the Perl interpreter itself, the Perl distribution installs a
+range of utilities on your system. There are also several utilities
+which are used by the Perl distribution itself as part of the install
+process. This document exists to list all of these utilities, explain
+what they are for and provide pointers to each module's documentation,
+if appropriate.
+
+=head2 DOCUMENTATION
+
+=over 3
+
+=item L<perldoc|perldoc>
+
+The main interface to Perl's documentation is C<perldoc>, although
+if you're reading this, it's more than likely that you've already found
+it. F<perldoc> will extract and format the documentation from any file
+in the current directory, any Perl module installed on the system, or
+any of the standard documentation pages, such as this one. Use 
+C<perldoc E<lt>nameE<gt>> to get information on any of the utilities
+described in this document.
+
+=item L<pod2man|pod2man> and L<pod2text|pod2text>
+
+If it's run from a terminal, F<perldoc> will usually call F<pod2man> to
+translate POD (Plain Old Documentation - see L<perlpod> for an
+explanation) into a man page, and then run F<man> to display it; if
+F<man> isn't available, F<pod2text> will be used instead and the output
+piped through your favourite pager.
+
+=item L<pod2html|pod2html> and L<pod2latex|pod2latex>
+
+As well as these two, there are two other converters: F<pod2html> will
+produce HTML pages from POD, and F<pod2latex>, which produces LaTeX
+files.
+
+=item L<pod2usage|pod2usage>
+
+If you just want to know how to use the utilities described here,
+F<pod2usage> will just extract the "USAGE" section; some of
+the utilities will automatically call F<pod2usage> on themselves when
+you call them with C<-help>.
+
+=item L<podselect|podselect>
+
+F<pod2usage> is a special case of F<podselect>, a utility to extract
+named sections from documents written in POD. For instance, while
+utilities have "USAGE" sections, Perl modules usually have "SYNOPSIS"
+sections: C<podselect -s "SYNOPSIS" ...> will extract this section for
+a given file.
+
+=item L<podchecker|podchecker>
+
+If you're writing your own documentation in POD, the F<podchecker>
+utility will look for errors in your markup.
+
+=item L<splain|splain>
+
+F<splain> is an interface to L<perldiag> - paste in your error message
+to it, and it'll explain it for you.
+
+=item L<roffitall|roffitall>
+
+The C<roffitall> utility is not installed on your system but lives in
+the F<pod/> directory of your Perl source kit; it converts all the
+documentation from the distribution to F<*roff> format, and produces a
+typeset PostScript or text file of the whole lot.
+
+=back
+
+=head2 CONVERTORS
+
+To help you convert legacy programs to Perl, we've included three
+conversion filters:
+
+=over 3
+
+=item L<a2p|a2p>
+
+F<a2p> converts F<awk> scripts to Perl programs; for example, C<a2p -F:>
+on the simple F<awk> script C<{print $2}> will produce a Perl program
+based around this code:
+
+    while (<>) {
+        ($Fld1,$Fld2) = split(/[:\n]/, $_, 9999);
+        print $Fld2;
+    }
+
+=item L<s2p|s2p>
+
+Similarly, F<s2p> converts F<sed> scripts to Perl programs. F<s2p> run
+on C<s/foo/bar> will produce a Perl program based around this:
+
+    while (<>) {
+        chomp;
+        s/foo/bar/g;
+        print if $printit;
+    }
+
+=item L<find2perl|find2perl>
+
+Finally, F<find2perl> translates C<find> commands to Perl equivalents which 
+use the L<File::Find|File::Find> module. As an example, 
+C<find2perl . -user root -perm 4000 -print> produces the following callback
+subroutine for C<File::Find>:
+
+    sub wanted {
+        my ($dev,$ino,$mode,$nlink,$uid,$gid);
+        (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
+        $uid == $uid{'root'}) &&
+        (($mode & 0777) == 04000);
+        print("$name\n");
+    }
+
+=back
+
+As well as these filters for converting other languages, the
+L<pl2pm|pl2pm> utility will help you convert old-style Perl 4 libraries to 
+new-style Perl5 modules.
+
+=head2 Development
+
+There are a set of utilities which help you in developing Perl programs, 
+and in particular, extending Perl with C.
+
+=over 3
+
+=item L<perlbug|perlbug>
+
+F<perlbug> is the recommended way to report bugs in the perl interpreter
+itself or any of the standard library modules back to the developers;
+please read through the documentation for F<perlbug> thoroughly before
+using it to submit a bug report.
+
+=item L<h2ph|h2ph>
+
+Back before Perl had the XS system for connecting with C libraries,
+programmers used to get library constants by reading through the C
+header files. You may still see C<require 'syscall.ph'> or similar
+around - the F<.ph> file should be created by running F<h2ph> on the
+corresponding F<.h> file. See the F<h2ph> documentation for more on how
+to convert a whole bunch of header files at ones.
+
+=item L<c2ph|c2ph> and L<pstruct|pstruct>
+
+F<c2ph> and F<pstruct>, which are actually the same program but behave
+differently depending on how they are called, provide another way of
+getting at C with Perl - they'll convert C structures and union declarations
+to Perl code. This is deprecated in favour of F<h2xs> these days.
+
+=item L<h2xs|h2xs>
+
+F<h2xs> converts C header files into XS modules, and will try and write
+as much glue between C libraries and Perl modules as it can. It's also
+very useful for creating skeletons of pure Perl modules.
+
+=item L<dprofpp|dprofpp>
+
+Perl comes with a profiler, the F<Devel::Dprof> module. The
+F<dprofpp> utility analyzes the output of this profiler and tells you
+which subroutines are taking up the most run time. See L<Devel::Dprof>
+for more information.
+
+=item L<perlcc|perlcc>
+
+F<perlcc> is the interface to the experimental Perl compiler suite.
+
+=back
+
+=head2 SEE ALSO
+
+L<perldoc|perldoc>, L<pod2man|pod2man>, L<perlpod>,
+L<pod2html|pod2html>, L<pod2usage|pod2usage>, L<podselect|podselect>,
+L<podchecker|podchecker>, L<splain|splain>, L<perldiag>,
+L<roffitall|roffitall>, L<a2p|a2p>, L<s2p|s2p>, L<find2perl|find2perl>,
+L<File::Find|File::Find>, L<pl2pm|pl2pm>, L<perlbug|perlbug>,
+L<h2ph|h2ph>, L<c2ph|c2ph>, L<h2xs|h2xs>, L<dprofpp|dprofpp>,
+L<Devel::Dprof>, L<perlcc|perlcc>
+
+=cut