diff options
Diffstat (limited to 'sntp/ag-tpl/0-old/Mdoc.pm')
-rw-r--r-- | sntp/ag-tpl/0-old/Mdoc.pm | 515 |
1 files changed, 515 insertions, 0 deletions
diff --git a/sntp/ag-tpl/0-old/Mdoc.pm b/sntp/ag-tpl/0-old/Mdoc.pm new file mode 100644 index 000000000000..61fffd6f615f --- /dev/null +++ b/sntp/ag-tpl/0-old/Mdoc.pm @@ -0,0 +1,515 @@ +=head1 NAME + +Mdoc - perl module to parse Mdoc macros + +=head1 SYNOPSIS + + use Mdoc qw(ns pp soff son stoggle mapwords); + +See mdoc2man and mdoc2texi for code examples. + +=head1 FUNCTIONS + +=over 4 + +=item def_macro( NAME, CODE, [ raw => 1, greedy => 1, concat_until => '.Xx' ] ) + +Define new macro. The CODE reference will be called by call_macro(). You can +have two distinct definitions for and inline macro and for a standalone macro +(i. e. 'Pa' and '.Pa'). + +The CODE reference is passed a list of arguments and is expected to return list +of strings and control characters (see C<CONSTANTS>). + +By default the surrouding "" from arguments to macros are removed, use C<raw> +to disable this. + +Normaly CODE reference is passed all arguments up to next nested macro. Set +C<greedy> to to pass everything up to the end of the line. + +If the concat_until is present, the line is concated until the .Xx macro is +found. For example the following macro definition + + def_macro('.Oo', gen_encloser(qw([ ]), concat_until => '.Oc' } + def_macro('.Cm', sub { mapwords {'($_)'} @_ } } + +and the following input + + .Oo + .Cm foo | + .Cm bar | + .Oc + +results in [(foo) | (bar)] + +=item get_macro( NAME ) + +Returns a hash reference like: + + { run => CODE, raw => [1|0], greedy => [1|0] } + +Where C<CODE> is the CODE reference used to define macro called C<NAME> + +=item parse_line( INPUT, OUTPUT_CODE, PREPROCESS_CODE ) + +Parse a line from the C<INPUT> filehandle. If a macro was detected it returns a +list (MACRO_NAME, @MACRO_ARGS), otherwise it calls the C<OUTPUT_CODE>, giving +caller a chance to modify line before printing it. If C<PREPROCESS_CODE> is +defined it calls it prior to passing argument to a macro, giving caller a +chance to alter them. if EOF was reached undef is returned. + +=item call_macro( MACRO, ARGS, ... ) + +Call macro C<MACRO> with C<ARGS>. The CODE reference for macro C<MACRO> is +called and for all the nested macros. Every called macro returns a list which +is appended to return value and returned when all nested macros are processed. +Use to_string() to produce a printable string from the list. + +=item to_string ( LIST ) + +Processes C<LIST> returned from call_macro() and returns formatted string. + +=item mapwords BLOCK ARRAY + +This is like perl's map only it calls BLOCK only on elements which are not +punctuation or control characters. + +=item space ( ['on'|'off] ) + +Turn spacing on or off. If called without argument it returns the current state. + +=item gen_encloser ( START, END ) + +Helper function for generating macros that enclose their arguments. + gen_encloser(qw({ })); +returns + sub { '{', ns, @_, ns, pp('}')} + +=item set_Bl_callback( CODE , DEFS ) + +This module implements the Bl/El macros for you. Using set_Bl_callback you can +provide a macro definition that should be executed on a .Bl call. + +=item set_El_callback( CODE , DEFS ) + +This module implements the Bl/El macros for you. Using set_El_callback you can +provide a macro definition that should be executed on a .El call. + +=item set_Re_callback( CODE ) + +The C<CODE> is called after a Rs/Re block is done. With a hash reference as a +parameter, describing the reference. + +=back + +=head1 CONSTANTS + +=over 4 + +=item ns + +Indicate 'no space' between to members of the list. + +=item pp ( STRING ) + +The string is 'punctuation point'. It means that every punctuation +preceeding that element is put behind it. + +=item soff + +Turn spacing off. + +=item son + +Turn spacing on. + +=item stoggle + +Toogle spacing. + +=item hs + +Print space no matter spacing mode. + +=back + +=head1 TODO + +* The concat_until only works with standalone macros. This means that + .Po blah Pc +will hang until .Pc in encountered. + +* Provide default macros for Bd/Ed + +* The reference implementation is uncomplete + +=cut + +package Mdoc; +use strict; +use warnings; +use List::Util qw(reduce); +use Text::ParseWords qw(quotewords); +use Carp; +use Exporter qw(import); +our @EXPORT_OK = qw(ns pp soff son stoggle hs mapwords gen_encloser nl); + +use constant { + ns => ['nospace'], + soff => ['spaceoff'], + son => ['spaceon'], + stoggle => ['spacetoggle'], + hs => ['hardspace'], +}; + +sub pp { + my $c = shift; + return ['pp', $c ]; +} +sub gen_encloser { + my ($o, $c) = @_; + return sub { ($o, ns, @_, ns, pp($c)) }; +} + +sub mapwords(&@) { + my ($f, @l) = @_; + my @res; + for my $el (@l) { + local $_ = $el; + push @res, $el =~ /^(?:[,\.\{\}\(\):;\[\]\|])$/ || ref $el eq 'ARRAY' ? + $el : $f->(); + } + return @res; +} + +my %macros; + +############################################################################### + +# Default macro definitions start + +############################################################################### + +def_macro('Xo', sub { @_ }, concat_until => '.Xc'); + +def_macro('.Ns', sub {ns, @_}); +def_macro('Ns', sub {ns, @_}); + +{ + my %reference; + def_macro('.Rs', sub { () } ); + def_macro('.%A', sub { + if ($reference{authors}) { + $reference{authors} .= " and @_" + } + else { + $reference{authors} = "@_"; + } + return (); + }); + def_macro('.%T', sub { $reference{title} = "@_"; () } ); + def_macro('.%O', sub { $reference{optional} = "@_"; () } ); + + sub set_Re_callback { + my ($sub) = @_; + croak 'Not a CODE reference' if not ref $sub eq 'CODE'; + def_macro('.Re', sub { + my @ret = $sub->(\%reference); + %reference = (); @ret + }); + return; + } +} + +def_macro('.Bl', sub { die '.Bl - no list callback set' }); +def_macro('.It', sub { die ".It called outside of list context - maybe near line $." }); +def_macro('.El', sub { die '.El requires .Bl first' }); + + +{ + my $elcb = sub { () }; + + sub set_El_callback { + my ($sub) = @_; + croak 'Not a CODE reference' if ref $sub ne 'CODE'; + $elcb = $sub; + return; + } + + sub set_Bl_callback { + my ($blcb, %defs) = @_; + croak 'Not a CODE reference' if ref $blcb ne 'CODE'; + def_macro('.Bl', sub { + + my $orig_it = get_macro('.It'); + my $orig_el = get_macro('.El'); + my $orig_bl = get_macro('.Bl'); + my $orig_elcb = $elcb; + + # Restore previous .It and .El on each .El + def_macro('.El', sub { + def_macro('.El', delete $orig_el->{run}, %$orig_el); + def_macro('.It', delete $orig_it->{run}, %$orig_it); + def_macro('.Bl', delete $orig_bl->{run}, %$orig_bl); + my @ret = $elcb->(@_); + $elcb = $orig_elcb; + @ret + }); + $blcb->(@_) + }, %defs); + return; + } +} + +def_macro('.Sm', sub { + my ($arg) = @_; + if (defined $arg) { + space($arg); + } else { + space() eq 'off' ? + space('on') : + space('off'); + } + () +} ); +def_macro('Sm', do { my $off; sub { + my ($arg) = @_; + if (defined $arg && $arg =~ /^(on|off)$/) { + shift; + if ($arg eq 'off') { soff, @_; } + elsif ($arg eq 'on') { son, @_; } + } + else { + stoggle, @_; + } +}} ); + +############################################################################### + +# Default macro definitions end + +############################################################################### + +sub def_macro { + croak 'Odd number of elements for hash argument <'.(scalar @_).'>' if @_%2; + my ($macro, $sub, %def) = @_; + croak 'Not a CODE reference' if ref $sub ne 'CODE'; + + $macros{ $macro } = { + run => $sub, + greedy => delete $def{greedy} || 0, + raw => delete $def{raw} || 0, + concat_until => delete $def{concat_until}, + }; + if ($macros{ $macro }{concat_until}) { + $macros{ $macros{ $macro }{concat_until} } = { run => sub { @_ } }; + $macros{ $macro }{greedy} = 1; + } + return; +} + +sub get_macro { + my ($macro) = @_; + croak "Macro <$macro> not defined" if not exists $macros{ $macro }; + +{ %{ $macros{ $macro } } } +} + +#TODO: document this +sub parse_opts { + my %args; + my $last; + for (@_) { + if ($_ =~ /^\\?-/) { + s/^\\?-//; + $args{$_} = 1; + $last = _unquote($_); + } + else { + $args{$last} = _unquote($_) if $last; + undef $last; + } + } + return %args; +} + +sub _is_control { + my ($el, $expected) = @_; + if (defined $expected) { + ref $el eq 'ARRAY' and $el->[0] eq $expected; + } + else { + ref $el eq 'ARRAY'; + } +} + +{ + my $sep = ' '; + + sub to_string { + if (@_ > 0) { + # Handle punctunation + my ($in_brace, @punct) = ''; + my @new = map { + if (/^([\[\(])$/) { + ($in_brace = $1) =~ tr/([/)]/; + $_, ns + } + elsif (/^([\)\]])$/ && $in_brace eq $1) { + $in_brace = ''; + ns, $_ + } + elsif ($_ =~ /^[,\.;:\?\!\)\]]$/) { + push @punct, ns, $_; + (); + } + elsif (_is_control($_, 'pp')) { + $_->[1] + } + elsif (_is_control($_)) { + $_ + } + else { + splice (@punct), $_; + } + } @_; + push @new, @punct; + + # Produce string out of an array dealing with the special control characters + # space('off') must but one character delayed + my ($no_space, $space_off) = 1; + my $res = ''; + while (defined(my $el = shift @new)) { + if (_is_control($el, 'hardspace')) { $no_space = 1; $res .= ' ' } + elsif (_is_control($el, 'nospace')) { $no_space = 1; } + elsif (_is_control($el, 'spaceoff')) { $space_off = 1; } + elsif (_is_control($el, 'spaceon')) { space('on'); } + elsif (_is_control($el, 'spacetoggle')) { space() eq 'on' ? + $space_off = 1 : + space('on') } + else { + if ($no_space) { + $no_space = 0; + $res .= "$el" + } + else { + $res .= "$sep$el" + } + + if ($space_off) { space('off'); $space_off = 0; } + } + } + $res + } + else { + ''; + } + } + + sub space { + my ($arg) = @_; + if (defined $arg && $arg =~ /^(on|off)$/) { + $sep = ' ' if $arg eq 'on'; + $sep = '' if $arg eq 'off'; + return; + } + else { + return $sep eq '' ? 'off' : 'on'; + } + } +} + +sub _unquote { + my @args = @_; + $_ =~ s/^"([^"]+)"$/$1/g for @args; + wantarray ? @args : $args[0]; +} + +sub call_macro { + my ($macro, @args) = @_; + my @ret; + + my @newargs; + my $i = 0; + + @args = _unquote(@args) if (!$macros{ $macro }{raw}); + + # Call any callable macros in the argument list + for (@args) { + if ($_ =~ /^[A-Z][a-z]+$/ && exists $macros{ $_ }) { + push @ret, call_macro($_, @args[$i+1 .. $#args]); + last; + } else { + if ($macros{ $macro }{greedy}) { + push @ret, $_; + } + else { + push @newargs, $_; + } + } + $i++; + } + + if ($macros{ $macro }{concat_until}) { + my ($n_macro, @n_args) = (''); + while (1) { + die "EOF was reached and no $macros{ $macro }{concat_until} found" + if not defined $n_macro; + ($n_macro, @n_args) = parse_line(undef, sub { push @ret, shift }); + if ($n_macro eq $macros{ $macro }{concat_until}) { + push @ret, call_macro($n_macro, @n_args); + last; + } + else { + $n_macro =~ s/^\.//; + push @ret, call_macro($n_macro, @n_args) if exists $macros{ $n_macro }; + } + } + } + + if ($macros{ $macro }{greedy}) { + #print "MACROG $macro (", (join ', ', @ret), ")\n"; + return $macros{ $macro }{run}->(@ret); + } + else { + #print "MACRO $macro (", (join ', ', @newargs), ")".(join ', ', @ret)."\n"; + return $macros{ $macro }{run}->(@newargs), @ret; + } +} + +{ + my ($in_fh, $out_sub, $preprocess_sub); + sub parse_line { + $in_fh = $_[0] if defined $_[0] || !defined $in_fh; + $out_sub = $_[1] if defined $_[1] || !defined $out_sub; + $preprocess_sub = $_[2] if defined $_[2] || !defined $preprocess_sub; + + croak 'out_sub not a CODE reference' + if not ref $out_sub eq 'CODE'; + croak 'preprocess_sub not a CODE reference' + if defined $preprocess_sub && not ref $preprocess_sub eq 'CODE'; + + while (my $line = <$in_fh>) { + chomp $line; + if ($line =~ /^\.[A-z][a-z0-9]+/ || $line =~ /^\.%[A-Z]/ || + $line =~ /^\.\\"/) + { + $line =~ s/ +/ /g; + my ($macro, @args) = quotewords(' ', 1, $line); + @args = grep { defined $_ } @args; + $preprocess_sub->(@args) if defined $preprocess_sub; + if ($macro && exists $macros{ $macro }) { + return ($macro, @args); + } else { + $out_sub->($line); + } + } + else { + $out_sub->($line); + } + } + return; + } +} + +1; +__END__ |