1290001Sglebius=head1 NAME
2290001Sglebius
3290001SglebiusMdoc - perl module to parse Mdoc macros
4290001Sglebius
5290001Sglebius=head1 SYNOPSIS
6290001Sglebius
7290001Sglebius    use Mdoc qw(ns pp soff son stoggle mapwords);
8290001Sglebius
9290001SglebiusSee mdoc2man and mdoc2texi for code examples.
10290001Sglebius
11290001Sglebius=head1 FUNCTIONS
12290001Sglebius
13290001Sglebius=over 4
14290001Sglebius
15290001Sglebius=item def_macro( NAME, CODE, [ raw => 1, greedy => 1, concat_until => '.Xx' ] )
16290001Sglebius
17290001SglebiusDefine new macro. The CODE reference will be called by call_macro(). You can
18290001Sglebiushave two distinct definitions for and inline macro and for a standalone macro
19290001Sglebius(i. e. 'Pa' and '.Pa').
20290001Sglebius
21290001SglebiusThe CODE reference is passed a list of arguments and is expected to return list
22290001Sglebiusof strings and control characters (see C<CONSTANTS>).
23290001Sglebius
24290001SglebiusBy default the surrouding "" from arguments to macros are removed, use C<raw>
25290001Sglebiusto disable this.
26290001Sglebius
27290001SglebiusNormaly CODE reference is passed all arguments up to next nested macro. Set
28290001SglebiusC<greedy> to to pass everything up to the end of the line.
29290001Sglebius
30290001SglebiusIf the concat_until is present, the line is concated until the .Xx macro is
31290001Sglebiusfound. For example the following macro definition
32290001Sglebius
33290001Sglebius    def_macro('.Oo', gen_encloser(qw([ ]), concat_until => '.Oc' }
34290001Sglebius    def_macro('.Cm', sub { mapwords {'($_)'} @_ } }
35290001Sglebius
36290001Sglebiusand the following input
37290001Sglebius
38290001Sglebius    .Oo
39290001Sglebius    .Cm foo |
40290001Sglebius    .Cm bar |
41290001Sglebius    .Oc
42290001Sglebius
43290001Sglebiusresults in [(foo) | (bar)]
44290001Sglebius
45290001Sglebius=item get_macro( NAME )
46290001Sglebius
47290001SglebiusReturns a hash reference like:
48290001Sglebius
49290001Sglebius    { run => CODE, raw => [1|0], greedy => [1|0] }
50290001Sglebius
51290001SglebiusWhere C<CODE> is the CODE reference used to define macro called C<NAME>
52290001Sglebius
53290001Sglebius=item parse_line( INPUT, OUTPUT_CODE, PREPROCESS_CODE )
54290001Sglebius
55290001SglebiusParse a line from the C<INPUT> filehandle. If a macro was detected it returns a
56290001Sglebiuslist (MACRO_NAME, @MACRO_ARGS), otherwise it calls the C<OUTPUT_CODE>, giving
57290001Sglebiuscaller a chance to modify line before printing it. If C<PREPROCESS_CODE> is
58290001Sglebiusdefined it calls it prior to passing argument to a macro, giving caller a
59290001Sglebiuschance to alter them.  if EOF was reached undef is returned.
60290001Sglebius
61290001Sglebius=item call_macro( MACRO, ARGS, ... )
62290001Sglebius
63290001SglebiusCall macro C<MACRO> with C<ARGS>. The CODE reference for macro C<MACRO> is
64290001Sglebiuscalled and for all the nested macros. Every called macro returns a list which
65290001Sglebiusis appended to return value and returned when all nested macros are processed.
66290001SglebiusUse to_string() to produce a printable string from the list.
67290001Sglebius
68290001Sglebius=item to_string ( LIST )
69290001Sglebius
70290001SglebiusProcesses C<LIST> returned from call_macro() and returns formatted string.
71290001Sglebius
72290001Sglebius=item mapwords BLOCK ARRAY
73290001Sglebius
74290001SglebiusThis is like perl's map only it calls BLOCK only on elements which are not
75290001Sglebiuspunctuation or control characters.
76290001Sglebius
77290001Sglebius=item space ( ['on'|'off] )
78290001Sglebius
79290001SglebiusTurn spacing on or off. If called without argument it returns the current state.
80290001Sglebius
81290001Sglebius=item gen_encloser ( START, END )
82290001Sglebius
83290001SglebiusHelper function for generating macros that enclose their arguments.
84290001Sglebius    gen_encloser(qw({ }));
85290001Sglebiusreturns
86290001Sglebius    sub { '{', ns, @_, ns, pp('}')}
87290001Sglebius
88290001Sglebius=item set_Bl_callback( CODE , DEFS )
89290001Sglebius
90290001SglebiusThis module implements the Bl/El macros for you. Using set_Bl_callback you can
91290001Sglebiusprovide a macro definition that should be executed on a .Bl call.
92290001Sglebius
93290001Sglebius=item set_El_callback( CODE , DEFS )
94290001Sglebius
95290001SglebiusThis module implements the Bl/El macros for you. Using set_El_callback you can
96290001Sglebiusprovide a macro definition that should be executed on a .El call.
97290001Sglebius
98290001Sglebius=item set_Re_callback( CODE )
99290001Sglebius
100290001SglebiusThe C<CODE> is called after a Rs/Re block is done. With a hash reference as a
101290001Sglebiusparameter, describing the reference.
102290001Sglebius
103290001Sglebius=back
104290001Sglebius
105290001Sglebius=head1 CONSTANTS
106290001Sglebius
107290001Sglebius=over 4
108290001Sglebius
109290001Sglebius=item ns
110290001Sglebius
111290001SglebiusIndicate 'no space' between to members of the list.
112290001Sglebius
113290001Sglebius=item pp ( STRING )
114290001Sglebius
115290001SglebiusThe string is 'punctuation point'. It means that every punctuation
116290001Sglebiuspreceeding that element is put behind it.
117290001Sglebius
118290001Sglebius=item soff
119290001Sglebius
120290001SglebiusTurn spacing off.
121290001Sglebius
122290001Sglebius=item son
123290001Sglebius
124290001SglebiusTurn spacing on.
125290001Sglebius
126290001Sglebius=item stoggle
127290001Sglebius
128290001SglebiusToogle spacing.
129290001Sglebius
130290001Sglebius=item hs
131290001Sglebius
132290001SglebiusPrint space no matter spacing mode.
133290001Sglebius
134290001Sglebius=back
135290001Sglebius
136290001Sglebius=head1 TODO
137290001Sglebius
138290001Sglebius* The concat_until only works with standalone macros. This means that
139290001Sglebius    .Po blah Pc
140290001Sglebiuswill hang until .Pc in encountered.
141290001Sglebius
142290001Sglebius* Provide default macros for Bd/Ed
143290001Sglebius
144290001Sglebius* The reference implementation is uncomplete
145290001Sglebius
146290001Sglebius=cut
147290001Sglebius
148290001Sglebiuspackage Mdoc;
149290001Sglebiususe strict;
150290001Sglebiususe warnings;
151290001Sglebiususe List::Util qw(reduce);
152290001Sglebiususe Text::ParseWords qw(quotewords);
153290001Sglebiususe Carp;
154290001Sglebiususe Exporter qw(import);
155290001Sglebiusour @EXPORT_OK = qw(ns pp soff son stoggle hs mapwords gen_encloser nl);
156290001Sglebius
157290001Sglebiususe constant {
158290001Sglebius    ns      => ['nospace'],
159290001Sglebius    soff    => ['spaceoff'],
160290001Sglebius    son     => ['spaceon'],
161290001Sglebius    stoggle => ['spacetoggle'],
162290001Sglebius    hs      => ['hardspace'],
163290001Sglebius};
164290001Sglebius
165290001Sglebiussub pp {
166290001Sglebius    my $c = shift;
167290001Sglebius    return ['pp', $c ];
168290001Sglebius}
169290001Sglebiussub gen_encloser {
170290001Sglebius    my ($o, $c) = @_;
171290001Sglebius    return sub { ($o, ns, @_, ns, pp($c)) };
172290001Sglebius}
173290001Sglebius
174290001Sglebiussub mapwords(&@) {
175290001Sglebius    my ($f, @l) = @_;
176290001Sglebius    my @res;
177290001Sglebius    for my $el (@l) {
178290001Sglebius        local $_ = $el;
179290001Sglebius        push @res, $el =~ /^(?:[,\.\{\}\(\):;\[\]\|])$/ || ref $el eq 'ARRAY' ?
180290001Sglebius                    $el : $f->();
181290001Sglebius    }
182290001Sglebius    return @res;
183290001Sglebius}
184290001Sglebius
185290001Sglebiusmy %macros;
186290001Sglebius
187290001Sglebius###############################################################################
188290001Sglebius
189290001Sglebius# Default macro definitions start
190290001Sglebius
191290001Sglebius###############################################################################
192290001Sglebius
193290001Sglebiusdef_macro('Xo',  sub { @_ }, concat_until => '.Xc');
194290001Sglebius
195290001Sglebiusdef_macro('.Ns', sub {ns, @_});
196290001Sglebiusdef_macro('Ns',  sub {ns, @_});
197290001Sglebius
198290001Sglebius{
199290001Sglebius    my %reference;
200290001Sglebius    def_macro('.Rs', sub { () } );
201290001Sglebius    def_macro('.%A', sub {
202290001Sglebius        if ($reference{authors}) {
203290001Sglebius            $reference{authors} .= " and @_"
204290001Sglebius        }
205290001Sglebius        else {
206290001Sglebius            $reference{authors} = "@_";
207290001Sglebius        }
208290001Sglebius        return ();
209290001Sglebius    });
210290001Sglebius    def_macro('.%T', sub { $reference{title} = "@_"; () } );
211290001Sglebius    def_macro('.%O', sub { $reference{optional} = "@_"; () } );
212290001Sglebius
213290001Sglebius    sub set_Re_callback {
214290001Sglebius        my ($sub) = @_;
215290001Sglebius        croak 'Not a CODE reference' if not ref $sub eq 'CODE';
216290001Sglebius        def_macro('.Re', sub {
217290001Sglebius            my @ret = $sub->(\%reference);
218290001Sglebius            %reference = (); @ret
219290001Sglebius        });
220290001Sglebius        return;
221290001Sglebius    }
222290001Sglebius}
223290001Sglebius
224290001Sglebiusdef_macro('.Bl', sub { die '.Bl - no list callback set' });
225290001Sglebiusdef_macro('.It', sub { die ".It called outside of list context - maybe near line $." });
226290001Sglebiusdef_macro('.El', sub { die '.El requires .Bl first' });
227290001Sglebius
228290001Sglebius
229290001Sglebius{
230290001Sglebius    my $elcb = sub { () };
231290001Sglebius
232290001Sglebius    sub set_El_callback {
233290001Sglebius        my ($sub) = @_;
234290001Sglebius        croak 'Not a CODE reference' if ref $sub ne 'CODE';
235290001Sglebius        $elcb = $sub;
236290001Sglebius        return;
237290001Sglebius    }
238290001Sglebius
239290001Sglebius    sub set_Bl_callback {
240290001Sglebius        my ($blcb, %defs) = @_;
241290001Sglebius        croak 'Not a CODE reference' if ref $blcb ne 'CODE';
242290001Sglebius        def_macro('.Bl', sub {
243290001Sglebius
244290001Sglebius            my $orig_it   = get_macro('.It');
245290001Sglebius            my $orig_el   = get_macro('.El');
246290001Sglebius            my $orig_bl   = get_macro('.Bl');
247290001Sglebius            my $orig_elcb = $elcb;
248290001Sglebius
249290001Sglebius            # Restore previous .It and .El on each .El
250290001Sglebius            def_macro('.El', sub {
251290001Sglebius                    def_macro('.El', delete $orig_el->{run}, %$orig_el);
252290001Sglebius                    def_macro('.It', delete $orig_it->{run}, %$orig_it);
253290001Sglebius                    def_macro('.Bl', delete $orig_bl->{run}, %$orig_bl);
254290001Sglebius                    my @ret = $elcb->(@_);
255290001Sglebius                    $elcb = $orig_elcb;
256290001Sglebius                    @ret
257290001Sglebius                });
258290001Sglebius            $blcb->(@_)
259290001Sglebius        }, %defs);
260290001Sglebius        return;
261290001Sglebius    }
262290001Sglebius}
263290001Sglebius
264290001Sglebiusdef_macro('.Sm', sub {
265290001Sglebius    my ($arg) = @_;
266290001Sglebius    if (defined $arg) {
267290001Sglebius        space($arg);
268290001Sglebius    } else {
269290001Sglebius        space() eq 'off' ?
270290001Sglebius            space('on') :
271290001Sglebius            space('off');
272290001Sglebius    }
273290001Sglebius    ()
274290001Sglebius} );
275290001Sglebiusdef_macro('Sm', do { my $off; sub {
276290001Sglebius    my ($arg) = @_;
277290001Sglebius    if (defined $arg && $arg =~ /^(on|off)$/) {
278290001Sglebius        shift;
279290001Sglebius        if    ($arg eq 'off') { soff, @_; }
280290001Sglebius        elsif ($arg eq 'on')  { son, @_; }
281290001Sglebius    }
282290001Sglebius    else {
283290001Sglebius        stoggle, @_;
284290001Sglebius    }
285290001Sglebius}} );
286290001Sglebius
287290001Sglebius###############################################################################
288290001Sglebius
289290001Sglebius# Default macro definitions end
290290001Sglebius
291290001Sglebius###############################################################################
292290001Sglebius
293290001Sglebiussub def_macro {
294290001Sglebius    croak 'Odd number of elements for hash argument <'.(scalar @_).'>' if @_%2;
295290001Sglebius    my ($macro, $sub, %def) = @_;
296290001Sglebius    croak 'Not a CODE reference' if ref $sub ne 'CODE';
297290001Sglebius
298290001Sglebius    $macros{ $macro } = {
299290001Sglebius        run          => $sub,
300290001Sglebius        greedy       => delete $def{greedy} || 0,
301290001Sglebius        raw          => delete $def{raw}    || 0,
302290001Sglebius        concat_until => delete $def{concat_until},
303290001Sglebius    };
304290001Sglebius    if ($macros{ $macro }{concat_until}) {
305290001Sglebius        $macros{ $macros{ $macro }{concat_until} } = { run => sub { @_ } };
306290001Sglebius        $macros{ $macro }{greedy}                  = 1;
307290001Sglebius    }
308290001Sglebius    return;
309290001Sglebius}
310290001Sglebius
311290001Sglebiussub get_macro {
312290001Sglebius    my ($macro) = @_;
313290001Sglebius    croak "Macro <$macro> not defined" if not exists $macros{ $macro };
314290001Sglebius    +{ %{ $macros{ $macro } } }
315290001Sglebius}
316290001Sglebius
317290001Sglebius#TODO: document this
318290001Sglebiussub parse_opts {
319290001Sglebius    my %args;
320290001Sglebius    my $last;
321290001Sglebius    for (@_) {
322290001Sglebius        if ($_ =~ /^\\?-/) {
323290001Sglebius            s/^\\?-//;
324290001Sglebius            $args{$_} = 1;
325290001Sglebius            $last = _unquote($_);
326290001Sglebius        }
327290001Sglebius        else {
328290001Sglebius            $args{$last} = _unquote($_) if $last;
329290001Sglebius            undef $last;
330290001Sglebius        }
331290001Sglebius    }
332290001Sglebius    return %args;
333290001Sglebius}
334290001Sglebius
335290001Sglebiussub _is_control {
336290001Sglebius    my ($el, $expected) = @_;
337290001Sglebius    if (defined $expected) {
338290001Sglebius        ref $el eq 'ARRAY' and $el->[0] eq $expected;
339290001Sglebius    }
340290001Sglebius    else {
341290001Sglebius        ref $el eq 'ARRAY';
342290001Sglebius    }
343290001Sglebius}
344290001Sglebius
345290001Sglebius{
346290001Sglebius    my $sep = ' ';
347290001Sglebius
348290001Sglebius    sub to_string {
349290001Sglebius        if (@_ > 0) {
350290001Sglebius            # Handle punctunation
351290001Sglebius            my ($in_brace, @punct) = '';
352290001Sglebius            my @new = map {
353290001Sglebius                if (/^([\[\(])$/) {
354290001Sglebius                    ($in_brace = $1) =~ tr/([/)]/;
355290001Sglebius                    $_, ns
356290001Sglebius                }
357290001Sglebius                elsif (/^([\)\]])$/ && $in_brace eq $1) {
358290001Sglebius                    $in_brace = '';
359290001Sglebius                    ns, $_
360290001Sglebius                }
361290001Sglebius                elsif ($_ =~ /^[,\.;:\?\!\)\]]$/) {
362290001Sglebius                    push @punct, ns, $_;
363290001Sglebius                    ();
364290001Sglebius                }
365290001Sglebius                elsif (_is_control($_, 'pp')) {
366290001Sglebius                    $_->[1]
367290001Sglebius                }
368290001Sglebius                elsif (_is_control($_)) {
369290001Sglebius                    $_
370290001Sglebius                }
371290001Sglebius                else {
372290001Sglebius                    splice (@punct), $_;
373290001Sglebius                }
374290001Sglebius            } @_;
375290001Sglebius            push @new, @punct;
376290001Sglebius
377290001Sglebius            # Produce string out of an array dealing with the special control characters
378290001Sglebius            # space('off') must but one character delayed
379290001Sglebius            my ($no_space, $space_off) = 1;
380290001Sglebius            my $res = '';
381290001Sglebius            while (defined(my $el = shift @new)) {
382290001Sglebius                if    (_is_control($el, 'hardspace'))   { $no_space = 1; $res .= ' ' }
383290001Sglebius                elsif (_is_control($el, 'nospace'))     { $no_space = 1;             }
384290001Sglebius                elsif (_is_control($el, 'spaceoff'))    { $space_off = 1;            }
385290001Sglebius                elsif (_is_control($el, 'spaceon'))     { space('on');               }
386290001Sglebius                elsif (_is_control($el, 'spacetoggle')) { space() eq 'on' ?
387290001Sglebius                                                            $space_off = 1 :
388290001Sglebius                                                            space('on')              }
389290001Sglebius                else {
390290001Sglebius                    if ($no_space) {
391290001Sglebius                        $no_space = 0;
392290001Sglebius                        $res .= "$el"
393290001Sglebius                    }
394290001Sglebius                    else {
395290001Sglebius                        $res .= "$sep$el"
396290001Sglebius                    }
397290001Sglebius
398290001Sglebius                    if ($space_off)    { space('off'); $space_off = 0; }
399290001Sglebius                }
400290001Sglebius            }
401290001Sglebius            $res
402290001Sglebius        }
403290001Sglebius        else {
404290001Sglebius            '';
405290001Sglebius        }
406290001Sglebius    }
407290001Sglebius
408290001Sglebius    sub space {
409290001Sglebius        my ($arg) = @_;
410290001Sglebius        if (defined $arg && $arg =~ /^(on|off)$/) {
411290001Sglebius            $sep = ' ' if $arg eq 'on';
412290001Sglebius            $sep = ''  if $arg eq 'off';
413290001Sglebius            return;
414290001Sglebius        }
415290001Sglebius        else {
416290001Sglebius            return $sep eq '' ? 'off' : 'on';
417290001Sglebius        }
418290001Sglebius    }
419290001Sglebius}
420290001Sglebius
421290001Sglebiussub _unquote {
422290001Sglebius    my @args = @_;
423290001Sglebius    $_ =~ s/^"([^"]+)"$/$1/g for @args;
424290001Sglebius    wantarray ? @args : $args[0];
425290001Sglebius}
426290001Sglebius
427290001Sglebiussub call_macro {
428290001Sglebius    my ($macro, @args) = @_;
429290001Sglebius    my @ret;
430290001Sglebius
431290001Sglebius    my @newargs;
432290001Sglebius    my $i = 0;
433290001Sglebius
434290001Sglebius    @args = _unquote(@args) if (!$macros{ $macro }{raw});
435290001Sglebius
436290001Sglebius    # Call any callable macros in the argument list
437290001Sglebius    for (@args) {
438290001Sglebius        if ($_ =~ /^[A-Z][a-z]+$/ && exists $macros{ $_ }) {
439290001Sglebius            push @ret, call_macro($_, @args[$i+1 .. $#args]);
440290001Sglebius            last;
441290001Sglebius        } else {
442290001Sglebius            if ($macros{ $macro }{greedy}) {
443290001Sglebius                push @ret, $_;
444290001Sglebius            }
445290001Sglebius            else {
446290001Sglebius                push @newargs, $_;
447290001Sglebius            }
448290001Sglebius        }
449290001Sglebius        $i++;
450290001Sglebius    }
451290001Sglebius
452290001Sglebius    if ($macros{ $macro }{concat_until}) {
453290001Sglebius        my ($n_macro, @n_args) = ('');
454290001Sglebius        while (1) {
455290001Sglebius            die "EOF was reached and no $macros{ $macro }{concat_until} found"
456290001Sglebius                if not defined $n_macro;
457290001Sglebius            ($n_macro, @n_args) = parse_line(undef, sub { push @ret, shift });
458290001Sglebius            if ($n_macro eq $macros{ $macro }{concat_until}) {
459290001Sglebius                push @ret, call_macro($n_macro, @n_args);
460290001Sglebius                last;
461290001Sglebius            }
462290001Sglebius            else {
463290001Sglebius                $n_macro =~ s/^\.//;
464290001Sglebius                push @ret, call_macro($n_macro, @n_args) if exists $macros{ $n_macro };
465290001Sglebius            }
466290001Sglebius        }
467290001Sglebius    }
468290001Sglebius
469290001Sglebius    if ($macros{ $macro }{greedy}) {
470290001Sglebius        #print "MACROG $macro (", (join ', ', @ret), ")\n";
471290001Sglebius        return $macros{ $macro }{run}->(@ret);
472290001Sglebius    }
473290001Sglebius    else {
474290001Sglebius        #print "MACRO $macro (", (join ', ', @newargs), ")".(join ', ', @ret)."\n";
475290001Sglebius        return $macros{ $macro }{run}->(@newargs), @ret;
476290001Sglebius    }
477290001Sglebius}
478290001Sglebius
479290001Sglebius{
480290001Sglebius    my ($in_fh, $out_sub, $preprocess_sub);
481290001Sglebius    sub parse_line {
482290001Sglebius        $in_fh          = $_[0] if defined $_[0] || !defined $in_fh;
483290001Sglebius        $out_sub        = $_[1] if defined $_[1] || !defined $out_sub;
484290001Sglebius        $preprocess_sub = $_[2] if defined $_[2] || !defined $preprocess_sub;
485290001Sglebius
486290001Sglebius        croak 'out_sub not a CODE reference'
487290001Sglebius            if not ref $out_sub eq 'CODE';
488290001Sglebius        croak 'preprocess_sub not a CODE reference'
489290001Sglebius            if defined $preprocess_sub && not ref $preprocess_sub eq 'CODE';
490290001Sglebius
491290001Sglebius        while (my $line = <$in_fh>) {
492290001Sglebius            chomp $line;
493290001Sglebius            if ($line =~ /^\.[A-z][a-z0-9]+/ || $line =~ /^\.%[A-Z]/ ||
494290001Sglebius                $line =~ /^\.\\"/)
495290001Sglebius            {
496290001Sglebius                $line =~ s/ +/ /g;
497290001Sglebius                my ($macro, @args) = quotewords(' ', 1, $line);
498290001Sglebius                @args = grep { defined $_ } @args;
499290001Sglebius                $preprocess_sub->(@args) if defined $preprocess_sub;
500290001Sglebius                if ($macro && exists $macros{ $macro }) {
501290001Sglebius                    return ($macro, @args);
502290001Sglebius                } else {
503290001Sglebius                    $out_sub->($line);
504290001Sglebius                }
505290001Sglebius            }
506290001Sglebius            else {
507290001Sglebius                $out_sub->($line);
508290001Sglebius            }
509290001Sglebius        }
510290001Sglebius        return;
511290001Sglebius    }
512290001Sglebius}
513290001Sglebius
514290001Sglebius1;
515290001Sglebius__END__
516