#!/usr/bin/env perl
use
open
qw(:std :utf8)
;
my
$text
;
if
(
@ARGV
) {
my
$file
=
shift
@ARGV
;
open
(
my
$fh
,
'<'
,
$file
) or
die
$!;
local
$/;
$text
= <
$fh
>;
close
$fh
;
}
else
{
local
$/;
$text
= <>;
}
sub
newlines_to_spaces {
my
$t
=
shift
;
$t
=~ s/\n/ /g;
return
$t
;
}
sub
do_semantic_markup_line {
my
(
$macro
,
$tag
) =
@_
;
$text
=~ s|
<
$macro
.*?>(.*?)</
$macro
>(~?[\.!\?,;:])?\s*
|
my
$punct
= $2;
$punct
=
defined
$punct
?
" $punct"
:
""
;
if
(
defined
$tag
) {
"\n.Sm -t $tag"
.
' "'
. newlines_to_spaces($1) .
$punct
. "\n"
}
else
{
"\n.Sm"
.
' "'
. newlines_to_spaces($1) .
$punct
. "\n"
}
|xseg;
}
sub
do_semantic_markup_block {
my
(
$macro
,
$tag
) =
@_
;
$text
=~ s|
<
$macro
.*?>(.*?)</
$macro
>(~?[\.!\?,;:])?\s*
|
my
$punct
= $2;
$punct
=
defined
$punct
?
" $punct"
:
""
;
if
(
defined
$tag
) {
"\n.Bm -t $tag\n$1\n.Em$punct\n"
}
else
{
"\n.Bm\n$1\n.Em$punct\n"
}
|xeg;
}
$text
=~ s|<p>||g;
$text
=~ s|</?body>||g;
$text
=~ s|</table>|\n.El\n|g;
$text
=~ s|</
tr
>||g;
$text
=~ s|</dl>|\n.El\n|g;
$text
=~ s|</ul>|\n.El\n|g;
$text
=~ s|</?dd>||g;
$text
=~ s|</li>||g;
$text
=~ s|<td>||g;
$text
=~ s
$text
=~ s|<table.*?>|\n.Bl -t table -columns 3\n|sg;
$text
=~ s|<ul.*?>|\n.Bl\n|sg;
$text
=~ s|<dl>|\n.Bl -t desc\n|g;
$text
=~ s|<
tr
>|\n.It\n|g;
$text
=~ s|<dt>(.*?)</dt>|
my
$text
= $1;
$text
=~ s/\n/ /g;
$text
=
"\n.It $1\n"
;
$text
;
|xesg;
$text
=~ s|<li>|\n.It\n|g;
$text
=~ s|<td>|\n.Ta\n|g;
$text
=~ s|<h1.*?>(.*?)</h1>|\n.Pt
"$1"
\n|sg;
$text
=~ s|<h2.*?>(.*?)</h2>|\n.Ch
"$1"
\n|sg;
$text
=~ s|<h3.*?>(.*?)</h3>|\n.Sh
"$1"
\n|sg;
$text
=~ s|<h4.*?>(.*?)</h4>|\n.Ss
"$1"
\n|sg;
$text
=~ s|<a\shref=
"(.*?)"
\s*>(.*?)</a>(~?[\.!\?,;:])?|
my
$punct
= $3;
my
$href
= $1;
$href
=~ s/\n/ /g;
my
$link_text
= $2;
$link_text
=~ s/\n/ /g;
$punct
=
defined
$punct
?
" $punct"
:
""
;
qq{\n.Lk "$href" "$link_text"$punct\n}
;
|sxeg;
do_semantic_markup_block(
"em"
);
do_semantic_markup_block(
"strong"
);
do_semantic_markup_block(
"code"
);
$text
=~ s/\n\s*\n+/\n/g;
$text
=~ s/[ \t]*$//mg;
$text
=~ s/^[ \t]*//mg;
print
$text
;