#! /usr/bin/perl
use
5.005_03;
use
vars
qw($VERSION $PROG @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS)
;
BEGIN {
@ISA
=
qw(Exporter)
;
}
%EXPORT_TAGS
= (
'all'
=> [
qw(
)
]
);
@EXPORT_OK
= (@{
$EXPORT_TAGS
{
'all'
}});
@EXPORT
=
qw(
run_txt2html
)
;
$PROG
=
'HTML::TextToHTML'
;
$VERSION
=
'2.02'
;
use
constant
TEXT_TO_HTML
=>
"TEXT_TO_HTML"
;
use
vars
qw($NONE $LIST $HRULE $PAR $PRE $END $BREAK $HEADER
$MAILHEADER $MAILQUOTE $CAPS $LINK $PRE_EXPLICIT $TABLE
$IND_BREAK $LIST_START $LIST_ITEM)
;
$NONE
= 0;
$LIST
= 1;
$HRULE
= 2;
$PAR
= 4;
$PRE
= 8;
$END
= 16;
$BREAK
= 32;
$HEADER
= 64;
$MAILHEADER
= 128;
$MAILQUOTE
= 256;
$CAPS
= 512;
$LINK
= 1024;
$PRE_EXPLICIT
= 2048;
$TABLE
= 4096;
$IND_BREAK
= 8192;
$LIST_START
= 16384;
$LIST_ITEM
= 32768;
use
vars
qw($LINK_NOCASE $LINK_EVAL $LINK_HTML $LINK_ONCE $LINK_SECT_ONCE)
;
$LINK_NOCASE
= 1;
$LINK_EVAL
= 2;
$LINK_HTML
= 4;
$LINK_ONCE
= 8;
$LINK_SECT_ONCE
= 16;
$OL
= 1;
$UL
= 2;
use
vars
qw(%char_entities %char_entities2)
;
%char_entities
= (
"\241"
,
"¡"
,
"\242"
,
"¢"
,
"\243"
,
"£"
,
"\244"
,
"¤"
,
"\245"
,
"¥"
,
"\246"
,
"¦"
,
"\247"
,
"§"
,
"\250"
,
"¨"
,
"\251"
,
"©"
,
"\252"
,
"ª"
,
"\253"
,
"«"
,
"\254"
,
"¬"
,
"\255"
,
"­"
,
"\256"
,
"®"
,
"\257"
,
"&hibar;"
,
"\260"
,
"°"
,
"\261"
,
"±"
,
"\262"
,
"²"
,
"\263"
,
"³"
,
"\264"
,
"´"
,
"\265"
,
"µ"
,
"\266"
,
"¶"
,
"\270"
,
"¸"
,
"\271"
,
"¹"
,
"\272"
,
"º"
,
"\273"
,
"»"
,
"\274"
,
"¼"
,
"\275"
,
"½"
,
"\276"
,
"¾"
,
"\277"
,
"¿"
,
"\300"
,
"À"
,
"\301"
,
"Á"
,
"\302"
,
"Â"
,
"\303"
,
"Ã"
,
"\304"
,
"Ä"
,
"\305"
,
"Å"
,
"\306"
,
"Æ"
,
"\307"
,
"Ç"
,
"\310"
,
"È"
,
"\311"
,
"É"
,
"\312"
,
"Ê"
,
"\313"
,
"Ë"
,
"\314"
,
"Ì"
,
"\315"
,
"Í"
,
"\316"
,
"Î"
,
"\317"
,
"Ï"
,
"\320"
,
"Ð"
,
"\321"
,
"Ñ"
,
"\322"
,
"Ò"
,
"\323"
,
"Ó"
,
"\324"
,
"Ô"
,
"\325"
,
"Õ"
,
"\326"
,
"Ö"
,
"\327"
,
"×"
,
"\330"
,
"Ø"
,
"\331"
,
"Ù"
,
"\332"
,
"Ú"
,
"\333"
,
"Û"
,
"\334"
,
"Ü"
,
"\335"
,
"Ý"
,
"\336"
,
"Þ"
,
"\337"
,
"ß"
,
"\340"
,
"à"
,
"\341"
,
"á"
,
"\342"
,
"â"
,
"\343"
,
"ã"
,
"\344"
,
"ä"
,
"\345"
,
"å"
,
"\346"
,
"æ"
,
"\347"
,
"ç"
,
"\350"
,
"è"
,
"\351"
,
"é"
,
"\352"
,
"ê"
,
"\353"
,
"ë"
,
"\354"
,
"ì"
,
"\355"
,
"í"
,
"\356"
,
"î"
,
"\357"
,
"ï"
,
"\360"
,
"ð"
,
"\361"
,
"ñ"
,
"\362"
,
"ò"
,
"\363"
,
"ó"
,
"\364"
,
"ô"
,
"\365"
,
"õ"
,
"\366"
,
"ö"
,
"\367"
,
"÷"
,
"\370"
,
"ø"
,
"\371"
,
"ù"
,
"\372"
,
"ú"
,
"\373"
,
"û"
,
"\374"
,
"ü"
,
"\375"
,
"ý"
,
"\376"
,
"þ"
,
"\377"
,
"ÿ"
,
);
%char_entities2
= (
"\267"
,
"·"
,);
use
vars
qw(@alignments @lc_alignments @xhtml_alignments)
;
@alignments
= (
''
,
''
,
' ALIGN="RIGHT"'
,
' ALIGN="CENTER"'
);
@lc_alignments
= (
''
,
''
,
' align="right"'
,
' align="center"'
);
@xhtml_alignments
= (
''
,
''
,
' style="text-align: right;"'
,
' style="text-align: center;"'
);
sub
new {
my
$invocant
=
shift
;
my
$self
= {};
my
$class
=
ref
(
$invocant
) ||
$invocant
;
init_our_data(
$self
);
bless
(
$self
,
$class
);
$self
->args(
@_
);
return
$self
;
}
sub
args {
my
$self
=
shift
;
my
%args
= ();
my
@arg_array
= ();
if
(
@_
&&
@_
== 1)
{
my
$aref
=
shift
;
@arg_array
= @{
$aref
};
}
elsif
(
@_
)
{
%args
=
@_
;
}
if
(
%args
) {
if
(
$self
->{debug}) {
print
STDERR
"========args(hash)========\n"
;
print
STDERR Dumper(
%args
);
}
foreach
my
$arg
(
keys
%args
) {
if
(
defined
$args
{
$arg
}) {
if
(
$arg
=~ /^-/) {
$arg
=~ s/^-//;
$arg
=~ s/^-//;
}
if
(
$self
->{debug}) {
print
STDERR
"--"
,
$arg
;
}
$self
->{
$arg
} =
$args
{
$arg
};
if
(
$self
->{debug}) {
print
STDERR
" "
,
$args
{
$arg
},
"\n"
;
}
}
}
}
elsif
(
@arg_array
) {
if
(
$self
->{debug}) {
print
STDERR
"========args(array)========\n"
;
print
STDERR Dumper(
@arg_array
);
}
my
$look_at_args
= 1;
while
(
@arg_array
&&
$look_at_args
) {
my
$arg
=
shift
@arg_array
;
if
(
$arg
=~ /^-/) {
$arg
=~ s/^-//;
$arg
=~ s/^-//;
if
(
$self
->{debug}) {
print
STDERR
"--"
,
$arg
;
}
if
(
$arg
eq
'debug'
||
$arg
eq
'eight_bit_clean'
||
$arg
eq
'escape_HTML_chars'
||
$arg
eq
'explicit_headings'
||
$arg
eq
'extract'
||
$arg
eq
'link_only'
||
$arg
eq
'lower_case_tags'
||
$arg
eq
'mailmode'
||
$arg
eq
'make_anchors'
||
$arg
eq
'make_links'
||
$arg
eq
'make_tables'
||
$arg
eq
'preserve_indent'
||
$arg
eq
'titlefirst'
||
$arg
eq
'unhyphenation'
||
$arg
eq
'use_mosaic_header'
||
$arg
eq
'use_preformat_marker'
||
$arg
eq
'verbose'
||
$arg
eq
'xhtml'
) {
$self
->{
$arg
} = 1;
if
(
$self
->{debug}) {
print
STDERR
"=true\n"
;
}
}
elsif
(
$arg
eq
'nodebug'
||
$arg
eq
'noeight_bit_clean'
||
$arg
eq
'noescape_HTML_chars'
||
$arg
eq
'noexplicit_headings'
||
$arg
eq
'noextract'
||
$arg
eq
'nolink_only'
||
$arg
eq
'nolower_case_tags'
||
$arg
eq
'nomailmode'
||
$arg
eq
'nomake_anchors'
||
$arg
eq
'nomake_links'
||
$arg
eq
'nomake_tables'
||
$arg
eq
'nopreserve_indent'
||
$arg
eq
'notitlefirst'
||
$arg
eq
'nounhyphenation'
||
$arg
eq
'nouse_mosaic_header'
||
$arg
eq
'nouse_preformat_marker'
||
$arg
eq
'noverbose'
||
$arg
eq
'noxhtml'
) {
$arg
=~ s/^
no
//;
$self
->{
$arg
} = 0;
if
(
$self
->{debug}) {
print
STDERR
" $arg=false\n"
;
}
}
else
{
my
$val
=
shift
@arg_array
;
if
(
$self
->{debug}) {
print
STDERR
"="
,
$val
,
"\n"
;
}
if
(
defined
$arg
&&
defined
$val
) {
if
(
$arg
eq
'infile'
||
$arg
eq
'custom_heading_regexp'
||
$arg
eq
'links_dictionaries'
) {
if
(
$val
eq
'CLEAR'
) {
$self
->{
$arg
} = [];
}
else
{
push
@{
$self
->{
$arg
}},
$val
;
}
}
elsif
(
$arg
eq
'file'
) {
if
(
$val
eq
'CLEAR'
) {
$self
->{infile} = [];
}
else
{
push
@{
$self
->{infile}},
$val
;
}
}
else
{
$self
->{
$arg
} =
$val
;
}
}
}
}
else
{
$look_at_args
= 0;
}
}
}
if
(
$self
->{debug})
{
print
STDERR Dumper(
$self
);
}
return
1;
}
sub
process_para ($$;%) {
my
$self
=
shift
;
my
$para
=
shift
;
my
%args
= (
close_tags
=>1,
is_fragment
=>0,
@_
);
$self
->do_init_call();
my
$para_action
=
$NONE
;
if
(
$self
->{__mode} &
$TABLE
) {
$self
->{__mode} ^=
$TABLE
;
}
if
(!
$self
->{link_only}) {
my
$para_len
=
length
(
$para
);
my
@para_lines
=
split
(/^/,
$para
);
my
@para_line_len
= ();
my
@para_line_indent
= ();
my
@para_line_action
= ();
my
$line
;
for
(
my
$i
= 0 ;
$i
<
@para_lines
;
$i
++) {
$line
=
$para_lines
[
$i
];
my
$ind
;
$line
=~ s/[ \011]*\015$//;
$line
=
$self
->untabify(
$line
);
push
@para_line_len
,
length
(
$line
);
if
(
$i
> 0) {
$ind
= count_indent(
$line
,
$para_line_indent
[
$i
- 1]);
push
@para_line_indent
,
$ind
;
}
else
{
$ind
= count_indent(
$line
, 0);
push
@para_line_indent
,
$ind
;
}
push
@para_line_action
, 0;
$para_lines
[
$i
] =
$line
;
}
if
(
$self
->{make_tables}) {
$self
->tablestuff(
rows_ref
=>\
@para_lines
,
para_len
=>
$para_len
);
}
my
$prev
=
''
;
my
$next
=
''
;
my
$prev_action
=
$self
->{__prev_para_action};
for
(
my
$i
= 0 ;
$i
<
@para_lines
;
$i
++) {
my
$prev_ref
;
my
$prev_action_ref
;
my
$prev_line_indent
;
my
$prev_line_len
;
if
(
$i
== 0) {
$prev_ref
= \
$prev
;
$prev_action_ref
= \
$prev_action
;
$prev_line_indent
= 0;
$prev_line_len
= 0;
}
else
{
$prev_ref
= \
$para_lines
[
$i
- 1];
$prev_action_ref
= \
$para_line_action
[
$i
- 1];
$prev_line_indent
=
$para_line_indent
[
$i
- 1];
$prev_line_len
=
$para_line_len
[
$i
- 1];
}
my
$next_ref
;
if
(
$i
==
@para_lines
- 1) {
$next_ref
= \
$next
;
}
else
{
$next_ref
= \
$para_lines
[
$i
+ 1];
}
if
(
$self
->{escape_HTML_chars} && !(
$self
->{__mode} &
$TABLE
)) {
$para_lines
[
$i
] = escape(
$para_lines
[
$i
]);
}
if
(
$self
->{mailmode}
&& !(
$self
->{__mode} & (
$PRE_EXPLICIT
|
$TABLE
))
&& !(
$para_line_action
[
$i
] &
$HEADER
))
{
$self
->mailstuff(
line_ref
=>\
$para_lines
[
$i
],
line_action_ref
=>\
$para_line_action
[
$i
],
prev_ref
=>
$prev_ref
,
prev_action_ref
=>
$prev_action_ref
,
next_ref
=>
$next_ref
);
}
if
((
$self
->{__mode} &
$PRE
)
&& (
$self
->{preformat_trigger_lines} != 0))
{
$self
->endpreformat(
mode_ref
=>\
$self
->{__mode},
para_lines_ref
=>\
@para_lines
,
para_action_ref
=>\
@para_line_action
,
ind
=>
$i
,
prev_ref
=>
$prev_ref
);
}
if
(!(
$self
->{__mode} &
$PRE
)) {
$self
->hrule(
para_lines_ref
=>\
@para_lines
,
para_action_ref
=>\
@para_line_action
,
ind
=>
$i
);
}
if
(@{
$self
->{custom_heading_regexp}} && !(
$self
->{__mode} &
$PRE
))
{
$self
->custom_heading(
para_lines_ref
=>\
@para_lines
,
para_action_ref
=>\
@para_line_action
,
ind
=>
$i
);
}
if
(!(
$self
->{__mode} & (
$PRE
|
$TABLE
))
&& !is_blank(
$para_lines
[
$i
]))
{
$self
->liststuff(
para_lines_ref
=>\
@para_lines
,
para_action_ref
=>\
@para_line_action
,
para_line_indent_ref
=>\
@para_line_indent
,
ind
=>
$i
,
prev_ref
=>
$prev_ref
);
}
if
(
!(
$para_line_action
[
$i
] &
(
$HEADER
|
$LIST
|
$MAILHEADER
|
$TABLE
))
&& !(
$self
->{__mode} & (
$LIST
|
$PRE
))
&&
$self
->{__preformat_enabled})
{
$self
->preformat(
mode_ref
=>\
$self
->{__mode},
line_ref
=>\
$para_lines
[
$i
],
line_action_ref
=>\
$para_line_action
[
$i
],
prev_ref
=>
$prev_ref
,
next_ref
=>
$next_ref
,
prev_action_ref
=>
$prev_action_ref
);
}
if
(!
$self
->{explicit_headings}
&& !(
$self
->{__mode} & (
$PRE
|
$HEADER
|
$TABLE
))
&& ${
$next_ref
} =~ /^\s*[=\-\*\.~\+]+\s*$/)
{
$self
->heading(
line_ref
=>\
$para_lines
[
$i
],
line_action_ref
=>\
$para_line_action
[
$i
],
next_ref
=>
$next_ref
);
}
$self
->paragraph(
mode_ref
=>\
$self
->{__mode},
line_ref
=>\
$para_lines
[
$i
],
line_action_ref
=>\
$para_line_action
[
$i
],
prev_ref
=>
$prev_ref
,
prev_action_ref
=>
$prev_action_ref
,
line_indent
=>
$para_line_indent
[
$i
],
prev_indent
=>
$prev_line_indent
,
is_fragment
=>
$args
{is_fragment},
ind
=>
$i
,
);
$self
->shortline(
mode_ref
=>\
$self
->{__mode},
line_ref
=>\
$para_lines
[
$i
],
line_action_ref
=>\
$para_line_action
[
$i
],
prev_ref
=>
$prev_ref
,
prev_action_ref
=>
$prev_action_ref
,
prev_line_len
=>
$prev_line_len
);
if
(!(
$self
->{__mode} & (
$PRE
|
$TABLE
))) {
$self
->caps(
line_ref
=>\
$para_lines
[
$i
],
line_action_ref
=>\
$para_line_action
[
$i
]);
}
if
(
$i
== 0 && !is_blank(
$prev
))
{
$line
=
$para_lines
[
$i
];
$para_lines
[
$i
] =
$prev
.
$line
;
}
if
(
$i
==
@para_lines
- 1 && !is_blank(
$next
))
{
$para_lines
[
$i
] .=
$next
;
}
}
$para_action
=
$para_line_action
[
$#para_line_action
];
$para
=
join
(
""
,
@para_lines
);
if
(
$self
->{xhtml})
{
my
$open_tag
= @{
$self
->{__tags}}[$
if
(
$open_tag
eq
'P'
)
{
$para
.=
$self
->close_tag(
'P'
);
}
}
if
(
$self
->{unhyphenation}
&& (
$para
=~ /[^\W\d_]\-\n\s*[^\W\d_]/s)
&& !(
$self
->{__mode} &
(
$PRE
|
$HEADER
|
$MAILHEADER
|
$TABLE
|
$BREAK
))
)
{
$self
->unhyphenate_para(\
$para
);
}
}
if
(
$self
->{make_links}
&& !is_blank(
$para
)
&& @{
$self
->{__links_table_order}})
{
$self
->make_dictionary_links(
line_ref
=>\
$para
,
line_action_ref
=>\
$para_action
);
}
if
(
$args
{close_tags}
&&
$self
->{__mode} &
$LIST
)
{
$self
->endlist(
num_lists
=>
$self
->{__listnum},
prev_ref
=>\
$para
,
line_action_ref
=>\
$para_action
);
}
if
(
$args
{close_tags} &&
$self
->{xhtml})
{
while
(@{
$self
->{__tags}})
{
$para
.=
$self
->close_tag(
''
);
}
}
if
(!
$self
->{eight_bit_clean}) {
my
@chars
=
split
(//,
$para
);
foreach
$_
(
@chars
) {
$_
=
$char_entities
{
$_
}
if
defined
(
$char_entities
{
$_
});
}
$para
=
join
(
""
,
@chars
);
}
$self
->{__prev_para_action} =
$para_action
;
return
$para
;
}
sub
txt2html ($;$) {
my
$self
=
shift
;
if
(
@_
) {
$self
->args(
@_
);
}
$self
->do_init_call();
my
$outhandle
;
my
$not_to_stdout
;
if
(
$self
->{outfile} eq
"-"
) {
$outhandle
=
*STDOUT
;
$not_to_stdout
= 0;
}
else
{
open
(HOUT,
"> "
.
$self
->{outfile}) ||
die
"Error: unable to open "
,
$self
->{outfile},
": $!\n"
;
$outhandle
=
*HOUT
;
$not_to_stdout
= 1;
}
local
$/ =
""
;
my
$para
=
''
;
my
$count
= 0;
foreach
my
$file
(@{
$self
->{infile}}) {
if
(-f
$file
&&
open
(IN,
$file
)) {
while
(<IN>) {
$para
=
$_
;
$para
=~ s/\n$//;
if
(
$count
== 0) {
$self
->do_file_start(
$outhandle
,
$para
);
}
$self
->clear_section_links();
$para
=
$self
->process_para(
$para
,
close_tags
=>0);
print
$outhandle
$para
,
"\n"
;
$count
++;
}
}
}
$self
->{__prev} =
""
;
if
(
$self
->{__mode} &
$LIST
)
{
$self
->endlist(
num_lists
=>
$self
->{__listnum},
prev_ref
=>\
$self
->{__prev},
line_action_ref
=>\
$self
->{__line_action})
}
print
$outhandle
$self
->{__prev};
if
(
$self
->{xhtml})
{
my
$open_tag
= @{
$self
->{__tags}}[$
while
(@{
$self
->{__tags}}
&&
$open_tag
ne
'BODY'
&&
$open_tag
ne
'HTML'
)
{
print
$outhandle
$self
->close_tag(
''
);
$open_tag
= @{
$self
->{__tags}}[$
}
print
$outhandle
"\n"
;
}
if
(
$self
->{append_file}) {
if
(-r
$self
->{append_file}) {
open
(APPEND,
$self
->{append_file});
while
(<APPEND>) {
print
$outhandle
$_
;
}
close
(APPEND);
}
else
{
print
STDERR
"Can't find or read file "
,
$self
->{append_file},
" to append.\n"
;
}
}
if
(!
$self
->{extract}) {
print
$outhandle
$self
->get_tag(
'BODY'
,
tag_type
=>
'end'
),
"\n"
;
print
$outhandle
$self
->get_tag(
'HTML'
,
tag_type
=>
'end'
),
"\n"
;
}
if
(
$not_to_stdout
) {
close
(
$outhandle
);
}
return
1;
}
sub
init_our_data ($) {
my
$self
=
shift
;
$self
->{debug} = 0;
$self
->{append_file} =
''
;
$self
->{append_head} =
''
;
$self
->{body_deco} =
''
;
$self
->{caps_tag} =
'STRONG'
;
$self
->{custom_heading_regexp} = [];
$self
->{default_link_dict} =
"$ENV{HOME}/.txt2html.dict"
;
$self
->{dict_debug} = 0;
$self
->{doctype} =
"-//W3C//DTD HTML 3.2 Final//EN"
;
$self
->{eight_bit_clean} = 0;
$self
->{escape_HTML_chars} = 1;
$self
->{explicit_headings} = 0;
$self
->{extract} = 0;
$self
->{hrule_min} = 4;
$self
->{indent_width} = 2;
$self
->{indent_par_break} = 0;
$self
->{infile} = [];
$self
->{links_dictionaries} = [];
$self
->{link_only} = 0;
$self
->{lower_case_tags} = 0;
$self
->{mailmode} = 0;
$self
->{make_anchors} = 1;
$self
->{make_links} = 1;
$self
->{make_tables} = 0;
$self
->{min_caps_length} = 3;
$self
->{outfile} =
'-'
;
$self
->{par_indent} = 2;
$self
->{preformat_trigger_lines} = 2;
$self
->{endpreformat_trigger_lines} = 2;
$self
->{preformat_start_marker} =
"^(:?(:?<)|<)PRE(:?(:?>)|>)\$"
;
$self
->{preformat_end_marker} =
"^(:?(:?<)|<)/PRE(:?(:?>)|>)\$"
;
$self
->{preformat_whitespace_min} = 5;
$self
->{prepend_file} =
''
;
$self
->{preserve_indent} = 0;
$self
->{short_line_length} = 40;
$self
->{style_url} =
''
;
$self
->{system_link_dict} =
'/usr/share/txt2html/txt2html.dict'
;
$self
->{tab_width} = 8;
$self
->{title} =
''
;
$self
->{titlefirst} = 0;
$self
->{underline_length_tolerance} = 1;
$self
->{underline_offset_tolerance} = 1;
$self
->{unhyphenation} = 1;
$self
->{use_mosaic_header} = 0;
$self
->{use_preformat_marker} = 0;
$self
->{xhtml} = 0;
$self
->{__file} =
""
;
my
%heading_styles
= ();
$self
->{__heading_styles} = \
%heading_styles
;
$self
->{__num_heading_styles} = 0;
my
%links_table
= ();
$self
->{__links_table} = \
%links_table
;
my
@links_table_order
= ();
$self
->{__links_table_order} = \
@links_table_order
;
my
@search_patterns
= ();
$self
->{__search_patterns} = \
@search_patterns
;
my
@repl_code
= ();
$self
->{__repl_code} = \
@repl_code
;
$self
->{__prev_para_action} = 0;
$self
->{__non_header_anchor} = 0;
$self
->{__mode} = 0;
$self
->{__listnum} = 0;
$self
->{__list_indent} =
""
;
$self
->{__call_init_done} = 0;
}
sub
deal_with_options ($) {
my
$self
=
shift
;
if
(
$self
->{links_dictionaries}) {
my
@dict_files
= @{
$self
->{links_dictionaries}};
$self
->args(
links_dictionaries
=>[]);
foreach
my
$ld
(
@dict_files
) {
if
(-r
$ld
) {
$self
->{
'make_links'
} = 1;
$self
->args([
'--links_dictionaries'
,
$ld
]);
}
else
{
print
STDERR
"Can't find or read link-file $ld\n"
;
}
}
}
if
(!
$self
->{make_links}) {
$self
->{
'links_dictionaries'
} = 0;
$self
->{
'system_link_dict'
} =
""
;
}
if
(
$self
->{append_file}) {
if
(!-r
$self
->{append_file}) {
print
STDERR
"Can't find or read "
,
$self
->{append_file},
"\n"
;
$self
->{append_file} =
''
;
}
}
if
(
$self
->{prepend_file}) {
if
(!-r
$self
->{prepend_file}) {
print
STDERR
"Can't find or read "
,
$self
->{prepend_file},
"\n"
;
$self
->{
'prepend_file'
} =
''
;
}
}
if
(
$self
->{append_head}) {
if
(!-r
$self
->{append_head}) {
print
STDERR
"Can't find or read "
,
$self
->{append_head},
"\n"
;
$self
->{
'append_head'
} =
''
;
}
}
if
(!
$self
->{outfile}) {
$self
->{
'outfile'
} =
"-"
;
}
$self
->{
'preformat_trigger_lines'
} = 0
if
(
$self
->{preformat_trigger_lines} < 0);
$self
->{
'preformat_trigger_lines'
} = 2
if
(
$self
->{preformat_trigger_lines} > 2);
$self
->{
'endpreformat_trigger_lines'
} = 1
if
(
$self
->{preformat_trigger_lines} == 0);
$self
->{
'endpreformat_trigger_lines'
} = 0
if
(
$self
->{endpreformat_trigger_lines} < 0);
$self
->{
'endpreformat_trigger_lines'
} = 2
if
(
$self
->{endpreformat_trigger_lines} > 2);
$self
->{__preformat_enabled} =
((
$self
->{endpreformat_trigger_lines} != 0)
||
$self
->{use_preformat_marker});
if
(
$self
->{use_mosaic_header}) {
my
$num_heading_styles
= 0;
my
%heading_styles
= ();
$heading_styles
{
"*"
} = ++
$num_heading_styles
;
$heading_styles
{
"="
} = ++
$num_heading_styles
;
$heading_styles
{
"+"
} = ++
$num_heading_styles
;
$heading_styles
{
"-"
} = ++
$num_heading_styles
;
$heading_styles
{
"~"
} = ++
$num_heading_styles
;
$heading_styles
{
"."
} = ++
$num_heading_styles
;
$self
->{__heading_styles} = \
%heading_styles
;
$self
->{__num_heading_styles} =
$num_heading_styles
;
}
if
(
$self
->{xhtml})
{
$self
->{
'lower_case_tags'
} = 1;
}
}
sub
is_blank ($) {
return
$_
[0] =~ /^\s*$/;
}
sub
escape ($) {
my
(
$text
) =
@_
;
$text
=~ s/&/
&
;/g;
$text
=~ s/>/
>
;/g;
$text
=~ s/</
<
;/g;
return
$text
;
}
sub
get_tag ($$;%) {
my
$self
=
shift
;
my
$in_tag
=
shift
;
my
%args
= (
tag_type
=>
'start'
,
inside_tag
=>
''
,
@_
);
my
$inside_tag
=
$args
{inside_tag};
my
$open_tag
= @{
$self
->{__tags}}[$
if
(!
defined
$open_tag
)
{
$open_tag
=
''
;
}
my
$tag_prefix
=
''
;
if
(
$self
->{xhtml})
{
if
(
$open_tag
eq
'P'
and
$in_tag
eq
'P'
and
$args
{tag_type} ne
'end'
)
{
$tag_prefix
=
$self
->close_tag(
'P'
);
}
elsif
(
$open_tag
eq
'P'
and
$in_tag
=~ /(HR|UL|OL|PRE|TABLE|^H)/)
{
$tag_prefix
=
$self
->close_tag(
'P'
);
}
elsif
(
$open_tag
eq
'LI'
and
$in_tag
eq
'LI'
and
$args
{tag_type} ne
'end'
)
{
$tag_prefix
=
$self
->close_tag(
'LI'
);
}
elsif
(
$open_tag
eq
'LI'
and
$in_tag
=~ /(UL|OL)/
and
$args
{tag_type} eq
'end'
)
{
$tag_prefix
=
$self
->close_tag(
'LI'
);
}
}
my
$out_tag
=
$in_tag
;
if
(
$args
{tag_type} eq
'end'
)
{
$out_tag
=
$self
->close_tag(
$in_tag
);
}
else
{
if
(
$self
->{lower_case_tags})
{
$out_tag
=~ s/(
$in_tag
)/\L$1/;
}
else
{
$out_tag
=~ s/(
$in_tag
)/\U$1/;
}
if
(
$args
{tag_type} eq
'empty'
)
{
if
(
$self
->{xhtml})
{
$out_tag
=
"<${out_tag}${inside_tag}/>"
;
}
else
{
$out_tag
=
"<${out_tag}${inside_tag}>"
;
}
}
else
{
push
@{
$self
->{__tags}},
$in_tag
;
$out_tag
=
"<${out_tag}${inside_tag}>"
;
}
}
$out_tag
=
$tag_prefix
.
$out_tag
;
if
(
$self
->{dict_debug} & 8)
{
print
STDERR
"open_tag = '${open_tag}', in_tag = '${in_tag}', tag_type = "
,
$args
{tag_type},
", inside_tag = '${inside_tag}', out_tag = '$out_tag'\n"
;
}
return
$out_tag
;
}
sub
close_tag ($$) {
my
$self
=
shift
;
my
$in_tag
=
shift
;
my
$open_tag
= @{
$self
->{__tags}}[$
if
(!
$in_tag
)
{
$in_tag
=
$open_tag
;
}
my
$out_tag
=
$in_tag
;
if
(
$self
->{lower_case_tags})
{
$out_tag
=~ s/(
$in_tag
)/\L$1/;
}
else
{
$out_tag
=~ s/(
$in_tag
)/\U$1/;
}
$out_tag
=
"<\/${out_tag}>"
;
if
(
$open_tag
eq
$in_tag
)
{
pop
@{
$self
->{__tags}};
}
if
(
$self
->{dict_debug} & 8)
{
print
STDERR
"close_tag: open_tag = '${open_tag}', in_tag = '${in_tag}', out_tag = '$out_tag'\n"
;
}
return
$out_tag
;
}
sub
hrule ($%) {
my
$self
=
shift
;
my
%args
= (
para_lines_ref
=>
undef
,
para_action_ref
=>
undef
,
ind
=>0,
@_
);
my
$para_lines_ref
=
$args
{para_lines_ref};
my
$para_action_ref
=
$args
{para_action_ref};
my
$ind
=
$args
{ind};
my
$hrmin
=
$self
->{hrule_min};
if
(
$para_lines_ref
->[
$ind
] =~ /^\s*([-_~=\*]\s*){
$hrmin
,}$/) {
my
$tag
=
$self
->get_tag(
"HR"
,
tag_type
=>
'empty'
);
$para_lines_ref
->[
$ind
] =
"$tag\n"
;
$para_action_ref
->[
$ind
] |=
$HRULE
;
}
elsif
(
$para_lines_ref
->[
$ind
] =~ /\014/) {
$para_action_ref
->[
$ind
] |=
$HRULE
;
my
$tag
=
$self
->get_tag(
"HR"
,
tag_type
=>
'empty'
);
$para_lines_ref
->[
$ind
] =~ s/\014/\n${tag}\n/g;
}
}
sub
shortline ($%) {
my
$self
=
shift
;
my
%args
= (
mode_ref
=>
undef
,
line_ref
=>
undef
,
line_action_ref
=>
undef
,
prev_ref
=>
undef
,
prev_action_ref
=>
undef
,
prev_line_len
=>0,
@_
);
my
$mode_ref
=
$args
{mode_ref};
my
$line_ref
=
$args
{line_ref};
my
$line_action_ref
=
$args
{line_action_ref};
my
$prev_ref
=
$args
{prev_ref};
my
$prev_action_ref
=
$args
{prev_action_ref};
my
$prev_line_len
=
$args
{prev_line_len};
my
$tag
=
$self
->get_tag(
'BR'
,
tag_type
=>
'empty'
);
if
(!(${
$mode_ref
} & (
$PRE
|
$LIST
|
$TABLE
))
&& !is_blank(${
$line_ref
})
&& !is_blank(${
$prev_ref
})
&& (
$prev_line_len
<
$self
->{short_line_length})
&& !(${
$line_action_ref
} & (
$END
|
$HEADER
|
$HRULE
|
$LIST
|
$IND_BREAK
|
$PAR
))
&& !(${
$prev_action_ref
} & (
$HEADER
|
$HRULE
|
$BREAK
|
$IND_BREAK
)))
{
${
$prev_ref
} .=
$tag
.
chop
(${
$prev_ref
});
${
$prev_action_ref
} |=
$BREAK
;
}
}
sub
mailstuff ($%) {
my
$self
=
shift
;
my
%args
= (
line_ref
=>
undef
,
line_action_ref
=>
undef
,
prev_ref
=>
undef
,
prev_action_ref
=>
undef
,
next_ref
=>
undef
,
@_
);
my
$line_ref
=
$args
{line_ref};
my
$line_action_ref
=
$args
{line_action_ref};
my
$prev_ref
=
$args
{prev_ref};
my
$prev_action_ref
=
$args
{prev_action_ref};
my
$next_ref
=
$args
{next_ref};
my
$tag
=
''
;
if
(((${
$line_ref
} =~ /^\w*
>
/)
|| (${
$line_ref
} =~ /^[\|:]/))
&& !is_blank(${
$next_ref
})
)
{
$tag
=
$self
->get_tag(
'BR'
,
tag_type
=>
'empty'
);
${
$line_ref
} =~ s/$/${tag}/;
${
$line_action_ref
} |= (
$BREAK
|
$MAILQUOTE
);
if
(!(${
$prev_action_ref
} & (
$BREAK
|
$MAILQUOTE
))) {
$tag
=
$self
->get_tag(
'P'
,
inside_tag
=>
" class='quote_mail'"
);
${
$prev_ref
} .=
$tag
;
${
$line_action_ref
} |=
$PAR
;
}
}
elsif
((${
$line_ref
} =~ /^(From:?)|(Newsgroups:) /)
&& is_blank(${
$prev_ref
}))
{
$self
->anchor_mail(
$line_ref
)
if
!(${
$prev_action_ref
} &
$MAILHEADER
);
chomp
${
$line_ref
};
$tag
=
$self
->get_tag(
'P'
);
my
$tag2
=
$self
->get_tag(
'BR'
,
tag_type
=>
'empty'
);
${
$line_ref
} =
"<!-- New Message -->\n$tag"
. ${
$line_ref
} .
"${tag2}\n"
;
${
$line_action_ref
} |= (
$BREAK
|
$MAILHEADER
|
$PAR
);
}
elsif
((${
$line_ref
} =~ /^[\w\-]*:/)
&& (${
$prev_action_ref
} &
$MAILHEADER
) && !is_blank(${
$next_ref
})
)
{
$tag
=
$self
->get_tag(
'BR'
,
tag_type
=>
'empty'
);
${
$line_ref
} =~ s/$/${tag}/;
${
$line_action_ref
} |= (
$BREAK
|
$MAILHEADER
);
}
elsif
((${
$line_ref
} =~ /^\s+\S/) &&
(${
$prev_action_ref
} &
$MAILHEADER
) && !is_blank(${
$next_ref
})
)
{
$tag
=
$self
->get_tag(
'BR'
,
tag_type
=>
'empty'
);
${
$line_ref
} =~ s/$/${tag}/;
${
$line_action_ref
} |= (
$BREAK
|
$MAILHEADER
);
}
}
sub
subtract_modes ($$) {
my
(
$vector
,
$mask
) =
@_
;
return
(
$vector
|
$mask
) -
$mask
;
}
sub
paragraph ($%) {
my
$self
=
shift
;
my
%args
= (
mode_ref
=>
undef
,
line_ref
=>
undef
,
line_action_ref
=>
undef
,
prev_ref
=>
undef
,
prev_action_ref
=>
undef
,
line_indent
=>0,
prev_indent
=>0,
is_fragment
=>0,
ind
=>0,
@_
);
my
$mode_ref
=
$args
{mode_ref};
my
$line_ref
=
$args
{line_ref};
my
$line_action_ref
=
$args
{line_action_ref};
my
$prev_ref
=
$args
{prev_ref};
my
$prev_action_ref
=
$args
{prev_action_ref};
my
$line_indent
=
$args
{line_indent};
my
$prev_indent
=
$args
{prev_indent};
my
$is_fragment
=
$args
{is_fragment};
my
$line_no
=
$args
{ind};
my
$tag
=
''
;
if
(!is_blank(${
$line_ref
})
&& !(${
$mode_ref
} & (
$PRE
|
$TABLE
))
&& !subtract_modes(${
$line_action_ref
},
$END
|
$MAILQUOTE
|
$CAPS
|
$BREAK
)
&& (is_blank(${
$prev_ref
})
|| (${
$line_action_ref
} &
$END
)
|| (
$line_indent
>
$prev_indent
+
$self
->{par_indent}))
&& !(
$is_fragment
&&
$line_no
== 0)
)
{
if
(
$self
->{indent_par_break}
&& !is_blank(${
$prev_ref
})
&& !(${
$line_action_ref
} &
$END
)
&& (
$line_indent
>
$prev_indent
+
$self
->{par_indent}))
{
$tag
=
$self
->get_tag(
'BR'
,
tag_type
=>
'empty'
);
${
$prev_ref
} .=
$tag
;
${
$prev_ref
} .=
" "
x
$line_indent
;
${
$line_ref
} =~ s/^ {
$line_indent
}//;
${
$prev_action_ref
} |=
$BREAK
;
${
$line_action_ref
} |=
$IND_BREAK
;
}
elsif
(
$self
->{preserve_indent})
{
$tag
=
$self
->get_tag(
'P'
);
${
$prev_ref
} .=
$tag
;
${
$prev_ref
} .=
" "
x
$line_indent
;
${
$line_ref
} =~ s/^ {
$line_indent
}//;
${
$line_action_ref
} |=
$PAR
;
}
else
{
$tag
=
$self
->get_tag(
'P'
);
${
$prev_ref
} .=
$tag
;
${
$line_action_ref
} |=
$PAR
;
}
}
elsif
(
$self
->{indent_par_break}
&& !(${
$mode_ref
} & (
$PRE
|
$TABLE
|
$LIST
))
&& !is_blank(${
$prev_ref
})
&& !(${
$line_action_ref
} &
$END
)
&& (${
$prev_action_ref
} & (
$IND_BREAK
|
$PAR
))
&& !subtract_modes(${
$line_action_ref
},
$END
|
$MAILQUOTE
|
$CAPS
)
&& (
$line_indent
>
$self
->{par_indent})
&& (
$line_indent
==
$prev_indent
)
)
{
$tag
=
$self
->get_tag(
'BR'
,
tag_type
=>
'empty'
);
${
$prev_ref
} .=
$tag
;
${
$prev_ref
} .=
" "
x
$line_indent
;
${
$line_ref
} =~ s/^ {
$line_indent
}//;
${
$prev_action_ref
} |=
$BREAK
;
${
$line_action_ref
} |=
$IND_BREAK
;
}
}
sub
count_indent ($$) {
my
(
$line
,
$prev_length
) =
@_
;
if
(is_blank(
$line
)) {
return
$prev_length
;
}
my
(
$ws
) =
$line
=~ /^( *)[^ ]/;
return
length
(
$ws
);
}
sub
listprefix ($) {
my
$line
=
shift
;
my
(
$prefix
,
$number
,
$rawprefix
);
return
(0, 0, 0)
if
(!(
$line
=~ /^\s*[-=o\*\267]+\s+\S/)
&& !(
$line
=~ /^\s*(\d+|[^\W\d_])[\.\)\]:]\s+\S/));
(
$number
) =
$line
=~ /^\s*(\d+|[^\W\d_])/;
$number
= 0
unless
defined
(
$number
);
if
(
$line
=~ /^\s
*o
\s/) {
$number
= 0;
}
if
(
$number
) {
(
$rawprefix
) =
$line
=~ /^(\s*(\d+|[^\W\d_]).)/;
$prefix
=
$rawprefix
;
$prefix
=~ s/(\d+|[^\W\d_])//;
}
else
{
(
$rawprefix
) =
$line
=~ /^(\s*[-=o\*\267]+.)/;
$prefix
=
$rawprefix
;
}
(
$prefix
,
$number
,
$rawprefix
);
}
sub
startlist ($%) {
my
$self
=
shift
;
my
%args
= (
prefix
=>
''
,
number
=>0,
rawprefix
=>
''
,
para_lines_ref
=>
undef
,
para_action_ref
=>
undef
,
ind
=>0,
prev_ref
=>
undef
,
@_
);
my
$prefix
=
$args
{prefix};
my
$number
=
$args
{number};
my
$rawprefix
=
$args
{rawprefix};
my
$para_lines_ref
=
$args
{para_lines_ref};
my
$para_action_ref
=
$args
{para_action_ref};
my
$ind
=
$args
{ind};
my
$prev_ref
=
$args
{prev_ref};
my
$tag
=
''
;
$self
->{__listprefix}->[
$self
->{__listnum}] =
$prefix
;
if
(
$number
) {
if
((
$number
ne
"1"
) && (
$number
ne
"a"
) && (
$number
ne
"A"
)) {
return
0;
}
$tag
=
$self
->get_tag(
'OL'
);
${
$prev_ref
} .=
$self
->{__list_indent} .
"${tag}\n"
;
$self
->{__list}->[
$self
->{__listnum}] =
$OL
;
}
else
{
$tag
=
$self
->get_tag(
'UL'
);
${
$prev_ref
} .=
$self
->{__list_indent} .
"${tag}\n"
;
$self
->{__list}->[
$self
->{__listnum}] =
$UL
;
}
$self
->{__listnum}++;
$self
->{__list_indent} =
" "
x
$self
->{__listnum} x
$self
->{indent_width};
$para_action_ref
->[
$ind
] |=
$LIST
;
$para_action_ref
->[
$ind
] |=
$LIST_START
;
$self
->{__mode} |=
$LIST
;
1;
}
sub
endlist ($%) {
my
$self
=
shift
;
my
%args
= (
num_lists
=>0,
prev_ref
=>
undef
,
line_action_ref
=>
undef
,
@_
);
my
$n
=
$args
{num_lists};
my
$prev_ref
=
$args
{prev_ref};
my
$line_action_ref
=
$args
{line_action_ref};
my
$tag
=
''
;
for
(;
$n
> 0 ;
$n
--,
$self
->{__listnum}--) {
$self
->{__list_indent} =
" "
x (
$self
->{__listnum} - 1) x
$self
->{indent_width};
if
(
$self
->{__list}->[
$self
->{__listnum} - 1] ==
$UL
) {
$tag
=
$self
->get_tag(
'UL'
,
tag_type
=>
'end'
);
${
$prev_ref
} .=
$self
->{__list_indent} .
"${tag}\n"
;
}
elsif
(
$self
->{__list}->[
$self
->{__listnum} - 1] ==
$OL
) {
$tag
=
$self
->get_tag(
'OL'
,
tag_type
=>
'end'
);
${
$prev_ref
} .=
$self
->{__list_indent} .
"${tag}\n"
;
}
else
{
print
STDERR
"Encountered list of unknown type\n"
;
}
}
${
$line_action_ref
} |=
$END
;
$self
->{__mode} ^=
$LIST
if
(!
$self
->{__listnum});
}
sub
continuelist ($%) {
my
$self
=
shift
;
my
%args
= (
para_lines_ref
=>
undef
,
para_action_ref
=>
undef
,
ind
=>0,
@_
);
my
$para_lines_ref
=
$args
{para_lines_ref};
my
$para_action_ref
=
$args
{para_action_ref};
my
$ind
=
$args
{ind};
my
$list_indent
=
$self
->{__list_indent};
my
$tag
=
''
;
if
(
$self
->{__list}->[
$self
->{__listnum} - 1] ==
$UL
&&
$para_lines_ref
->[
$ind
] =~ /^\s*[-=o\*\267]+\s*/)
{
$tag
=
$self
->get_tag(
'LI'
);
$para_lines_ref
->[
$ind
] =~ s/^\s*[-=o\*\267]+\s*/${list_indent}${tag}/;
$para_action_ref
->[
$ind
] |=
$LIST_ITEM
;
}
if
(
$self
->{__list}->[
$self
->{__listnum} - 1] ==
$OL
)
{
$tag
=
$self
->get_tag(
'LI'
);
$para_lines_ref
->[
$ind
] =~ s/^\s*(\d+|[^\W\d_]).\s*/${list_indent}${tag}/;
$para_action_ref
->[
$ind
] |=
$LIST_ITEM
;
}
$para_action_ref
->[
$ind
] |=
$LIST
;
}
sub
liststuff ($%) {
my
$self
=
shift
;
my
%args
= (
para_lines_ref
=>
undef
,
para_action_ref
=>
undef
,
para_line_indent_ref
=>
undef
,
ind
=>0,
prev_ref
=>
undef
,
@_
);
my
$para_lines_ref
=
$args
{para_lines_ref};
my
$para_action_ref
=
$args
{para_action_ref};
my
$para_line_indent_ref
=
$args
{para_line_indent_ref};
my
$ind
=
$args
{ind};
my
$prev_ref
=
$args
{prev_ref};
my
$i
;
my
(
$prefix
,
$number
,
$rawprefix
) = listprefix(
$para_lines_ref
->[
$ind
]);
if
(!
$prefix
) {
if
(
$ind
> 0 && !is_blank(
$para_lines_ref
->[
$ind
- 1]))
{
return
;
}
if
(
$self
->{__listnum}) {
$self
->endlist(
num_lists
=>
$self
->{__listnum},
prev_ref
=>
$prev_ref
,
line_action_ref
=>\
$para_action_ref
->[
$ind
]);
}
return
;
}
my
$prefix_alternate
;
if
(
length
(
""
.
$number
) > 1) {
$prefix_alternate
= (
" "
x (
length
(
""
.
$number
) - 1)) .
$prefix
;
}
for
(
$i
=
$self
->{__listnum} - 1 ;
(
$i
>= 0) && (
$prefix
ne
$self
->{__listprefix}->[
$i
]) ;
$i
--
)
{
if
(
length
(
""
.
$number
) > 1) {
last
if
$prefix_alternate
eq
$self
->{__listprefix}->[
$i
];
}
}
my
$islist
;
my
(
$total_prefix
) =
$para_lines_ref
->[
$ind
] =~ /^(\s*[\w=o\*-]+.\s*)/;
$islist
= 1;
$i
++;
if
((
$i
> 0) && (
$i
!=
$self
->{__listnum})) {
$self
->endlist(
num_lists
=>
$self
->{__listnum} -
$i
,
prev_ref
=>
$prev_ref
,
line_action_ref
=>\
$para_action_ref
->[
$ind
]);
$islist
= 0;
}
elsif
(!
$self
->{__listnum} || (
$i
!=
$self
->{__listnum})) {
if
((
$para_line_indent_ref
->[
$ind
] > 0)
||
$ind
== 0
|| (
$ind
> 0 && is_blank(
$para_lines_ref
->[
$ind
- 1]))
|| (
$ind
> 0
&&
$para_action_ref
->[
$ind
- 1] & (
$BREAK
|
$HEADER
|
$CAPS
))
)
{
$islist
=
$self
->startlist(
prefix
=>
$prefix
,
number
=>
$number
,
rawprefix
=>
$rawprefix
,
para_lines_ref
=>
$para_lines_ref
,
para_action_ref
=>
$para_action_ref
,
ind
=>
$ind
,
prev_ref
=>
$prev_ref
);
}
else
{
return
;
}
}
$self
->continuelist(
para_lines_ref
=>
$para_lines_ref
,
para_action_ref
=>
$para_action_ref
,
ind
=>
$ind
)
if
(
$self
->{__mode} &
$LIST
);
$para_line_indent_ref
->[
$ind
] =
length
(
$total_prefix
)
if
$islist
;
}
sub
tablestuff ($%) {
my
$self
=
shift
;
my
%args
= (
rows_ref
=>
undef
,
para_len
=>0,
@_
);
my
$rows_ref
=
$args
{rows_ref};
my
$para_len
=
$args
{para_len};
my
@rows
= @{
$rows_ref
};
my
@starts
;
my
@ends
;
my
$spaces
;
my
$max
= 0;
my
$min
=
$para_len
;
foreach
my
$row
(
@rows
) {
(
$spaces
|=
$row
) =~
tr
/ /\xff/c;
$min
=
length
$row
if
length
$row
<
$min
;
$max
=
length
$row
if
$max
<
length
$row
;
}
$spaces
=
substr
$spaces
, 0,
$min
;
push
(
@starts
, 0)
unless
$spaces
=~ /^ /;
while
(
$spaces
=~ /((?:^| ) +)(?=[^ ])/g) {
push
@ends
,
pos
(
$spaces
) -
length
$1;
push
@starts
,
pos
(
$spaces
);
}
shift
(
@ends
)
if
$spaces
=~ /^ /;
push
(
@ends
,
$max
);
if
(2 <=
@rows
and 2 <=
@starts
) {
$self
->{__mode} |=
$TABLE
;
my
@align
;
my
$cell
=
''
;
foreach
my
$col
(0 ..
$#starts
) {
my
@count
= (0, 0, 0, 0);
foreach
my
$row
(
@rows
) {
my
$width
=
$ends
[
$col
] -
$starts
[
$col
];
$cell
=
substr
$row
,
$starts
[
$col
],
$width
;
++
$count
[(
$cell
=~ /^ / ? 2 : 0) +
(
$cell
=~ / $/ ||
length
(
$cell
) <
$width
? 1 : 0)];
}
$align
[
$col
] = 0;
my
$population
=
$count
[1] +
$count
[2] +
$count
[3];
foreach
(1 .. 3) {
if
(
$count
[
$_
] * 2 >
$population
) {
$align
[
$col
] =
$_
;
last
;
}
}
}
foreach
my
$row
(
@rows
) {
$row
=
join
''
,
$self
->get_tag(
'TR'
), (
map
{
$cell
=
substr
$row
,
$starts
[
$_
],
$ends
[
$_
] -
$starts
[
$_
];
$cell
=~ s/^ +//;
$cell
=~ s/ +$//;
if
(
$self
->{escape_HTML_chars}) {
$cell
= escape(
$cell
);
}
(
$self
->get_tag(
'TD'
,
inside_tag
=>(
$self
->{xhtml}
?
$xhtml_alignments
[
$align
[
$_
]]
: (
$self
->{lower_case_tags}
?
$lc_alignments
[
$align
[
$_
]]
:
$alignments
[
$align
[
$_
]]))),
$cell
,
$self
->close_tag(
'TD'
));
} 0 ..
$#starts
),
$self
->close_tag(
'TR'
);
}
my
$tag
;
if
(
$self
->{xhtml})
{
$tag
=
$self
->get_tag(
'TABLE'
,
inside_tag
=>
' summary=""'
);
}
else
{
$tag
=
$self
->get_tag(
'TABLE'
);
}
$rows
[0] =
"${tag}\n"
.
$rows
[0];
$tag
=
$self
->get_tag(
'TABLE'
,
tag_type
=>
'end'
);
$rows
[
$#rows
] .=
"\n${tag}"
;
@{
$rows_ref
} =
@rows
;
return
1;
}
else
{
return
0;
}
}
sub
is_preformatted ($$) {
my
$self
=
shift
;
my
$line
=
shift
;
my
$pre_white_min
=
$self
->{preformat_whitespace_min};
my
$result
= ((
$line
=~ /\s{
$pre_white_min
,}\S+/o)
|| (
$line
=~ /\.{
$pre_white_min
,}\S+/o));
return
$result
;
}
sub
endpreformat ($%) {
my
$self
=
shift
;
my
%args
= (
mode_ref
=>
undef
,
para_lines_ref
=>
undef
,
para_action_ref
=>
undef
,
ind
=>0,
prev_ref
=>
undef
,
@_
);
my
$mode_ref
=
$args
{mode_ref};
my
$para_lines_ref
=
$args
{para_lines_ref};
my
$para_action_ref
=
$args
{para_action_ref};
my
$ind
=
$args
{ind};
my
$prev_ref
=
$args
{prev_ref};
my
$tag
=
''
;
if
(${
$mode_ref
} &
$PRE_EXPLICIT
) {
my
$pe_mark
=
$self
->{preformat_end_marker};
if
(
$para_lines_ref
->[
$ind
] =~ /
$pe_mark
/io) {
if
(
$ind
== 0)
{
$tag
=
$self
->get_tag(
'PRE'
,
tag_type
=>
'end'
);
$para_lines_ref
->[
$ind
] =
"${tag}\n"
;
}
else
{
$tag
=
$self
->get_tag(
'PRE'
,
tag_type
=>
'end'
);
$para_lines_ref
->[
$ind
- 1] .=
"${tag}\n"
;
$para_lines_ref
->[
$ind
] =
""
;
}
${
$mode_ref
} ^= ((
$PRE
|
$PRE_EXPLICIT
) & ${
$mode_ref
});
$para_action_ref
->[
$ind
] |=
$END
;
}
return
;
}
if
(!
$self
->is_preformatted(
$para_lines_ref
->[
$ind
])
&& (
$self
->{endpreformat_trigger_lines} == 1
|| (
$ind
+ 1 < @{
$para_lines_ref
}
&& !
$self
->is_preformatted(
$para_lines_ref
->[
$ind
+ 1]))
||
$ind
+ 1 >= @{
$para_lines_ref
}
)
)
{
if
(
$ind
== 0)
{
$tag
=
$self
->get_tag(
'PRE'
,
tag_type
=>
'end'
);
${
$prev_ref
} =
"${tag}\n"
;
}
else
{
$tag
=
$self
->get_tag(
'PRE'
,
tag_type
=>
'end'
);
$para_lines_ref
->[
$ind
- 1] .=
"${tag}\n"
;
}
${
$mode_ref
} ^= (
$PRE
& ${
$mode_ref
});
$para_action_ref
->[
$ind
] |=
$END
;
}
}
sub
preformat ($%) {
my
$self
=
shift
;
my
%args
= (
mode_ref
=>
undef
,
line_ref
=>
undef
,
line_action_ref
=>
undef
,
prev_ref
=>
undef
,
next_ref
=>
undef
,
prev_action_ref
=>
undef
,
@_
);
my
$mode_ref
=
$args
{mode_ref};
my
$line_ref
=
$args
{line_ref};
my
$line_action_ref
=
$args
{line_action_ref};
my
$prev_ref
=
$args
{prev_ref};
my
$next_ref
=
$args
{next_ref};
my
$prev_action_ref
=
$args
{prev_action_ref};
my
$tag
=
''
;
if
(
$self
->{use_preformat_marker}) {
my
$pstart
=
$self
->{preformat_start_marker};
if
(${
$line_ref
} =~ /
$pstart
/io) {
if
(${
$prev_ref
} =~ s/<P>$//)
{
pop
@{
$self
->{__tags}};
}
$tag
=
$self
->get_tag(
'PRE'
,
inside_tag
=>
" class='quote_explicit'"
);
${
$line_ref
} =
"${tag}\n"
;
${
$mode_ref
} |=
$PRE
|
$PRE_EXPLICIT
;
${
$line_action_ref
} |=
$PRE
;
return
;
}
}
if
(!(${
$line_action_ref
} &
$MAILQUOTE
)
&& !(${
$prev_action_ref
} &
$MAILQUOTE
)
&& (
$self
->{preformat_trigger_lines} == 0
|| (
$self
->is_preformatted(${
$line_ref
})
&& (
$self
->{preformat_trigger_lines} == 1
||
$self
->is_preformatted(${
$next_ref
})))
)
)
{
if
(${
$prev_ref
} =~ s/<P>$//)
{
pop
@{
$self
->{__tags}};
}
$tag
=
$self
->get_tag(
'PRE'
);
${
$line_ref
} =~ s/^/${tag}\n/;
${
$mode_ref
} |=
$PRE
;
${
$line_action_ref
} |=
$PRE
;
}
}
sub
make_new_anchor ($$) {
my
$self
=
shift
;
my
$heading_level
=
shift
;
my
(
$anchor
,
$i
);
return
sprintf
(
"%d"
,
$self
->{__non_header_anchor}++)
if
(!
$heading_level
);
$anchor
=
"section"
;
$self
->{__heading_count}->[
$heading_level
- 1]++;
for
(
$i
= @{
$self
->{__heading_count}} ;
$i
>
$heading_level
;
$i
--) {
$self
->{__heading_count}->[
$i
- 1] = 0;
}
for
(
$i
= 0 ;
$i
<
$heading_level
;
$i
++) {
$self
->{__heading_count}->[
$i
] = 1
if
!
$self
->{__heading_count}->[
$i
];
$anchor
.=
sprintf
(
"_%d"
,
$self
->{__heading_count}->[
$i
]);
}
chomp
(
$anchor
);
$anchor
;
}
sub
anchor_mail ($$) {
my
$self
=
shift
;
my
$line_ref
=
shift
;
if
(
$self
->{make_anchors}) {
my
(
$anchor
) =
$self
->make_new_anchor(0);
if
(
$self
->{lower_case_tags}) {
${
$line_ref
} =~ s/([^ ]*)/<a name=
"$anchor"
>$1<\/a>/;
}
else
{
${
$line_ref
} =~ s/([^ ]*)/<A NAME=
"$anchor"
>$1<\/A>/;
}
}
}
sub
anchor_heading ($$$) {
my
$self
=
shift
;
my
$level
=
shift
;
my
$line_ref
=
shift
;
if
(
$self
->{dict_debug} & 8) {
print
STDERR
"anchor_heading: "
, ${
$line_ref
},
"\n"
;
}
if
(
$self
->{make_anchors}) {
my
(
$anchor
) =
$self
->make_new_anchor(
$level
);
if
(
$self
->{lower_case_tags}) {
${
$line_ref
} =~ s/(<h.>)(.*)(<\/h.>)/$1<a name=
"$anchor"
>$2<\/a>$3/;
}
else
{
${
$line_ref
} =~ s/(<H.>)(.*)(<\/H.>)/$1<A NAME=
"$anchor"
>$2<\/A>$3/;
}
}
if
(
$self
->{dict_debug} & 8) {
print
STDERR
"anchor_heading(after): "
, ${
$line_ref
},
"\n"
;
}
}
sub
heading_level ($$) {
my
$self
=
shift
;
my
(
$style
) =
@_
;
$self
->{__heading_styles}->{
$style
} = ++
$self
->{__num_heading_styles}
if
!
$self
->{__heading_styles}->{
$style
};
$self
->{__heading_styles}->{
$style
};
}
sub
heading ($%) {
my
$self
=
shift
;
my
%args
= (
line_ref
=>
undef
,
line_action_ref
=>
undef
,
next_ref
=>
undef
,
@_
);
my
$line_ref
=
$args
{line_ref};
my
$line_action_ref
=
$args
{line_action_ref};
my
$next_ref
=
$args
{next_ref};
my
(
$hoffset
,
$heading
) = ${
$line_ref
} =~ /^(\s*)(.+)$/;
$hoffset
=
""
unless
defined
(
$hoffset
);
$heading
=
""
unless
defined
(
$heading
);
$heading
=~ s/&[^;]+;/X/g;
my
(
$uoffset
,
$underline
) = ${
$next_ref
} =~ /^(\s*)(\S+)\s*$/;
$uoffset
=
""
unless
defined
(
$uoffset
);
$underline
=
""
unless
defined
(
$underline
);
my
(
$lendiff
,
$offsetdiff
);
$lendiff
=
length
(
$heading
) -
length
(
$underline
);
$lendiff
*= -1
if
$lendiff
< 0;
$offsetdiff
=
length
(
$hoffset
) -
length
(
$uoffset
);
$offsetdiff
*= -1
if
$offsetdiff
< 0;
if
(is_blank(${
$line_ref
})
|| (
$lendiff
>
$self
->{underline_length_tolerance})
|| (
$offsetdiff
>
$self
->{underline_offset_tolerance}))
{
return
;
}
$underline
=
substr
(
$underline
, 0, 1);
$underline
.=
"C"
if
$self
->iscaps(${
$line_ref
});
${
$next_ref
} =
" "
;
$self
->{__heading_level} =
$self
->heading_level(
$underline
);
$self
->tagline(
"H"
.
$self
->{__heading_level},
$line_ref
);
$self
->anchor_heading(
$self
->{__heading_level},
$line_ref
);
${
$line_action_ref
} |=
$HEADER
;
}
sub
custom_heading ($%) {
my
$self
=
shift
;
my
%args
= (
para_lines_ref
=>
undef
,
para_action_ref
=>
undef
,
ind
=>0,
@_
);
my
$para_lines_ref
=
$args
{para_lines_ref};
my
$para_action_ref
=
$args
{para_action_ref};
my
$ind
=
$args
{ind};
my
(
$i
,
$level
);
for
(
$i
= 0 ;
$i
< @{
$self
->{custom_heading_regexp}} ;
$i
++) {
my
$reg
= ${
$self
->{custom_heading_regexp}}[
$i
];
if
(
$para_lines_ref
->[
$ind
] =~ /
$reg
/) {
if
(
$self
->{explicit_headings}) {
$level
=
$i
+ 1;
}
else
{
$level
=
$self
->heading_level(
"Cust"
.
$i
);
}
$self
->tagline(
"H"
.
$level
, \
$para_lines_ref
->[
$ind
]);
$self
->anchor_heading(
$level
, \
$para_lines_ref
->[
$ind
]);
$para_action_ref
->[
$ind
] |=
$HEADER
;
last
;
}
}
}
sub
unhyphenate_para ($$) {
my
$self
=
shift
;
my
$para_ref
=
shift
;
${
$para_ref
} =~
/(\s*)([^\W\d_]*)\-\n(\s*)([^\W\d_]+[\)\}\]\.,:;\'\"\>]*\s*)/s;
${
$para_ref
} =~
s/(\s*)([^\W\d_]*)\-\n(\s*)([^\W\d_]+[\)\}\]\.,:;\'\"\>]*\s*)/$1$2$4\n$3/gs;
}
sub
untabify ($$) {
my
$self
=
shift
;
my
$line
=
shift
;
while
(
$line
=~ /\011/) {
my
$tw
=
$self
->{tab_width};
$line
=~ s/\011/
" "
x (
$tw
- (
length
($`) %
$tw
))/e;
}
$line
;
}
sub
tagline ($$$) {
my
$self
=
shift
;
my
$tag
=
shift
;
my
$line_ref
=
shift
;
chomp
${
$line_ref
};
my
$tag1
=
$self
->get_tag(
$tag
);
my
$tag2
=
$self
->get_tag(
$tag
,
tag_type
=>
'end'
);
${
$line_ref
} =~ s/^\s*(.*)$/${tag1}$1${tag2}\n/;
}
sub
iscaps {
my
$self
=
shift
;
local
(
$_
) =
@_
;
my
$min_caps_len
=
$self
->{min_caps_length};
/^[^a-z\341\343\344\352\353\354\363\370\337\373\375\342\345\347\350\355\357\364\365\376\371\377\340\346\351\360\356\361\362\366\372\374<]*[A-Z\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\330\331\332\333\334\335\336]{
$min_caps_len
,}[^a-z\341\343\344\352\353\354\363\370\337\373\375\342\345\347\350\355\357\364\365\376\371\377\340\346\351\360\356\361\362\366\372\374<]*$/;
}
sub
caps {
my
$self
=
shift
;
my
%args
= (
line_ref
=>
undef
,
line_action_ref
=>
undef
,
@_
);
my
$line_ref
=
$args
{line_ref};
my
$line_action_ref
=
$args
{line_action_ref};
if
(
$self
->iscaps(${
$line_ref
})) {
$self
->tagline(
$self
->{caps_tag},
$line_ref
);
${
$line_action_ref
} |=
$CAPS
;
}
}
sub
glob2regexp {
my
(
$glob
) =
@_
;
$glob
=~ s/[^\w\[\]\*\?\|\\]/\\$&/g;
my
(
$regexp
,
$i
,
$len
,
$escaped
) = (
""
, 0,
length
(
$glob
), 0);
for
(;
$i
<
$len
;
$i
++) {
my
$char
=
substr
(
$glob
,
$i
, 1);
if
(
$escaped
) {
$escaped
= 0;
$regexp
.=
$char
;
next
;
}
if
(
$char
eq
"\\"
) {
$escaped
= 1;
next
;
$regexp
.=
$char
;
}
if
(
$char
eq
"?"
) {
$regexp
.=
"."
;
next
;
}
if
(
$char
eq
"*"
) {
$regexp
.=
".*"
;
next
;
}
$regexp
.=
$char
;
}
"\\b"
.
$regexp
.
"\\b"
;
}
sub
add_regexp_to_links_table ($$$$) {
my
$self
=
shift
;
my
(
$key
,
$URL
,
$switches
) =
@_
;
if
(!
$self
->{__links_table}->{
$key
}) {
push
(@{
$self
->{__links_table_order}}, (
$key
));
$self
->{__links_table}->{
$key
} =
$URL
;
$self
->{__links_switch_table}->{
$key
} =
$switches
;
my
$ind
= @{
$self
->{__links_table_order}} - 1;
print
STDERR
" ("
,
$ind
,
")\tKEY: $key\n\tVALUE: $URL\n\tSWITCHES: $switches\n\n"
if
(
$self
->{dict_debug} & 1);
}
else
{
if
(
$self
->{dict_debug} & 1) {
print
STDERR
" Skipping entry. Key already in table.\n"
;
print
STDERR
"\tKEY: $key\n\tVALUE: $URL\n\n"
;
}
}
}
sub
add_literal_to_links_table ($$$$) {
my
$self
=
shift
;
my
(
$key
,
$URL
,
$switches
) =
@_
;
$key
=~ s/(\W)/\\$1/g;
$key
=
"\\b$key\\b"
;
$self
->add_regexp_to_links_table(
$key
,
$URL
,
$switches
);
}
sub
add_glob_to_links_table ($$$$) {
my
$self
=
shift
;
my
(
$key
,
$URL
,
$switches
) =
@_
;
$self
->add_regexp_to_links_table(glob2regexp(
$key
),
$URL
,
$switches
);
}
sub
parse_dict ($$$) {
my
$self
=
shift
;
my
(
$dictfile
,
$dict
) =
@_
;
print
STDERR
"Parsing dictionary file $dictfile\n"
if
(
$self
->{dict_debug} & 1);
$dict
=~ s/^\
$dict
=~ s/^.*[^\\]:\s*$//mg;
if
(
$dict
=~ /->\s*->/) {
my
$message
=
"Two consecutive '->'s found in $dictfile\n"
;
my
$near
;
(
$near
) =
$dict
=~ /([\S ]*\s*->\s*->\s*\S*)/;
$message
.=
"\n$near\n"
if
$near
=~ /\S/;
die
$message
;
}
my
(
$key
,
$URL
,
$switches
,
$options
);
while
(
$dict
=~ /\s*(.+)\s+\-+([iehos]+\-+)?\>\s*(.*\S+)\s*\n/ig) {
$key
= $1;
$options
= $2;
$options
=
""
unless
defined
(
$options
);
$URL
= $3;
$switches
= 0;
$switches
+=
$LINK_NOCASE
if
$options
=~ /i/i;
$switches
+=
$LINK_EVAL
if
$options
=~ /e/i;
$switches
+=
$LINK_HTML
if
$options
=~ /h/i;
$switches
+=
$LINK_ONCE
if
$options
=~ /o/i;
$switches
+=
$LINK_SECT_ONCE
if
$options
=~ /s/i;
$key
=~ s/\s*$//;
if
(
$key
=~ m|^/|)
{
$key
=
substr
(
$key
, 1);
$key
=~ s|/$||;
$self
->add_regexp_to_links_table(
$key
,
$URL
,
$switches
);
}
elsif
(
$key
=~ /^\|/)
{
$key
=
substr
(
$key
, 1);
$key
=~ s/\|$//;
$key
=~ s|/|\\/|g;
$self
->add_regexp_to_links_table(
$key
,
$URL
,
$switches
);
}
elsif
(
$key
=~ /\"/) {
$key
=
substr
(
$key
, 1);
$key
=~ s/\
"$//; # Allow them to forget the closing "
$self
->add_literal_to_links_table(
$key
,
$URL
,
$switches
);
}
else
{
$self
->add_glob_to_links_table(
$key
,
$URL
,
$switches
);
}
}
}
sub
setup_dict_checking ($) {
my
$self
=
shift
;
my
(
$key
,
$URL
,
$switches
,
$options
,
$tag1
,
$tag2
);
my
(
$pattern
,
$href
,
$i
,
$r_sw
,
$code
,
$code_ref
);
for
(
$i
= 1 ;
$i
< @{
$self
->{__links_table_order}} ;
$i
++) {
$pattern
=
$self
->{__links_table_order}->[
$i
];
$key
=
$pattern
;
$switches
=
$self
->{__links_switch_table}->{
$key
};
$href
=
$self
->{__links_table}->{
$key
};
if
(!(
$switches
&
$LINK_HTML
))
{
$href
=~ s
if
(
$self
->{lower_case_tags})
{
$href
=
'<a href="'
.
$href
.
'">$&<\\/a>'
}
else
{
$href
=
'<A HREF="'
.
$href
.
'">$&<\\/A>'
}
}
else
{
if
(
$self
->{lower_case_tags})
{
$href
=~ s
$href
=~ s/(<)([A-Z]*)(>)/${1}\L${2}${3}/g;
$href
=~ s/(<)(A\s
*HREF
)([^>]*>)/$1\L$2$3/g;
}
$href
=~ s
}
$r_sw
=
"s"
;
$r_sw
.=
"i"
if
(
$switches
&
$LINK_NOCASE
);
$r_sw
.=
"e"
if
(
$switches
&
$LINK_EVAL
);
$code
=
"\$self->{__repl_code}->[$i] = sub {\nmy \$al = shift;\n\$al =~ s/$pattern/$href/$r_sw;\nreturn \$al; }\n"
;
print
STDERR
"$code"
if
(
$self
->{dict_debug} & 2);
eval
"$code"
;
if
(
$switches
&
$LINK_NOCASE
)
{
$self
->{__search_patterns}->[
$i
] =
qr/$pattern/
si;
}
else
{
$self
->{__search_patterns}->[
$i
] =
qr/$pattern/
s;
}
}
}
sub
in_link_context ($$$) {
my
$self
=
shift
;
my
(
$match
,
$before
) =
@_
;
return
1
if
$match
=~ m@</?A>
@i
;
my
(
$final_open
,
$final_close
);
if
(
$self
->{lower_case_tags}) {
$final_open
=
rindex
(
$before
,
"<a "
) - $[;
$final_close
=
rindex
(
$before
,
"</a>"
) - $[;
}
else
{
$final_open
=
rindex
(
$before
,
"<A "
) - $[;
$final_close
=
rindex
(
$before
,
"</A>"
) - $[;
}
return
1
if
(
$final_open
>= 0)
&& ((
$final_close
< 0)
|| (
$final_open
>
$final_close
)
);
$final_open
=
rindex
(
$before
,
"<"
) - $[;
$final_close
=
rindex
(
$before
,
">"
) - $[;
(
$final_open
>= 0)
&& ((
$final_close
< 0)
|| (
$final_open
>
$final_close
)
);
}
sub
clear_section_links ($) {
my
$self
=
shift
;
$self
->{__done_with_sect_link} = [];
}
sub
check_dictionary_links ($%) {
my
$self
=
shift
;
my
%args
= (
line_ref
=>
undef
,
line_action_ref
=>
undef
,
@_
);
my
$line_ref
=
$args
{line_ref};
my
$line_action_ref
=
$args
{line_action_ref};
my
(
$i
,
$pattern
,
$switches
,
$options
,
$repl_func
);
my
$key
;
my
$s_sw
;
my
$r_sw
;
my
(
$line_link
) = (${
$line_action_ref
} |
$LINK
);
my
(
$before
,
$linkme
,
$line_with_links
);
for
(
$i
= 1 ;
$i
< @{
$self
->{__links_table_order}} ;
$i
++) {
$pattern
=
$self
->{__links_table_order}->[
$i
];
$key
=
$pattern
;
$switches
=
$self
->{__links_switch_table}->{
$key
};
if
(
$switches
&
$LINK_ONCE
)
{
$line_with_links
=
""
;
while
(!
$self
->{__done_with_link}->[
$i
]
&& ${
$line_ref
} =~
$self
->{__search_patterns}->[
$i
])
{
$self
->{__done_with_link}->[
$i
] = 1;
$line_link
=
$LINK
if
(!
$line_link
);
$before
= $`;
$linkme
= $&;
${
$line_ref
} =
substr
(${
$line_ref
},
length
(
$before
) +
length
(
$linkme
));
if
(!
$self
->in_link_context(
$linkme
,
$line_with_links
.
$before
)) {
print
STDERR
"Link rule $i matches $linkme\n"
if
(
$self
->{dict_debug} & 4);
$repl_func
=
$self
->{__repl_code}->[
$i
];
$linkme
=
&$repl_func
(
$linkme
);
}
$line_with_links
.=
$before
.
$linkme
;
}
${
$line_ref
} =
$line_with_links
. ${
$line_ref
};
}
elsif
(
$switches
&
$LINK_SECT_ONCE
)
{
$line_with_links
=
""
;
while
(!
$self
->{__done_with_sect_link}->[
$i
]
&& ${
$line_ref
} =~
$self
->{__search_patterns}->[
$i
])
{
$self
->{__done_with_sect_link}->[
$i
] = 1;
$line_link
=
$LINK
if
(!
$line_link
);
$before
= $`;
$linkme
= $&;
${
$line_ref
} =
substr
(${
$line_ref
},
length
(
$before
) +
length
(
$linkme
));
if
(!
$self
->in_link_context(
$linkme
,
$line_with_links
.
$before
)) {
print
STDERR
"Link rule $i matches $linkme\n"
if
(
$self
->{dict_debug} & 4);
$repl_func
=
$self
->{__repl_code}->[
$i
];
$linkme
=
&$repl_func
(
$linkme
);
}
$line_with_links
.=
$before
.
$linkme
;
}
${
$line_ref
} =
$line_with_links
. ${
$line_ref
};
}
else
{
$line_with_links
=
""
;
while
(${
$line_ref
} =~
$self
->{__search_patterns}->[
$i
]) {
$line_link
=
$LINK
if
(!
$line_link
);
$before
= $`;
$linkme
= $&;
${
$line_ref
} =
substr
(${
$line_ref
},
length
(
$before
) +
length
(
$linkme
));
if
(!
$self
->in_link_context(
$linkme
,
$line_with_links
.
$before
)) {
print
STDERR
"Link rule $i matches $linkme\n"
if
(
$self
->{dict_debug} & 4);
$repl_func
=
$self
->{__repl_code}->[
$i
];
$linkme
=
&$repl_func
(
$linkme
);
}
$line_with_links
.=
$before
.
$linkme
;
}
${
$line_ref
} =
$line_with_links
. ${
$line_ref
};
}
}
${
$line_action_ref
} |=
$line_link
;
}
sub
load_dictionary_links ($) {
my
$self
=
shift
;
my
(
$dict
,
$contents
);
@{
$self
->{__links_table_order}} = 0;
%{
$self
->{__links_table}} = ();
foreach
$dict
(@{
$self
->{links_dictionaries}}) {
next
unless
$dict
;
open
(DICT,
"$dict"
) ||
die
"Can't open Dictionary file $dict\n"
;
$contents
=
""
;
$contents
.=
$_
while
(<DICT>);
close
(DICT);
$self
->parse_dict(
$dict
,
$contents
);
}
$self
->setup_dict_checking();
}
sub
make_dictionary_links ($%) {
my
$self
=
shift
;
my
%args
= (
line_ref
=>
undef
,
line_action_ref
=>
undef
,
@_
);
my
$line_ref
=
$args
{line_ref};
my
$line_action_ref
=
$args
{line_action_ref};
$self
->check_dictionary_links(
line_ref
=>
$line_ref
,
line_action_ref
=>
$line_action_ref
);
warn
$@
if
$@;
}
sub
do_file_start ($$$) {
my
$self
=
shift
;
my
$outhandle
=
shift
;
my
$para
=
shift
;
if
(!
$self
->{extract}) {
my
@para_lines
=
split
(/\n/,
$para
);
my
$first_line
=
$para_lines
[0];
if
(
$self
->{doctype})
{
if
(
$self
->{xhtml})
{
print
$outhandle
'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"'
,
"\n"
;
}
else
{
print
$outhandle
'<!DOCTYPE HTML PUBLIC "'
.
$self
->{doctype} . "\
">\n"
;
}
}
print
$outhandle
$self
->get_tag(
'HTML'
),
"\n"
;
print
$outhandle
$self
->get_tag(
'HEAD'
),
"\n"
;
if
(
$self
->{titlefirst} && !
$self
->{title}) {
my
(
$tit
) =
$first_line
=~ /^ *(.*)/;
$tit
=~ s/ *$//;
$tit
= escape(
$tit
)
if
$self
->{escape_HTML_chars};
$self
->{
'title'
} =
$tit
;
}
if
(!
$self
->{title}) {
$self
->{
'title'
} =
""
;
}
print
$outhandle
$self
->get_tag(
'TITLE'
),
$self
->{title},
$self
->get_tag(
'TITLE'
,
tag_type
=>
'end'
),
"\n"
;
if
(
$self
->{append_head}) {
open
(APPEND,
$self
->{append_head})
||
die
"Failed to open "
,
$self
->{append_head},
"\n"
;
while
(<APPEND>) {
print
$outhandle
$_
;
}
close
(APPEND);
}
if
(
$self
->{lower_case_tags})
{
print
$outhandle
$self
->get_tag(
'META'
,
tag_type
=>
'empty'
,
inside_tag
=>
" name=\"generator\" content=\"$PROG v$VERSION\""
),
"\n"
;
}
else
{
print
$outhandle
$self
->get_tag(
'META'
,
tag_type
=>
'empty'
,
inside_tag
=>
" NAME=\"generator\" CONTENT=\"$PROG v$VERSION\""
),
"\n"
;
}
if
(
$self
->{style_url})
{
my
$style_url
=
$self
->{style_url};
if
(
$self
->{lower_case_tags})
{
print
$outhandle
$self
->get_tag(
'LINK'
,
tag_type
=>
'empty'
,
inside_tag
=>
" rel=\"stylesheet\" type=\"text/css\" href=\"$style_url\""
),
"\n"
;
}
else
{
print
$outhandle
$self
->get_tag(
'LINK'
,
tag_type
=>
'empty'
,
inside_tag
=>
" REL=\"stylesheet\" TYPE=\"text/css\" HREF=\"$style_url\""
),
"\n"
;
}
}
print
$outhandle
$self
->get_tag(
'HEAD'
,
tag_type
=>
'end'
),
"\n"
;
if
(
$self
->{body_deco})
{
print
$outhandle
$self
->get_tag(
'BODY'
,
inside_tag
=>
$self
->{body_deco}),
"\n"
;
}
else
{
print
$outhandle
$self
->get_tag(
'BODY'
),
"\n"
;
}
}
if
(
$self
->{prepend_file}) {
if
(-r
$self
->{prepend_file}) {
open
(PREPEND,
$self
->{prepend_file});
while
(<PREPEND>) {
print
$outhandle
$_
;
}
close
(PREPEND);
}
else
{
print
STDERR
"Can't find or read file "
,
$self
->{prepend_file},
" to prepend.\n"
;
}
}
}
sub
do_init_call ($) {
my
$self
=
shift
;
if
(!
$self
->{__call_init_done}) {
push
(@{
$self
->{links_dictionaries}}, (
$self
->{default_link_dict}))
if
(
$self
->{make_links} && (-f
$self
->{default_link_dict}));
$self
->deal_with_options();
if
(
$self
->{make_links}) {
push
(@{
$self
->{links_dictionaries}}, (
$self
->{system_link_dict}))
if
-f
$self
->{system_link_dict};
$self
->load_dictionary_links();
}
$self
->{__non_header_anchor} = 0;
$self
->{__mode} = 0;
$self
->{__listnum} = 0;
$self
->{__list_indent} =
''
;
$self
->{__tags} = [];
$self
->{__call_init_done} = 1;
}
}
sub
run_txt2html {
my
(
$caller
) =
@_
;
my
$conv
= new HTML::TextToHTML(\
@ARGV
);
my
@args
= ();
foreach
my
$df
(
@ARGV
) {
push
@args
,
"--infile"
,
$df
;
}
$conv
->txt2html(\
@args
);
}
1;