# Philippe Verdret 1998-1999
use strict;
package RTF::HTML::Converter;

use RTF::Control;
use RTF::HTML::Converter::ansi;
use RTF::HTML::Converter::charmap;

@RTF::HTML::Converter::ISA = qw(RTF::Control);

use constant TRACE => 0;
use constant LIST_TRACE => 0;
use constant SHOW_STYLE_NOT_PROCESSED => 1;
use constant SHOW_STYLE => 0;	# insert style name in the output
use constant SHOW_RTF_LINE_NUMBER => 0;

use constant RTF_DEBUG => 0;



=head1 NAME

RTF::HTML::Converter - Perl extension for converting RTF into HTML

=head1 DESCRIPTION

Perl extension for converting RTF into HTML

=head1 SYNOPSIS

	use strict;
	use RTF::HTML::Converter;
	
	my $object = RTF::HTML::Converter->new(
	
		output => \*STDOUT
	
	);

	$object->parse_stream( \*RTF_FILE );
	
OR

	use strict;
	use RTF::HTML::Converter;
	
	my $object = RTF::HTML::Converter->new(
	
		output => \$string
	
	);

	$object->parse_string( $rtf_data );

=head1 METHODS

=head2 new()

Constructor method. Currently takes one named parameter, C<output>,
which can either be a reference to a filehandle, or a reference to
a string. This is where our HTML will end up.

=head2 parse_stream()

Read RTF in from a filehandle, and start processing it. Pass me
a reference to a filehandle.

=head2 parse_string()

Read RTF in from a string, and start processing it. Pass me a string.

=head1 JUST SO YOU KNOW

You can mix-and-match your output and input methods - nothing to stop
you outputting to a string when you've read from a filehandle...

=head1 AUTHOR

Peter Sergeant C<rtf.parser@clueball.com>, originally by Philippe Verdret

=head1 COPYRIGHT

Copyright 2004 B<Pete Sergeant>.

This program is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

=head1 CREDITS

This work was carried out under a grant generously provided by The Perl Foundation -
give them money!


=cut



# Symbol exported by the RTF::Ouptut module:
# %info: informations of the {\info ...}
# %par_props: paragraph properties
# $style: name of the current style or pseudo-style
# $event: start and end on the 'document' event
# $text: text associated to the current style
# %symbol: symbol translations
# %do_on_control: routines associated to RTF controls
# %do_on_event: routines associated to events
# output(): a stack oriented output routine (don't use print())

my $START_NEW_PARA = 1;		# some actions to do at the beginning of a new para

###########################################################################
my $N = "\n"; # Pretty-printing
				# some output parameters
my $TITLE_FLAG = 0;
my $LANG = 'en';
my $TABLE_BORDER = 1;

my $CURRENT_LI = 0;		# current list indent
my @LIST_STACK = ();		# stack of opened lists
my %LI_LEVEL = ();		# li -> list level

my %charmap_defaults = map({ sprintf("%02x", $_) => "&#$_;" } (0..255));

my %PAR_ALIGN = qw(
		 qc CENTER
		 ql LEFT
		 qr RIGHT
		 qj LEFT
		);
				# here put your style mappings
my %STYLES = ('Normal' => 'p',
	      'Abstract' => 'Blockquote', 
	      'PACSCode' => 'Code',
	      #'AuthGrp' => '', 
	      'Section' => 'H1',
	      'heading 1' => 'H1',
	      'heading 2' => 'H2',
	      'heading 3' => 'H3',
	      'heading 4' => 'H4',
	      'heading 5' => 'H5',
	      'heading 6' => 'H6', 
	      'Code' => 'pre',
	      'par' => 'p',	# default value
	     );
				# list names -> level
my %UL_STYLES = ('toc 1' => 1, 
		 'toc 2' => 2,
		 'toc 3' => 3,
		 'toc 4' => 4,
		 'toc 5' => 5,
		);

				# not used
my %UL_TYPES = qw(b7 disk
		  X square
		  Y circle
		 );

my %OL_STYLES = (
		);				
				# not used
my %OL_TYPES = (
		'pncard' => '1', # Cardinal numbering: One, Two, Three
		'pndec' => '1', # Decimal numbering: 1, 2, 3
		'pnucltr' => 'A', # Uppercase alphabetic numbering
		'pnlcltr' => 'a', # lowercase alphabetic numbering
		'pnucrm' =>  'I', # Uppercase roman numbering
		'pnlcrm' =>  'i', # Lowercase roman numbering
	       );
my $in_Field = -1;			# nested links are illegal, not used
my $in_Bookmark = -1;			# nested links are illegal, not used
%do_on_event = 
  (
   'document' => sub {		# Special action
     if ($event eq 'start') {
       output qq@<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" []>$N<html>$N<body>$N@;
     } else {
       my $author = $info{author};
       my $creatim = $info{creatim};
       my $revtim = $info{revtim};

       my $tag;
       while (@LIST_STACK) {
	 $tag = pop @LIST_STACK;
	 output "</$tag>" . $N;
       }
       $style = 'p';

       if ($LANG eq 'fr') {
	 output "<$style><b>Auteur</b> : $author</$style>\n" if $author;
	 output "<$style><b>Date de création</b> : $creatim</$style>\n" if $creatim;
	 output "<$style><b>Date de modification</b> : $revtim</$style>\n" if $revtim;
       } else {			# Default
	 output "<$style><b>Author</b> : $author</$style>\n" if $author;
	 output "<$style><b>Creation date</b>: $creatim</$style>\n" if $creatim;
	 output "<$style><b>Modification date</b>: $revtim</$style>\n" if $revtim;
       }
       output "</body>\n</html>\n";
     }
   },
				# Table processing
   'table' => sub {		# end of table
     if ($event eq 'end') {
       #print STDERR "end of table\n";
       $TABLE_BORDER ? output "<table BORDER>$N$text</table>$N"
	 :
	   output "<table>$N$text</table>$N";
     } else {
       #print STDERR "start of table\n";
       my $end;
       while (@LIST_STACK) {
	 $end .= '</' . pop(@LIST_STACK) . '>' . $N;
       }
       output ($end);
     }
   },
   'row' => sub {		# end of row
     #my $char_props = $_[SELF]->force_char_props('end');
     #output "$N<tr valign='top'>$text$char_props</tr>$N";
     if ($event eq 'end') {
       output "$N<tr valign='top'>$N$text$N</tr>$N";
     } else {
				# not defined
     }
   },
   'cell' => sub {		# end of cell
     if ($event eq 'end') {
       my $char_props = $_[SELF]->force_char_props('end');
       my $end;
       while (@LIST_STACK) {
	 $end .= '</' . pop(@LIST_STACK) . '>' . $N;
       }
       output "<td>$text$char_props$end</td>$N";
     } else {
       # not defined
     }
   },
				# PARAGRAPH STYLES
   #'Normal' => sub {},		# create one entry per style name???
   'par' => sub {		# Default rule: if no entry for a paragraph style
				# Paragraph styles
     #print STDERR "$style\n" if LIST_TRACE;
     return output($text) unless $text =~ /\S/;
     my ($tag_start, $tag_end, $before) = ('','','');

     if (defined(my $level = $UL_STYLES{$style})) { # registered list styles
       if ($level > @LIST_STACK) {
	 my $tag;
	 push @LIST_STACK, $tag = 'UL';
	 if (SHOW_STYLE) {
	   $before = "<$tag>[$style]" . $N;
	 } else {
	   $before = "<$tag>" . $N;
	 }
	 $tag_start = $tag_end = 'LI';
       } else {
	 $level = @LIST_STACK - $level;
	 while ($level-- > 0) {
	   $before .= '</' . pop(@LIST_STACK) . '>'. $N;
	 }
	 $tag_start = $tag_end = 'LI';       
       } 
     } else {
     }

     if ($tag_start eq '') {	# end of list
       while (@LIST_STACK) {
	 $before .= '</' . pop(@LIST_STACK) . '>' . $N;
       }
       $tag_start = $tag_end = $STYLES{$style} || do {
	 if (SHOW_STYLE_NOT_PROCESSED) {
	   use vars qw/%style_not_processed/;
				# todo: add count
	   unless (exists $style_not_processed{$style}) {
	     print STDERR "style not defined '$style'\n" if SHOW_STYLE_NOT_PROCESSED;
	     $style_not_processed{$style} = '';
	   }
	 }
	 $STYLES{'par'};
       };
       foreach (qw(qj qc ql qr)) { # for some html elements...
	 if ($par_props{$_}) {
	   $tag_start .= " ALIGN=$PAR_ALIGN{$_}";
	 }
       }
     }

     $_[SELF]->trace("$tag_start-$tag_end: $text") if TRACE;
     my $char_props = $_[SELF]->force_char_props('end');
     if (SHOW_RTF_LINE_NUMBER) {
       output "$N$before<$tag_start>[$.]$text$char_props</$tag_end>$N";
     } else {
       output "$N$before<$tag_start>$text$char_props</$tag_end>$N";
     }
     $START_NEW_PARA = 1;
   },
				# Hypertextuel links
#   'bookmark' => sub {
#     $_[SELF]->trace("bookmark $event $text") if TRACE;
#     if ($event eq 'end') {
#       return if $in_Bookmark--;
#       output("</a>");
#     } else {
#       return if ++$in_Bookmark;
#       output("<a name='$text'>");
#     }
#   },
#   'field' => sub {
#     my $id = $_[0];
#     $_[SELF]->trace("field $event $text") if TRACE;
#     if ($event eq 'end') {
#       return if $in_Field--;
#       output("$text</a>");
#     } else {
#       return if ++$in_Field;
#       output("<a href='#$id'>"); # doesn't work!
#     }
#   },
				# CHAR properties
   'b' => sub {			
     $style = 'b';
     if ($event eq 'end') {
       output "</$style>";
     } else {
       output "<$style>";
     }
   },
   'i' => sub {
     $style = 'i';
     if ($event eq 'end') {
       output "</$style>";
     } else {
       output "<$style>";
     }
   },
   'ul' => sub {		
     $style = 'u';
     if ($event eq 'end') {
       output "</$style>";
     } else {
       output "<$style>";
     }
   },
   'sub' => sub {
     $style = 'sub';
     if ($event eq 'end') {
       output "</$style>";
     } else {
       output "<$style>";
     }
   },
   'super' => sub {
     $style = 'sup';
     if ($event eq 'end') {
       output "</$style>";
     } else {
       output "<$style>";
     }
   },
   'strike' => sub {
     $style = 'strike';
     if ($event eq 'end') {
       output "</$style>";
     } else {
       output "<$style>";
     }
   },
  );

###############################################################################
# Could be used in a next release
# manage a minimal context for the tag generation
# gen_tags(EVENT, TAG_NAME, [ATTLIST])
#          EVENT: open|close
# return: a tag start|end
my %cant_nest = map { $_ => 1 } qw(a);
use constant GEN_TAGS_WARNS => 1;
my @element_stack = ();		
my %open_element = ();
sub gen_tags {			# manage a minimal context for tag outputs

	debug( 'gen_tags', @_ ) if RTF_DEBUG > 5;

  die "bad argument number"  unless (@_ >= 2);
  my ($eve, $tag, $att)  = @_;

  my $result = '';
  if ($eve eq 'open') {
    push @element_stack, $tag; # add a new node
    if ($open_element{$tag}++ and defined $cant_nest{$tag}) {
      #print STDERR "skip open $tag\n";
      $result = '';
    } else {
      $result = '<'. $tag . '>' . $N;
    }
  } else {			# close
    unless (@element_stack) {
      warn "no element to close on the '$tag' tag\n" if GEN_TAGS_WARNS;
      return $result;
    }
    my $opened_elt;
    while (1) {
      $opened_elt = pop @element_stack;
      if (--$open_element{$tag} >= 1 and defined $cant_nest{$tag}) {
	#print STDERR "skip close $opened_elt\n";
      } else {
	$result .= '</' . $opened_elt . '>' . $N;
      }
      last if $tag eq $opened_elt;
      unless (@element_stack) {
	warn "element stack is empty on $tag close\n" if GEN_TAGS_WARNS;
	return $result;
      }
    }
  }
  $result;
}
###############################################################################
# If you have an &<entity>; in your RTF document and if
# <entity> is a character entity, you'll see "&<entity>;" in the RTF document
# and the corresponding glyphe in the HTML document
# How to give a new definition to a control registered in %do_on_control:
# - method redefinition (could be the purist's solution)
# - $Control::do_on_control{control_word} = sub {}; 
# - when %do_on_control is exported write:
$do_on_control{'ansi'} =	# callback redefinition
  sub {
    # RTF: \'<hex value>
    # HTML: &#<dec value>;
    
    my @charmap_data = $_[SELF]->charmap_reader( $_[CONTROL] );

    my %charset = (		# general rule
		   %charmap_defaults,
				# and some specific defs
		   map({ s/^\s+//; split /\s+/ } @charmap_data)
		  );
    *char = sub { 
      my $char_props;
      if ($START_NEW_PARA) {
	$char_props = $_[SELF]->force_char_props('start');
	$START_NEW_PARA = 0;
      } else {
	$char_props = $_[SELF]->process_char_props();
      }
      output $char_props . $charset{$_[1]}
    } 
  };


				# symbol processing
				# RTF: \~
				# named chars
				# RTF: \ldblquote, \rdblquote
$symbol{'~'} = '&nbsp;';
$symbol{'tab'} = ' '; #'&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;';
$symbol{'ldblquote'} = '&laquo;';
$symbol{'rdblquote'} = '&raquo;';
$symbol{'line'} = '<br>';
sub symbol {	

		debug( 'symbol', @_ ) if RTF_DEBUG > 5;

		
  my $char_props;
  if ($START_NEW_PARA) {	
    $char_props = $_[SELF]->force_char_props('start');
    $START_NEW_PARA = 0;
  } else {
    $char_props = $_[SELF]->process_char_props();
  }
  if (defined(my $sym = $symbol{$_[1]}))  {
    output $char_props . $sym;
  } else {
    output $char_props . $_[1];		# as it
  }
}
				# Text
				# certainly do the same thing with the char() method
sub text {			# parser callback redefinition

	debug( 'text', @_ ) if RTF_DEBUG > 5;


  my $text = $_[1];
  my $char_props = '';
  if ($START_NEW_PARA) {	
    $char_props = $_[SELF]->force_char_props('start');
    $START_NEW_PARA = 0;
  } else {
    $char_props = $_[SELF]->process_char_props();
  }
  $text =~ s/&/&amp;/g;	
  $text =~ s/</&lt;/g;	
  $text =~ s/>/&gt;/g;	
  if (defined $char_props) { 
    output("$char_props$text");
  } else {
    output("$text");
  }
}

sub debug {

	my $function = shift;
	
	print STDERR "[RTF::HTML::Converter::$function]" . (join '|', @_ ) , "\n";

}

1;
__END__