#!/usr/local/bin/perl

$VERSION = '0.14';

=head1 NAME

g2b.pl - Simplified to Traditional Chinese converter

=head1 SYNOPSIS

B<g2b.pl> [ B<-p> ] [ B<-u> ] [ I<inputfile> ...] > I<outputfile>

=head1 USAGE

    % g2b.pl -p < gbk.txt > big5.txt
    % g2b.pl -pu < simp.txt > trad.txt

=head1 DESCRIPTION

The B<g2b.pl> utility reads files sequentially, converts them from
Simplified to Traditional Chinese, then writes them to the standard
output.  The I<inputfile> arguments are processed in command-line order.
If I<inputfile> is a single dash (C<->) or absent, this program reads
from the standard input.

The C<-p> switch enables rudimentary phrase-oriented substition via a
small built-in lexicon.

The C<-u> switch specifies that both the input and output streams should
be UTF-8 encoded.  If not specified, the input stream is assumed to be
in GBK, and the output will be encoded in Big5.

=head1 CAVEATS

In pure-perl implementations (pre-5.8 perl or without a C compiler),
C<-p> and C<-u> cannot be used together.

=cut

use strict;
use warnings;
use Getopt::Std;

sub MAP ();

my %opts;
BEGIN {
    getopts('hup', \%opts);
    if ($opts{h}) { system("perldoc", $0); exit }
    $SIG{__WARN__} = sub {};
}

use constant UTF8 => $opts{u};
use constant DICT => ($opts{p} and (!UTF8 or $] >= 5.008));

use Encode::HanConvert;

my $KEYS = join('|', map quotemeta, sort { length($b) <=> length($a) } keys %{+MAP}) if DICT;
my $MAP  = +MAP if DICT;

if (@ARGV) {
    for (@ARGV) {
        unless(open F, $_) {
            warn "Can't open $_: $!";
            next;
        }
        convert(\*F);
        close F;
    }
} else {
    convert(\*STDIN);
}

sub convert {
    my ($fh) = @_;
    if ($] >= 5.008) {
        if (UTF8) {
            binmode($fh, ':encoding(simp-trad)'); binmode(STDOUT, ':utf8')
        } else {
            binmode($fh, ':encoding(gbk-trad)'); binmode(STDOUT, ':encoding(big5)')
        }
    }
    while (<$fh>) {
        unless ($] >= 5.008) {
            if (UTF8) { Encode::HanConvert::simp_to_trad($_) }
            else { Encode::HanConvert::gb_to_big5($_) }
        }
        if (DICT) { s/($KEYS)/$MAP->{$1}/g }
        print;
    }
}

use constant MAP => DICT && {
    map { UTF8 ? Encode::decode(big5 => $_) : $_ } reverse (
'¤A¤Ó' => '¥H¤Ó',
'¤A¤Óºô¸ô' => '¥H¤Óºôµ¸',
'¤E¤E­¼ªkªí' => '¤E¤Eªí',
'¤K¤Ø' => '¤K§`',
'¤XËC' => 'ÌÖËC',
'¤¤Â_ÂI' => 'Â_ÂI',
'¤¶­H' => '¤¶«`',
'¤¶­±' => '±µ¤f',
'¤¸¥ó' => '²Õ¥ó',
'¤½¨Æ¥]' => '¤½¤å¥]',
'¤Éµ¥¦Ò¸Õ' => '¤É¯Å¦Ò¸Õ',
'¤É¾­' => '¤É§Ç',
'¤Ì§x' => 'Ë®§x',
'¤ÓªÅ¤H' => '¯è¤Ñ­û',
'¤ÓªÅ¦ç' => '¦t©z­¸¦æªA',
'¤ÓªÅ±ô' => '¯è¤Ñ­¸¾÷',
'¤ÓªÅ²î' => '¦t©z­¸²î',
'¤à¤Ú«´¤Ò' => '¤àº¸¤Ú³ì¤Ò',
'¤á¤f¦Wï' => '¤á¤fï',
'¤ä´©' => '¤ä«ù',
'¤å¥ó§¨' => '¬¡­¶§¨',
'¤éº¸°Ò¥Á±Ú' => '¤é¦Õ°Ò¥Á±Ú',
'¤ñÂı׶ð' => '¤ñ¶ë¶ð',
'¤õ«÷' => '¤õ¨Ã',
'¤ù»y' => 'µü²Õ',
'¥\\¯àªí' => 'µæ³æ',
'¥]¨¦' => '­c¨¦',
'¥v¹FªL' => '´µ¤jªL',
'¥~¤ÓªÅ' => '¥~¼hªÅ¶¡',
'¥¦­Ì' => '¨e­Ì',
'¥¬§Æ' => '¥¬¤°',
'¥­¦æ§@·~' => '¨Ã¦æ¾Þ§@',
'¥­¦æ°ð' => '¨Ã¦æºÝ¤f',
'¥­¦æ½u' => '¨Ã¦æ½u',
'¥®¸X¶é' => '¥®¨à¶é',
'¥À­µ' => '¤¸­µ',
'¸ê®Æ' => '¼Æ¾Ú',
'¥ð¤h¹y' => '¥ð´µ´°',
'¥ò¤¶' => '¤¤¤¶',
'¥úºÐ¾÷' => '¥úÅX',
'¥þ§Î' => '¥þ¨¤',
'¦@¥Î' => '¦@¨É',
'¦B´Î' => '´Î¦B',
'¦C¦L' => '¥´¦L',
'¦Lªí¾÷' => '¥´¦L¾÷',
'¦V¥ú©Ê' => '¦V±Û¥ú©Ê',
'¦]¼Æ' => '¦]¤l',
'°j°é' => '´`Àô',
'¦^À³' => 'ÅTÀ³',
'¦h©ú¥§¥[' => '¦h¦Ì¥§¥[',
'¦r¦ê' => '¦r²Å¦ê',
'¦r­º' => '«eºó',
'¦sÀÉ' => '¦s½L',
'¦¬»È¾÷' => '¦¬´Ú¾÷',
'¦Ð¶q¯Å' => '»´¶q¯Å',
'¦ç¯Á¤ñ¨È' => '®J¶ë«X¤ñ¨È',
'¦è¤¸' => '¤½¤¸',
'¦ì§}' => '¦a§}',
'¦î¦C' => '¶¤¦C',
'¦õµý' => '¥ªµý',
'¦øªA¾¹' => 'ªA°È¾¹',
'§@·~¨t²Î' => '¾Þ§@¨t²Î',
'§B©ú¨u' => '§B©ú¿«',
'§C¶¥»y¨¥' => '§C¯Å»y¨¥',
'§Y®É' => '¹ê®É',
'§l¥ú©Ê' => '§l±Û¥ú©Ê',
'§»³£©Ô´µ' => '¬x³£©Ô´µ',
'§Ç¦C°ð' => '¦ê¦æºÝ¤f',
'§Ë°ó' => 'ÛÁ°ó',
'¨F©Ôªo' => '¦â©Ôªo',
'¨F¯Q¦aªü©Ô§B' => '¨F¯Sªü©Ô§B',
'¨H¬A' => '¨I¬A',
'¨§çM' => '×^çM',
'¨­¾úÁn' => '¥ßÅéÁn',
'¨¾¼g' => '¼g«OÅ@',
'¨ÓºÖ½u' => '¨Ó½Æ½u',
'¨ä¥L' => '¨ä¥¦',
'¨ó©w' => '¨óij',
'¨÷¶b' => 'ºu°Ê±ø',
'©PÃä' => '¥~³ò',
'©}¥ú©Ê' => '©}±Û¥ú©Ê',
'©Ý¸O' => 'Ý­¸O',
'ª`¸}' => '¸}ª`',
'ª`¸Ñ' => '§åª`',
'ª¡§É' => 'ɧÉ',
'ª«¥ó' => '¹ï¶H',
'ª­ª±' => 'ª­æö',
'ª¾ÃÑ¥÷¤l' => 'ª¾ÃѤÀ¤l',
'ª¿¥Û' => 'Öº¥Û',
'ª¿´¹¤ù' => 'Öºªä¤ù',
'ª¿¹q´¹Åé' => 'Öº´¹ÅéºÞ',
'ªÅ¥ÕÁä' => 'ªÅ®æÁä',
'ªô¦Nº¸' => '¥C¦Nº¸',
'ªù³w' => 'ªù®|',
'ªü©i´µ§§' => 'ªü©i´µ¯S®Ô',
'ªü´µ¤ÇÆF' => 'ªü¥q¤ÇªL',
'«c¤M' => 'îK¤M',
'«n¥Ê' => 'µf¥Ê',
'«Ë³Hë^' => '«Ë³Hßâ',
'«ü¼Ð' => '«ü°w',
'¬A©·' => '¬A¸¹',
'¬H½ü¦Ñ¤â' => 'åÁ½ü¦Ñ¤â',
'¬M®g' => '¬M¹³',
'¬È¥ú©Ê' => '¬È±Û¥ú©Ê',
'¬Û®e' => '­Ý®e',
'­C½Ï¸`' => '¸t½Ï¸`',
'­I¥ú©Ê' => '­I±Û¥ú©Ê',
'­^¤o' => '­^¦T',
'­p®É¾¹' => '©w®É¾¹',
'­pµ{¨®' => '¥X¯²¨®',
'­pºâ¾÷' => '­pºâ¾¹',
'­}´µ¥i' => '­}´µ¬ì',
'­«¼½' => '¦^©ñ',
'­¹¨ã' => 'À\\¨ã',
'­Ë·°' => '­Ë¾`',
'­Ô¾÷«Ç' => '­Ô¾÷¼Ó',
'®M¸Ë³nÅé' => '³n¥ó¥]',
'®y¼Ð' => '§¤¼Ð',
'®z¥ú©Ê' => '®z±Û¥ú©Ê',
'®°¶}' => 'ݵ¶}',
'®¸¦í' => 'ݳ¦í',
'®¿·b' => 'ѽ·b',
'®ü¨½' => '®ü¯@',
'¯B¤ô¦L' => '¤ô¦L',
'¯Q¥C' => '¯QËú',
'¯Q¨H¨H' => '¯Q¨I¨I',
'¯}­µ¦r' => '¦h­µ¦r',
'¯µÓD' => '¯µÄy',
'¯Á°¨§Q¨È' => '¯Á°¨¨½',
'¯Üż' => 'ÛQż',
'¯Þ»K' => 'Ëv»K',
'¯ã°©' => 'ìm°©',
'¯è¤ÓÁ`¸p' => '¯è¤ÑÁ`¸p',
'¯óµÚ' => '¯ó±g',
'°O¾ÐÅé' => '¤º¦s',
'°sùf»ó' => '°sêt»ó',
'°}¦C' => '¼Æ²Õ',
'°­ãÚ' => '°­ñÀ',
'°Æµ{¦¡' => '¤lµ{§Ç',
'°ÆÀɦW' => 'ÂX®i¦W',
'°Õ°Õ¶¤' => '©Ô©Ô¶¤',
'°ßŪ' => '¥uŪ',
'°ïÅ|' => '°ï´Ì',
'±M®×' => '¶µ¥Ø',
'±`¦¡' => '¨Òµ{',
'±d¤D¨f®æ' => '±d¯I¨f§J',
'±¶®|' => '§Ö±¶¤è¦¡',
'±½´y¾¹' => '±½´y»ö',
'±Ò°Ê' => '¿E¬¡',
'²¦¤ó©w²z' => '¤ÄªÑ©w²z',
'²¦¥d¯Á' => '²¦¥[¯Á',
'²Ë»Ã' => 'Ãæ»Ã',
'²ö¥¾¯S' => '²ö¤ã¯S',
'³J¥Á' => 'ãô¥Á',
'³n¤ù' => '½¦¨÷',
'³q°T¿ý' => '³q°Tï',
'³q¹D' => '«H¹D',
'³s½u' => 'Áp¾÷',
'³t­¹' => '§ÖÀ\\',
'³¦¹Z' => '³¦ö`',
'³±¨H¨H' => '³±¨I¨I',
'³»©K©K' => '³»¨í¨í',
'³Â¤l' => '·ò¤l',
'³Â¶ë½Ñ¶ë' => '°¨ÂĽѶë',
'³Â·ô¤£¾é' => '·ò·ô¤£¤F',
'³Ù¼M' => 'ë~¼M',
'´¨¯Ý' => 'Ý´¯Ý',
'´°´°' => '±¬±¬',
'´°·ü' => '±¬·ü',
'´¹¤ù' => 'ªä¤ù',
'´¼¼z' => '´¼¯à',
'´å¼Ð' => '¥ú¼Ð',
'µL¦k¤§¨a' => 'É^¦k¤§¨a',
'µf­X' => '¦è¬õ¬U',
'µn¥X' => 'ª`¾P',
'µwÅé' => 'µw¥ó',
'µ{¦¡' => 'µ{§Ç',
'µ{§Ç±±¨î' => '¹Lµ{±±¨î',
'µª¿ý¾÷' => '¿ý­µ¾÷',
'µªÂÐ' => 'µª½Æ',
'µ¬¤æ' => '­o¤æ',
'µ²¹Ù' => 'µ²¥ë',
'µÉÁy' => 'ÀSÁy',
'µÒ¬r' => '·Ï¬r',
'µøµ¡' => 'µ¡¤f',
'¶W³sµ²' => '¶W¯ÅÃì±µ',
'¶l»¼°Ï¸¹' => '¶l¬F½s½X',
'¶Â¨H¨H' => '¶Â¨I¨I',
'¶Ã¼Æ' => 'ÀH¾÷¼Æ',
'¶×¬y±Æ' => 'Á`½u',
'¶ì½¦' => '¶ì®Æ',
'¶ø§J©Ô²ü°¨¦{' => '«X§J©Ô²ü°¨¦{',
'¶ø·|' => '¶ø©e·|',
'·P¥ú©Ê' => '·P±Û¥ú©Ê',
'·s¨u¥¬®L' => '·s¨u¥¬¤°º¸',
'·t¨H¨H' => '·t¨I¨I',
'·¹®ð' => 'ÍB®ð',
'·Æ¹«' => '¹«¼Ð',
'¸q¤j§Q' => '·N¤j§Q',
'¸t¦a¤ú­ô' => '¸t¦a¨È­ô',
'¸ÑªR«×' => '¤À¿ë²v',
'¸Ñ½X' => 'Ķ½X',
'¸Õºâªí' => '¹q¤lªí®æ',
'¸ê°T' => '«H®§',
'¸ü¤J' => '¥[¸ü',
'¹Bºâ¤¸' => '¾Þ§@¼Æ',
'¹Bºâ¦¡' => 'ªí¹F¦¡',
'¹O¦~' => 'ëƦ~',
'¹P¥@' => 'èQ¥@',
'¹d´I' => '¥¨´I',
'¹h¹D' => 'ºôÃö',
'¹p®g¦Lªí¾÷' => '¿E¥ú¥´¦L¾÷',
'¹p®Ú' => '¨½®Ú',
'¹q´¹Åé' => '´¹ÅéºÞ',
'¹q¶Çµø°T' => '¹Ï¤å¹qµø',
'¹q¸£' => '­pºâ¾÷',
'¹q¸£µ{¦¡' => '­pºâ¾÷µ{§Ç',
'¹qÁç' => '¹q¶ºÁç',
'¹Æù' => '¹ÆÅo',
'¹Ï¥Ü' => '¹Ï¼Ð',
'ºG¿F' => 'ºG²H',
'ºP¦X' => '§é¦X',
'ºj¤f' => 'Âò¤f',
'ºtºâªk' => 'ºâªk',
'º®½E' => 'º®¶ó',
'º²©¥' => 'Âè©¥',
'ºÏ¤ù' => 'ºÏ½L',
'ºÏ­y' => 'ºÏ¹D',
'ºÏ°Ï' => '®°°Ï',
'ºÏºÐ' => 'ºÏ½L',
'ºÏºÐ¾÷' => 'ºÏ½LÅX°Ê¾¹',
'ºÏºÐÀÉ' => 'ºÏ½L¤å¥ó',
'ºÒ¯»§X' => '¾¥¯»²°',
'ºÖ°¨ªL' => 'ºÖº¸°¨ªL',
'ºÞï¶' => 'ºÞÆ_',
'ºô¸ô' => 'ºôµ¸',
'ééÄgÄg' => '»X»XÄïÄï',
'éÄg' => '»XÄï',
'»`ÁÊ' => '·jÁÊ',
'»«¤h' => '©b¹£',
'»®¾|³·¤Ò' => '»®¾|¾å¤Ò',
'»·ºÝ' => '»·µ{',
'»ñ±ù' => 'µÔÅÚ',
'»ò©f' => '¤\\©f',
'»ò»ò°Û°Û' => '¤\\¤\\°Û°Û',
'¼x¦¬' => '©º¦¬',
'¼Æ¾Ú¾÷' => '½Õ¨î¸Ñ½Õ¾¹',
'¼È¦s¾¹' => '½w¦s¾¹',
'¼Ðñ' => '¨÷¼Ð',
'¼Ò²Õ' => '¼Ò¶ô',
'¼ÒÀÀ' => '¥é¯u',
'¼ÒÀÀ­pºâ¾÷' => '¥é¯u­pºâ¾¹',
'½Y¥Û' => '½L¥Û',
'½d¥»' => '¼ÒªO',
'½k¨½½k¶î' => '­J¨½­J¶î',
'½k¶î' => '­J¶î',
'½u¤W§@·~' => 'Áp¾÷¾Þ§@',
'½çë¬' => '½çî°',
'½ð½ñ»R' => '½ðľ»R',
'¾A¤~' => '¾AÅ×',
'¾B¸n' => '«Ì½ª',
'¾KÂÈÂÈ' => '¾KâÀâÀ',
'¾¥¨H¨H' => '¾¥¨I¨I',
'¾À³ø' => 'Àð³ø',
'¾é¦p' => 'ÁA¦p',
'¿nÅé¹q¸ô' => '¶°¦¨¹q¸ô',
'¿Ã¹õ' => '«Ì¹õ',
'¿Ô¸ß' => '«t¸ß',
'¿Õ°Ò¦a' => '¿Õ°Ò©³',
'¿Õ°Ò²Ä' => '¿Õ°Ò©³',
'¿ý¼v' => '¿ý¹³',
'ÀJ®«' => 'ó箫',
'Àz°Þ' => 'ÀzÎB',
'ÁVÁÏ' => 'ðþÁÏ',
'ÁÍ¥ú©Ê' => 'Áͱۥú©Ê',
'Áå¿û' => '·Ò¿û',
'Á鿤' => 'ÄÁ¿¤',
'Á÷²N²O' => '¦B²N²O',
'Ân¶§' => '¨H¶§',
'²³ø' => 'ºt¥Ü¤å½Z',
'ÂǾ÷' => '­É¾÷',
'ÂȤÑ' => 'âÀ¤Ñ',
'Âà±ÊÂI' => 'Âà§éÂI',
'Âø°T' => '¾¸Án',
'Ã]Ã]' => '¥V¥V',
'ù¥¬ªL¥d' => 'ù¥¬º¸¥d',
'ù­ùù­ö' => 'Åo­ùÅo­ö',
'ù¼w®q' => 'ù±o®q',
'ùù­ö­ö' => 'ÅoÅo­ö­ö',
'ùù¶Û¶Û' => 'ÅoÅo¶Û¶Û',
'ÃѧO¦r' => '¼ÐÃѲÅ',
'Ãß½Ò' => '°k½Ò',
'Ãã¶×' => 'µü¶×',
'ÃöÁä¦r' => 'ÃöÁäµü',
'Ãþ¤ñ' => '¼ÒÀÀ',
'ÄTµß' => '¦±µß',
'Äâ±a«¬' => '«KÄ⦡',
'Äæ¦ì' => '¦r¬q',
'ÅI­Ü¹õ©²' => 'ñT­Ü¹õ©²',
'ÅJ®g' => '¿E¥ú',
'Åv§ú' => '¥OµP',
'Ū¥d¾÷' => '¥d¤ù¾\\Ū¾÷',
'ÆB¦¿' => 'ù¢¦¿',
'Êe¤Ó§g' => 'Ú®¤Ó§g',
'×ƦÞ' => '¾¡¦Þ',
'âé¤Y' => '¸A¤Y',
'带Ç' => '¼Â¦Ç',
'åø¤û' => '¼ù¤û',
'çÁ¦P' => '­JÛÂ',
'ìê¦ç' => '·o¦ç',
'₩X' => 'ø¥¦X',
'¦^ÂÐ' => '¦^Î`',
'¦ì¤¸²Õ' => '¦r¸`',
'Ãìµ²' => 'Ãì±µ',
'±K½X' => '¤f¥O',
'¡u' => '¡¥',
'¡v' => '¡¦',
'¡y' => '¡§',
'¡z' => '¡¨',
'Æg¬ü' => 'ÃÙ¬ü',
'Æg¹|' => 'ÃÙ¹|',
'Ægµü' => 'ÃÙµü',
'Âi«e' => '¥x«e',
'Âi¿O' => '¥x¿O',
'Âi¤W' => '¥x¤W',
'»OÆW' => '¥xÆW',
'»O¥_' => '¥x¥_',
'»O«n' => '¥x«n',
'»ä­·' => '¥x­·',
'Äѯ»' => '­±¯»',
'ÄÑ¥]' => '­±¥]',
'Äѱø' => '­±±ø',
'ª£ÄÑ' => 'ª£­±',
'¼´ÄÑ' => '¼´­±',
'¤ý«á' => '¤ý¦Z',
'¥À«á' => '¥À¦Z',
) }
__END__

=head1 SEE ALSO

L<b2g.pl>, L<Encode::HanConvert>

=head1 AUTHORS

Currently maintained by Kuang-che Wu E<lt>kcwu@csie.orgE<gt>.  Orignal author:
Audrey Tang E<lt>cpan@audreyt.orgE<gt>

=head1 COPYRIGHT

Copyright 2002-2009 by Audrey Tang E<lt>cpan@audreyt.orgE<gt>.
Copyright 2006 by Kuang-che Wu E<lt>kcwu@csie.orgE<gt>.

This program is free software; you can redistribute it and/or 
modify it under the same terms as Perl itself.

See L<http://www.perl.com/perl/misc/Artistic.html>

=cut