Multibyte/EUC_TW.pm - metacpan.org


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
—
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
              package String::Multibyte::EUC_TW;
use vars qw($VERSION);
$VERSION = '1.12';
+{
    charset  => 'EUC-TW',
    regexp   => '(?:[\x00-\x7F]|[\xA1-\xFE][\xA1-\xFE]|' .
        '\x8E[\xA1-\xB0][\xA1-\xFE][\xA1-\xFE])',
    cmpchar => sub {
        length($_[0]) <=> length($_[1]) || $_[0] cmp $_[1];
    },
    nextchar => sub {
        my $ch = shift;
        my $len = length $ch;
        if ($len == 1) {
            return $ch eq "\x7F"
                ? "\xA1\xA1"
                : chr(ord($ch)+1);
        }
        elsif ($len == 2) {
            my($c,$d) = unpack('CC', $ch);
            return $ch eq "\xFE\xFE"
                    ? "\x8E\xA1\xA1\xA1"
                    : $d == 0xFE
                        ? chr($c+1)."\xA1"
                        : pack('CC', $c, $d+1);
        }
        elsif ($len == 4) {
            return unless $ch =~ s/^\x8E//;
            my($b,$c,$d) = unpack('CCC',$ch);
            return $d == 0xFE
                    ? $c == 0xFE
                        ? $b == 0xB0
                            ? undef
                            : pack('CCCC', 0x8E, $b+1, 0xA1, 0xA1)
                        : pack('CCCC', 0x8E, $b, $c+1, 0xA1)
                    : pack('CCCC', 0x8E, $b, $c, $d+1);
        }
        else {
            return;
        }
    },
};
__END__
=head1 NAME
String::Multibyte::EUC_TW - internally used by String::Multibyte
for EUC-TW
=head1 SYNOPSIS
    use String::Multibyte;
    $euctw = String::Multibyte->new('EUC_TW');
    $euctw_length = $euctw->length($euctw_string);
=head1 DESCRIPTION
C<String::Multibyte::EUC_TW> is used for manipulation of strings
in EUC-TW.
Byte range of single-byte characters:
C<0x00..0x7F>.
First and second byte range of double-byte characters:
C<0xA1..0xFE>.
Second byte range of four-byte characters:
C<0xA1..0xB0>.
Third and fourth byte range of four-byte characters:
C<0xA1..0xFE>.
Character order (invalid code points are excluded):
C<0x00..0x7F>, C<0xA1A1..0xFEFE>, C<0x8EA1A1A1..0x8EB0FEFE>.
=head1 CAVEAT
C1 controls other than SS2 (C<0x80..0x8D> and C<0x8F..0x9E>)
are not supported.
Plane 1 characters in G1 (e.g. C<0xA1A1>) and those in G2
(e.g. C<0x8EA1A1A1>) are not treated as equivalents.
=head1 SEE ALSO
L<String::Multibyte>
=cut

	Global
`s`	Focus search bar
`?`	Bring up this help dialog

	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)

	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse

	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)