#!./perl
BEGIN {
pop
@INC
if
$INC
[-1] eq
'.'
}
my
%Opt
; getopts(
"ChH:e:f:t:s:pPv"
, \
%Opt
);
$Opt
{p} ||=
$Opt
{P};
$Opt
{e} ||=
'utf8'
;
$Opt
{f} ||=
$Opt
{e};
$Opt
{t} ||=
$Opt
{e};
$Opt
{h} and help();
my
(
$linebuf
,
$outbuf
);
my
$CPL
=
$Opt
{p} ? 64 : 8;
my
$linenum
;
my
$linesperheading
=
$Opt
{H};
my
$nchars
;
our
$PrevChunk
;
$Opt
{h} and help();
$Opt
{p} and do_perl(
$Opt
{s});
do_dump(
$Opt
{s});
exit
;
sub
do_perl{
my
$string
=
shift
;
$Opt
{P} and
print
"#!$^X -w\nprint\n"
;
unless
(
$string
){
while
(<>){
$linebuf
.= Encode::decode(
$Opt
{f},
$_
);
while
(
$linebuf
){
my
$chr
= render_p(
substr
(
$linebuf
, 0, 1,
''
));
length
(
$outbuf
) +
length
(
$chr
) >
$CPL
and print_P();
$outbuf
.=
$chr
;
}
}
$outbuf
and
print
print_P(
";"
);
}
else
{
while
(
$string
){
my
$chr
= render_p(
substr
(
$string
, 0, 1,
''
));
length
(
$outbuf
) +
length
(
$chr
) >
$CPL
and print_P();
$outbuf
.=
$chr
;
}
}
$outbuf
and
print
print_P(
";"
);
exit
;
}
sub
render_p{
my
(
$chr
,
$format
) =
@_
;
our
%S2pstr
;
$S2pstr
{
$chr
} and
return
$S2pstr
{
$chr
};
$chr
=~ /[\x20-\x7e]/ and
return
$chr
;
my
$fmt
= (
$chr
=~ /[\x00-\x1f\x7F]/) ?
q(\x%x)
:
q(\x{%x})
;
return
sprintf
$fmt
,
ord
(
$chr
);
}
sub
print_P{
my
$end
=
shift
;
$outbuf
or
return
;
print
'"'
, encode(
$Opt
{t},
$outbuf
),
'"'
;
my
$tail
=
$Opt
{P} ?
$end
?
"$end"
:
","
:
''
;
print
$tail
,
"\n"
;
$outbuf
=
''
;
}
sub
do_dump{
my
$string
=
shift
;
!
$Opt
{p} and
exists
$Opt
{H} and print_H();
unless
(
$string
){
while
(<>){
$linebuf
.= Encode::decode(
$Opt
{f},
$_
);
while
(
length
(
$linebuf
) >
$CPL
){
my
$chunk
=
substr
(
$linebuf
, 0,
$CPL
,
''
);
print_C(
$chunk
,
$linenum
++);
$Opt
{H} and
$linenum
%
$Opt
{H} ==
$CPL
-1 and print_S();
}
}
$linebuf
and print_C(
$linebuf
);
}
else
{
while
(
$string
){
my
$chunk
=
substr
(
$string
, 0,
$CPL
,
''
);
print_C(
$chunk
,
$linenum
++);
$Opt
{H} and
$linenum
%
$Opt
{H} ==
$CPL
-1 and print_S();
}
}
exit
;
}
sub
print_S{
print
"--------+------------------------------------------------"
;
if
(
$Opt
{C}){
print
"-+-----------------"
;
}
print
"\n"
;
}
sub
print_H{
print
" Offset 0 1 2 3 4 5 6 7"
;
if
(
$Opt
{C}){
print
" | 0 1 2 3 4 5 6 7"
;
}
print
"\n"
;
print_S;
}
sub
print_C{
my
(
$chunk
,
$linenum
) =
@_
;
if
(!
$Opt
{v} and
$chunk
eq
$PrevChunk
){
printf
"%08x *\n"
,
$linenum
*8;
return
;
}
$PrevChunk
=
$chunk
;
my
$end
=
length
(
$chunk
) - 1;
my
(
@ord
,
@chr
);
for
my
$i
(0..
$end
){
my
$chr
=
substr
(
$chunk
,
$i
,1);
my
$ord
=
ord
(
$chr
);
my
$fmt
=
$ord
<= 0xffff ?
" %04x"
:
" %05x"
;
push
@ord
, (
sprintf
$fmt
,
$ord
);
$Opt
{C} and
push
@chr
, render_c(
$chr
);
}
if
(++
$end
< 7){
for
my
$i
(
$end
..7){
push
@ord
, (
" "
x 6);
}
}
my
$line
=
sprintf
"%08x %s"
,
$linenum
*8,
join
(
''
,
@ord
);
$Opt
{C} and
$line
.=
sprintf
" | %s"
,
join
(
''
,
@chr
);
print
encode(
$Opt
{t},
$line
),
"\n"
;
}
sub
render_c{
my
(
$chr
,
$format
) =
@_
;
our
(
%S2str
,
$IsFullWidth
);
$chr
=~ /[\p{IsControl}\s]/o and
return
$S2str
{
$chr
} ||
" "
;
$chr
=~
$IsFullWidth
and
return
$chr
;
return
" "
.
$chr
;
}
sub
help{
my
$message
=
shift
;
my
$name
= basename($0);
$message
and
print
STDERR
"$name error: $message\n"
;
print
STDERR
<<"EOT";
Usage:
$name -[options...] [files...]
$name -[options...] -s "string"
$name -h
-h prints this message.
Inherited from hexdump;
-C Canonical unidump mode
-v prints the duplicate line as is. Without this option,
single "*" will be printed instead.
For unidump only
-p prints in perl literals that you can copy and paste directly
to your perl script.
-P prints in perl executable format!
-u prints a bunch of "Uxxxx,". Handy when you want to pass your
characters in mailing lists.
IO Options:
-e io_encoding same as "-f io_encoding -t io_encoding"
-f from_encoding convert the source stream from this encoding
-t to_encoding print to STDOUT in this encoding
-s string "string" will be converted instead of STDIN.
-H nline prints separater for each nlines of output.
0 means only the table headding be printed.
EOT
exit
;
}
BEGIN{
our
%S2pstr
= (
"\\"
=>
'\\\\'
,
"\0"
=>
'\0'
,
"\t"
=>
'\t'
,
"\n"
=>
'\n'
,
"\r"
=>
'\r'
,
"\v"
=>
'\v'
,
"\a"
=>
'\a'
,
"\e"
=>
'\e'
,
"\""
=>
qq(\\\")
,
"\'"
=>
qq(\\\')
,
'$'
=>
'\$'
,
"@"
=>
'\@'
,
"%"
=>
'\%'
,
);
our
%S2str
= (
qq(\x00)
=>
q(\0)
,
qq(\x01)
=>
q(^A)
,
qq(\x02)
=>
q(^B)
,
qq(\x03)
=>
q(^C)
,
qq(\x04)
=>
q(^D)
,
qq(\x05)
=>
q(^E)
,
qq(\x06)
=>
q(^F)
,
qq(\x07)
=>
q(\a)
,
qq(\x08)
=>
q(^H)
,
qq(\x09)
=>
q(\t)
,
qq(\x0A)
=>
q(\n)
,
qq(\x0B)
=>
q(\v)
,
qq(\x0C)
=>
q(^L)
,
qq(\x0D)
=>
q(\r)
,
qq(\x0E)
=>
q(^N)
,
qq(\x0F)
=>
q(^O)
,
qq(\x10)
=>
q(^P)
,
qq(\x11)
=>
q(^Q)
,
qq(\x12)
=>
q(^R)
,
qq(\x13)
=>
q(^S)
,
qq(\x14)
=>
q(^T)
,
qq(\x15)
=>
q(^U)
,
qq(\x16)
=>
q(^V)
,
qq(\x17)
=>
q(^W)
,
qq(\x18)
=>
q(^X)
,
qq(\x19)
=>
q(^Y)
,
qq(\x1A)
=>
q(^Z)
,
qq(\x1B)
=>
q(\e)
,
qq(\x1C)
=>
"^\\"
,
qq(\x1D)
=>
"^\]"
,
qq(\x1E)
=>
q(^^)
,
qq(\x1F)
=>
q(^_)
,
);
our
$IsFullWidth
=
qr/^[
\x{1100}-\x{1159}
\x{115F}-\x{115F}
\x{2329}-\x{232A}
\x{2E80}-\x{2E99}
\x{2E9B}-\x{2EF3}
\x{2F00}-\x{2FD5}
\x{2FF0}-\x{2FFB}
\x{3000}-\x{303E}
\x{3041}-\x{3096}
\x{3099}-\x{30FF}
\x{3105}-\x{312C}
\x{3131}-\x{318E}
\x{3190}-\x{31B7}
\x{31F0}-\x{321C}
\x{3220}-\x{3243}
\x{3251}-\x{327B}
\x{327F}-\x{32CB}
\x{32D0}-\x{32FE}
\x{3300}-\x{3376}
\x{337B}-\x{33DD}
\x{3400}-\x{4DB5}
\x{4E00}-\x{9FA5}
\x{33E0}-\x{33FE}
\x{A000}-\x{A48C}
\x{AC00}-\x{D7A3}
\x{A490}-\x{A4C6}
\x{F900}-\x{FA2D}
\x{FA30}-\x{FA6A}
\x{FE30}-\x{FE46}
\x{FE49}-\x{FE52}
\x{FE54}-\x{FE66}
\x{FE68}-\x{FE6B}
\x{FF01}-\x{FF60}
\x{FFE0}-\x{FFE6}
\x{20000}-\x{2A6D6}
]$/
xo;
}