#!/usr/bin/perl
use
Encode
qw [
encode_utf8 decode_utf8 ] ;
& main () ;
exit
0 ;
sub
uniq ( @ ) {
my
%seen
;
grep
{ not
$seen
{
$_
}++ }
@_
} ;
sub
ngram ( $ ) {
my
@ret
;
my
@c
=
split
// ,
$_
, -1 ;
for
my
$i
(0 ..
@c
-
$o
{n} ) {
my
$tmp
=
''
;
$tmp
.=
$c
[
$i
+
$_
]
for
0 ..
$o
{n} - 1 ;
push
@ret
,
$tmp
;
}
return
uniq
@ret
if
$o
{u} ;
return
@ret
;
}
sub
main ( ) {
$o
{n} //=2 ;
$o
{m} //=12 ;
my
%F
= () ;
while
( <> ) {
chomp
;
$_
= decode_utf8 (
$_
) ;
my
@ngrams
= ngram
$_
;
$F
{
$_
} ++
for
@ngrams
;
}
my
$q
=
$o
{m} ;
for
(
sort
{
$F
{
$b
} <=>
$F
{
$a
} }
keys
%F
) {
print
encode_utf8 (
"$F{$_}\t$_\n"
) ;
--
$q
or
last
;
}
}
BEGIN {
our
$VERSION
= 0.01 ;
$Getopt::Std::STANDARD_HELP_VERSION
= 1 ;
grep
{ m/--help/}
@ARGV
and
*VERSION_MESSAGE
=
sub
{} ;
}
sub
HELP_MESSAGE {
$ARGV
[1] //=
''
;
open
my
$FH
,
'<'
, $0 ;
while
(<
$FH
>){
s/\$0/
$Script
/g ;
print
$_
if
s/^=head1// .. s/^=cut// and
$ARGV
[1] =~ /^o(p(t(i(o(ns?)?)?)?)?)?$/i ? m/^\s+\-/ : 1;
}
close
$FH
;
exit
0 ;
}