#!perl
our $DATE = '2018-05-01'; # DATE
our $VERSION = '0.007'; # VERSION
use 5.010001;
use strict;
use Getopt::Long qw(:config bundling no_ignore_case);
my %Opts = (
action => 'rank',
ignore_leading_blanks => 0,
calc_percentile => [],
reverse => 0,
sort => 'ascii',
field_separator => "\t",
sort_field => 0,
show_rank => 1,
show_percentile => 0,
rank => 'default',
# TODO: --dictionary-order, -d
# TODO: --ignore-nonprinting, -i
# TODO: --human-numeric-sort, -h
# TODO: --version-sort, -V
# TODO: --percentile-format=s
);
sub parse_cmdline {
my $res = GetOptions(
'action=s' => \$Opts{action},
'calc-percentile=s' => sub {
$Opts{action} = 'calc-percentile';
$Opts{calc_percentile} = [split /\s*,\s*/, $_[1]];
},
'ignore-leading-blanks|b' => \$Opts{ignore_leading_blanks},
'ignore-case|f' => \$Opts{ignore_case},
'reverse|r' => \$Opts{reverse},
'field-separator|t=s' => \$Opts{field_separator},
'numeric-sort|n' => sub { $Opts{sort} = 'numeric' },
'sort=s' => \$Opts{sort},
'sort-field=i' => \$Opts{sort_field},
'rank=s' => \$Opts{rank},
'show-rank!' => \$Opts{show_rank},
'show-percentile!' => \$Opts{show_percentile},
'-p' => sub { $Opts{show_percentile} = 1 },
'help|h' => sub {
print <<USAGE;
Usage:
rank [OPTIONS]... [INPUT]...
rank --help
For more details, see the manpage/documentation.
USAGE
exit 0;
},
'version|v' => sub {
no warnings 'once';
say "rank version ".($main::VERSION // 'dev');
exit 0;
},
);
exit 99 if !$res;
}
sub run {
my $fs = $Opts{field_separator};
my @data; # elem: [sortkey, $orig_line]
my $re_split_fields = qr/\Q$fs\E/;
while (defined (my $line = <>)) {
if ($Opts{ignore_leading_blanks}) {
$line =~ s/\A\s+//;
}
my @fields = split $re_split_fields, $line;
my $sortkey = $fields[$Opts{sort_field}] // '';
if ($Opts{ignore_case}) { $sortkey = lc $sortkey };
push @data, [$sortkey, $line];
}
my $sortsub;
if ($Opts{sort} eq 'numeric') {
if ($Opts{reverse}) {
$sortsub = sub { $_[0] <=> $_[1] };
} else {
$sortsub = sub { $_[1] <=> $_[0] };
}
} else {
if ($Opts{reverse}) {
$sortsub = sub { $_[1] cmp $_[0] };
} else {
$sortsub = sub { $_[0] cmp $_[1] };
}
}
@data = sort { $sortsub->($a->[0], $b->[0]) } @data;
# exact percentiles to calculate
my @percentiles;
for (@{ $Opts{calc_percentile} }) {
my $p = $_+0;
die "rank: Invalid percentile '$_', must be (0,100]\n"
unless $p > 0 && $p <= 100;
push @percentiles, $p;
}
@percentiles = sort {$b <=> $a} @percentiles;
# if we're using the 'no-skip' ranking system, we'll need to calculate
# the lowest ranking (highest number)first
my $lowest_rank;
if ($Opts{rank} eq 'no-skip') {
my $prev_sortkey;
for my $row_num (0..$#data) {
my $item = $data[$row_num];
my $sortkey = $item->[0];
if (!defined($prev_sortkey) ||
$sortsub->($sortkey, $prev_sortkey) > 0) {
$lowest_rank //= 0;
$lowest_rank++;
}
$prev_sortkey = $sortkey;
}
}
my $rank;
my $prev_sortkey;
my $prev_percentile;
for my $row_num (0..$#data) {
my $item = $data[$row_num];
my $sortkey = $item->[0];
my $percentile;
if ($Opts{rank} eq 'default') {
if (!defined($prev_sortkey) ||
$sortsub->($sortkey, $prev_sortkey) > 0) {
$rank = $row_num+1;
}
$percentile = (@data - $rank + 1)/@data * 100;
} elsif ($Opts{rank} eq 'no-skip') {
if (!defined($prev_sortkey) ||
$sortsub->($sortkey, $prev_sortkey) > 0) {
$rank //= 0;
$rank++;
}
$percentile = ($lowest_rank - $rank+1) / $lowest_rank * 100;
} else { # no-same
$rank = $row_num+1;
$percentile = (@data - $rank + 1)/@data * 100;
}
if ($Opts{action} eq 'rank') {
print $rank, $fs if $Opts{show_rank};
printf "%.3f%s", $percentile, $fs
if $Opts{show_percentile};
print $item->[1];
} elsif ($Opts{action} eq 'calc-percentile') {
while (1) {
last unless @percentiles;
if ($percentile == $percentiles[0]) {
printf "%.3f\t%.8f\n", $percentile, $sortkey;
shift @percentiles;
} elsif ($percentile < $percentiles[0] || $row_num == $#data) {
if (defined $prev_percentile) {
# linear interpolation
my $val = $prev_sortkey +
($percentiles[0]-$prev_percentile) / ($percentile-$prev_percentile) * ($sortkey - $prev_sortkey);
printf "%.3f\t%.8f\n", $percentiles[0], $val;
} else {
printf "%.3f\t%s\n", $percentile, "N/A";
}
shift @percentiles;
} else {
last;
}
}
$prev_percentile = $percentile;
}
$prev_sortkey = $sortkey;
}
}
# MAIN
parse_cmdline();
run();
1;
# ABSTRACT: Rank lines of text
# PODNAME: rank
__END__
=pod
=encoding UTF-8
=head1 NAME
rank - Rank lines of text
=head1 VERSION
This document describes version 0.007 of rank (from Perl distribution App-rank), released on 2018-05-01.
=head1 SYNOPSIS
rank [OPTION]... [FILE]...
=head1 DESCRIPTION
C<rank> ranks lines of text, by default using the first field as the sort key
(can be changed with C<--sort-field> option). When there are multiple lines that
have the same sort key, C<rank> will assign the same rank to the lines. Finally,
the lines will be displayed with the ranks, starting from the highest ranked
(1).
Sample input:
21 ujang
30 budi
50 atang
75 robi
89 parjiyem
77 nono
75 tedi
Sample output using C<rank -n> (a.k.a. C<rank --numeric-sort>):
1 89 parjiyem
2 77 nono
3 75 robi
3 75 tedi
5 50 atang
6 30 budi
7 21 ujang
Sample output using C<rank -np> (a.k.a. C<rank --numeric-sort
--show-percentile>):
1 100.000 89 parjiyem
2 85.714 77 nono
3 71.429 75 robi
3 71.429 75 tedi
5 42.857 50 atang
6 28.571 30 budi
7 14.286 21 ujang
Sample output using C<rank -n --calc-percentile 5,25,50,75,95>):
95.000 84.80000000
75.000 75.50000000
50.000 56.25000000
25.000 27.75000000
5.000 15.15000000
=head1 EXIT CODES
0 on success.
255 on I/O error.
99 on command-line options error.
=head1 OPTIONS
=over
=item * --action=s (default: rank)
Valid values: C<rank>, C<calc-percentile>
The default action is to show ranking (C<rank>).
Action C<calc-percentile> will calculate specific percentiles. Usually you just
specify C<--calc-percentile a,b,c,...> to set action to C<calc-percentile>.
=item * --calc-percentile=f1,f2,...
Imply C<--action=calc-percentile>. Calculate specific percentile(s). Will use
linear interpolation to get the percentile values.
=item * --reverse, -r
=item * --ignore-leading-blanks, -b
=item * --ignore-case, -i
=item * --field-separator, -f (default: Tab)
=item * --sort=s
Valid values: C<numeric>, C<ascii>.
=item * --numeric-sort, -n
Shortcut for C<--sort=numeric>. By default numeric sort order is descending
instead of ascending. To do ascending sort, add C<-r>.
=item * --sort-field=i (default: 0)
=item * rank=s (default: default)
Change ranking system. The default is to have all lines that have the same sort
keys to be ranked the same, but the next ranking will skip numbers:
1 89 parjiyem
2 77 nono
3 75 robi
3 75 tedi
5 50 atang
6 30 budi
7 21 ujang
The C<no-skip> ranking system will not do these skips:
1 89 parjiyem
2 77 nono
3 75 robi
3 75 tedi
4 50 atang
5 30 budi
6 21 ujang
The C<no-same> ranking system will assign different ranks to lines that have the
same sort key:
1 89 parjiyem
2 77 nono
3 75 robi
4 75 tedi
5 50 atang
6 30 budi
7 21 ujang
=item * --no-show-rank
=item * --show-percentile, -p
=item * --help, -h
=item * --version, -v
=back
=head1 FAQ
=head1 HOMEPAGE
Please visit the project's homepage at L<https://metacpan.org/release/App-rank>.
=head1 SOURCE
Source repository is at L<https://github.com/perlancar/perl-App-rank>.
=head1 BUGS
Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-rank>
When submitting a bug or request, please include a test-file or a
patch to an existing test-file that illustrates the bug or desired
feature.
=head1 SEE ALSO
B<sort>
=head1 AUTHOR
perlancar <perlancar@cpan.org>
=head1 COPYRIGHT AND LICENSE
This software is copyright (c) 2018 by perlancar@cpan.org.
This is free software; you can redistribute it and/or modify it under
the same terms as the Perl 5 programming language system itself.
=cut