The Perl and Raku Conference 2025: Greenville, South Carolina - June 27-29 Learn more

#!/usr/bin/env perl
# Copyright (c) 2015,2020 Christian Jaeger, copying@christianjaeger.ch
# This is free software. See the file COPYING.md that came bundled
# with this file.
use strict;
use warnings FATAL => 'uninitialized';
# find modules from functional-perl working directory (not installed)
use Cwd 'abs_path';
our ($mydir, $myname);
BEGIN {
my $location = (-l $0) ? abs_path($0) : $0;
$location =~ /(.*?)([^\/]+?)_?\z/s or die "?";
($mydir, $myname) = ($1, $2);
}
use lib "$mydir/../lib";
use FP::Predicates 'is_natural0';
use Chj::xopen 'glob_to_fh';
use FP::IOStream 'fh_to_chunks';
use FP::Stream ":all";
use FP::List ":all";
use FP::Lazy ":all";
use POSIX 'ESPIPE';
our $BUFSIZ = 1024 * 16;
sub usage {
print STDERR map {"$_\n"} @_ if @_;
print "$myname n [m]
Skip n bytes of input, output the rest. If m is given, don't output
the last m bytes. Example: `echo -n 'Hi World' | skip 3 2` -> 'Wor'
--bufsize n use n instead of default $BUFSIZ as buffer size
--silent don't give error if the input is too short
";
exit @_ ? 1 : 0;
}
sub resource_rss {
@_ == 0 or fp_croak_arity 0;
require BSD::Resource;
(BSD::Resource::getrusage(BSD::Resource::RUSAGE_SELF()))[2]
}
# ---- sliding buffer ----------------------------------------
# Handle a sliding buffer window to make sure we don't output the
# input too soon (so that when we hit EOF, we've got at least m bytes
# still buffered). Do this by using a lazy list of chunks as the input
# and keep two pointers into it as the window.
# `chunks_change_tail` returns a stream of the unmodified chunks until
# passing through one more would make the sum of the remaining chunks
# till the end of the stream smaller than $minsize; pass the remainder
# to &$fn($tail, $remainingsize) and use its result as the tail of the
# output stream. (See test cases below for illustration.)
sub chunks_change_tail {
@_ == 3 or fp_croak_arity 3;
my ($chunks, $minsize, $fn) = @_;
weaken $_[0];
is_null $chunks and die "got empty input";
# start and rest are parts of the same stream of chunks,
# windowsize is the number of bytes between them
my $next;
$next = sub {
@_ == 3 or fp_croak_arity 3;
my ($start, $rest, $windowsize) = @_;
my $next = $next;
lazy {
NEXT: {
FORCE $start, $rest; # optional (since is_null, first
# etc. are forcing promises
# anyway), but might reduce
# overhead slightly?
my $first = first $start;
my $lenfirst = length $first;
my $reserve = $windowsize - $minsize;
if ($lenfirst <= $reserve) {
cons($first,
&$next(rest($start), $rest, $windowsize - $lenfirst))
} else {
if (is_null $rest) {
&$fn($start, $windowsize)
} else {
#&$next($start, rest $rest, $windowsize + length first $rest)
$windowsize = $windowsize + length first $rest;
$rest = rest $rest;
redo NEXT;
}
}
}
}
};
Weakened($next)->($chunks, $chunks, 0);
}
sub test_with_size {
@_ == 1 or fp_croak_arity 1;
my ($size) = @_;
stream_to_array chunks_change_tail(array_to_stream(["foo", "bars", "baz"]),
$size, sub { my ($tail, $len) = @_; cons $len, $tail });
}
TEST { test_with_size 1 }
['foo', 'bars', 3, 'baz'];
TEST {
dumperequal(test_with_size(2), test_with_size(1))
and dumperequal(test_with_size(3), test_with_size(1))
}
1;
TEST { test_with_size 4 }
['foo', 7, 'bars', 'baz'];
TEST { test_with_size 999 }
[10, 'foo', 'bars', 'baz'];
# ------------------------------------------------------------
our $verbose = 0;
our $opt_repl;
our $opt_leaktest;
our $opt_silent;
GetOptions(
"verbose" => \$verbose,
"help" => sub {usage},
"bufsize=n" => \$BUFSIZ,
"repl" => \$opt_repl,
"leaktest" => \$opt_leaktest,
"silent" => \$opt_silent,
) or exit 1;
# called for testing or debugging:
if ($opt_repl) {
require FP::Repl;
FP::Repl::repl();
exit 0
}
if (perhaps_run_tests "main") {
exit 0;
}
# called as tool:
usage unless (@ARGV == 1 or @ARGV == 2);
our ($n, $maybe_m) = @ARGV;
is_natural0 $n or usage "n must be a non-negative integer";
if (defined $maybe_m) {
is_natural0 $maybe_m or usage "m must be a non-negative integer";
}
our $mem_start = resource_rss if $opt_leaktest;
our $mem_end;
our $in = glob_to_fh * STDIN;
our $out = glob_to_fh * STDOUT;
if ($n) {
$in->seek($n) || do {
if ($! == ESPIPE) {
# non-seekable device
my $nbufs = int($n / $BUFSIZ);
my $nrest = $n % $BUFSIZ;
$nbufs * $BUFSIZ + $nrest == $n or die "Perl can't calculate?";
my $buf;
for (1 .. $nbufs) {
$in->xsysreadcompletely($buf, $BUFSIZ);
}
$in->xsysreadcompletely($buf, $nrest);
# XX: seek does not complain when file is too short; this
# does. This is inconsistent. Also, check $opt_silent ?
} else {
die "seek: $!";
}
};
}
if ($maybe_m) {
my $chunks = force fh_to_chunks $in, $BUFSIZ;
if (is_null $chunks) {
die "$myname: no remainder left after skipping $n byte(s)\n"
unless $opt_silent;
} else {
my $chunks2 = chunks_change_tail(
$chunks, $maybe_m,
sub {
my ($rest, $remainingsize) = @_;
$mem_end = resource_rss if $opt_leaktest;
if ($remainingsize < $maybe_m) {
if ($opt_silent) {
null
} else {
die "$myname: only $remainingsize byte(s) left "
. "after skipping $n byte(s)\n"
}
} else {
my $last = first $rest;
cons(substr($last, 0, $remainingsize - $maybe_m), null)
}
}
);
stream_for_each sub {
$out->xprint($_[0])
}, $chunks2;
}
} else {
$in->xsendfile_to($out);
$in->xclose;
}
$out->xclose;
if ($opt_leaktest and (($mem_end - $mem_start) / $mem_start > 1.5)) {
die "there was a leak";
}