——#!/usr/bin/perl
=begin metadata
Name: fold
Description: wrap each input line to fit specified width
Author: Clinton Pierce, clintp@geeksalad.org
Author: Tom Christiansen, tchrist@perl.com
License: perl
=end metadata
=cut
# The standard fold(1), implemented in Perl
# copyright 1999, Clinton A. Pierce
# Freely redistributable under the Perl Artistic License
# Severely hacked at by Tom Christiansen:
# code reformatting & rearranging, simplification, beautification
# added pragmata
# "screaming" code
# argument parsing
# pod documentation
use
strict;
my
$Program
= basename($0);
for
(0 ..
$#ARGV
) {
last
if
$ARGV
[
$_
] eq
'--'
;
$ARGV
[
$_
] =
"-w$1"
if
$ARGV
[
$_
] =~ m/\A\-([0-9]+)\z/;
}
my
%opt
;
getopts(
'bsw:'
, \
%opt
) or usage();
my
$Tabstop
= 8;
# sane tab stops
my
$Width
=
defined
$opt
{
'w'
} ?
$opt
{
'w'
} : 80;
my
$Byte_Only
=
$opt
{
'b'
};
my
$Space_Break
=
$opt
{
's'
};
sub
usage {
warn
qq[
usage: $Program [-bs]
[-w width] [file ...]
-s
split
lines on whitespace where possible
-b count bytes, not characters
-w WIDTH maximum
length
of lines on output
];
exit
EX_FAILURE;
}
unless
(
$Width
&&
$Width
=~ /^\d+$/) {
warn
"$Program: illegal width value `$Width'\n"
;
usage();
}
if
(
$Space_Break
&&
$Width
<
$Tabstop
) {
warn
"$Program: width must be greater than $Tabstop with the -s option\n"
;
usage();
}
my
$func
=
$Byte_Only
? \
&fold_file_byte
: \
&fold_file
;
my
$rc
= EX_SUCCESS;
for
(
@ARGV
) {
if
(-d
$_
) {
warn
"$Program: '$_': is a directory\n"
;
$rc
= EX_FAILURE;
next
;
}
my
$fh
;
unless
(
open
$fh
,
'<'
,
$_
) {
warn
"$Program: failed to open '$_': $!\n"
;
$rc
= EX_FAILURE;
next
;
}
if
(
$func
->(
$fh
) != 0) {
$rc
= EX_FAILURE;
}
unless
(
close
$fh
) {
warn
"$Program: failed to close '$_': $!\n"
;
$rc
= EX_FAILURE;
}
}
unless
(
@ARGV
) {
if
(
$func
->(
*STDIN
) != 0) {
$rc
= EX_FAILURE;
}
}
exit
$rc
;
########
# If we are not in byte-only mode, we have to calculate
# the new column based on the spec. This is superslow.
sub
adj_col {
my
(
$col
,
$char
) =
@_
;
# algorithm from BSD fold --tchrist
if
(
$char
eq
"\b"
) {
$col
--
if
$col
}
elsif
(
$char
eq
"\r"
) {
$col
= 0; }
elsif
(
$char
eq
"\t"
) {
$col
+=
$Tabstop
- (
$col
%
$Tabstop
) }
else
{
$col
++ }
return
$col
;
}
sub
fold_file_byte {
my
$input
=
shift
;
# the following hack allows us to dispense with the
# slow getc() and the hairy adj_col() code because we
# don't care about \t and \b anymore. This small adjustment
# provides a screaming 3,000% speedup, so seems worth it!
# --tchrist
my
$soft
=
"(.{0,$Width})(?=\b.)"
;
# XXX: \b != \s
my
$hard
=
"(.{$Width})(?=.)"
;
if
(
$Space_Break
) {
while
(<
$input
>) {
APPLYRE:
while
(
length
>
$Width
) {
if
(s/
$soft
//o || s/
$hard
//o) {
"$1\n"
;
}
else
{
last
APPLYRE;
}
}
;
}
}
else
{
while
(<
$input
>) {
s/
$hard
/$1\n/go;
;
}
}
return
0;
}
# run fold on a given file
sub
fold_file {
my
$input
=
shift
;
my
(
$column
,
$char
,
$output
);
$column
= 0;
CHAR:
# bytewise processing. The horror! The horror!
while
(
defined
(
$char
=
getc
(
$input
))) {
if
(
$char
eq
"\n"
) {
$output
,
"\n"
;
$output
=
""
;
$column
= 0;
next
CHAR;
}
ADJUST: {
$column
= adj_col(
$column
,
$char
);
if
(
$column
>
$Width
) {
if
(
$Space_Break
) {
for
(
my
$i
=
length
(
$output
);
$i
>= 0;
$i
--) {
if
(
substr
(
$output
,
$i
, 1) =~ /\s/) {
substr
(
$output
, 0,
$i
+1),
"\n"
;
$output
=
substr
(
$output
,
$i
+1);
for
(
$column
=
$i
= 0;
$i
<
length
(
$output
);
$i
++) {
$column
= adj_col(
$column
,
substr
(
$output
,
$i
, 1));
}
redo
ADJUST;
}
}
$output
,
"\n"
;
$output
=
""
;
$column
= 0;
redo
ADJUST;
}
else
{
"$output\n"
;
$output
=
$char
;
$column
= adj_col(0,
$char
);
}
}
else
{
$output
.=
$char
;
}
}
# ADJUST goto
}
return
0;
}
__END__
=head1 NAME
fold - wrap each input line to fit specified width
=head1 SYNOPSIS
B<fold> [B<-bs>] [B<-w> I<width>] [I<file> ...]
=head1 DESCRIPTION
The I<fold> command reads lines from the specified files (or standard
input if none are specified) and writes them to the standard output with
newlines inserted into lines longer than the specified column width.
The default column width is 80, but this may be overridden using the
B<-w> flag. For historical reasons, the width may be specified directly,
as in C<fold -72>, omitting the B<-w>.
The B<-s> flag causes breaks to occur after whitespace rather than in
the middle of a word. This produces a ragged right edge, but is much
nicer to look at.
The B<-b> flag makes the program ignore embedded backspaces, tabs, and
carriage returns when deciding where to split. This makes it run about
thirty times faster. You might want to get used to using B<-b>.
Current locale settings will be honored in determining what
is meant by "whitespace" and "word characters".
=head1 BUGS
POSIX 1003.2 states that a newline will never be inserted
immediately before or after a backspace or a carriage return,
but this is not checked for.
=head1 SEE ALSO
expand(1), fmt(1)
=head1 AUTHORS
Clinton Pierce and Tom Christiansen.
This code is freely modifiable and freely redistributable under Perl's
Artistic License.