From Code to Community: Sponsoring The Perl and Raku Conference 2025 Learn more

#!/usr/bin/perl
=begin metadata
Name: strings
Description: extract strings
Author: Nathan Scott Thompson, quimby at city-net dot com
License:
=end metadata
=cut
# Copyright 1999 Nathan Scott Thompson <quimby at city-net dot com>
=head1 NAME
strings - extract strings
=head1 SYNOPSIS
strings [-afo] [-n length] [-t {d|o|x}] [file ...]
=head1 DESCRIPTION
C<strings> prints strings gleaned from the given files
(or from standard input.) By default strings of less than 4 characters
are ignored. A string is considered to be any sequence of graphical ASCII
characters (plus space and tab).
Options:
-f Print the file name with each string.
-n length Set the minimum length of strings to print; 4 by default.
-o Print each string with the octal offset in the file.
(Same as -to).
-t {d|o|x} Print each string with the offset in the file;
specify decimal, octal or hexadecimal respectively.
The -a option is included for compatibility with older versions but does
nothing.
=head1 BUGS
Things are seldom what they seem,
Skim milk masquerades as cream;
Highlows pass as patent leathers;
Jackdaws strut in peacock's feathers.
=cut
use strict;
use File::Basename qw(basename);
use Getopt::Std qw(getopts);
use vars qw($opt_a $opt_f $opt_o $opt_n $opt_t);
use constant EX_SUCCESS => 0;
use constant EX_FAILURE => 1;
my $Program = basename($0);
sub usage {
warn "usage: $Program [-afo] [-n length] [-t {d|o|x}] [file ...]\n";
exit EX_FAILURE;
}
getopts( 'afon:t:' ) or usage();
if (defined $opt_n) {
if ($opt_n !~ m/\A[0-9]\Z/ || $opt_n == 0) {
warn "$Program: invalid minimum string length '$opt_n'\n";
exit EX_FAILURE;
}
} else {
$opt_n = 4;
}
if ($opt_o) {
$opt_t = 'o';
} elsif (defined $opt_t) {
my %EXPECT = (
'd' => 1,
'o' => 1,
'x' => 1,
);
usage() unless $EXPECT{$opt_t};
}
my $offset_format = "\%07$opt_t ";
# Consider all graphic characters plus space and tab to be printable.
# Escape all punctuation characters out of paranoia.
my $punctuation = join '\\', split //, q/`~!@#$%^&*()-+={}|[]\:";'<>?,.\//;
my $printable = '\w \t' . $punctuation;
my $chunksize = 4096; # whatever
for my $filename ( @ARGV )
{
next if -d $filename;
my $in;
unless (open $in, '<', $filename) {
warn "$Program: Can't open '$filename': $!\n";
exit EX_FAILURE;
}
binmode $in;
scanfile($in, $filename);
close $in;
}
unless (@ARGV) {
scanfile(*STDIN, '<stdin>');
}
exit EX_SUCCESS;
sub scanfile {
my ($fh, $filename) = @_;
my $offset = 0;
while ($_ or read($fh, $_, $chunksize))
{
$offset += length($1) if s/^([^$printable]+)//o;
my $string = '';
do {
$string .= $1 if s/^([$printable]+)//o;
} until ($_ or !read($fh, $_, $chunksize));
if ( length($string) >= $opt_n )
{
print $filename, ':' if $opt_f;
printf $offset_format, $offset if $opt_t;
print $string, "\n";
}
$offset += length($string);
}
}