NAME
Forks::Super - extensions and convenience methods to manage background processes
VERSION
Version 0.97
SYNOPSIS
use
Forks::Super;
# --- familiar use - parent returns PID>0, child returns zero
$pid
=
fork
();
die
"fork failed"
unless
defined
$pid
;
if
(
$pid
> 0) {
# parent code
}
else
{
# child code
}
# --- wait for a child process to finish
$w
=
wait
;
# blocking wait on any child,
# child exit status in $?
$w
=
waitpid
$pid
,0;
# blocking wait on specific child
$w
=
waitpid
$pid
,WNOHANG;
# non-blocking, use with POSIX ':sys_wait_h'
$w
=
waitpid
0,
$flag
;
# wait on any process in current process group
waitall;
# block until all children are finished
# -------------- helpful extensions ---------------------
# fork directly to a shell command. Child doesn't return.
$pid
=
fork
{
cmd
=>
"./myScript 17 24 $n"
};
$pid
=
fork
{
exec
=> [
"/bin/prog"
,
$file
,
"-x"
, 13 ] };
$pid
=
fork
[
"./myScript"
, 17, 24,
$n
];
# new syntax in v0.72
# --- fork directly to a Perl subroutine. Child doesn't return.
$pid
=
fork
{
sub
=>
$methodNameOrRef
,
args
=> [
@methodArguments
] };
$pid
=
fork
{
sub
=> \
&subroutine
,
args
=> [
@args
] };
$pid
=
fork
{
sub
=>
sub
{
"anonymous sub"
},
args
=> [
@args
] );
$pid
=
fork
sub
{ CODE },
%other_options
;
# new syntax in v0.72
# --- impose a time limit on the child process
$pid
=
fork
{
cmd
=>
$cmd
,
timeout
=> 30 };
# kill child if not done in 30s
$pid
=
fork
{
sub
=>
$subRef
,
args
=> [
@args
],
expiration
=> 1260000000 };
# complete 8am Dec 5, 2009 UTC
# --- wait and waitpid support timeouts, too
$pid
=
wait
3.0;
"No child reaped in 5s"
if
waitpid
0, 0, 5.0 ==
&Forks::Super::Wait::TIMEOUT
;
# --- run a child process starting from a different directory
$pid
=
fork
{
dir
=>
"some/other/directory"
,
cmd
=> [
"command"
,
"--that"
,
"--runs=somewhere"
,
"else"
] };
# --- obtain standard file handles for the child process
$pid
=
fork
{
child_fh
=>
"in,out,err,:utf8"
};
if
(
$pid
== 0) {
# child process
sleep
1;
$x
= <STDIN>;
# read from parent's $pid->{child_stdin} (output handle)
rand
() > 0.5 ?
"Yes\n"
:
"No\n"
if
$x
eq
"Clean your room\n"
;
sleep
2;
$i_can_haz_ice_cream
= <STDIN>;
if
(
$i_can_haz_ice_cream
!~ /you can have ice cream/ &&
rand
() < 0.5) {
STDERR
'@#$&#$*&#$*&'
,
"\n"
;
}
exit
0;
}
# else parent process
$child_stdin
=
$pid
->{child_stdin};
$child_stdin
=
$Forks::Super::CHILD_STDIN
{
$pid
};
# alternate, deprecated
$child_stdin
"Clean your room\n"
;
sleep
2;
$child_stdout
=
$pid
->{child_stdout};
# -or- $child_stdout = $Forks::Super::CHILD_STDOUT{$pid}; # deprecated
$child_response
= <
$child_stdout
>;
# -or-: Forks::Super::read_stdout($pid);
if
(
$child_response
eq
"Yes\n"
) {
$child_stdin
"Good boy. You can have ice cream.\n"
;
}
else
{
$child_stdin
"Bad boy. No ice cream for you.\n"
;
sleep
2;
$child_err
= Forks::Super::read_stderr(
$pid
);
# -or- $child_err = $pid->read_stderr();
# -or- $child_err = readline($pid->{child_stderr});
$child_stdin
"And no back talking!\n"
if
$child_err
;
}
# --- retrieve variable values from a child process
$pid1
=
fork
{
share
=> [ \
$scalar
, \
@list
],
sub
=> \
&method
};
$pid2
=
fork
{
share
=> [ \
@list
, \
%hash
],
sub
=> \
&someOtherMethod
};
waitpid
$pid1
, 0;
waitpid
$pid2
, 0;
# now $scalar is set to value in 1st job, @list has values from both jobs,
# and %hash has values from 2nd job
# ---------- manage jobs and system resources ---------------
# --- run 100 tasks but fork blocks while there are already 5 active jobs
$Forks::Super::MAX_PROC
= 5;
$Forks::Super::ON_BUSY
=
'block'
;
for
(
$i
=0;
$i
<100;
$i
++) {
$pid
=
fork
{
cmd
=>
$task
[
$i
] };
}
# --- jobs fail (without blocking) if the system is too busy
$Forks::Super::MAX_LOAD
= 2.0;
$Forks::Super::ON_BUSY
=
'fail'
;
$pid
=
fork
{
cmd
=>
$task
};
if
(
$pid
> 0) {
"'$task' is running\n"
}
elsif
(
$pid
< 0) {
"current CPU load > 2.0: didn't start '$task'\n"
}
# $Forks::Super::MAX_PROC setting can be overridden.
# Start job immediately if < 3 jobs running
$pid
=
fork
{
sub
=>
'MyModule::MyMethod'
,
args
=> [
@b
],
max_proc
=> 3 };
# --- try to fork no matter how busy the system is
$pid
=
fork
{
sub
=> \
&MyMethod
,
force
=> 1 }
# when system is busy, queue jobs. When system becomes less busy,
# some jobs on the queue will start.
# if job is queued, return value from fork() is a very negative number
$Forks::Super::ON_BUSY
=
'queue'
;
$pid
=
fork
{
cmd
=>
$command
};
$pid
=
fork
{
cmd
=>
$useless_command
,
queue_priority
=> -5 };
$pid
=
fork
{
cmd
=>
$important_command
,
queue_priority
=> 5 };
$pid
=
fork
{
cmd
=>
$future_job
,
delay
=> 20 };
# queue job for at least 20s
# --- assign descriptive names to tasks
$pid1
=
fork
{
cmd
=>
$command
,
name
=>
"my task"
};
$pid2
=
waitpid
"my task"
, 0;
$num_signalled
= Forks::Super::
kill
'TERM'
,
"my task"
;
$pid1
=
fork
{
cmd
=>
$command1
,
name
=>
'task 1'
};
$pid2
=
fork
{
cmd
=>
$command2
,
name
=>
'task 2'
};
$pid
=
waitpid
-1, 0;
"Task that just finished was $pid->{name}\n"
;
# task 1 or task 2
# --- run callbacks at various points of job life-cycle
$pid
=
fork
{
cmd
=>
$command
,
callback
=> \
&on_complete
};
$pid
=
fork
{
sub
=>
$sub
,
args
=> [
@args
],
callback
=> {
start
=>
'on_start'
,
finish
=> \
&on_complete
,
queue
=>
sub
{
"Job $_[1] queued\n"
} } };
# --- set up dependency relationships
$pid1
=
fork
{
cmd
=>
$job1
};
$pid2
=
fork
{
depend_on
=>
$pid1
,
cmd
=>
$job2
};
# queue until job 1 is complete
$pid3
=
fork
{ ... };
$pid4
=
fork
{
depend_start
=> [
$pid2
,
$pid3
],
cmd
=>
$job4
};
# queue until jobs 2,3 have started
$pid5
=
fork
{
cmd
=>
$job5
,
name
=>
"group C"
};
$pid6
=
fork
{
cmd
=>
$job6
,
name
=>
"group C"
};
$pid7
=
fork
{
depend_on
=>
"group C"
,
cmd
=>
$job7
};
# wait for jobs 5 & 6 to complete
# --- manage OS settings on jobs -- may not be available on all systems
$pid1
=
fork
{
os_priority
=> 10 };
# like nice(1) on Un*x
$pid2
=
fork
{
cpu_affinity
=> 0x5 };
# background task to prefer CPUs #0,2
# --- job information
$state
= Forks::Super::state(
$pid
);
# ACTIVE | DEFERRED | COMPLETE | REAPED
$status
= Forks::Super::status(
$pid
);
# exit status ($?) for completed jobs
# --- return value from fork is object that just looks like a process id
# --- see Forks::Super::Job
$job
=
fork
{ ... };
$state
=
$job
->{state};
if
(
$job
->is_complete) {
$status
=
$job
->{status};
}
# --- evaluate long running expressions in the background
$result
= bg_eval { a_long_running_calculation() };
# sometime later ...
"Result was $result\n"
;
$result
= bg_qx(
"./long_running_command"
);
# ... do something else for a while and when you need the output ...
"output of long running command was: $result\n"
;
# if you need bg_eval or bg_qx functionality in list context ...
tie
%result
, BG_EVAL,
sub
{ long_running_calc_that_returns_hash() };
tie
@output
, BG_QX,
"./long_running_cmd"
;
# --- convenience methods, compare to IPC::Open2, IPC::Open3
my
(
$fh_in
,
$fh_out
,
$pid
,
$job
) = Forks::Super::open2(
@command
);
my
(
$fh_in
,
$fh_out
,
$fh_err
,
$pid
,
$job
)
= Forks::Super::open3(
@command
, {
timeout
=> 60 });
# --- parallel grep, map operations ---
@files_with_errors
= pgrep {
qx(cat $_)
=~ /error/i }
@files
;
@result
= pmap { long_running_calculation(
$_
) } {
timeout
=> 10},
@list
;
# --- run a background process as a *daemon*
$job
=
fork
{
cmd
=>
$cmd
,
daemon
=> 1 };
DESCRIPTION
This package provides new definitions for the Perl functions fork, wait, and waitpid with richer functionality. The new features are designed to make it more convenient to spawn background processes and more convenient to manage them to get the most out of your system's resources.
fork
$pid = fork( \%options )
Attempts to spawn a new process. On success, it returns a Forks::Super::Job object with information about the background task to the calling process. This object is overloaded so that in any numeric or string context, it will behave like the process id of the new process, and let's Forks::Super::fork
be used as a drop-in replacement for the builtin Perl fork
call.
With no arguments, it behaves the same as the Perl fork() system call:
creating a new process running the same program at the same execution point
returning an object to the parent that behaves like the process id (PID) of the new child process in any boolean, numeric, or string context (on Windows, the value is a pseudo-process ID, typically a negative number).
returning 0 to the child process
returning
undef
if the fork call was unsuccessful
Options for instructing the child process
The fork
call supports three options, "cmd", "exec", and "sub" (or sub
/args
) that will instruct the child process to carry out a specific task. The child process will not return from the fork
call if any of these options are used.
cmd
$child_pid = fork { cmd => $shell_command }
$child_pid = fork { cmd => \@shell_command }
-
On successful launch of the child process, runs the specified shell command in the child process with the Perl system() function. When the system call is complete, the child process exits with the same exit status that was returned by the system call.
Returns the PID of the child process to the parent process. Does not return from the child process, so you do not need to check the fork() return value to determine whether code is executing in the parent or child process.
See "Alternate fork syntax", below, for an alternate way of specifying a command to run in a background process.
exec
$child_pid = fork { exec => $shell_command }
$child_pid = fork { exec => \@shell_command }
-
Like the "cmd" option, but the background process launches the shell command with exec instead of with system.
Using
exec
instead ofcmd
will usually spawn one fewer process. Prior to v0.55, the "timeout" and "expiration" options (see "Options for simple job management") could not be used with theexec
option, but that incompatibility has been fixed.
sub
$child_pid = fork { sub => $subName [, args => \@args ] }
$child_pid = fork { sub => \&subReference [, args => \@args ] }
$child_pid = fork { sub => sub { ... code ... } [, args => \@args ] }
-
On successful launch of the child process,
fork
invokes the specified Perl subroutine with the specified set of method arguments (if provided) in the child process. If the subroutine completes normally, the child process exits with a status of zero. If the subroutine exits abnormally (i.e., if itdie
's, or if the subroutine invokesexit
with a non-zero argument), the child process exits with non-zero status.Returns the PID of the child process to the parent process. Does not return from the child process, so you do not need to check the fork() return value to determine whether code is running in the parent or child process.
See "Alternate fork syntax", below, for an alternate way of specifying a subroutine to run in the child process.
If neither the "cmd", "exec", nor the "sub" option is provided to the fork call, then the fork() call behaves like a standard Perl fork()
call, returning the child PID to the parent and also returning zero to a new child process.
Options for simple job management
timeout
expiration
fork { timeout => $delay_in_seconds }
fork { expiration => $timestamp_in_seconds_since_epoch_time }
-
Puts a deadline on the child process and terminates the child if it has not completed by the deadline. With the
timeout
option, you specify that the child process should not survive longer than the specified number of seconds. Withexpiration
, you are specifying an epoch time (like the one returned by the time function) as the child process's deadline.If the setpgrp() system call is implemented on your system, then this module will try to reset the process group ID of the child process. On timeout, the module will attempt to kill off all subprocesses of the expiring child process.
If the deadline is some time in the past (if the timeout is not positive, or the expiration is earlier than the current time), then the child process will die immediately after it is created.
This feature usually uses Perl's alarm call and installs its own handler for
SIGALRM
, but an alternate "poor mans alarm" is available. If you wish to use thetimeout
orexpiration
feature with a child sub that also usesalarm
/SIGALRM
, or on a system that has issues withalarm
, you can also pass the optionuse_alternate_alarm => 1
to forceForks::Super
to use the alternate alarm.If you have installed the DateTime::Format::Natural module, then you may also specify the timeout and expiration options using natural language:
$pid
=
fork
{
timeout
=>
"in 5 minutes"
,
sub
=> ... };
$pid
=
fork
{
expiration
=>
"next Wednesday"
,
cmd
=>
$long_running_cmd
};
dir
fork { dir => $directory }
fork { chdir => $directory }
-
Causes the child process to be run from a different directory than the parent.
If the specified directory does not exist or if the
chdir
call fails (e.g, if the caller does not have permission to change to the directory), then the child process immediately exits with a non-zero status.chdir
anddir
are synonyms.
env
umask
fork { umask => $mask }
-
Sets the "umask" of the background process to specify the default permissions of files and directories created by the background process. See "umask" in perlfunc and umask(1).
As it is with the Perl builtin function, the
$mask
argument is a number, usually given in octal form, but it is not a string of octal digits. Sofork
{
umask
=>
"0775"
, ... }
will probably not do what you want. Instead, use one of
fork
{
umask
=> 0775, ... }
fork
{
umask
=> 509, ... }
# 509 == 0775
fork
{
umask
=>
oct
"0775"
, ... }
delay
start_after
fork { delay => $delay_in_seconds }
fork { start_after => $timestamp_in_epoch_time }
-
Prepares a child process to be spawned at some time in the future. The return value is an object which resolves to a very negative number. See the section on "Deferred processes" for information on what you can do with this return value.
A deferred job will start no earlier than its appointed time in the future. Depending on the circumstances when the queued jobs are examined, the actual start time of the job could be significantly later than the appointed time.
A job may have both a minimum start time (through
delay
orstart_after
options) and a maximum end time (through "timeout" and "expiration"). Jobs with inconsistent times (end time is not later than start time) will be killed off as soon as they are created.As with the "timeout" and "expiration" options, the
delay
andstart_after
options can be expressed in natural language if you have installed the DateTime::Format::Natural module.$pid
=
fork
{
start_after
=>
"12:25pm tomorrow"
,
sub
=> ... };
$pid
=
fork
{
delay
=>
"in 7 minutes"
,
cmd
=> ... };
child_fh
$pid = fork { child_fh => $fh_spec }
$pid = fork { child_fh => [ @fh_spec ] }
-
Launches a child process and makes the child process's
STDIN
,STDOUT
, and/orSTDERR
file handles available to the parent process in the instance members$pid->{child_stdin}
,$pid->{child_stdout}
, and$pid->{child_stderr}
, or in the package variables$Forks::Super::CHILD_STDIN{$pid}
,$Forks::Super::CHILD_STDOUT{$pid}
, and/or$Forks::Super::CHILD_STDERR{$pid}
.$pid
is the numeric return value from the fork call. This feature makes it possible, even convenient, for a parent process to communicate with a child, as this contrived example shows.$pid
=
fork
{
sub
=> \
&pig_latinize
,
timeout
=> 10,
child_fh
=>
"all"
};
# in the parent, $Forks::Super::CHILD_STDIN{$pid} ($pid->{child_stdout})
# is an **output** file handle
print
{
$pid
->{child_stdin}}
"The blue jay flew away in May\n"
;
sleep
2;
# give child time to start up and get ready for input
# and $Forks::Super::CHILD_STDOUT{$pid} ($pid->{child_stdout}) and
# $Forks::Super::CHILD_STDERR{$pid} ($pid->{child_stderr}
# are **input** handles.
$result
= < {
$pid
->{child_stdout} } >;
print
"Pig Latin translator says: "
,
"$result\n"
;
# ==> eThay ueblay ayjay ewflay awayay inay ayMay\n
@errors
=
readline
(
$pid
->{child_stderr} );
print
"Pig Latin translator complains: @errors\n"
if
@errors
> 0;
sub
pig_latinize {
for
(;;) {
while
(<STDIN>) {
foreach
my
$word
(
split
/\s+/) {
if
(
$word
=~ /^qu/i) {
print
substr
(
$word
,2) .
substr
(
$word
,0,2) .
"ay"
;
# STDOUT
}
elsif
(
$word
=~ /^([b-df-hj-np-tv-z][b-df-hj-np-tv-xz]*)/i) {
my
$prefix
= $1;
$word
=~ s/[b-df-hj-np-tv-z][b-df-hj-np-tv-xz]*//i;
print
$word
.
$prefix
.
"ay"
;
}
elsif
(
$word
=~ /^[aeiou]/i) {
print
$word
.
"ay"
;
}
else
{
print
STDERR
"Didn't recognize this word: $word\n"
;
}
print
" "
;
}
print
"\n"
;
}
}
}
The set of file handles to make available are specified either as a non-alphanumeric delimited string, or list reference. This spec may contain one or more of the words:
in
out
err
join
all
socket
pipe
block
:<layer>
in
,out
, anderr
mean that the child's STDIN, STDOUT, and STDERR, respectively, will be available in the parent process through the file handles in$Forks::Super::CHILD_STDIN{$pid}
,$Forks::Super::CHILD_STDOUT{$pid}
, and$Forks::Super::CHILD_STDERR{$pid}
, where$pid
is the child's process ID.all
is a convenient way to specifyin
,out
, anderr
.join
specifies that the child's STDOUT and STDERR will be returned through the same file handle, available as both$Forks::Super::CHILD_STDOUT{$pid}
and$Forks::Super::CHILD_STDERR{$pid}
.If
socket
is specified, then local sockets will be used to pass between parent and child instead of temporary files.If
pipe
is specified, then local pipes will be used to pass between parent and child instead of temporary files.If
block
is specified, then the read end of each file handle will block until input is available. Note that this can lead to deadlock unless the I/O of the write end of the file handle is carefully managed.:<layer>
may be any valid PerlIO I/O layer, such as:crlf
,:utf8
,:gzip
, etc. Some I/O layers may not work well with socket and pipe IPC. And of course they will not work well with Perl <=v5.6 and its poorer support for I/O layers.
See also: "write_stdin", "read_stdout", "read_stderr".
Socket handles vs. file handles vs. pipes
Here are some things to keep in mind when deciding whether to use sockets, pipes, or regular files for parent-child IPC:
Using regular files is implemented everywhere and is the most portable and robust scheme for IPC. Sockets and pipes are best suited for Unix-like systems, and may have limitations on non-Unix systems.
Sockets and pipes have a performance advantage, especially at child process start-up.
Temporary files use disk space; sockets and pipes use memory. One of these might be a relatively scarce resource on your system.
Socket input buffers have limited capacity. Write operations can block if the socket reader is not vigilant. Pipe input buffers are often even smaller (as small as 512 bytes on some modern systems).
The
Forks/Super/SysInfo.pm
file that is created at build time will have information about the socket and pipe capacity of your system, if you are interested.On Windows, sockets and pipes are blocking, and care must be taken to prevent your script from reading on an empty socket. In addition, sockets to the input/output streams of external programs on Windows is a little flaky, so you are almost always better off using file handles for IPC if your Windows program needs external commands (the
cmd
orexec
options toForks::Super::fork
).
Socket and file handle gotchas
Some things to keep in mind when using socket or file handles to communicate with a child process.
care should be taken before calling close on a socket handle. The same socket handle can be used for both reading and writing. Don't close a handle when you are only done with one half of the socket operations.
In general, the
Forks::Super
module knows whether a file handle is associated with a file, a socket, or a pipe, and the "close_fh" function provides a safe way to close the file handles associated with a background task:Forks::Super::close_fh(
$pid
);
# close all STDxxx handles
Forks::Super::close_fh(
$pid
,
'stdin'
);
# close STDIN only
Forks::Super::close_fh(
$pid
,
'stdout'
,
'stderr'
);
# don't close STDIN
# --- OO interface
$pid
->close_fh;
$pid
->close_fh(
'stdin'
);
$pid
->close_fh(
'stdout'
,
'stderr'
);
The test
Forks::Super::Util::is_socket($handle)
can determine whether$handle
is a socket handle or a regular file handle. The testForks::Super::Util::is_pipe($handle)
can determine whether$handle
is reading from or writing to a pipe.IPC in this module is asynchronous. In general, you cannot tell whether the parent/child has written anything to be read in the child/parent. So getting
undef
when reading from the$pid->{child_stdout}
handle does not necessarily mean that the child has finished (or even started!) writing to its STDOUT. Check out theseek HANDLE,0,1
trick in the perlfunc documentation for seek about reading from a handle after you have already read past the end. You may find it useful for your parent and child processes to follow some convention (for example, a special token like"__EOF__"
) to denote the end of input.There is a limit to how many file handles your process can have open at one time. Sometimes that limit is quite small (I'm looking at you, default configuration of Solaris!) If your program creates many child processes and you use file handles or socket handles for interprocess communication with them, you could run out of file handles. When this happens, you will see warning messages like
Too many open files while opening ...
or sometimes a crypticCan't locate Scalar/Util.pm in @INC (@INC contains: ...)
message.When you are finished with I/O operations on your job, you should call
Forks::Super::close_fh(
$pid
)
or
$pid
->dispose
to close the I/O handles and make them available for other processes. If you set "$Forks::Super::ON_TOO_MANY_OPEN_FILEHANDLES" to the value
$Forks::Super::ON_TOO_MANY_OPEN_FILEHANDLES
=
'rescue'
;
(also)
then
Forks::Super
will try to determine when your program is approaching the limit of open file handles, and will try to determine which file handles can be safely closed.
stdin
fork { stdin => $input }
-
Provides the data in
$input
as the child process's standard input. Equivalent to, but a little more efficient than:$pid
=
fork
{
child_fh
=>
"in"
,
sub
=>
sub
{ ... } };
Forks::Super::write_stdin(
$pid
,
$input
);
$input
may either be a scalar, a reference to a scalar, or a reference to an array.
stdout
stderr
fork { stdout => \$output }
fork { stderr => \$errput }
-
On completion of the background process, loads the standard output and standard error of the child process into the given scalar references. If you do not need to use the child's output while the child is running, it could be more convenient to use this construction than calling Forks::Super::read_stdout($pid) (or
readline($pid->{child_stdout})
) to obtain the child's output.
retries
fork { retries => $max_retries }
-
If the underlying system
fork
call fails (returnsundef
), pause for a short time and retry up to$max_retries
times.This feature is probably not that useful. A failed
fork
call usually indicates some bad system condition (too many processes, system out of memory or swap space, impending kernel panic, etc.) where your expectations of recovery should not be too high.
Options for complicated job management
The fork()
call from this module supports options that help to manage child processes or groups of child processes in ways to better manage your system's resources. For example, you may have a lot of tasks to perform in the background, but you don't want to overwhelm your (possibly shared) system by running them all at once. There are features to control how many, how, and when your jobs will run.
name
fork { name => $name }
-
Attaches a string identifier to the job. The identifier can be used for several purposes:
to obtain a Forks::Super::Job object representing the background task through the Forks::Super::Job::get or Forks::Super::Job::getByName methods.
as the first argument to "waitpid" to wait on a job or jobs with specific names
as an argument to Forks::Super::kill to signal a job or group of jobs by name
to identify and establish dependencies between background tasks. See the "depend_on" and "depend_start" parameters below.
if supported by your system, the name attribute will change the argument area used by the ps(1) program and change the way the background process is displaying in your process viewer. (See $PROGRAM_NAME in perlvar about overriding the special
$0
variable.)
Each job need not be assigned a unique name. Calls to "waitpid" by name will wait for any job with the specified name, and calls to "kill" by name will signal all of the jobs with the specified name.
max_proc
fork { max_proc => $max_simultaneous_jobs }
fork { max_proc => \&subroutine }
-
Specifies the maximum number of background processes that should run simultaneously. If a
fork
call is attempted while there are already the maximum number of child processes running, then thefork()
call will either block (until some child processes complete), fail (return a negative value without spawning the child process), or queue the job (returning a very negative value called a job ID), according to the specified "on_busy" behavior (see "on_busy", below). See the "Deferred processes" section for information about how queued jobs are handled.On any individual
fork
call, the maximum number of processes may be overridden by also specifyingmax_proc
or "force" options.$Forks::Super::MAX_PROC
= 8;
# launch 2nd job only when system is very not busy
# always launch 3rd job no matter how busy we are
$pid1
=
fork
{
sub
=>
'method1'
};
$pid2
=
fork
{
sub
=>
'method2'
,
max_proc
=> 1 };
$pid3
=
fork
{
sub
=>
'method3'
,
force
=> 1 };
Setting
max_proc
parameter to zero or a negative number will disable the check for too many simultaneous processes. Also see the "force" option, below.max_fork
is a synonym formax_proc
.Also see $Forks::Super::MAX_PROC in MODULE VARIABLES, which globally specifies the desired maximum number of simultaneous background processes when a
max_proc
parameter is not supplied to thefork
call.Since v0.77, the
max_proc
parameter may be assigned a code reference to a subroutine that returns the (possibly dynamic) number of simultaneous background processes allowed. See $Forks::Super::MAX_PROC in MODULE VARIABLES for a use case and demonstration.
max_load
fork { max_load => $max_cpu_load }
-
Specifies a maximum CPU load threshold at which this job can be started. The
fork
command will not spawn a new jobs while the current system CPU load is larger than this threshold. CPU load checks are disabled if this value is set to zero or to a negative number.Note that the metric of "CPU load" is different on different operating systems. On Windows (including Cygwin), the metric is CPU utilization, which is always a value between 0 and 1. On Unix-ish systems, the metric is the 1-minute system load average, which could be a value larger than 1. Also note that the 1-minute average load measurement has a lot of inertia -- after a CPU intensive task starts or stops, it will take at least several seconds for that change to impact the 1-minute utilization.
If your system does not have a well-behaved uptime(1) command, then it is recommended to install the Sys::CpuLoadX module to use this feature. The
Sys::CpuLoadX
module is only available bundled withForks::Super
and otherwise cannot be downloaded from CPAN.Also see $Forks::Super::MAX_LOAD in MODULE VARIABLES, which will specifies the maximum CPU load for launching a job when the
max_load
parameter is not provided tofork
.
on_busy
fork { on_busy => "block" | "fail" | "queue" }
-
Dictates the behavior of
fork
in the event that the module is not allowed to launch the specified job for whatever reason. If you are usingForks::Super
to throttle (see max_proc, $Forks::Super::MAX_PROC) or impose dependencies on (see depend_start, depend_on) background processes, then failure to launch a job should be expected.block
-
If the module cannot create a new child process for the specified job, it will wait and periodically retry to create the child process until it is successful. Unless a system fork call is attempted and fails,
fork
calls that use this behavior will return a positive PID. fail
-
If the module cannot immediately create a new child process for the specified job, the
fork
call will return with a small negative value. queue
-
If the module cannot create a new child process for the specified job, the job will be deferred, and an attempt will be made to launch the job at a later time. See "Deferred processes" below. The return value will be a very negative number (job ID).
Note that jobs that use any of the "delay", "start_after", "depend_on", or "depend_start" options ignore this setting and always put the job on the deferred job queue (unless a different
on_busy
attribute is explicitly provided).Also see $Forks::Super::ON_BUSY in MODULE VARIABLES, which specifies the busy behavior when an
on_busy
parameter is not supplied to thefork
call.
force
fork { force => $bool }
-
If the
force
option is set, thefork
call will disregard the usual criteria for deciding whether a job can spawn a child process, and will always attempt to create the child process.
queue_priority
fork { queue_priority => $priority }
-
In the event that a job cannot immediately create a child process and is put on the job queue (see "Deferred processes"), the
queue_priority
specifies the relative priority of the job on the job queue. In general, eligible jobs with high priority values will be started before jobs with lower priority values.
depend_on
depend_start
fork { depend_on => $id }
fork { depend_on => [ $id_1, $id_2, ... ] }
fork { depend_start => $id }
fork { depend_start => [ $id_1, $id_2, ... ] }
-
Indicates a dependency relationship between the job in this
fork
call and one or more other jobs. The identifiers may be process/job IDs or "name" attributes (see above) from earlierfork
calls.If a
fork
call specifies adepend_on
option, then that job will be deferred until all of the child processes specified by the process or job IDs have completed. If afork
call specifies adepend_start
option, then that job will be deferred until all of the child processes specified by the process or job IDs have started.Invalid process and job IDs in a
depend_on
ordepend_start
setting will produce a warning message but will not prevent a job from starting.Dependencies are established at the time of the
fork
call and can only apply to jobs that are known at run time. So for example, in this code,$job1
=
fork
{
cmd
=>
$cmd
,
name
=>
"job1"
,
depend_on
=>
"job2"
};
$job2
=
fork
{
cmd
=>
$cmd
,
name
=>
"job2"
,
depend_on
=>
"job1"
};
at the time the first job is cereated, the job named "job2" has not been created yet, so the first job will not have a dependency (and a warning will be issued when the job is created). This may be a limitation but it also guarantees that there will be no circular dependencies.
When a dependency identifier is a name attribute that applies to multiple jobs, the job will be dependent on all existing jobs with that name:
# Job 3 will not start until BOTH job 1 and job 2 are done
$job1
=
fork
{
name
=>
"Sally"
, ... };
$job2
=
fork
{
name
=>
"Sally"
, ... };
$job3
=
fork
{
depend_on
=>
"Sally"
, ... };
# all of these jobs have the same name and depend on ALL previous jobs
$job4
=
fork
{
name
=>
"Ralph"
,
depend_start
=>
"Ralph"
, ...};
# no dependencies
$job5
=
fork
{
name
=>
"Ralph"
,
depend_start
=>
"Ralph"
, ...};
# depends on Job 4
$job6
=
fork
{
name
=>
"Ralph"
,
depend_start
=>
"Ralph"
, ...};
# depends on 4 and 5
The default "on_busy" behavior for jobs with dependencies is to go on to the job queue, ignoring the value of "ON_BUSY" in $Forks::Super::ON_BUSY (but not ignoring the
on_busy
attribute passed to the job, if any).
can_launch
fork { can_launch => \&methodName }
fork { can_launch => sub { ... anonymous sub ... } }
-
Supply a user-specified function to determine when a job is eligible to be started. The function supplied should return 0 if a job is not eligible to start and non-zero if it is eligible to start.
During a
fork
call or when the job queue is being examined, the user'scan_launch
method will be invoked with a singleForks::Super::Job
argument containing information about the job to be launched. User code may make use of the default launch determination method by invoking the_can_launch
method of the job object:# Running on a BSD system with the uptime(1) call.
# Want to block jobs when the current CPU load
# (1 minute) is greater than 4 and respect all other criteria:
fork
{
cmd
=>
$my_command
,
can_launch
=>
sub
{
$job
=
shift
;
# a Forks::Super::Job object
return
0
if
!
$job
->_can_launch;
# default
$cpu_load
= (
split
/\s+/,`uptime`)[-3];
# get 1 minute avg CPU load
return
0
if
$cpu_load
> 4.0;
# system too busy. let's wait
return
1;
} }
callback
fork { callback => $subroutineName }
fork { callback => sub { BLOCK } }
fork { callback => { start => ..., finish => ..., queue => ..., fail => ... } }
-
Install callbacks to be run as certain events in the life cycle of a background process occur. The first two forms of this option are equivalent to
fork
{
callback
=> {
finish
=> ... } }
and specify code that will be executed when a background process is complete and the module has received its
SIGCHLD
event. Astart
callback is executed just after a new process is spawned. Aqueue
callback is run if and only if the job is deferred for any reason (see "Deferred processes") and the job is placed onto the job queue for the first time. And thefail
callback is run if the job is not going to be launched (that is, a case where thefork
call would return-1
).Callbacks are invoked with two arguments: the Forks::Super::Job object that was created with the original
fork
call, and the job's ID (the return value fromfork
).You should keep your callback functions short and sweet, like you do for your signal handlers. Sometimes callbacks are invoked from a signal handler, and the processing of other signals could be delayed if the callback functions take too long to run.
share
-
Allows variables in the parent process to be updated when the child exits.
Input is a listref of references -- scalar, list, or hash references -- that may be updated in a child process. When the child process finishes, the values of these variables in the parent are updated with the values that were in the child on its exit. The value of a scalar variable will be overwritten with the child value, arrays and hashes will be appended with the child values.
use
Forks::Super;
my
$a
=
'old value'
;
my
@a
= 1..5;
my
%a
= (
abc
=>
'def'
);
$job
=
fork
{
share
=> [ \
$a
, \
@a
, \
%a
],
sub
=> {
$a
=
'new value'
;
@a
=
qw(foo bar)
;
%a
= (
bar
=>
'foo'
,
19
=> 42);
}
};
waitpid
$job
, 0;
print
"\$a now contains $a\n"
;
# scalar overwritten => 'new value'
print
"\@a now contains @a\n"
;
# list appended => 1 2 3 4 5 foo bar
print
"\%a now contains "
,
keys
%a
,
"\n"
;
# hash appended => abc,bar,19
This option is not meaningful when used with the
cmd
orexec
options.If you use the
share
option in perl's "taint" mode, you will also need to pass anuntaint => 1
option to thefork
call.
remote
fork { remote => 'hostname', cmd => \@cmd, ... }
fork { remote => '[user[:pass]@]host[:port]', cmd => \@cmd, ... }
fork { remote => \%remote_opts, cmd => \@cmd, ... }
fork { remote => [host1,host2,...], cmd => \@cmd, ... }
fork { remote => [\%opts1,\%opts2,...], cmd => \@cmd, ... }
-
Runs the external command specified in
@cmd
on a remote host withssh
(other protocols likersh
may be supported in the future).Forks::Super
will connect to the remote host in a background process and run the command through the Net::OpenSSH module or other available method.The
remote
parameter value is either a remote host specification, or a reference to an array of remote host specifications. A remote host specification can be a simple scalar consisting of a hostname or IP address with optional username, password, or portremote
=>
'machine73.example.com'
remote
=>
'root@machine72'
remote
=>
'bob:pwdofbob@172.14.18.119:30022'
or it can be a hash reference with a
host
key and optional entries foruser
,port
,password
, and other options accepted by the constructor for Net::OpenSSHremote
=> {
host
=>
'172.14.18.119'
,
user
=>
'bob'
,
proto
=>
'ssh'
,
port
=> 30022,
key_path
=>
"$ENV{HOME}/.ssh/id_dsa"
}
A
host
parameter is required.user
andport
values default to the user executing the current program, and the default ssh port. Apassword
parameter need not be used when a sufficient password-less public key authentication scheme is in place.If the
remote
parameter value is an array reference, then the elements of that array are considered separate allowable remote host specifications. When a background job is ready to be launched,Forks::Super
will iterate over the specifications in a random order looking for a specification that can be used to run the job on a remote host.The
remote
feature only works with thecmd
style calls tofork
. For other styles offork
calls, the information in theremote
option will be ignored.A background process run on the local host has a different impact on the local machine's resources than a process run on a remote host, so a different scheme to decide when a job can be started is used for remote jobs. See "%MAX_PROC" in MODULE VARIABLES.
suspend
fork { suspend => 'subroutineName' } }
fork { suspend => \&subroutineName } }
fork { suspend => sub { ... anonymous sub ... } }
-
Registers a callback function that can indicate when a background process should be suspended and when it should be resumed. The callback function will receive one argument -- the Forks::Super::Job object that owns the callback -- and is expected to return a numerical value. The callback function will be evaluated periodically (for example, during the productive downtime of a "wait"/"waitpid" call or
Forks::Super::Util::pause()
call).When the callback function returns a negative value and the process is active, the process will be suspended.
When the callback function returns a positive value while the process is suspended, the process will be resumed.
When the callback function returns 0, the job will remain in its current state.
my
$pid
=
fork
{
cmd
=>
"run-the-heater"
,
suspend
=>
sub
{
my
$t
= get_temperature()->Fahrenheit;
if
(
$t
< 68) {
return
+1;
# too cold, make sure heater is on
}
elsif
(
$t
> 72) {
return
-1;
# too warm, suspend the heater process
}
else
{
return
0;
# leave it on or off
}
}
};
sync
fork { sync => $n }
fork { sync => 'string' }
fork { sync => \@list }
-
Creates one or more synchronization objects that will be accessible to both the parent and child processes.
The argument to the
sync
option is either a number, a string consisting of'C'
,'P'
, and'N'
characters, or a list reference consisting of'C'
,'P'
, and'N'
elements. For a string or list reference input, the number of synchronization objects created will be the length of the string or length of the list. The values 'C', 'P', and 'N' determine which process initially has exclusive access to each synchronization object after the fork.'C'
means that the child process should begin with exclusive access to the resource,'P'
means that the parent process should begin with exclusive access to the resource, and'N'
means that neither process should have access to the resource after the fork.Both of these calls create 3 synchronization objects to be shared between a parent and child process. The first resource is initially held by the parent, the second resource is initially held by the child, and the third resource is not held by either process:
$pid
=
fork
{
sync
=>
'PCN'
};
$pid
=
fork
{
sync
=> [
'P'
,
'C'
,
'N'
] };
Using the
sync
option with a numeric value will create that number of synchronization objects, with none of the objects initially held by either the parent or child process. That is, these three uses of thesync
option are equivalent:$pid
=
fork
{
sync
=> 2 };
$pid
=
fork
{
sync
=>
'NN'
};
$pid
=
fork
{
sync
=> [
'N'
,
'N'
] };
After the fork, the parent and child processes can acquire and release exclusive access to these objects with the acquire and release methods of the Forks::Super::Job object.
Synchronization objects are useful for coordinating activity between a parent and child processes. You could use a synchronization object to coordinate appending to a common file, for example.
# in parent:
$job
->acquire(0);
open
my
$fh
,
'>>'
,
$common_file
;
print
$fh
$some_message_from_parent
;
close
$fh
;
$job
->release(0);
# in child:
Forks::Super->acquire(0);
open
my
$fh
,
'>>'
,
$common_file
;
print
$fh
$some_message_from_child
;
close
$fh
;
Forks::Super->release(0);
os_priority
fork { os_priority => $priority }
-
On supported operating systems, and after the successful creation of the child process, attempt to set the operating system priority of the child process, using your operating system's notion of what priority is.
On unsupported systems, this option is ignored.
cpu_affinity
fork { cpu_affinity => $bitmask }
fork { cpu_affinity => [ @list_of_processors ] }
-
On supported operating systems with multiple cores, and after the successful creation of the child process, attempt to set the child process's CPU affinity.
In the scalar style of this option, each bit of the bitmask represents one processor. Set a bit to 1 to allow the process to use the corresponding processor, and set it to 0 to disallow the corresponding processor.
For example, to bind a new child process to use CPU #s 2 and 3 on a system with (at least) 4 processors, you would call one of
fork
{
cpu_affinity
=> 12 , ... } ;
# 12 = 1<<2 + 1<<3
fork
{
cpu_affinity
=> [2,3] , ... };
There may be additional restrictions on the range of valid values for the
cpu_affinity
option imposed by the operating system. See the Sys::CpuAffinity docs for discussion of some of these restrictions.This feature requires the Sys::CpuAffinity module. The
Sys::CpuAffinity
module is bundled withForks::Super
, or it may be obtained from CPAN.
daemon
fork { daemon => 1 }
-
Launches the background process as a daemon, partially severing the relationship between the parent and child process.
Features of daemon process:
closes all open file descriptors from the parent
begins in root directory
"/"
unless thedir => ...
option is specifiedhas umask of zero unless umask => ... option specified
daemon will not be affected by signals to the parent
The following restrictions apply to
daemon
processes:the
finish
callback (see callbacks), if any, will never be called for a daemonthe Forks::Super::Job::is_XXX, state methods may not give correct results for a daemon
the Forks::Super::Job::status method will not work on a daemon
you cannot use "waitpid" on a daemon process
on MSWin32, must be used with
cmd => ...
orexec => ...
optionon MSWin32, this feature requires Win32::Process
Also note that
daemon
processes will not count against the$Forks::Super::MAX_PROC
limits.
debug
undebug
fork { debug => $bool }
fork { undebug => $bool }
-
Overrides the debugging setting in
$Forks::Super::DEBUG
(see DEBUG under MODULE VARIABLES) for this specific job. If specified, thedebug
parameter controls only whether the module will output debugging information related to the job created by thisfork
call.Normally, the debugging settings of the parent, including the job-specific settings, are inherited by child processes. If the
undebug
option is specified with a non-zero parameter value, then debugging will be disabled in the child process.Also see $Forks::Super::DEBUG in MODULE VARIABLES, which specifies the debug settings for a job when the
debug
parameter is not supplied, and debug settings for messages that are not related to a particular background job.
emulate
fork { emulate => $bool }
-
When emulation mode is enabled, a call to
fork
does not actually spawn a new process, but instead runs the job to completion in the parent process and returns a job object that is already in the completed state.When specified, the value for the parameter
emulate
overrides the emulation mode setting in$Forks::Super::EMULATION_MODE
for a specific job.One use case for emulation mode is when you are debugging a script with the perl debugger. Using the debugger with multi-process programs is tricky, and having all Perl code execute in the main process can be helpful.
Also see $EMULATION_MODE in MODULE VARIABLES, which specifies the emulation mode for a job when the
emulate
parameter is not supplied.Not all options to
fork
are compatible with emulation mode.
Deferred processes
Whenever some condition exists that prevents a fork()
call from immediately starting a new child process, one option is to defer the job. Deferred jobs are placed on a queue. At periodic intervals, in response to periodic events, or whenever you invoke the Forks::Super::Deferred::check_queue
method in your code, the queue will be examined to see if any deferred jobs are eligible to be launched.
Job ID
When a fork()
call fails to spawn a child process but instead defers the job by adding it to the queue, the fork()
call will return a unique, large negative number called the job ID. The number will be negative and large enough (<= -1_000_000) so that it can be distinguished from any possible PID, Windows pseudo-process ID, process group ID, or fork()
failure code.
Although the job ID is not the actual ID of a system process, it may be used like a PID as an argument to "waitpid", as a dependency specification in another fork
call's "depend_on" or "depend_start" option, or the other module methods used to retrieve job information (See "Obtaining job information" below). Once a deferred job has been started, it will be possible to obtain the actual PID (or on Windows, the actual psuedo-process ID) of the process running that job.
Job priority
Every job on the queue will have a priority value. A job's priority may be set explicitly by including the "queue_priority" option in the fork()
call, or it will be assigned a default priority near zero. Every time the queue is examined, the queue will be sorted by this priority value and an attempt will be made to launch each job in this order. Note that different jobs may have different criteria for being launched, and it is possible that that an eligible low priority job may be started before an ineligible higher priority job.
Queue examination
Certain events in the SIGCHLD
handler or in the "wait", "waitpid", and/or "waitall" methods will cause the list of deferred jobs to be evaluated and to start eligible jobs. But this configuration does not guarantee that the queue will be examined in a timely or frequent enough basis. The user may invoke the
Forks::Super::Deferred:check_queue()
method at any time to force the queue to be examined.
Alternate fork syntax
Since v0.72, the fork
function recognizes these additional syntax:
fork \&code, %options
fork \&code, \%options
If the first argument to fork
is a code reference, then it is treated like a "sub" argument, and is equivalent to the call
fork
{
sub
=> \
&code
,
%options
}
This style of call resembles the async function in Coro.
fork \@cmd, %options
fork \@cmd, \%options
If the first argument to fork
is an array reference, then it is treated like a "cmd"
argument, and is equivalent to the call
fork
{
cmd
=> \
@cmd
,
%options
}
OTHER FUNCTIONS
Process monitoring and signalling
wait
$reaped_pid = wait [$timeout]
-
Like the Perl wait system call, blocks until a child process terminates and returns the PID of the deceased process, or
-1
if there are no child processes remaining to reap. The exit status of the child is returned in $?.This version of the
wait
call can take an optional$timeout
argument, which specifies the maximum length of time in seconds to wait for a process to complete. If a timeout is supplied and no process completes before the timeout expires, then thewait
function returns the value-1.5
(you can also test if the return value of the function is the same as Forks::Super::TIMEOUT, which is a constant to indicate that a wait call timed out).If
wait
(or "waitpid" or "waitall") is called when all jobs are either complete or suspended, and there is at least one suspended job, then the behavior is governed by the setting of the $Forks::Super::WAIT_ACTION_ON_SUSPENDED_JOBS variable.
waitpid
$reaped_pid = waitpid $pid, $flags [, $timeout]
-
Waits for a child with a particular PID or a child from a particular process group to terminate and returns the PID of the deceased process, or
-1
if there is no suitable child process to reap. If the return value contains a PID, then $? is set to the exit status of that process.The first argument (
$pid
) may be a numeric process identifier, a Forks::Super::Job object, a process group id, or a job name (specified with a "name" argument when the process was created).A
$pid
value of-1
waits for the first available child process to terminate.A
$pid
value of0
waits for the first available child from the same process group as the calling process.A negative
$pid
that is not recognized as a valid job ID will be interpreted as a process group ID, and thewaitpid
function will wait on the first available child from that process group.$pid
may be a job object that represents a deferred process (see "Deferred processes"). In that case,waitpid
can wait for the process to start and then wait for the process to terminate.On some^H^H^H^H every modern system that I know about, a
$flags
value ofPOSIX::WNOHANG
is supported to perform a non-blocking wait. See the Perl waitpid documentation.If the optional
$timeout
argument is provided, thewaitpid
function will block for at most$timeout
seconds, and return-1.5
(or Forks::Super::TIMEOUT if a suitable process is not reaped in that time.
waitall
$count = waitall [$timeout]
-
Blocking wait for all child processes, including deferred jobs that have not started at the time of the
waitall
call. Return value is the number of processes that were waited on.If the optional
$timeout
argument is supplied, the function will block for at most$timeout
seconds before returning.
kill
$num_signalled = Forks::Super::kill $signal, @jobsOrPids
-
A cross-platform process signalling function. Sends "signals" to the background processes specified by process IDs, job names, or Forks::Super::Job objects. Returns the number of jobs that were successfully signalled.
This method "does what you mean" with respect to terminating, suspending, or resuming processes. This method can be used to signal to jobs in the job queue (that don't even have a proper process id yet), or signal processes on Windows systems (which do not have a Unix-like signals framework). The appropriate Windows API calls are used to communicate with Windows processes and threads. It is highly recommended that you install the Win32::API module for this purpose.
See also the Forks::Super::Job::suspend and resume methods. It is preferable (out of portability concerns) to use these methods
$job
->suspend;
$job
->resume;
rather than
Forks::Super::kill
.Forks::Super::
kill
'STOP'
,
$job
;
Forks::Super::
kill
'CONT'
,
$job
;
kill_all
$num_signalled = Forks::Super::kill_all $signal
-
Sends a "signal" (see expanded meaning of "signal" in "kill", above). to all active processes spawned from the
Forks::Super
module. Returns the number of processes signalled.
isValidPid
Forks::Super::isValidPid( $pid )
-
Tests whether the return value of a
fork
call indicates that a background process has been successfully created or not. On POSIX-y systems it is sufficient to check whether$pid
is a positive integer, butisValidPid
is a more portable way to test the return value as it also identifies psuedo-process IDs on Windows systems, which are typically negative numbers.isValidPid
will return false for a large negative process id, which thefork
call returns to indicate that a job has been deferred (see "Deferred processes"). Of course it is possible that the job will run later and have a valid process id associated with it.
Lazy background evaluation
bg_eval
$result = bg_eval { BLOCK }
$result = bg_eval { BLOCK } { option => value, ... }
-
Launches a block of code in a background process and returning immediately. The next time the result of the function call is referenced, interprocess communication is used to retrieve the result of the child process, waiting until the child finishes, if necessary.
$result
= bg_eval {
sleep
3;
return
42 };
# returns immediately
print
"Result was $result\n"
;
# takes 3 seconds to execute
With the
bg_eval
function, you can perform other tasks while waiting for the results of another task to be available.$result
= bg_eval {
sleep
5;
return
[1,2,3] };
do_thing_that_takes_about_5_seconds();
print
"Result was @$result\n"
;
# this line probably runs immediately
The background process is spawned with the
Forks::Super::fork
call, and will block, fail, or defer a job in accordance with all the other rules of this module. Additional options may be passed tobg_eval
that will be provided to thefork
call. Most valid options to thefork
call are also valid for thebg_eval
call, including timeouts, delays, job dependencies, names, and callbacks. This example will populate$result
with the valueundef
if thebg_eval
operation takes longer than 60 seconds.# run task in background, but timeout after 20 seconds
$result
= bg_eval {
download_from_teh_Internet(
$url
,
@options
)
} {
timeout
=> 20,
os_priority
=> 3 };
do_something_else();
if
(!
defined
(
$result
)) {
# operation probably timed out ...
}
else
{
# operation probably succeeded, use $result
}
An additional option that is recognized by
bg_eval
(and "bg_qx", see below) isuntaint
. If you are running perl in "taint" mode, the value(s) returned bybg_eval
andbg_qx
are likely to be "tainted". If the code or command run withbg_eval
orbg_qx
is trusted, you can pass theuntaint
option (with a true value assigned to it) so that the values returned bybg_eval
andbg_qx
will be taint clean.Calls to
bg_eval
(and "bg_qx") will populate the variables$Forks::Super::LAST_JOB
and$Forks::Super::LAST_JOB_ID
with the Forks::Super::Job object and the job id, respectively, for the job created by thebg_eval
/bg_qx
call. See "LAST_JOB" in MODULE VARIABLES below.Since v0.74, the value returned by the background code block may be a blessed object.
# ok since v0.74
$result
= bg_eval {
sleep
10; Some::Object->new };
$result
->someMethod();
List context is not supported directly by the
bg_eval
function, but the Forks::Super::bg_eval tied class provides a way to evaluate a code block asynchronously in list context.See also: "bg_qx".
bg_qx
$result = bg_qx $command
$result = bg_qx $command, { option => value, ... }
$result = bg_qx [@command]
$result = bg_qx [@command], { option => value, ... }
-
Launches an external program and returns immediately. Execution of the command continues in a background process. When the command completes, interprocess communication copies the output of the command into the result (left hand side) variable. If the result variable is referenced again before the background process is complete, the program will wait until the background process completes. A job that fails or otherwise produces no output will return the empty string (
""
).Think of this command as a background version of Perl's backticks or qx() function (albeit one that can only work in scalar context).
The background job will be spawned with the
Forks::Super::fork
call, and the command can block, fail, or defer a background job in accordance with all of the other rules of this module. Additional options may be passed tobg_qx
that will be provided to thefork
call. For example,$result
= bg_qx
"nslookup joe.schmoe.com"
, {
timeout
=> 15 }
will run
nslookup
in a background process for up to 15 seconds. The next time$result
is referenced in the program, it will contain all of the output produced by the process up until the time it was terminated. Most valid options for thefork
call are also valid options forbg_qx
, including timeouts, delays, job dependencies, names, and callbacks. The only invalid options forbg_qx
are "cmd", "sub", "exec", and "child_fh".Like "bg_eval", a call to
bg_qx
will populate the variables$Forks::Super::LAST_JOB
and$Forks::Super::LAST_JOB_ID
with the Forks::Super::Job object and the job id, respectively, for the job created by thebg_qx
call. See "LAST_JOB" under MODULE VARIABLES below.The
bg_qx
function does not directly support list context, but see the Forks::Super::bg_qx tied class for a way to evaluate the output of an external command in list context asynchronously.See also: "bg_eval".
Forks%3A%3ASuper%3A%3Abg_eval_tied_class
Forks%3A%3ASuper%3A%3Abg_qx_tied_class
Forks::Super::bg_eval tied class
Forks::Super::bg_qx tied class
tie $result, 'Forks::Super::bg_eval', sub { CODE }, \%options
tie @result, 'Forks::Super::bg_eval', sub { CODE }, \%options
tie %result, 'Forks::Super::bg_eval', sub { CODE }, \%options
tie $output, 'Forks::Super::bg_qx', $command, \%options
tie @output, 'Forks::Super::bg_qx', $command, \%options
tie %output, 'Forks::Super::bg_qx', $command, \%options
-
Alternative calls to "bg_eval" and "bg_qx" functions that also work in list context.
Instead of calling
my
$result
= long_running_function(
$arg1
,
$arg2
);
my
@output
=
qx(some long running command)
;
my
%hash
= long_running_function_that_returns_hash();
you could say
tie
$result
,
'Forks::Super::bg_eval'
,
sub
{long_running_function(
$arg1
,
$arg2
)};
tie
@output
,
'Forks::Super::bg_qx'
,
qq[some long running command]
;
tie
%hash
,
'Forks::Super::bg_eval'
,
sub
{long_running_func_returns_hash()};
The result of each of these expressions is to tie a variable to the result of a background process. Like
bg_qx
andbg_eval
, these expressions spawn a background process and return immediately. Also likebg_qx
andbg_eval
, the module retrieves the results of the background operation the next time the tied variables are evaluated, waiting for the background process to finish if necessary.Like other bg_qx and bg_eval calls, these expressions respect most of the additional options that you can pass to Forks::Super::fork.
tie
@output
,
'Forks::Super::bg_qx'
,
"ssh me@remotehost who"
,{
timeout
=> 10 };
tie
%result
,
'Forks::Super::bg_eval'
,\
&my_function
,{
cpu_affinity
=> 0x2 };
Note: the constants
BG_QX
andBG_EVAL
are exported by default, and provide a convenient shorthand for "Forks::Super::bg_qx
" and "Forks::Super::bg_eval
", respectively. So you could rewrite the previous two expressions astie
@output
, BG_QX,
'ssh me@remotehost who'
, {
timeout
=> 10 };
tie
%result
, BG_EVAL, \
&my_function
, {
cpu_affinity
=> 0x2 };
Interprocess communication functions
read_stdout
read_stderr
$line = Forks::Super::read_stdout($pid [,%options] )
@lines = Forks::Super::read_stdout($pid [,%options] )
$line = Forks::Super::read_stderr($pid [, %options])
@lines = Forks::Super::read_stderr($pid [, %options] )
$line = $job->read_stdout( [%options] )
@lines = $job->read_stdout( [%options] )
$line = $job->read_stderr( [%options])
@lines = $job->read_stderr( [%options] )
-
For jobs that were started with the
child_fh => "out"
andchild_fh => "err"
options enabled, read data from the STDOUT and STDERR file handles of child processes.Aside from the more readable syntax, these functions may be preferable to some alternate ways of reading from an interprocess I/O handle
$line
= < {
$Forks::Super::CHILD_STDOUT
{
$pid
}} >;
@lines
= < {
$job
->{child_stdout}} >;
@lines
= < {
$Forks::Super::CHILD_STDERR
{
$pid
}} >;
$line
= < {
$job
->{child_stderr}} >;
because the
read_stdout
andread_stderr
functions willclear the EOF condition when the parent is reading from the handle faster than the child is writing to it
not block (unless blocking has been specifically enabled on the I/O handle or in the read call).
work correctly in list context (prior to Perl v5.18, a limitation with overloading means that
<$overloaded_object>
was always called in scalar context).
Functions work in both scalar and list context. If there is no data to read on the file handle, but the child process is still active and could put more data on the file handle, these functions return
""
(empty string) in scalar context and()
(empty list) in list context. If there is no more data on the file handle and the child process is finished, the return values of the functions will beundef
.These methods all take any number of arbitrary key-value pairs as additional arguments. There are currently three recognized options to these methods:
block => 0 | 1
Determines whether blocking I/O is used on the file, socket, or pipe handle. If enabled, the read_stdXXX function will hang until input is available or until the module can determine that the process creating input for that handle has completed. Blocking I/O can lead to deadlocks unless you are careful about managing the process creating input for the handle. The default mode is non-blocking.
warn => 0 | 1
If warnings on the read_stdXXX function are disabled, then some warning messages (reading from a closed handle, reading from a non-existent/unconfigured handle) will be suppressed. Enabled by default.
Note that the output of the child process may be buffered, and data on the channel that
read_stdout
andread_stderr
read from may not be available until the child process has produced a lot of output, or until the child process has finished.Forks::Super
will make an effort to autoflush the file handles that write from one process and are read in another process, but assuring that arbitrary external commands will flush their output regularly is beyond the scope of this module.timeout => $num_seconds
On an otherwise non-blocking file handle, waits up to the specified number of seconds for input to become available.
getc_stdout
getc_stderr
$char = $job->getc_stdout( [%options] )
$char = $job->getc_stderr( [%options] )
-
Retrieves a single character from a child process output stream, if available. Supports the same
block
,timeout
, andwarn
options as the "read_stdout" and "read_stderr" functions.
<$job>
For perl v5.8.8 or better, the <>
operator has been overloaded for the Forks::Super::Job package such that calling
<
$job
>
is equivalent to calling
$job
->read_stdout()
Note in Perls older than v5.18.0, due to a limitation on overloading, this construction is always interpreted in scalar context.
close_fh
Forks::Super::close_fh($pid)
Forks::Super::close_fh($pid, 'stdin', 'stdout', 'stderr')
-
Closes the specified open file handles and socket handles for interprocess communication with the specified child process. With no additional arguments, closes all open handles for the process.
Most operating systems impose a hard limit on the number of file handles that can be opened in a process simultaneously, so you should use this function when you are finished communicating with a child process so that you don't run into that limit.
See also "close_fh" in Forks::Super::Job.
open2
open3
($in,$out,$pid,$job) = Forks::Super::open2( @command [, \%options ] )
($in,$out,$err,$pid,$job) = Forks::Super::open3( @command [, \%options] )
-
Starts a background process and returns file handles to the process's standard input and standard output (and standard error in the case of the
open3
call). Also returns the process id and the Forks::Super::Job object associated with the background process.Compare these methods to the main functions of the IPC::Open2 and IPC::Open3 modules.
Many of the options that can be passed to
Forks::Super::fork
can also be passed toForks::Super::open2
andForks::Super::open3
:# run a command but kill it after 30 seconds
(
$in
,
$out
,
$pid
) =
Forks::Super::open2(
"ssh me\@mycomputer ./runCommand.sh"
,
{
timeout
=> 30 });
# invoke a callback when command ends
(
$in
,
$out
,
$err
,
$pid
,
$job
) =
Forks::Super::open3(
@cmd
,
{
callback
=>
sub
{
print
"\@cmd finished!\n"
}});
Obtaining job information
Forks::Super::Job::get
$job = Forks::Super::Job::get($pid)
-
Returns a
Forks::Super::Job
object associated with process ID or job ID$pid
. See Forks::Super::Job for information about the methods and attributes of these objects.Returns
undef
if$pid
is not a valid job ID.This subroutine is mainly redundant since v0.41, where the default return value of
fork
is an overloadedForks::Super::Job
object instead of a simple scalar process id.
Forks::Super::Job::getByName
@jobs = Forks::Super::Job::getByName($name)
-
Returns zero of more
Forks::Super::Job
objects with the specified job name. A job has a name if a "name" parameter was provided in theForks::Super::fork
call.
state
$state = Forks::Super::state($pid)
-
Returns the state of the job specified by the given process ID, job ID, or job name. See "state" in Forks::Super::Job.
status
$status = Forks::Super::status($pid)
-
Returns the exit status of a completed child process represented by process ID, job ID, or
name
attribute. Aside from being a permanent store of the exit status of a job, using this method might be a more reliable indicator of a job's status than checking$?
after a "wait" or "waitpid" call, because it is possible for this module'sSIGCHLD
handler to temporarily corrupt the$?
value while it is checking for deceased processes.
PREFORK, POSTFORK
PREFORK { ... };
POSTFORK { ... };
POSTFORK_PARENT { ... };
POSTFORK_CHILD { ... };
Sets up one or more code blocks that are run before and after system call to fork
. Use cases for these functions include setting up I/O handles, database connections, or any other resource that doesn't play nicely across a fork
.
POSTFORK
blocks are executed by both parent and child processes immediately after the fork
. POSTFORK_PARENT
blocks are only executed in the parent and POSTFORK_CHILD
blocks are only executed in the child process.
PREFORK
blocks are executed first-in, first-out.
POSTFORK
, POSTFORK_PARENT
, and POSTFORK_CHILD
blocks are executed last-in, first-out.
Miscellaneous functions
pmap
@result = pmap BLOCK LIST
@result = pmap BLOCK \%opts, LIST
-
Like the
map BLOCK LIST
syntax for the builtin map function, evaluatesBLOCK
in list context for each element ofLIST
and returns the list value composed of the results of each evaluation. Each evaluation is performed in a background process, so the evaluations may be done in parallel.Each element of the list is aliased to
$_
prior to evaluation of theBLOCK
, just like withmap
.@result
= Forks::Super::pmap { evaluate_element(
$_
) }
@elements
;
If the first element after
BLOCK
is an unblessed hash reference, it will be treated as a set of options to be passed to the underlying "fork" call.# process a set of URLs, but skip any URLs that take more than 10s
@result
= pmap {; {
url
=>
$_
,
result
=>process_url(
$_
)} } {
timeout
=> 10},
@urls
;
In the edge case that your
LIST
might contains hash references but you don't want the first one to be interpreted as a set of options tofork
, you can always safely pass a reference to an empty hash# always safe to do this
@result
= pmap { CODE } {},
@list
;
pgrep
@match = pgrep BLOCK LIST
@match = pgrep BLOCK \%opts, LIST
$count = pgrep BLOCK LIST
$count = pgrep BLOCK \%opts, LIST
-
Like the
grep BLOCK LIST
syntax for the builtin grep function, evaluatesBLOCK
in list context for each element ofLIST
and returns the subset of list elements for which the code block evaluated to true. Each evaluation is performed in a background process, so the evaluations may be done in parallel.Each element if the list is aliased to
$_
prior to evaluation of theBLOCK
, just like withgrep
.@success
= Forks::Super::pgrep { evaluate(
$_
) =~ /OK/ }
@elements
;
If the first element after
BLOCK
is an unblessed hash reference, it will be treated as a set of options to be passed to the underlying "fork" call. In the edge case that yourLIST
might contain hash references but you don't want the first one to be interpreted as a set of options tofork
, you can always safely pass a reference to an empty hash# always safe to do this
@match
= pgrep { CODE } {},
@list
;
In scalar context, returns the number of elements in the list for which the evaluation is true, just like
grep
.
pause
Forks::Super::pause($delay)
-
A productive drop-in replacement for the Perl sleep system call (or Time::HiRes::sleep, if available). On systems like Windows that lack a proper method for handling
SIGCHLD
events, theForks::Super::pause
method will occasionally reap child processes that have completed and attempt to dispatch jobs on the queue.On other systems, using
Forks::Super::pause
is less vulnerable thansleep
to interruptions from this module (See "BUGS AND LIMITATIONS" below).
init_pkg
deinit_pkg
Forks::Super->deinit_pkg
Forks::Super->init_pkg
-
RT#124316 identified an issue where the
SIGCHLD
handler used byForks::Super
would interfere with a piped open. That is, theclose
call in code likemy
$pid
=
open
my
$fh
,
"|-"
,
"some command you expect to work ..."
;
...
close
$fh
or
die
"..."
;
could fail because
Forks::Super
'sSIGCHLD
handler might reap the process before the implicitwaitpid
call in theclose
function gets to it.In some situations -- say, near the end of your program, when you are not going to use
Forks::Super::fork
anymore, but you still have a reason to call a piped open -- it is desirable and appropriate to uninstallForks::Super
'sSIGCHLD
handler. TheForks::Super::deinit_pkg
is provided for this purpose.Forks::Super::deinit_pkg;
Forks::Super->deinit_pkg;
Either one of these calls will uninstall the
SIGCHLD
handler and revert thefork
,waitpid
,wait
, andkill
functions to Perl's builtin behaviors. It is a kludgy attempt to "uninstall" the module, or at least several features of the module, to workaround the issue with piped opens.The
init_pkg
function, invoked as eitherForks::Super::init_pkg
orForks::Super->init_pkg
, installs features of the module into your program. It is automatically called when theForks::Super
module is imported. So it is not necessary for users to call it explicitly, unless they have previously calleddeinit_pkg
and wish to re-enable features of the module.
MODULE VARIABLES
Most module variables may be initialized on the use Forks::Super
line
# set max simultaneous procs to 5, allow children to call CORE::fork()
or they may be set explicitly at run-time:
$Forks::Super::ON_BUSY
=
'queue'
;
$Forks::Super::IPC_DIR
=
"/home/joe/temp-ipc-files"
;
Many module variables govern global settings that affect all fork
calls. But many can be overridden by a parameter setting in any specific fork
call.
$Forks::Super::ON_BUSY
=
'queue'
;
$j1
=
fork
{
sub
=> ... };
# put on queue if busy
$j2
=
fork
{
sub
=> ..., on_busy =
'block'
};
# block if busy
Module variables that may be of interest include:
MAX_PROC
$Forks::Super::MAX_PROC = int
-
The maximum number of simultaneous background processes that can be spawned by
Forks::Super
. If afork
call is attempted while there are already at least this many active background processes, the behavior of thefork
call will be determined by the value in $Forks::Super::ON_BUSY or by the "on_busy" option passed to thefork
call.The "force" option passed to a
fork
call overrides this setting. The value might also not be respected if the user supplies a code reference in the "can_launch" option and the user-supplied code does not test whether there are already too many active proceeses.Since v0.77, the package variable
$Forks::Super::MAX_PROC
or themax_proc
parameter tofork
may be assigned a code reference. When the module needs to know the maximum number allowed background processes, it will invoke the subroutine and expect it to return an integer. Here's a demonstration of how you could assign a multi-process program to use fewer resources between 9:00am and 5:00pm:$Forks::Super::MAX_PROC
=
sub
{
my
@lt
=
localtime
;
my
$hour
=
$lt
[2];
$hour
>= 9 &&
$hour
< 17 ? 4 : 16;
};
%MAX_PROC
%Forks::Super::MAX_PROC
-
Since v0.75. The maximum number of simultaneous background processes that can be spawned by
Forks::Super
and run on a remote host. The keys of this hash are remote hostnames, and the values are integers specifying how many jobs can be dispatched to those hosts (see the "remote" option). The key"DEFAULT"
can be used to provide a default maximum for hosts otherwise not specified. If a maximum process count for a remote hostname is not specified in%MAX_PROC
and there is not a"DEFAULT"
setting in%MAX_PROC
, the maximum number of processes that can be dispatched to the host defaults to$Forks::Super::MAX_PROC
.
MAX_LOAD
$Forks::Super::MAX_LOAD = $max_cpu_utilization
-
The threshold CPU load at which jobs created by a
fork
call will be deferred. The metric of "CPU load" means different things on different operating systems. See the discussion under the "max_load" parameter tofork
for details.
ON_BUSY
$Forks::Super::ON_BUSY = 'block' | 'fail' | 'queue'
-
Determines behavior of a
fork
call when the system is too busy to create another background process.If this value is set to
block
, thenfork
will wait until the system is no longer too busy and then launch the background process. The return value will be a normal process ID value (assuming there was no system error in creating a new process).If the value is set to
fail
, thefork
call will return immediately without launching the background process. The return value will be-1
. AForks::Super::Job
object will not be created.If the value is set to
queue
, then thefork
call will create a "deferred" job that will be queued and run at a later time. Also see the "queue_priority" option tofork
to set the urgency level of a job in case it is deferred. The return value will be a large and negative job ID.This value will be ignored in favor of an "on_busy" option supplied to the
fork
call.
CHILD_FORK_OK
$Forks::Super::CHILD_FORK_OK = -1 | 0 | +1
-
Spawning a child process from another child process with this module has its pitfalls, and this capability is disabled by default: you will get a warning message and the
fork()
call will fail if you try it.To override this behavior, set
$Forks::Super::CHILD_FORK_OK
to a non-zero value. Setting it to a positive value will allow you to use all the functionality of this module from a child process (with the obvious caveat that you cannotwait
on the child process of a child process from the main process).Setting
$Forks::Super::CHILD_FORK_OK
to a negative value will disable the functionality of this module in child processes but will reenable the Perl builtinfork()
system call.Note that this module will not have any preconceptions about which is the "parent process" until you the first call to
Forks::Super::fork
. This means it is possible to useForks::Super
functionality in processes that were not spawned byForks::Super
, say, by an explicitCORE::fork()
call:2:
$Forks::Super::CHILD_FORK_OK
= 0;
3:
4:
$child1
= CORE::
fork
();
5:
if
(
$child1
== 0) {
6:
# OK -- child1 is still a valid "parent process"
7:
$grandchild1
= Forks::Super::
fork
{ ... };
8: ...;
9:
exit
;
10: }
11:
$child2
= Forks::Super::
fork
();
12:
if
(
$child2
== 0) {
13:
# NOT OK - parent of child2 is now "the parent"
14:
$grandchild2
= Forks::Super::
fork
{ ... };
15: ...;
16:
exit
;
17: }
18:
$child3
= CORE::
fork
();
19:
if
(
$child3
== 0) {
20:
# NOT OK - call in line 11 made parent of child3 "the parent"
21:
$grandchild3
= Forks::Super::
fork
{ ... };
22: ...;
23:
exit
;
24: }
More specifically, this means it is OK to use the
Forks::Super
module in a daemon process:use
Forks::Super;
$Forks::Super::CHILD_FORK_OK
= 0;
CORE::
fork
() &&
exit
;
$daemon_child
= Forks::Super::
fork
();
# ok
DEBUG
$Forks::Super::DEBUG = bool
-
To see the internal workings of the
Forks::Super
module, set$Forks::Super::DEBUG
to a non-zero value. Information messages will be written to theForks::Super::Debug::DEBUG_FH
file handle. By defaultForks::Super::Debug::DEBUG_FH
is aliased toSTDERR
, but it may be reset by the module user at any time.Debugging behavior may be overridden for specific jobs if the "debug" or "undebug" option is provided to
fork
.
EMULATION_MODE
$Forks::Super::EMULATION_MODE = bool
-
When emulation mode is enabled, the
fork
call does not actually spawn a new process, but instead runs the job to completion in the foreground process and returns a job object that is already in the completed state.One use case for emulation mode is when you are debugging a script with the perl debugger. Using the debugger with multi-process programs is tricky, and having all Perl code execute in the main process can be helpful.
The default emulation mode may be overridden for specific jobs if the "emulate" option is provided to
fork
.Not all options to
fork
are compatible with emulation mode.
CHILD_STDxxx
%CHILD_STDxxx
%Forks::Super::CHILD_STDIN
%Forks::Super::CHILD_STDOUT
%Forks::Super::CHILD_STDERR
-
Deprecated. See Note, below.
In jobs that request access to the child process file handles, these hash arrays contain file handles to the standard input and output streams of the child. The file handles for particular jobs may be looked up in these tables by process ID or job ID for jobs that were deferred.
Remember that from the perspective of the parent process,
$Forks::Super::CHILD_STDIN{$pid}
is an output file handle (what you print to this file handle can be read in the child's STDIN), and$Forks::Super::CHILD_STDOUT{$pid}
and$Forks::Super::CHILD_STDERR{$pid}
are input file handles (for reading what the child wrote to STDOUT and STDERR).As with any asynchronous communication scheme, you should be aware of how to clear the EOF condition on file handles that are being simultaneously written to and read from by different processes. A construction like this works on most systems:
# in parent, reading STDOUT of a child
for
(;;) {
while
(<{
$Forks::Super::CHILD_STDOUT
{
$pid
}}>) {
print
"Child $pid said: $_"
;
}
# EOF reached, but child may write more to file handle later.
sleep
1;
seek
$Forks::Super::CHILD_STDOUT
{
$pid
}, 0, 1;
}
The Forks::Super::Job object provides the methods
write_stdin(@msg)
,read_stdout(\%options)
, andread_stderr(\%options)
for object oriented read and write operations to and from a child's IPC file handles. These methods can adjust their behavior based on the type of IPC channel (file, socket, or pipe) or other idiosyncracies of your operating system (#@$%^&*! Windows), so using those methods is preferred to using the file handles directly.Note that handles for background process IPC are also available through the Forks::Super::Job object (the return value from
Forks::Super::fork
), in$pid
->{child_stdin}
$pid
->{child_stdout}
$pid
->{child_stderr}
This usage should be preferred to
$CHILD_STDxxx{...}
.
ALL_JOBS
@Forks::Super::ALL_JOBS
%Forks::Super::ALL_JOBS
-
List of all
Forks::Super::Job
objects that were created fromfork()
calls, including deferred and failed jobs. Both process IDs and job IDs for jobs that were deferred at one time) can be used to look up Job objects in the%Forks::Super::ALL_JOBS
table.
IPC_DIR
$Forks::Super::IPC_DIR
-
A directory where temporary files to be shared among processes for interprocess communication (IPC) can be created. If not specified,
Forks::Super
will try to guess a good directory such as an OS-appropriate temporary directory or your home directory as a suitable store for these files.$Forks::Super::IPC_DIR
is a tied variable and an assignment to it will fail if the RHS is not suitable for use as a temporary IPC file store.Forks::Super
will look for the environment variableIPC_DIR
and for anIPC_DIR
parameter on module import (that is,) for suggestions about where to store the IPC files.
Setting this value to
"undef"
(the string literal"undef"
, not the Perl special valueundef
) will disable file-based interprocess communication for your program. The module will fall back to using sockets or pipes (probably sockets) for all IPC. Some features of this distribution may not work or may not work properly if file-based IPC is disabled.
QUEUE_INTERRUPT
$Forks::Super::QUEUE_INTERRUPT
-
On systems with mostly-working signal frameworks, this module installs a signal handler the first time that a task is deferred. The signal that is trapped is defined in the variable
$Forks::Super::QUEUE_INTERRUPT
. The default value isUSR1
, and it may be overridden directly or set on module import$Forks::Super::QUEUE_INTERRUPT
=
'USR2'
;
You would only worry about resetting this variable if you (including other modules that you import) are making use of an existing
SIGUSR1
handler.Since v0.40 this variable is generally not used unless
1. your system has a POSIX-y signal framework, and
2. Time::HiRes::setitimer is not implemented for your system.
TIMEOUT
Forks::Super::TIMEOUT
-
A possible return value from "wait" and "waitpid" functions when a timeout argument is supplied. The value indicating a timeout should not collide with any other possible value from those functions, and should be recognizable as not an actual process ID.
my
$pid
=
wait
10.0;
# Forks::Super::wait with timeout
if
(
$pid
== Forks::Super::TIMEOUT) {
# no tasks have finished in the last 10 seconds ...
}
else
{
# task has finished, process id in $pid.
}
LAST_JOB
LAST_JOB_ID
$Forks::Super::LAST_JOB
$Forks::Super::LAST_JOB_ID
-
Calls to the "bg_eval" and "bg_qx" functions launch a background process and set the variables
$Forks::Super::LAST_JOB_ID
to the job's process ID and$Forks::Super::LAST_JOB
to the job's Forks::Super::Job object. These functions do not explicitly return the job id, so these variables provide a convenient way to query that state of the jobs launched by these functions.Some
bash
users will immediately recognize the parallels between these variables and the special bash$!
variable, which captures the process id of the last job to be run in the background.
WAIT_ACTION_ON_SUSPENDED_JOBS
$Forks::Super::Wait::WAIT_ACTION_ON_SUSPENDED_JOBS
-
Governs the action of a call to "wait", "waitpid", or "waitall" in the case when all remaining jobs are in the
SUSPENDED
orDEFERRED-SUSPENDED
state (see "state" in Forks::Super::Job). Allowable values for this variable arewait
-
Causes the call to "wait"/"waitpid" to block indefinitely until those jobs start and one or more of them is completed. In this case it is presumed that the queue monitor is running periodically and conditions that allow those jobs to get started will occur. This is the default setting for this variable.
fail
-
Causes the "wait"/"waitpid" call to return with the special (negative) value
Forks::Super::Wait::ONLY_SUSPENDED_JOBS_LEFT
. resume
-
Causes one of the suspended jobs to be resumed. It is presumed that this job will complete and allow the "wait"/"waitpid" function to return.
ON_TOO_MANY_OPEN_FILEHANDLES
$Forks::Super::ON_TOO_MANY_OPEN_FILEHANDLES = 'rescue' | 'fail'
-
Open file handles are a scarce computing resource, and a script that launches many small jobs with
Forks::Super
and is not meticulous about calling "close_fh" or "dispose" on those jobs may bump up against this limit. The module variable$Forks::Super::ON_TOO_MANY_OPEN_FILEHANDLES
dictates what happens whenForks::Super
detects that you are getting close to this limit. This variable can have two possible values:$Forks::Super::ON_TOO_MANY_OPEN_FILEHANDLES = 'fail'
-
This is the default. With this setting,
Forks::Super
will allow you to attempt to open more file handles, and not do anything special about it on failure. $Forks::Super::ON_TOO_MANY_OPEN_FILEHANDLES = 'rescue'
-
With this setting,
Forks::Super
will attempt to close some open file handles from other jobs when it detects that it is getting close to the maximum number of open file handles. Unread output from child processes may be lost if this safeguard kicks in.
EXPORTS
This module always exports the "fork", "wait", "waitpid", and "waitall" functions, overloading the Perl system calls with the same names. Mixing Forks::Super
calls with the similarly-named Perl calls is strongly discouraged, but you can access the original builtin functions at CORE::fork
, CORE::wait
, etc.
Functions that can be exported to the caller's package include
Forks::Super::bg_eval
Forks::Super::bg_qx
Forks::Super::isValidPid
Forks::Super::open2
Forks::Super::open3
Forks::Super::pause
Forks::Super::pgrep
Forks::Super::pmap
Forks::Super::read_stderr
Forks::Super::read_stdout
Module variables that can be exported are (these module variables are deprecated):
%Forks::Super::CHILD_STDIN
%Forks::Super::CHILD_STDOUT
%Forks::Super::CHILD_STDERR
The special tag :var
will export all three of these hash tables to the calling namespace.
The tag :all
will export all the functions and variables listed above.
The Forks::Super::kill
function cannot be exported for now, while I think through the implications of overloading yet another Perl system call.
IMPORT CONFIG
Many of these settings have been mentioned in other parts of this document, but here is a summary of the configuration that can be done on the use Forks::Super ...
line
MAX_PROC => integer | subroutine that returns integer
Initializes $Forks::Super::MAX_PROC
, which governs the maximum number of simultaneous background processes managed by this module. When a new process is requested and this limit has been reached, the fork
call will fail, block (until at least one current process finishes), or queue, depending on the setting of $Forks::Super::ON_BUSY
. See "MAX_PROC" under MODULE VARIABLES.
ON_BUSY => 'block' | 'fail' | 'queue'
Sets $Forks::Super::ON_BUSY
, which governs the behavior of fork
when the limit of simultaneous background processes has been reached. See "ON_BUSY" under MODULE VARIABLES.
CHILD_FORK_OK => -1 | 0 | 1
Sets $Forks::Super::CHILD_FORK_OK
, which governs the behavior of Forks::Super::fork
when called from a child process. See "CHILD_FORK_OK" in "MODULE VARIABLES".
DEBUG => boolean
Turns module debugging on and off. On the import line, this configuration overrides the value of $ENV{FORKS_SUPER_DEBUG}
(see "ENVIRONMENT").
QUEUE_MONITOR_FREQ => num_seconds
Sets $Forks::Super::Deferred::QUEUE_MONITOR_FREQ
, which governs how frequently the main process should be interrupted to examine the queue of jobs that have not started yet. See Forks::Super::Deferred.
QUEUE_INTERRUPT => signal_name
Sets $Forks::Super::QUEUE_INTERRUPT
, the name of the signal used by Forks::Super
to periodically examine the queue of background jobs that have not started yet. The default setting is USR1
, but you should change this if you with to use SIGUSR1
for other purposes in your program. This setting does not have any effect on MSWin32 systems.
IPC_DIR => directory, FH_DIR => directory
Use the specified directory for temporary interprocess communication files used by Forks::Super
. Overrides settings of $ENV{IPC_DIR}
or $ENV{FH_DIR}
.
CONFIG => file, CONFIG_FILE => file
Loads module configuration out of the specified file. The file is expected to contain key-value pairs for the same parameter documented in this section. Parameter names in the configuration file are not case sensitive.
# sample Forks::Super config file
max_proc=10
IPC_DIR=/home/mob/.forks-super-ipc
ENVIRONMENT
Forks::Super
makes use of the following optional variables from your environment.
- FORKS_SUPER_DEBUG
-
If set, sets the default value of
$Forks::Super::DEBUG
(see "MODULE VARIABLES") to true. - FORKS_SUPER_QUEUE_DEBUG
-
If set and true, sends additional information about the status of the queue (see "Deferred processes") to standard output. This setting is independent of the
$ENV{FORKS_SUPER_DEBUG}
/$Forks::Super::DEBUG
setting. - FORKS_DONT_CLEANUP
-
If set and true, the program will not remove the temporary files used for interprocess communication. This setting can be helpful if you want to analyze the messages that were sent between processes after the fact.
- FORKS_SUPER_CONFIG
-
Forks::Super
will probe your system for available functions, Perl modules, and external programs and try suitable workarounds when the desired feature is not available. With$ENV{FORKS_SUPER_CONFIG}
, you can commandForks::Super
to assume that certain features are available (or are not available) on your system. This is a little bit helpful for testing; I don't know whether it would be helpful for anything else. See the source forForks/Super/Config.pm
for more information about how$ENV{FORKS_SUPER_CONFIG}
is used. - FORKS_SUPER_JOB_OVERLOAD
-
Specifies whether the
fork
call will return an overloaded Forks::Super::Job object instead of a scalar process identifier. See "OVERLOADING" in Forks::Super::Job. Since v0.41 overloading is enabled by default. If theFORKS_SUPER_JOB_OVERLOAD
variable is set, it will override this default. - FORKS_SUPER_ENABLE_DUMP
-
If set, will invoke the Forks::Super::Debug::enable_dump function and enable a Java Virtual Machine-like feature to report the status of all the background jobs your program has created. If this variable contains the name of a signal, then that signal will be trapped by your program to produce the process dump. If the variable value is not a signal name but is a true value, then the program will produce a process dump in response to a
SIGQUIT
. See Forks::Super::Debug.This feature can also be enabled on import of
Forks::Super
by passing anENABLE_DUMP
parameter on import, like - IPC_DIR
-
Specifies a directory for storing temporary files for interprocess communication. See "IPC_DIR" in "MODULE VARIABLES".
DIAGNOSTICS
fork() not allowed in child process ...
-
When the package variable
$Forks::Super::CHILD_FORK_OK
is zero, this package does not allow thefork()
method to be called from a child process. Set$Forks::Super::CHILD_FORK_OK
to change this behavior. quick timeout
-
A job was configured with a timeout/expiration time such that the deadline for the job occurred before the job was even launched. The job was killed immediately after it was spawned.
Job start/Job dependency <nnn> for job <nnn> is invalid. Ignoring.
-
A process id or job id that was specified as a "depend_on" or "depend_start" option did not correspond to a known job.
Job <nnn> reaped before parent initialization.
-
A child process finished quickly and was reaped by the parent process
SIGCHLD
handler before the parent process could even finish initializing the job state. The state of the job in the parent process might be unavailable or corrupt for a short time, but eventually it should be all right. could not open file handle to provide child STDIN/STDOUT/STDERR
child was not able to detect STDIN file ... Child may not have any input to read.
could not open file handle to write child STDIN
could not open file handle to read child STDOUT/STDERR
-
Initialization of file handles for a child process failed. The child process will continue, but it will be unable to receive input from the parent through the
$Forks::Super::CHILD_STDIN{pid}
(pid->{child_stdin}
) file handle, or pass output to the parent through the file handles$Forks::Super::CHILD_STDOUT{pid}
and$Forks::Super::CHILD_STDERR{pid}
(pid->{child_stdout}
andpid->{child_stderr}
).
INCOMPATIBILITIES
This module requires its own SIGCHLD
handler. Installing other SIGCHLD
handlers may cause undefined behavior, though if you are used to setting
$SIG
{CHLD} =
'IGNORE'
in your code, you should still be OK.
DEPENDENCIES
The Win32::API module is required for many features of this module on Windows and is strongly encouraged.
Otherwise, there are no hard dependencies on non-core modules. Some features, especially operating-system specific functions, depend on some modules (Win32::Process and Win32 for Wintel systems, for example), but the module will compile without those modules. Attempts to use these features without the necessary modules will be silently ignored.
BUGS AND LIMITATIONS
Special tips for Windows systems
On POSIX systems (including Cygwin), programs using the Forks::Super
module are interrupted when a child process completes. A callback function performs some housekeeping and may perform other duties like trying to dispatch items from the list of deferred jobs.
Forks::Super::pause
vs. sleep
Windows systems do not have the signal handling capabilities of other systems, and so other things equal, a script running on Windows will not perform the housekeeping tasks as frequently as a script on other systems.
The method Forks::Super::pause
can be used as a drop in replacement for the Perl sleep
call. In a pause
function call, the program will check on active child processes, reap the ones that have completed, and attempt to dispatch jobs on the queue.
Calling pause
with an argument of 0 is also a valid way of invoking the child handler function on Windows. When used this way, pause
returns immediately after running the child handler.
Forks::Super::isValidPid($pid)
vs. if ($pid > 0)
Child processes are implemented differently in Windows than in POSIX systems. The CORE::fork
and Forks::Super::fork
calls from Windows will usually return a pseudo-process ID to the parent process, and this will be a negative number. The Unix idiom of testing whether a fork
call returns a positive number needs to be modified on Windows systems by testing whether Forks::Super::isValidPid($pid)
returns true, where $pid
is the return value from a Forks::Super::fork
call.
Interference with piped open
As documented in RT#124316, Forks::Super
sets a relatively heavy SIGCHLD
handler, which can cause a race condition when you call close
on a piped filehandle
open
my
$fh
,
'|-'
,
"command you expect to work ..."
;
...
close
$fh
or
die
;
Sometimes, a waitpid
call inside the signal handler will reap the process before the close
call. If that happens, the close
call will fail (and set $!
to "No child processes
" and $?
to -1).
If this behavior is undesired, and there are no calls to "fork" between the piped open
and close
statements, the workaround is to call the "deinit_pkg" function and disable the problematic features of the module.
Leftover temporary files and directories
In programs that use the interprocess communication features, the module will usually but not always do a good job of cleaning up after itself. You may find directories called .fhfork<nnn>
that may or not be empty scattered around your filesystem.
You can invoke this module as one of:
$ perl -MForks::Super=cleanse
$ perl -MForks::Super=cleanse,<directory>
to run a function that will clean up these directories.
Interrupted system calls
A typical script using this module will have a lot of behind-the-scenes signal handling as child processes finish and are reaped. These frequent interruptions can affect the execution of the rest of your program. For example, in this script:
the sleep
call in line 3 is probably going to get interrupted before 5 seconds have elapsed as the end of the child process spawned in line 2 will interrupt execution and invoke the SIGCHLD handler. In some cases there are tedious workarounds:
3a:
$stop_sleeping_at
=
time
+ 5;
3b:
sleep
1
while
time
<
$stop_sleeping_at
;
In this distribution, the Forks::Super::pause call provides an interruption-resistant alternative to sleep
.
3: Forks::Super::pause(5);
The pause
call itself has the limitation that it may sleep for longer than the desired time, because the pause
function executes "productive" code that can run for an arbitrarily long time.
Idiosyncratic behavior on some systems
The system implementation of fork'ing and wait'ing varies from platform to platform. This module has been extensively tested on Cygwin, Windows, and Linux, but less so on other systems. It is possible that some features will not work as advertised. Please report any problems you encounter to <mob@cpan.org> and I'll see what I can do about it.
Other bugs or feature requests
Feel free to report other bugs or feature requests to bug-forks-super at rt.cpan.org
or through the web interface at http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Forks-Super. This includes any cases where you think the documentation might not be keeping up with the development. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.
SEE ALSO
There are reams of other modules on CPAN for managing background processes. See Parallel::*, Proc::Parallel, Proc::Queue, Proc::Fork, Proc::Launcher, MCE. Also Win32::Job.
Inspiration for "bg_eval" function from Acme::Fork::Lazy.
Forks::Queue provides a mechanism to manipulate a queue object from different processes simultaneously, and can be helpful to implement a dynamic queue for a supervisor-worker model with processes, not unlike Threads::Queue can do for threads.
AUTHOR
Marty O'Brien, <mob@cpan.org>
LICENSE AND COPYRIGHT
Copyright (c) 2009-2018, Marty O'Brien.
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself, either Perl version 5.8.8 or, at your option, any later version of Perl 5 you may have available.
See http://dev.perl.org/licenses/ for more information.