### INCLUDES ##################################################################
# safe Perl
use strict;
use Carp;
use Moose;
with 'HPCI::Stage',
HPCI::get_extra_roles('SGE', 'stage');
sub _build_cluster_specific_valid_resources {
return (
h_vmem => 'mem',
h_rt => 'h_time',
s_rt => 's_time'
)
}
sub _build_cluster_specific_default_resources {
return ( h_vmem => '2G' )
};
sub _build_cluster_specific_default_retry_resources {
h_vmem => [qw(2G 4G 8G 16G 32G)]
};
has '+runs' => (
isa => 'ArrayRef[HPCD::SGE::Run]'
);
has '_run_class' => (
is => 'ro',
isa => 'Str',
init_arg => undef,
default => "HPCD::SGE::Run"
);
around '_run_args' => sub {
my $orig = shift;
my $self = shift;
return (
(
$self->$orig(), resources_required => $self->_use_resources_required
)
);
};
has '+name' => (
writer => '_set_name',
trigger => \&_name_filter
);
sub _name_filter {
my( $self, $new, $old ) = @_;
my $filter = $new;
$filter =~ s/[^A-Za-z0-9\-\.\_]//g;
$self->_set_name( $filter ) if $filter ne $new;
}
has 'extra_sge_args_string' => (
is => 'ro',
isa => 'Str',
default => ''
);
has 'memory_too_small' => (
is => 'ro',
isa => 'CodeRef',
default => sub { return sub { return 0 } }
);
sub _analyse_completion_state {
my $self = shift;
my $run = shift;
my $stats = $run->stats;
$self->debug( "Stats from finished stage(" . $self->name . "): " . Dumper($stats) );
my $vmem_retry = $self->_vmem_usage( $run );
my $new_state =
0 == $stats->{exit_status} ? 'pass'
: $vmem_retry ? 'retry'
: 'fail';
$self->_set_state($new_state);
}
sub _can_retry_from_vmem {
my ($self, $res, $dres) = @_;
return 0
unless $self->_use_retry_resources_required
and my $res_avail = $self->_use_retry_resources_required->{h_vmem};
for my $nres (@$res_avail) {
$self->info( "Considering ($nres) as resource replacement for ($dres)" );
next unless $res < _res_to_num($nres);
$self->info( "Accepted ($nres) as resource replacement for ($dres)" );
$self->_use_resources_required->{h_vmem} = $nres;
return 1;
}
$self->info( "No resource replacement found for ($dres)" );
return 0;
}
sub _res_to_num {
my $str = shift;
return 0 unless my ($val, $unit) = ($str =~ /^(\d+(?:\.\d+)?)([KMG])?$/);
if ($unit) {
$unit = uc $unit;
if ($unit eq 'K') {
return $val * 1024;
}
elsif ($unit eq 'M') {
return $val * 1024 * 1024;
}
elsif ($unit eq 'G') {
return $val * 1024 * 1024 * 1024;
}
else {
return 0;
}
}
return $val;
}
sub _num_to_res {
my $num = shift;
my $codes = ' KMG';
my $unit = 0;
while ($unit < length($codes)-1 && $num >= 1024) {
$num /= 1024;
++$unit;
}
$num = int($num) == $num ? int($num) : sprintf( "%.2f", $num );
$num .= substr( $codes, $unit, 1 );
return $num;
}
has 'retry_mem_percent' => (
isa => 'Int',
is => 'ro',
default => 99
);
sub _vmem_usage {
my $self = shift;
my $run = shift;
my $stats = $run->stats;
my ( $req, $used ) = map { _res_to_num($_) }
$self->_use_resources_required->{h_vmem},
$stats->{peak_virtual_memory};
my $dreq = _num_to_res($req);
my $dused = _num_to_res($used);
if ($stats->{granted_pe} !~ /^(NONE|unknown)/ && $stats->{slots} =~ /^\d+/) {
$used /= $stats->{slots};
$dused = _num_to_res($used) . ' per pe';
}
my $pct = ( $used / $req ) * 100;
my $warn = ($pct < 50 && $req > _res_to_num('2G'))
? " ** Requested resource too large"
: "";
my $loglevel = $warn ? 'warn' : 'info';
$self->$loglevel( sprintf "Resource(%s): Requested(%s) Usage(%s) %.2f%%%s Stage(%s), Run(%s)",
'h_vmem', $dreq, $dused, $pct, $warn, $run->stage->name, $run->index );
my $stderr = $run->_stderr;
my $cmd = "tail -3 $stderr";
my @last = `$cmd`;
if ($last[-1] =~ /Out of memory\!/
|| ($last[-2] =~ /^Error: cannot allocate vector of size /
&& $last[-1] =~ /Execution halted/)
|| $pct >= $self->retry_mem_percent
|| $self->memory_too_small->($stats, $stderr)) {
return $self->_can_retry_from_vmem(
$pct > 100
? ( $used, $dused )
: ( $req, $dreq )
);
}
return 0;
}
1;