my
$a
= sequence 5;
is_pdl
$a
->stdv, pdl( 1.4142135623731 ),
"standard deviation of $a"
;
is_pdl
$a
->stdv_unbiased, pdl( 1.58113883008419 ),
"unbiased standard deviation of $a"
;
is_pdl
$a
->var, pdl( 2 ),
"variance of $a"
;
is_pdl
$a
->var_unbiased, pdl( 2.5 ),
"unbiased variance of $a"
;
is_pdl
$a
->se, pdl( 0.707106781186548 ),
"standard error of $a"
;
is_pdl
$a
->ss, pdl( 10 ),
"sum of squared deviations from the mean of $a"
;
is_pdl
$a
->skew, pdl( 0 ),
"sample skewness of $a"
;
is_pdl
$a
->skew_unbiased, pdl( 0 ),
"unbiased sample skewness of $a"
;
is_pdl
$a
->kurt, pdl( -1.3 ),
"sample kurtosis of $a"
;
is_pdl
$a
->kurt_unbiased, pdl( -1.2 ),
"unbiased sample kurtosis of $a"
;
{
my
$x
= pdl [(0.001) x 6];
my
$var
=
$x
->var;
ok
$var
>= 0,
'var >= 0'
or diag
"var = $var"
;
my
$stdv
=
$x
->stdv;
ok
$stdv
>= 0,
'stdv >= 0'
or diag
"stdv = $stdv"
;
}
is_pdl
$_
->ss, ((
$_
-
$_
->avg)**2)->sumover,
"ss for $_"
for
pdl(
'[1 1 1 1 2 3 4 4 4 4 4 4]'
),
pdl(
'[1 2 2 2 3 3 3 3 4 4 5 5]'
),
pdl(
'[1 1 1 2 2 3 3 4 4 5 5 5]'
);
my
$a_bad
= sequence 6;
$a_bad
->setbadat(-1);
is_pdl
$a_bad
->stdv, pdl( 1.4142135623731 ),
"standard deviation of $a_bad"
;
is_pdl
$a_bad
->stdv_unbiased, pdl( 1.58113883008419 ),
"unbiased standard deviation of $a_bad"
;
is_pdl
$a_bad
->var, pdl( 2 ),
"variance of $a_bad"
;
is_pdl
$a_bad
->var_unbiased, pdl( 2.5 ),
"unbiased variance of $a_bad"
;
is_pdl
$a_bad
->se, pdl( 0.707106781186548 ),
"standard error of $a_bad"
;
is_pdl
$a_bad
->ss, pdl( 10 ),
"sum of squared deviations from the mean of $a_bad"
;
is_pdl
$a_bad
->skew, pdl( 0 ),
"sample skewness of $a_bad"
;
is_pdl
$a_bad
->skew_unbiased, pdl( 0 ),
"unbiased sample skewness of $a_bad"
;
is_pdl
$a_bad
->kurt, pdl( -1.3 ),
"sample kurtosis of $a_bad"
;
is_pdl
$a_bad
->kurt_unbiased, pdl( -1.2 ),
"unbiased sample kurtosis of $a_bad"
;
my
$b
= pdl
'0 0 0 1 1'
;
is_pdl
$a
->cov(
$b
), pdl( 0.6 ),
"sample covariance of $a and $b"
;
is_pdl
$a
->corr(
$b
), pdl( 0.866025403784439 ),
"Pearson correlation coefficient of $a and $b"
;
is_pdl
$a
->n_pair(
$b
), indx( 5 ),
"Number of good pairs between $a and $b"
;
is_pdl
$a
->corr(
$b
)->t_corr( 5 ), pdl( 3 ),
"t significance test of Pearson correlation coefficient of $a and $b"
;
is_pdl
$a
->corr_dev(
$b
), pdl( 0.903696114115064 ),
"correlation calculated from dev_m values of $a and $b"
;
my
$b_bad
= pdl
'BAD 0 0 1 1 1'
;
is_pdl
$a_bad
->cov(
$b_bad
), pdl( 0.5 ),
"sample covariance with bad data of $a_bad and $b_bad"
;
is_pdl
$a_bad
->corr(
$b_bad
), pdl( 0.894427190999916 ),
"Pearson correlation coefficient with bad data of $a_bad and $b_bad"
;
is_pdl
$a_bad
->n_pair(
$b_bad
), indx( 4 ),
"Number of good pairs between $a_bad and $b_bad with bad values taken into account"
;
is_pdl
$a_bad
->corr(
$b_bad
)->t_corr( 4 ), pdl( 2.82842712474619 ),
"t signifiance test of Pearson correlation coefficient with bad data of $a_bad and $b_bad"
;
is_pdl
$a_bad
->corr_dev(
$b_bad
), pdl( 0.903696114115064 ),
"correlation calculated from dev_m values with bad data of $a_bad and $b_bad"
;
my
(
$t
,
$df
) =
$a
->t_test(
$b
);
is_pdl
$t
, pdl( 2.1380899352994 ),
"t-test between $a and $b - 't' output"
;
is_pdl
$df
, pdl( 8 ),
"t-test between $a and $b - 'df' output"
;
(
$t
,
$df
) =
$a
->t_test_nev(
$b
);
is_pdl
$t
, pdl( 2.1380899352994 ),
"t-test with non-equal variance between $a and $b - 't' output"
;
is_pdl
$df
, pdl( 4.94637223974763 ),
"t-test with non-equal variance between $a and $b - 'df' output"
;
(
$t
,
$df
) =
$a
->t_test_paired(
$b
);
is_pdl
$t
, pdl( 3.13785816221094 ),
"paired sample t-test between $a and $b - 't' output"
;
is_pdl
$df
, pdl( 4 ),
"paired sample t-test between $a and $b - 'df' output"
;
(
$t
,
$df
) =
$a_bad
->t_test(
$b_bad
);
is_pdl
$t
, pdl( 1.87082869338697 ),
"t-test with bad values between $a_bad and $b_bad - 't' output"
;
is_pdl
$df
, pdl( 8 ),
"t-test with bad values between $a_bad and $b_bad - 'd' output"
;
(
$t
,
$df
) =
$a_bad
->t_test_nev(
$b_bad
);
is_pdl
$t
, pdl( 1.87082869338697 ),
"t-test with non-equal variance with bad values between $a_bad and $b_bad - 't' output"
;
is_pdl
$df
, pdl( 4.94637223974763 ),
"t-test with non-equal variance with bad values between $a_bad and $b_bad - 'df' output"
;
(
$t
,
$df
) =
$a_bad
->t_test_paired(
$b_bad
);
is_pdl
$t
, pdl( 4.89897948556636 ),
"paired sample t-test with bad values between $a_bad and $b_bad - 't' output"
;
is_pdl
$df
, pdl( 3 ),
"paired sample t-test with bad values between $a_bad and $b_bad - 'df' output"
;
{
my
(
$data
,
$idv
,
$ido
) = rtable(\
*DATA
, {
V
=>0});
is_pdl
$data
, pdl '
[ 5 BAD BAD 2 BAD 5 BAD 9 4 BAD BAD BAD 5 BAD]
[ 7 BAD 3 7 0 BAD 0 8 BAD 0 3 0 BAD 0]
[BAD BAD BAD BAD BAD 1 BAD 1 BAD BAD BAD BAD 1 BAD]
[BAD BAD BAD BAD BAD 0 BAD 5 BAD BAD BAD BAD 0 BAD]
[BAD BAD 0 BAD 2 BAD 0 BAD BAD 0 0 2 BAD 0]
';
}
{
my
$a
= pdl '
0.045 0.682 0.290 0.024 0.598 0.321 0.772 0.375 0.237 0.811;
0.356 0.094 0.925 0.139 0.701 0.849 0.689 0.109 0.240 0.847;
0.822 0.492 0.351 0.860 0.400 0.243 0.313 0.011 0.437 0.480
';
is_pdl
$a
->cov_table,
$a
->cov(
$a
->dummy(1)),
'cov_table'
;
$a
->setbadat(4,0);
is_pdl
$a
->cov_table,
$a
->cov(
$a
->dummy(1)),
'cov_table bad val'
;
}
{
my
$a
= pdl '
0.045 0.682 0.290 0.024 0.598 0.321 0.772 0.375 0.237 0.811;
0.356 0.094 0.925 0.139 0.701 0.849 0.689 0.109 0.240 0.847;
0.822 0.492 0.351 0.860 0.400 0.243 0.313 0.011 0.437 0.480
';
is_pdl
$a
->corr_table,
$a
->corr(
$a
->dummy(1)),
"Square Pearson correlation table"
;
$a
->setbadat(4,0);
is_pdl
$a
->corr_table,
$a
->corr(
$a
->dummy(1)),
"Square Pearson correlation table with bad data"
;
}
{
my
$a
= pdl([0,1,2,3,4], [0,0,0,0,0]);
$a
=
$a
->setvaltobad(0);
ok
$a
->stdv->nbad,
"Bad value input to stdv makes the stdv itself bad"
;
}
SKIP: {
skip
'no PDL::GSL::CDF'
, 1
if
$@;
my
$x
= pdl(1, 2);
my
$n
= pdl(2, 10);
my
$p
= .5;
is_pdl binomial_test(
$x
,
$n
,
$p
), pdl(0.75, 0.9892578125),
'binomial_test'
;
}
{
my
$a
= sequence 10, 2;
my
$factor
= sequence(10) > 4;
my
$ans
= pdl( [[0..4], [10..14]], [[5..9], [15..19]] );
my
(
$a_
,
$l
) =
$a
->group_by(
$factor
);
is_pdl
$a_
,
$ans
,
'group_by single factor equal n'
;
is_deeply
$l
, [0, 1],
'group_by single factor label'
;
$a
= sequence 10,2;
$factor
= qsort sequence(10) % 3;
$ans
= pdl( [1.5, 11.5], [5, 15], [8, 18] );
is_pdl
$a
->group_by(
$factor
)->average,
$ans
,
'group_by single factor unequal n'
;
$a
= sequence 10;
my
@factors
= ( [
qw( a a a a b b b b b b )
], [
qw(0 1 0 1 0 1 0 1 0 1)
] );
$ans
= pdl
'[ 0 2 BAD; 1 3 BAD ], [ 4 6 8; 5 7 9 ]'
;
(
$a_
,
$l
) =
$a
->group_by(
@factors
);
is_pdl
$a_
,
$ans
,
'group_by multiple factors'
;
is_deeply
$l
, [[
qw(a_0 a_1)
], [
qw( b_0 b_1 )
]],
'group_by multiple factors label'
;
}
{
my
@a
=
qw(a a b b c c)
;
my
$a
= PDL::Stats::Basic::code_ivs( \
@a
);
my
$ans
= pdl( 0,0,1,1,2,2 );
is_pdl
$a
,
$ans
,
'code_ivs'
;
$a
[-1] =
undef
;
my
$a_bad
= PDL::Stats::Basic::code_ivs( \
@a
);
my
$ans_bad
= pdl
'0 0 1 1 2 BAD'
;
is_pdl
$a_bad
,
$ans_bad
,
'code_ivs with missing value undef correctly coded'
;
$a
[-1] =
'BAD'
;
$a_bad
= PDL::Stats::Basic::code_ivs( \
@a
);
is_pdl
$a_bad
,
$ans_bad
,
'code_ivs with missing value BAD correctly coded'
;
}
done_testing();