if
(
$CWB::CWBVersion
>= 3.004_021) {
plan
tests
=> 3;
}
else
{
plan
skip_all
=>
"only available in CWB v3.4.21 or newer"
;
}
our
$reg_dir
=
"tmp/registry"
;
our
$data_dir
=
"tmp/vss_an"
;
our
$vrt_file
=
"data/vrt/VeryShortStories.vrt"
;
mkdir
$reg_dir
unless
-d
$reg_dir
;
our
$enc
= new CWB::Encoder
"VSS_AN"
;
isa_ok(
$enc
, CWB::Encoder,
"create CWB::Encoder object"
);
$enc
->registry(
$reg_dir
);
$enc
->dir(
$data_dir
);
$enc
->overwrite(1);
$enc
->longname(
"Very Short Stories"
);
$enc
->info(
"Info file for corpus VSS (Very Short Stories)\n"
);
$enc
->charset(
"latin1"
);
$enc
->language(
"en"
);
$enc
->perm(
"640"
);
$enc
->p_attributes(
qw(word pos lemma)
);
$enc
->null_attributes(
"collection"
);
$enc
->s_attributes(
qw(chapter:0+num s:0)
);
$enc
->auto_null(1);
$enc
->encode_options(
"-q"
);
$enc
->memory(100);
$enc
->validate(1);
$enc
->verbose(0);
$enc
->debug(0);
our
$T0
=
time
;
eval
{
$enc
->encode(
$vrt_file
) };
ok(! $@,
"corpus encoding and indexing"
);
our
$elapsed
=
time
-
$T0
;
diag(
sprintf
"VSS corpus encoded in %.1f seconds"
,
$elapsed
);
our
$ref_dir
=
"data/vss"
;
our
$ref_regfile
=
"data/registry/vss"
;
our
$dh
= new DirHandle
$ref_dir
;
my
$ok
= 1;
my
$old_huffcode
= 0;
while
(
defined
(
my
$filename
=
$dh
->
read
)) {
my
$ref_file
=
"$ref_dir/$filename"
;
my
$new_file
=
"$data_dir/$filename"
;
next
unless
-f
$new_file
&&
$filename
!~ /^\./;
if
(
$filename
=~ /^(word|
pos
|lemma|collection|chapter(_num)?|s)\./) {
if
(-f
$new_file
) {
if
(compare(
$new_file
,
$ref_file
) != 0) {
diag(
"data file '$filename' is corrupt"
);
$ok
= 0;
}
}
else
{
diag(
"failed to create data file '$filename'"
);
$ok
= 0;
}
}
else
{
if
(-f
$new_file
) {
diag(
"file '$filename' should not have been created"
);
$ok
= 0;
}
}
}
$dh
->
close
;
ok(
$ok
,
"validation of created data files"
);