#!/usr/bin/perl
use
5.010001;
use
POSIX
qw(setlocale LC_ALL)
;
POSIX::setlocale(LC_ALL,
"C"
);
my
$dsl
=
<<'END_OF_SOURCE';
:default ::= action => [ values ] bless => ::lhs
lexeme default = action => [ value ] bless => ::name
S ::= NP VP period bless => S
NP ::= NN bless => NP
| NNS bless => NP
| DT NN bless => NP
| NN NNS bless => NP
| NNS CC NNS bless => NP
VP ::= VBZ NP bless => VP
| VP VBZ NNS bless => VP
| VP CC VP bless => VP
| VP VP CC VP bless => VP
| VBZ bless => VP
period ~ '.'
:discard ~ whitespace
whitespace ~ [\s]+
CC ~ 'and'
DT ~ 'a' | 'an'
NN ~ 'panda'
NNS ~ 'shoots' | 'leaves'
VBZ ~ 'eats' | 'shoots' | 'leaves'
END_OF_SOURCE
my
$grammar
= Marpa::R3::Scanless::G->new(
{
bless_package
=>
'PennTags'
,
source
=> \
$dsl
, } );
my
$full_expected
=
<<'END_OF_OUTPUT';
(S (NP (DT a) (NN panda))
(VP (VBZ eats) (NP (NNS shoots) (CC and) (NNS leaves)))
(. .))
(S (NP (DT a) (NN panda))
(VP (VP (VBZ eats) (NP (NNS shoots))) (CC and) (VP (VBZ leaves)))
(. .))
(S (NP (DT a) (NN panda))
(VP (VP (VBZ eats)) (VP (VBZ shoots)) (CC and) (VP (VBZ leaves)))
(. .))
END_OF_OUTPUT
my
$sentence
=
'a panda eats shoots and leaves.'
;
my
@actual
= ();
my
$recce
= Marpa::R3::Scanless::R->new( {
grammar
=>
$grammar
} );
$recce
->
read
( \
$sentence
);
while
(
defined
(
my
$value_ref
=
$recce
->value() ) ) {
my
$value
=
$value_ref
? ${
$value_ref
}->bracket() :
'No parse'
;
push
@actual
,
$value
;
}
Marpa::R3::Test::is( (
join
"\n"
,
sort
@actual
) .
"\n"
,
$full_expected
,
'Ambiguous English sentence using value()'
);
my
$panda_grammar
= Marpa::R3::Scanless::G->new(
{
bless_package
=>
'PennTags'
,
source
=> \
$dsl
, } );
my
$panda_recce
= Marpa::R3::Scanless::R->new( {
grammar
=>
$panda_grammar
} );
$panda_recce
->
read
( \
$sentence
);
my
$asf
= Marpa::R3::ASF->new( {
slr
=>
$panda_recce
} );
my
$full_result
=
$asf
->traverse( {}, \
&full_traverser
);
my
$pruned_result
=
$asf
->traverse( {}, \
&pruning_traverser
);
sub
full_traverser {
my
(
$glade
,
$scratch
) =
@_
;
my
$rule_id
=
$glade
->rule_id();
my
$symbol_id
=
$glade
->symbol_id();
my
$symbol_name
=
$panda_grammar
->symbol_name(
$symbol_id
);
if
( not
defined
$rule_id
) {
my
$literal
=
$glade
->literal();
my
$penn_tag
= penn_tag(
$symbol_name
);
return
[
"($penn_tag $literal)"
];
}
my
@return_value
= ();
CHOICE:
while
(1) {
my
@results
=
$glade
->all_choices();
if
(
$symbol_name
eq
'[:start]'
) {
return
[
map
{
join
q{}
, @{
$_
} }
@results
];
}
my
$join_ws
=
q{ }
;
$join_ws
=
qq{\n }
if
$symbol_name
eq
'S'
;
push
@return_value
,
map
{
'('
. penn_tag(
$symbol_name
) .
q{ }
. (
join
$join_ws
, @{
$_
} ) .
')'
}
@results
;
last
CHOICE
if
not
defined
$glade
->
next
();
}
return
\
@return_value
;
}
my
$cooked_result
=
join
"\n"
, (
sort
@{
$full_result
}),
q{}
;
Marpa::R3::Test::is(
$cooked_result
,
$full_expected
,
'Ambiguous English sentence using ASF'
);
sub
penn_tag {
my
(
$symbol_name
) =
@_
;
return
q{.}
if
$symbol_name
eq
'period'
;
return
$symbol_name
;
}
sub
pruning_traverser {
my
(
$glade
,
$scratch
) =
@_
;
my
$rule_id
=
$glade
->rule_id();
my
$symbol_id
=
$glade
->symbol_id();
my
$symbol_name
=
$panda_grammar
->symbol_name(
$symbol_id
);
if
( not
defined
$rule_id
) {
my
$literal
=
$glade
->literal();
my
$penn_tag
= penn_tag(
$symbol_name
);
return
"($penn_tag $literal)"
;
}
my
@return_value
=
$glade
->rh_values();
return
(
join
q{ }
,
@return_value
) .
"\n"
if
$symbol_name
eq
'[:start]'
;
my
$join_ws
=
q{ }
;
$join_ws
=
qq{\n }
if
$symbol_name
eq
'S'
;
my
$penn_tag
= penn_tag(
$symbol_name
);
return
"($penn_tag "
. (
join
$join_ws
,
@return_value
) .
')'
;
}
my
$pruned_expected
=
<<'END_OF_OUTPUT';
(S (NP (DT a) (NN panda))
(VP (VBZ eats) (NP (NNS shoots) (CC and) (NNS leaves)))
(. .))
END_OF_OUTPUT
Marpa::R3::Test::is(
$pruned_result
,
$pruned_expected
,
'Ambiguous English sentence using ASF: pruned'
);
sub
contents {
join
(
$_
[0],
map
{
$_
->bracket() } @{
$_
[1] } );
}
sub
PennTags::S::bracket {
"(S "
. contents(
"\n "
,
$_
[0] ) .
")"
}
sub
PennTags::NP::bracket {
"(NP "
. contents(
' '
,
$_
[0] ) .
")"
}
sub
PennTags::VP::bracket {
"(VP "
. contents(
' '
,
$_
[0] ) .
")"
}
sub
PennTags::PP::bracket {
"(PP "
. contents(
' '
,
$_
[0] ) .
")"
}
sub
PennTags::CC::bracket {
"(CC $_[0]->[0])"
}
sub
PennTags::DT::bracket {
"(DT $_[0]->[0])"
}
sub
PennTags::IN::bracket {
"(IN $_[0]->[0])"
}
sub
PennTags::NN::bracket {
"(NN $_[0]->[0])"
}
sub
PennTags::NNS::bracket {
"(NNS $_[0]->[0])"
}
sub
PennTags::VB::bracket {
"(VB $_[0]->[0])"
}
sub
PennTags::VBP::bracket {
"(VBP $_[0]->[0])"
}
sub
PennTags::VBZ::bracket {
"(VBZ $_[0]->[0])"
}
sub
PennTags::period::bracket {
"(. .)"
}