lib/Treex/Core/Node/EffectiveRelations.pm


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
—
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
              package Treex::Core::Node::EffectiveRelations;
$Treex::Core::Node::EffectiveRelations::VERSION = '2.20210102';
use Moose::Role;
# with Moose >= 2.00, this must be present also in roles
use MooseX::SemiAffordanceAccessor;
use Treex::Core::Log;
has is_member => (
    is            => 'rw',
    isa           => 'Bool',
    documentation => 'Is this node a member of a coordination (i.e. conjunct) or apposition?',
);
# Shared modifiers of coordinations can be distinguished in PDT style
# just based on the fact they are hanged on the conjunction (coord. head).
# However, in other styles (e.g. Stanford) this attribute might be useful.
has is_shared_modifier => (
    is            => 'rw',
    isa           => 'Bool',
    documentation => 'Is this node a shared modifier of a coordination?',
);
requires 'is_coap_root';
# Implementation details:
# Members of a coordination can be distinguished from shared modifiers
# according to the attribute is_member. The only problem is when some
# members are hanged on prepositions(afun=AuxP) or subord. conjunctions(AuxC).
# The PDT style is that Aux[CP] nodes can never have is_member=1, resulting in
# e.g. "It was in(parent=and) Prague(parent=in,is_member=1)
#       and(parent=was) in(parent=and) London(parent=in,is_member=1)."
# The style adpoted in Treex is that members have always is_member=1
# no matter what afun they have. This results in:
#  "It was in(parent=and,is_member=1) Prague(parent=in)
#   and(parent=was) in(parent=and,is_member=1) London(parent=in)."
# Both annotation styles have their pros and cons.
sub get_echildren {
    my ( $self, $arg_ref ) = @_;
    if ( !defined $arg_ref ) {
        $arg_ref = {};
    }
    log_fatal('Incorrect number of arguments') if @_ > 2;
    my @echildren;
    if ( $self->_can_apply_eff($arg_ref) ) {
        # 1) Get my own effective children (i.e. I am their only eff. parent).
        # These are in my subtree.
        @echildren = $self->_get_my_own_echildren($arg_ref);
        # 2) Add shared effective children
        # (i.e. I am their eff. parent, but not the only one).
        # This can happen only if I am member of a coordination
        # and these eff. children are shared modifiers of the coordination.
        push @echildren, $self->_get_shared_echildren($arg_ref);
    } else {
        @echildren = $self->get_children();
    }
    # 3) Process eventual switches (ordered=>1, add_self=>1,...)
    #return @echildren if !$arg_ref; TODO this cannot happen now, see $arg_ref = {} if !defined $arg_ref;
    delete $arg_ref->{dive};
    delete $arg_ref->{or_topological};
    delete $arg_ref->{ignore_incorrect_tree_structure};
    return $self->_process_switches( $arg_ref, @echildren );
}
sub get_eparents {
    my ( $self, $arg_ref ) = @_;
    if ( !defined $arg_ref ) {
        $arg_ref = {};
    }
    log_fatal('Incorrect number of arguments') if @_ > 2;
    # Get effective parents
    my @eparents = $self->_can_apply_eff($arg_ref) ?
        $self->_get_eparents($arg_ref) : ($self->get_parent());
    # Process eventual switches (ordered=>1, add_self=>1,...)
    delete $arg_ref->{dive};
    delete $arg_ref->{or_topological};
    delete $arg_ref->{ignore_incorrect_tree_structure};
    return $self->_process_switches( $arg_ref, @eparents );
}
sub _get_eparents {
    my ( $self, $arg_ref ) = @_;
    # 0) Check if there is a topological parent.
    # Otherwise, there is no chance getting effective parents.
    if ( !$self->get_parent() ) {
        my $id = $self->id;
        #TODO: log_fatal if !$robust
        log_warn( "The node $id has no effective nor topological parent, using the root", 1 );
        return $self->get_root();
    }
    # 1) If $self is a member of a coordination/aposition,
    # get the highest node representing $self -- i.e. the coord/apos root.
    # Otherwise, let $node be $self.
    my $node = $self->_get_transitive_coap_root($arg_ref) || $self;
    # 2) Get the parent
    $node = $node->get_parent() or return $self->_fallback_parent($arg_ref);
    # 3) If it is a node to be dived, look above for the first non-dive ancestor.
    while ( $arg_ref->{dive}->($node) ) {
        $node = $node->get_parent() or return $self->_fallback_parent($arg_ref);
    }
    # If $node is not a head of a coordination/aposition,
    # it is the effective parent we are looking for.
    return $node if !$node->is_coap_root();
    # Otherwise, there can be more than one effective parent.
    # All effective parents (of $self) are shared modifiers
    # of the coordination rooted in $node.
    my @eff = $node->get_coap_members($arg_ref);
    return @eff if @eff;
    return $self->_fallback_parent($arg_ref);
}
# --- Utility methods for get_echildren and get_eparents
sub _is_auxCP {
    my ($self) = @_;
    my $afun = $self->afun || '';
    return $afun =~ /^Aux[CP]$/;
}
sub _get_direct_coap_root {
    my ( $self, $arg_ref ) = @_;
    my $parent = $self->get_parent() or return;
    return $parent if $self->get_attr('is_member');
    return if !$arg_ref->{dive} || $arg_ref->{dive}->($self);
    while ( $arg_ref->{dive}->($parent) ) {
        return $parent->get_parent() if $parent->get_attr('is_member');
        $parent = $parent->get_parent() or return;
    }
    return;
}
sub _get_transitive_coap_root {
    my ( $self, $arg_ref ) = @_;
    my $root = $self->_get_direct_coap_root($arg_ref) or return;
    while ( $root->get_attr('is_member') ) {
        $root = $root->_get_direct_coap_root($arg_ref) or return;
    }
    return $root;
}
sub _can_apply_eff {
    my ( $self, $arg_ref ) = @_;
    if ( !$arg_ref->{dive} ) {
        $arg_ref->{dive} = sub {0};
    }
    elsif ( $arg_ref->{dive} eq 'AuxCP' ) {
        $arg_ref->{dive} = \&_is_auxCP;
    }
    my $error = $arg_ref->{dive}->($self)
        ? 'a node that is "to be dived"'
        : $self->is_coap_root() ? 'coap root' : 0;
    return 1 if !$error;
    return 0 if $arg_ref->{or_topological};    #TODO: document
    my $method_name = ( caller 1 )[3];
    my $id          = $self->id;
    log_warn( "$method_name called on $error ($id). Fallback to topological one.", 1 );
    return 0;
}
# used only if there is an error in the tree structure,
# eg. there is a coordination with no members or a non-root node with no parent
sub _fallback_parent {
    my ( $self, $arg_ref ) = @_;
    my $id = $self->get_attr('id');
    log_warn "The node $id has no effective parent, using the topological one."
        if ( !$arg_ref || !$arg_ref->{ignore_incorrect_tree_structure} );
    return $self->get_parent();
}
# Get my own effective children (i.e. I am their only eff. parent).
sub _get_my_own_echildren {
    my ( $self, $arg_ref ) = @_;
    my @members = ();
    my @queue   = $self->get_children();
    while (@queue) {
        my $node = shift @queue;
        if ( $arg_ref->{dive}->($node) ) {
            push @queue, $node->get_children();
        }
        elsif ( $node->is_coap_root() ) {
            push @members, $node->get_coap_members($arg_ref);
            #push @queue, grep { $_->get_attr('is_member') } $node->get_children();
        }
        else {
            push @members, $node;
        }
    }
    return @members;
}
# Get shared effective children
# (i.e. I am their eff. parent but not the only one).
sub _get_shared_echildren {
    my ( $self, $arg_ref ) = @_;
    # Only members of coord/apos can have shared eff. children
    my $coap_root = $self->_get_direct_coap_root($arg_ref) or return ();
    my @shared_echildren = ();
    # All shared modifiers of $coap_root are eff. children of $self.
    # We must process all possibly nested coap_roots.
    #  In the first iteration, $self is one of children of $coap_root.
    #  (In case of "diving", it's not $self, but its governing Aux[CP].)
    #  However, it has is_member==1, so it won't get into @shared_echildren.
    #  Similarly for other iterations.
    while ($coap_root) {
        push @shared_echildren,
            map  { $_->get_coap_members($arg_ref) }
            grep { !$_->get_attr('is_member') }
            $coap_root->get_children();
        $coap_root = $coap_root->_get_direct_coap_root($arg_ref);
    }
    return @shared_echildren;
}
sub get_coap_members {
    my ( $self, $arg_ref ) = @_;
    log_fatal('Incorrect number of arguments') if @_ > 2;
    return $self if !$self->is_coap_root();
    my $direct_only = $arg_ref->{direct_only};
    my $dive = $arg_ref->{dive} || sub {0};
    if ( $dive eq 'AuxCP' ) { $dive = \&_is_auxCP; }
    my @members = ();
    my @queue = grep { $_->is_member } $self->get_children();
    while (@queue) {
        my $node = shift @queue;
        if ( $dive->($node) ) {
            push @queue, $node->get_children();
        }
        elsif ( !$direct_only && $node->is_coap_root() ) {
            push @queue, grep { $_->is_member } $node->get_children();
        }
        else {
            push @members, $node;
        }
    }
    return @members;
}
use List::MoreUtils "any";
sub is_echild_of {
    my ($potential_echild, $eparent, $arg_ref) = @_;
    my @echildren = $eparent->get_echildren( $arg_ref );
    return any { $_ == $potential_echild } @echildren;
}
my @eff_args = qw(dive or_topological ignore_incorrect_tree_structure);
# Remove args relevant only to eff relations from arg_ref and return them
sub _extract_eff_args {
    my ($self, $arg_ref) = @_;
    my $eff_arg_ref = {};
    foreach my $eff_arg (@eff_args) {
        if (defined $arg_ref->{$eff_arg}) {
            $eff_arg_ref->{$eff_arg} = $arg_ref->{$eff_arg};
            delete $arg_ref->{$eff_arg};
        }
    }
    return $eff_arg_ref;
}
sub get_esiblings {
    my ( $self, $arg_ref ) = @_;
    log_fatal('Incorrect number of arguments') if @_ > 2;
    if ( !defined $arg_ref ) {
        $arg_ref = {};
    }
    my $eff_arg_ref = $self->_extract_eff_args($arg_ref);
    my @eparents = $self->get_eparents($eff_arg_ref);
     
    my %esiblings_hash;
    foreach my $eparent (@eparents) {
        my @esiblings = grep { $_ ne $self } $eparent->get_echildren($eff_arg_ref);
        foreach my $esibling (@esiblings) {
            $esiblings_hash{$esibling->id} = $esibling;
        }
    }
    my @esiblings = values %esiblings_hash;
    return $self->_process_switches($arg_ref, @esiblings);
}
1;
__END__
=encoding utf-8
=head1 NAME
Treex::Core::Node::EffectiveRelations
=head1 VERSION
version 2.20210102
=head1 DESCRIPTION
Moose role for nodes with a notion of so called
I<effective> parents and I<effective> children.
This notion is used both
on the a-layer (L<Treex::Core::Node::A>) and
on the t-layer (L<Treex::Core::Node::T>).
TODO: explain it, some examples, reference to PDT manual
Eg. in the sentence "Martin and Rudolph came", the tree structure is
 came(and(Martin,Rudolph))
where "and" is a head of a coordination with members "Martin" and "Rudolph".
See what the methods C<get_children>, C<get_echildren>, C<get_parent>
and C<get_eparents> return when called on various nodes
(some pseudocode used for better readability):
 # CHILDREN AND EFFECTIVE CHILDREN
 "came"->get_children()
 # returns "and"
 "came"->get_echildren()
 # returns ("Martin", "Rudolph")
 "and"->get_children()
 # returns ("Martin", "Rudolph")
 "and"->get_echildren()
 # returns ("Martin", "Rudolph") and issues a warning:
 # get_echildren called on coap root ([id_of_and]). Fallback to topological one.
 "and"->get_echildren({or_topological => 1})
 # returns ("Martin", "Rudolph") with no warning
 # PARENTS AND EFFECTIVE PARENTS
 "Rudolph"->get_parent()
 # returns "and"
 "Rudolph"->get_eparents()
 # returns "came"
 "and"->get_parent()
 # returns "came"
 "and"->get_eparents()
 # returns "came" and issues a warning:
 # get_eparents called on coap root ([id_of_and]). Fallback to topological one.
 "and"->get_eparents({or_topological => 1})
 # returns "came" with no warning
Note that to skip prepositions and subordinating conjunctions on the a-layer,
you must use option C<dive>, e.g.:
    my $eff_children = $node->get_echildren({dive=>'AuxCP'});
Methods C<get_eparents> and C<get_echildren> produce a warning
"C<called on coap root ($id). Fallback to topological one.>"
when called on a root of coordination or apposition,
because effective children/parents are not properly defined in this case.
This warning can be supressed by option C<or_topological>.
=head1 METHODS
=over
=item my @effective_children = $node->get_echildren($arg_ref?)
Returns a list of effective children of the C<$node>. It means that
a) instead of coordination/aposition heads, their members are returned
b) shared modifiers of a coord/apos (technically hanged on the head of coord/apos)
   count as effective children of the members.
OPTIONS:
=over
=item dive=>$sub_ref
Using C<dive>, you can define nodes to be skipped (or I<dived>).
C<dive> is a reference to a subroutine that decides
whether the given node should be skipped or not.
Typically this is used for prepositions and subord. conjunctions on a-layer.
You can set C<dive> to the string C<AuxCP> which is a shortcut
for C<sub {my $self=shift;return $self->afun =~ /^Aux[CP]$/;}>.
=item or_topological
If the notion of effective child is not defined
(if C<$node> is a head of coordination),
return the topological children without warnings.
=item ordered, add_self, following_only, preceding_only, first_only, last_only
You can specify the same options as in
L<Treex::Core::Node::get_children()|Treex::Core::Node/get_children>.
=back
=item my @effective_parents = $node->get_eparents($arg_ref?)
Returns a list of effective parents of the C<$node>.
OPTIONS
=over
=item dive
see L<get_echildren>
=item or_topological
If the notion of effective parent is not defined
(if C<$node> is a head of coordination),
return the topological parent without warnings.
=item ignore_incorrect_tree_structure
If there is an error in the tree structure,
eg. there is a coordination with no members or a non-root node with no parent,
a warning is issued
(C<The node [node_id] has no effective parent, using the topological one.>).
This option supresses the warning.
Use thoughtfully (preferably do not use at all).
=item ordered, add_self, following_only, preceding_only, first_only, last_only
You can specify the same options as in
L<Treex::Core::Node::get_children()|Treex::Core::Node/get_children>.
=back
=item $node->get_coap_members($arg_ref?)
If the node is a coordination/apposition head (see
L<Node::A::is_coap_root()|Node::A/is_coap_root> and
L<Node::T::is_coap_root()|Node::T/is_coap_root>) a list of all coordinated
members is returned. Otherwise, the node itself is returned.
OPTIONS
=over
=item direct_only
In case of nested coordinations return only "first-level" members.
The default is to return I<transitive> members.
For example "(A and B) or C":
 $or->get_coap_members();                 # returns A,B,C
 $or->get_coap_members({direct_only=>1}); # returns and,C
=item dive
see L<get_echildren>
=back
=back
=head1 AUTHOR
Martin Popel <popel@ufal.mff.cuni.cz>
=head1 COPYRIGHT AND LICENSE
Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)