lib/Build/Hopen/G/DAG.pm


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
—
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
—
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
—
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
—
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
—
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
—
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
—
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
—
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
—
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
              # Build::Hopen::G::DAG - hopen build graph
package Build::Hopen::G::DAG;
use Build::Hopen::Base;
use Build::Hopen qw(hlog $QUIET);
our $VERSION = '0.000007'; # TRIAL
use parent 'Build::Hopen::G::Op';
use Class::Tiny {
    goals   => sub { [] },
    default_goal => undef,
    # Private attributes with simple defaults
    #_node_by_name => sub { +{} },   # map from node names to nodes in either
    #                                # _init_graph or _graph
    # Private attributes - initialized by BUILD()
    _graph  => undef,   # L<Graph> instance
    _final   => undef,  # The graph sink - all goals have edges to this
    #Initialization operations
    _init_graph => undef,   # L<Graph> for initializations
    _init_first => undef,   # Graph node for initialization - the first
                            # init operation to be performed.
    # TODO? also support fini to run operations after _graph runs?
};
use Build::Hopen::G::Goal;
use Build::Hopen::G::Link;
use Build::Hopen::G::Node;
use Build::Hopen::G::CollectOp;
use Build::Hopen::Util::Data qw(forward_opts);
use Build::Hopen::Arrrgs;
use Graph;
use Storable ();
# Class data {{{1
use constant {
    LINKS => 'link_list',    # Graph edge attr: array of BHG::Link instances
};
# A counter used for making unique names
my $_id_counter = 0;    # threads: make shared
# }}}1
# Docs {{{1
=head1 NAME
Build::Hopen::G::DAG - A hopen build graph
=head1 SYNOPSIS
This class encapsulates the DAG for a particular set of one or more goals.
It is itself a L<Build::Hopen::G::Op> so that it can be composed into
other DAGs.
=head1 ATTRIBUTES
=head2 goals
Arrayref of the goals for this DAG.
=head2 default_goal
The default goal for this DAG.
=head2 _graph
The actual L<Graph>.  If you find that you have to use it, please open an
issue so we can see about providing a documented API for your use case!
=head2 _final
The node to which all goals are connected.
=head2 _init_graph
A separate L<Graph> of operations that will run before all the operations
in L</_graph>.  This is because I don't want to add an edge to every
single node just to force the topological sort to work out.
=head2 _init_first
The first node to be run in _init_graph.
=head1 FUNCTIONS
=cut
# }}}1
=head2 _run
Traverses the graph.  The DAG is similar to a subroutine in this respect.
The outputs from all the goals
of the DAG are aggregated and provided as the outputs of the DAG.
The output is a hash keyed by the name of each goal, with each goal's outputs
as the values under that name.  Usage:
    my $hrOutputs = $dag->run([-context=>$scope][, other options])
C<$scope> must be a L<Build::Hopen::Scope> or subclass if provided.
Other options are as L<Build::Hopen::Runnable/run>.
=cut
# The implementation of run().  $self->scope has already been linked to the context.
sub _run {
    my ($self, %args) = parameters('self', [qw(; phase generator)], @_);
    my $retval = {};
    # --- Get the initialization ops ---
    my @init_order = eval { $self->_init_graph->toposort };
    die "Initializations contain a cycle!" if $@;
    @init_order = () if $self->_init_graph->vertices == 1;  # no init nodes => skip
    # --- Get the runtime ops ---
    my @order = eval { $self->_graph->toposort };
        # TODO someday support multi-core-friendly topo-sort, so nodes can run
        # in parallel until they block each other.
    die "Graph contains a cycle!" if $@;
    # Remove _final from the order for now - I don't yet know what it means
    # to traverse _final.
    die "Last item in order isn't _final!  This might indicate a bug in hopen."
        unless $order[$#order] == $self->_final;
    pop @order;
    # --- Traverse ---
    # Note: while hacking, please make sure Goal nodes can appear
    # anywhere in the graph.
    hlog { my $x = 'Traversing DAG ' . $self->name; $x, '*' x (78-length($x)) };
    my $graph = $self->_init_graph;
    foreach my $node (@init_order, undef, @order) {
        if(!defined($node)) {   # undef is the marker between init and run
            $graph = $self->_graph;
            next;
        }
        # Inputs to this node.  These are different from the DAG's inputs.
        # The scope stack is (outer to inner) DAG's inputs, DAG's overrides,
        # then $node_inputs, then the individual node's overrides.
        my $node_inputs = Build::Hopen::Scope::Hash->new;
            # TODO make this a BH::Scope::Inputs once it's implemented
        $node_inputs->outer($self->scope);
            # Data specifically being provided to the current node, e.g.,
            # on input edges, beats the scope of the DAG as a whole.
        $node_inputs->local(true);
            # A CollectOp won't reach above the node's inputs by default.
        # Iterate over each node's edges and process any Links
        foreach my $pred ($graph->predecessors($node)) {
            hlog { ('From', $pred->name, 'to', $node->name) };
            # Goals do not feed outputs to other Goals.  This is so you can
            # add edges between Goals to set their order while keeping the
            # data for each Goal separate.
            # TODO add tests for this.  Also TODO decide whether this is
            # actually the Right Thing!
            next if eval { $pred->DOES('Build::Hopen::G::Goal') };
            my $links = $graph->get_edge_attribute($pred, $node, LINKS);
            unless($links) {    # Simple case: predecessor's outputs become our inputs
                $node_inputs->add(%{$pred->outputs});
                next;
            }
            # More complex case: Process all the links
            my $hrPredOutputs = $pred->outputs;
                # In one test, outputs was undef if not on its own line.
            my $link_inputs = Build::Hopen::Scope::Hash->new->add(%{$hrPredOutputs});
                # All links get the same outer scope --- they are parallel,
                # not in series.
            $link_inputs->outer($self->scope);
                # The links run at the same scope level as the node.
            $link_inputs->local(true);
            # Run the links in series - not parallel!
            my $link_outputs = $link_inputs->as_hashref(-levels=>'local');
            foreach my $link (@$links) {
                hlog { ('From', $pred->name, 'via', $link->name, 'to', $node->name) };
                $link_outputs = $link->run(
                    -context=>$link_inputs,
                    forward_opts(\%args, {'-'=>1}, 'phase')
                    # Generator not passed to links.
                );
            } #foreach incoming link
            $node_inputs->add(%{$link_outputs});
                # TODO specify which set these are.
        } #foreach predecessor node
        my $step_output = $node->run(-context=>$node_inputs,
            forward_opts(\%args, {'-'=>1}, 'phase', 'generator')
        );
        $node->outputs($step_output);
        # Give the Generator a chance, and stash the results if necessary.
        if(eval { $node->DOES('Build::Hopen::G::Goal') }) {
            $args{generator}->visit_goal($node) if $args{generator};
            # Save the result if there is one.  Don't save {}.
            # use $node->outputs, not $step_output, since the generator may
            # alter $node->outputs.
            $retval->{$node->name} = $node->outputs if keys %{$node->outputs};
        } else {
            $args{generator}->visit_node($node) if $args{generator};
        }
    } #foreach node in topo-sort order
    return $retval;
} #run()
=head1 ADDING DATA
=head2 goal
Creates a goal of the DAG.  Goals are names for sequences of operations,
akin to top-level Makefile targets.  Usage:
    my $goalOp = $dag->goal('name')
Returns a passthrough operation representing the goal.  Any inputs passed into
that operation are provided as outputs of the DAG under the corresponding name.
The first call to C<goal()> also sets L</default_goal>.
=cut
sub goal {
    my $self = shift or croak 'Need an instance';
    my $name = shift or croak 'Need a goal name';
    my $goal = Build::Hopen::G::Goal->new(name => $name);
    $self->_graph->add_vertex($goal);
    #$self->_node_by_name->{$name} = $goal;
    $self->_graph->add_edge($goal, $self->_final);
    $self->default_goal($goal) unless $self->default_goal;
    return $goal;
} #goal()
=head2 connect
   - C<DAG:connect(<op1>, <out-edge>, <in-edge>, <op2>)>:
     connects output C<< out-edge >> of operation C<< op1 >> as input C<< in-edge >> of
     operation C<< op2 >>.  No processing is done between output and input.
     - C<< out-edge >> and C<< in-edge >> can be anything usable as a table index,
       provided that table index appears in the corresponding operation's
       descriptor.
   - C<DAG:connect(<op1>, <op2>)>: creates a dependency edge from C<< op1 >> to
     C<< op2 >>, indicating that C<< op1 >> must be run before C<< op2 >>.
     Does not transfer any data from C<< op1 >> to C<< op2 >>.
   - C<DAG:connect(<op1>, <Link>, <op2>)>: Connects C<< op1 >> to
     C<< op2 >> via L<Build::Hopen::G::Link> C<< Link >>.
TODO return the name of the edge?  The edge instance itself?  Maybe a
fluent interface to the DAG for chaining C<connect> calls?
=cut
sub connect {
    my $self = shift or croak 'Need an instance';
    my ($op1, $out_edge, $in_edge, $op2) = @_;
    my $link;
    if(!defined($in_edge)) {    # dependency edge
        $op2 = $out_edge;
        $out_edge = false;      # No outputs
        $in_edge = false;       # No inputs
    } elsif(!defined($op2)) {
        $op2 = $in_edge;
        $link = $out_edge;
        $out_edge = false;      # No outputs TODO
        $in_edge = false;       # No inputs TODO
    }
#    # Create the link
#    unless($link) {
#        $link = Build::Hopen::G::Link->new(
#            name => 'link_' . $op1->name . '_' . $op2->name,
#            in => [$out_edge],      # Output of op1
#            out => [$in_edge],      # Input to op2
#        );
#    }
    hlog { 'DAG::connect(): Edge from', $op1->name,
            'via', $link ? $link->name : '(no link)',
            'to', $op2->name };
    # Add it to the graph (idempotent)
    $self->_graph->add_edge($op1, $op2);
    #$self->_node_by_name->{$_->name} = $_ foreach ($op1, $op2);
    # Save the BHG::Link as an edge attribute (not idempotent!)
    my $attrs = $self->_graph->get_edge_attribute($op1, $op2, LINKS) || [];
    push @$attrs, $link if $link;
    $self->_graph->set_edge_attribute($op1, $op2, LINKS, $attrs);
    return undef;   # TODO decide what to return
} #connect()
=head2 add
Add a regular node to the graph.  An attempt to add the same node twice will be
ignored.  Usage:
    my $node = Build::Hopen::G::Op->new(name=>"whatever");
    $dag->add($node);
Returns the node, for the sake of chaining.
=cut
sub add {
    my $self = shift or croak 'Need an instance';
    my $node = shift or croak 'Need a node';
    return if $self->_graph->has_vertex($node);
    hlog { __PACKAGE__, 'adding', Dumper($node) } 2;
    $self->_graph->add_vertex($node);
    #$self->_node_by_name->{$node->name} = $node if $node->name;
    return $node;
} #add()
=head2 init
Add an initialization operation to the graph.  Initialization operations run
before all other operations.  An attempt to add the same initialization
operation twice will be ignored.  Usage:
    my $op = Build::Hopen::G::Op->new(name=>"whatever");
    $dag->init($op[, $first]);
If C<$first> is truthy, the op will be run before anything already in the
graph.  However, later calls to C<init()> with C<$first> set will push
operations even before C<$op>.
Returns the node, for the sake of chaining.
=cut
sub init {
    my $self = shift or croak 'Need an instance';
    my $op = shift or croak 'Need an op';
    my $first = shift;
    return if $self->_init_graph->has_vertex($op);
    $self->_init_graph->add_vertex($op);
    #$self->_node_by_name->{$op->name} = $op;
    if($first) {    # $op becomes the new _init_first node
        $self->_init_graph->add_edge($op, $self->_init_first);
        $self->_init_first($op);
    } else {    # Not first, so can happen anytime.  Add it after the
                # current first node.
        $self->_init_graph->add_edge($self->_init_first, $op);
    }
    return $op;
} #init()
=head1 ACCESSORS
=head2 empty
Returns truthy if the only nodes in the graph are internal nodes.
Intended for use by hopen files.
=cut
sub empty {
    my $self = shift or croak 'Need an instance';
    return ($self->_graph->vertices == 1);
        # _final is the node in an empty() graph.
        # We don't check the _init_graph since empty() is intended
        # for use by hopen files, not toolsets.
} #empty()
=head1 OTHER
=head2 BUILD
Initialize the instance.
=cut
sub BUILD {
    #use Data::Dumper;
    #say Dumper(\@_);
    my $self = shift or croak 'Need an instance';
    my $hrArgs = shift;
    # DAGs always have names
    $self->name('__R_DAG_' . $_id_counter++) unless $self->has_custom_name;
    # Graph of normal operations
    my $graph = Graph->new( directed => true,
                            refvertexed => true);
    my $final = Build::Hopen::G::Node->new(
                                    name => '__R_DAG_ROOT' . $_id_counter++);
    $graph->add_vertex($final);
    $self->_graph($graph);
    $self->_final($final);
    # Graph of initialization operations
    my $init_graph = Graph->new( directed => true,
                            refvertexed => true);
    my $init = Build::Hopen::G::CollectOp->new(
                                    name => '__R_DAG_INIT' . $_id_counter++);
    $init_graph->add_vertex($init);
    $self->_init_graph($init_graph);
    $self->_init_first($init);
} #BUILD()
1;
# Rest of the docs {{{1
__END__
=head1 IMPLEMENTATION
Each DAG has a hidden "root" node.  All outputs have edges from the root node.
The traversal order is reverse topological from the root node, but is not
constrained beyond that.  Generators can ask for the nodes in root-first or
root-last order.
The DAG is built backwards from the outputs toward the inputs, although calls
to L</output> and L</connect> can appear in any order in the C<hopen> file as
long as everything is hooked in by the end of the file.
The following is in flux:
 - C<DAG>: A class representing a DAG.  An instance called C<main> represents
   what will be generated.
   - C<DAG:set_default(<goal>)>: make C<< goal >> the default goal of this DAG
     (default target).
   - C<DAG:inject(<op1>,<op2>[, after/before'])>: Returns an operation that
     lives on the edge between C<op1> and C<op2>.  If the third parameter is
     false, C<'before'>, or omitted, the new operation will be the first
     operation on that edge.  If the third parameter is true or C<'after'>,
     the new operation will be the last operation on that edge.  Any number
     of operations can be injected on any edge.
=cut
# }}}1
# vi: set fdm=marker: #
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)