lib/Text/Tradition/Analysis.pm


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
—
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
—
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
—
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
—
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
—
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
—
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
—
926
927
928
929
930
931
932
933
934
935
              package Text::Tradition::Analysis;
use strict;
use warnings;
use Algorithm::Diff;  # for word similarity measure
use Encode qw/ decode_utf8 encode_utf8 /;
use Exporter 'import';
use Graph;
use JSON qw/ to_json decode_json /;
use LWP::UserAgent;
use Set::Scalar;
use Text::Tradition::Analysis::Result;
use Text::Tradition::Stemma;
use TryCatch;
use vars qw/ @EXPORT_OK $VERSION /;
@EXPORT_OK = qw/ run_analysis group_variants analyze_variant_location wit_stringify /;
$VERSION = "1.1";
my $SOLVER_URL = 'http://byzantini.st/cgi-bin/graphcalc.cgi';
my $unsolved_problems = {};
=head1 NAME
Text::Tradition::Analysis - functions for stemma analysis of a tradition
=head1 DESCRIPTION
Text::Tradition is a library for representation and analysis of collated
texts, particularly medieval ones.  Where the Collation is the central
feature of a Tradition, it may also have one or more stemmata associated
with it, and these stemmata may be analyzed. This package provides the
following modules:
=over 4
=item * L<Text::Tradition::HasStemma> - a role that will be composed into
Text::Tradition objects, providing the ability for Text::Tradition::Stemma
objects to be associated with them.
=item * L<Text::Tradition::Stemma> - an object class that represents stemma
hypotheses, both rooted (with a single archetype) and unrooted (e.g.
phylogenetic trees).
=item * Text::Tradition::Analysis (this package). Provides functions for
the analysis of a given stemma against the collation within a given
Tradition.
=back
=head1 SYNOPSIS
  use Text::Tradition;
  use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
  my $t = Text::Tradition->new( 
    'name' => 'this is a text',
    'input' => 'TEI',
    'file' => '/path/to/tei_parallel_seg_file.xml' );
  $t->add_stemma( 'dotfile' => $stemmafile );
  my $variant_data = run_analysis( $tradition );
     
=head1 SUBROUTINES
=head2 run_analysis( $tradition, %opts )
Runs the analysis described in analyze_variant_location on every location in the 
collation of the given tradition, with the given options. These include:
=over 4
=item * stemma_id - Specify which of the tradition's stemmata to use. Default
is 0 (i.e. the first).
=item * ranks - Specify a list of location ranks to analyze; exclude the rest.
=item * merge_types - Specify a list of relationship types, where related readings 
should be treated as identical for the purposes of analysis.
=item * exclude_type1 - Exclude those ranks whose groupings have only type-1 variants.
=back
=begin testing
use Text::Tradition;
use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
my $datafile = 't/data/florilegium_tei_ps.xml';
my $tradition = Text::Tradition->new( 'input' => 'TEI',
                                      'name' => 'test0',
                                      'file' => $datafile );
my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
is( ref( $s ), 'Text::Tradition::Stemma', "Added stemma to tradition" );
my %expected_genealogical = (
        1 => 0,
        2 => 1,
        3 =>  0,
        5 =>  0,
        7 =>  0,
        8 =>  0,
        10 => 0,
        13 => 1,
        33 => 0,
        34 => 0,
        37 => 0,
        60 => 0,
        81 => 1,
        84 => 0,
        87 => 0,
        101 => 0,
        102 => 0,
        122 => 1,
        157 => 0,
        166 => 1,
        169 => 1,
        200 => 0,
        216 => 1,
        217 => 1,
        219 => 1,
        241 => 1,
        242 => 1,
        243 => 1,
);
my $data = run_analysis( $tradition, calcdsn => 'dbi:SQLite:dbname=t/data/analysis.db' );
my $c = $tradition->collation;
foreach my $row ( @{$data->{'variants'}} ) {
        # Account for rows that used to be "not useful"
        unless( exists $expected_genealogical{$row->{'id'}} ) {
                $expected_genealogical{$row->{'id'}} = 1;
        }
        my $gen_bool = $row->{'genealogical'} ? 1 : 0;
        is( $gen_bool, $expected_genealogical{$row->{'id'}}, 
                "Got correct genealogical flag for row " . $row->{'id'} );
        # Check that we have the right row with the right groups
        my $rank = $row->{'id'};
        foreach my $rdghash ( @{$row->{'readings'}} ) {
                # Skip 'readings' that aren't really
                next unless $c->reading( $rdghash->{'readingid'} );
                # Check the rank
                is( $c->reading( $rdghash->{'readingid'} )->rank, $rank, 
                        "Got correct reading rank" );
                # Check the witnesses
                my @realwits = sort $c->reading_witnesses( $rdghash->{'readingid'} );
                my @sgrp = sort @{$rdghash->{'group'}};
                is_deeply( \@sgrp, \@realwits, "Reading analyzed with correct groups" );
        }
}
is( $data->{'variant_count'}, 58, "Got right total variant number" );
# TODO Make something meaningful of conflict count, maybe test other bits
=end testing
=cut
sub run_analysis {
        my( $tradition, %opts ) = @_;
        my $c = $tradition->collation;
        my $aclabel = $c->ac_label;
        my $stemma_id = $opts{'stemma_id'} || 0;
        my @ranks = ref( $opts{'ranks'} ) eq 'ARRAY' ? @{$opts{'ranks'}} : ();
        my $collapse = Set::Scalar->new();
        if( $opts{'merge_types'} && ref( $opts{'merge_types'} ) eq 'ARRAY' ) {
                $collapse->insert( @{$opts{'merge_types'}} );
        } elsif( $opts{'merge_types'} ) {
                $collapse->insert( $opts{'merge_types'} );
        }
         
        # Make sure we have a lookup DB for graph calculation results; this will die
        # if calcdir or calcdsn isn't passed.
        my $dir;
        if( exists $opts{'calcdir'} ) {
                $dir = delete $opts{'calcdir'}
        } elsif ( exists $opts{'calcdsn'} ) {
                eval { require Text::Tradition::Directory };
                if( $@ ) {
                        throw( "Could not instantiate a directory for " . $opts{'calcdsn'}
                                . ": $@" );
                }
                $dir = Text::Tradition::Directory->new( dsn => $opts{'calcdsn'} );
        }
        # Get the stemma        
        my $stemma = $tradition->stemma( $stemma_id );
        # Figure out which witnesses we are working with - that is, the ones that
        # appear both in the stemma and in the tradition. All others are 'lacunose'
        # for our purposes.
        my $lacunose = Set::Scalar->new( $stemma->hypotheticals );
        my $stemma_wits = Set::Scalar->new( $stemma->witnesses );
        my $tradition_wits = Set::Scalar->new( map { $_->sigil } $tradition->witnesses );
        $lacunose->insert( $stemma_wits->symmetric_difference( $tradition_wits )->members );
        # Find and mark 'common' ranks for exclusion, unless they were
        # explicitly specified.
        unless( @ranks ) {
                my %common_rank;
                foreach my $rdg ( $c->common_readings ) {
                        $common_rank{$rdg->rank} = 1;
                }
                @ranks = grep { !$common_rank{$_} } ( 1 .. $c->end->rank-1 );
        }
         
        # Group the variants to send to the solver
        my @groups;
        my @use_ranks;
        my %lacunae;
        my $moved = {};
        foreach my $rank ( @ranks ) {
                my $missing = $lacunose->clone();
                my $rankgroup = group_variants( $tradition, $rank, $missing, $moved, $collapse );
                # Filter out any empty rankgroups 
                # (e.g. from the later rank for a transposition)
                next unless keys %$rankgroup;
                # Get the graph for this rankgroup
                my $rankgraph = _graph_for_grouping( $stemma, $rankgroup, $missing, $aclabel );
                if( $opts{'exclude_type1'} ) {
                        # Check to see whether this is a "useful" group.
                        next unless _useful_variant( $rankgroup, $rankgraph, $aclabel );
                }
                push( @use_ranks, $rank );
                push( @groups, { grouping => $rankgroup, graph => $rankgraph } );
                $lacunae{$rank} = $missing;
        }
        # Run the solver
        my $answer;
        try {
                $answer = solve_variants( $dir, @groups );
        } catch ( Text::Tradition::Error $e ) {
                if( $e->message =~ /IDP/ ) {
                        # Something is wrong with the solver; make the variants table anyway
                        $answer->{'variants'} = [];
                        map { push( @{$answer->{'variants'}}, _init_unsolved( $_, 'IDP error' ) ) }
                                @groups;
                } else {
                        # Something else is wrong; error out.
                        $e->throw;
                }
        }
        # Do further analysis on the answer
        my $conflict_count = 0;
        my $reversion_count = 0;
        foreach my $idx ( 0 .. $#use_ranks ) {
                my $location = $answer->{'variants'}->[$idx];
                # Add the rank back in
                my $rank = $use_ranks[$idx];
                $location->{'id'} = $rank;
                # Note what our lacunae are
                my %lmiss;
                map { $lmiss{$_} = 1 } @{$lacunae{$use_ranks[$idx]}};
                $location->{'missing'} = [ keys %lmiss ];
                 
                # Run the extra analysis we need.
                ## TODO We run through all the variants in this call, so
                ## why not add the reading data there instead of here below?
                my $graph = $groups[$idx]->{graph};
                analyze_location( $tradition, $graph, $location, \%lmiss );
                my @layerwits;
                # Do the final post-analysis tidying up of the data.
                foreach my $rdghash ( @{$location->{'readings'}} ) {
                        $conflict_count++ if $rdghash->{'is_conflict'};
                        $reversion_count++ if $rdghash->{'is_reverted'};
                        # Add the reading text back in, setting display value as needed
                        my $rdg = $c->reading( $rdghash->{'readingid'} );
                        if( $rdg ) {
                                $rdghash->{'text'} = $rdg->text . 
                                        ( $rdg->rank == $rank ? '' : ' [' . $rdg->rank . ']' );
                                if( $rdg->does( 'Text::Tradition::Morphology' ) ) {
                                        $rdghash->{'is_ungrammatical'} = $rdg->grammar_invalid;
                                        $rdghash->{'is_nonsense'} = $rdg->is_nonsense;
                                }
                        }
                        # Remove lacunose witnesses from this reading's list now that the
                        # analysis is done 
                        my @realgroup;
                        map { push( @realgroup, $_ ) unless $lmiss{$_} } @{$rdghash->{'group'}};
                        $rdghash->{'group'} = \@realgroup;
                        # Note any layered witnesses that appear in this group
                        foreach( @realgroup ) {
                                if( $_ =~ /^(.*)\Q$aclabel\E$/ ) {
                                        push( @layerwits, $1 );
                                }
                        }
                }
                $location->{'layerwits'} = \@layerwits if @layerwits;
        }
        $answer->{'conflict_count'} = $conflict_count;
        $answer->{'reversion_count'} = $reversion_count;
         
        return $answer;
}
=head2 group_variants( $tradition, $rank, $lacunose, $transposed, $merge_relationship_types )
Groups the variants at the given $rank of the collation, treating any
relationships in the set $merge_relationship_types as equivalent. 
$lacunose should be a reference to an array, to which the sigla of lacunose
witnesses at this rank will be appended; $transposed should be a reference
to a hash, wherein the identities of transposed readings and their
relatives will be stored.
Returns a hash $group_readings where $rdg is attested by the witnesses listed 
in $group_readings->{$rdg}.
=cut
# Return group_readings, groups, lacunose
sub group_variants {
        my( $tradition, $rank, $lacunose, $transposed, $collapse ) = @_;
        my $c = $tradition->collation;
        my $aclabel = $c->ac_label;
        my $table = $c->alignment_table;
        # Get the alignment table readings
        my %readings_at_rank;
        my $check_for_gaps = Set::Scalar->new();
        my %moved_wits;
        my $has_transposition;
        foreach my $tablewit ( @{$table->{'alignment'}} ) {
                my $rdg = $tablewit->{'tokens'}->[$rank-1];
                my $wit = $tablewit->{'witness'};
                # Exclude the witness if it is "lacunose" which if we got here
                # means "not in the stemma".
                next if _is_lacunose( $wit, $lacunose, $aclabel );
                # Note if the witness is actually in a lacuna
                if( $rdg && $rdg->{'t'}->is_lacuna ) {
                        _add_to_witlist( $wit, $lacunose, $aclabel );
                # Otherwise the witness either has a positive reading...
                } elsif( $rdg ) {
                        # If the reading has been counted elsewhere as a transposition, ignore it.
                        if( $transposed->{$rdg->{'t'}->id} ) {
                                # TODO Does this cope with three-way transpositions?
                                map { $moved_wits{$_} = 1 } @{$transposed->{$rdg->{'t'}->id}};
                                next;
                        }
                        # Otherwise, record it...
                        $readings_at_rank{$rdg->{'t'}->id} = $rdg->{'t'};
                        # ...and grab any transpositions, and their relations.
                        my @transp = grep { $_->rank != $rank } $rdg->{'t'}->related_readings();
                        foreach my $trdg ( @transp ) {
                                next if exists $readings_at_rank{$trdg->id};
                                $has_transposition = 1;
                                my @affected_wits = _table_witnesses( 
                                        $table, $trdg, $lacunose, $aclabel );
                                next unless @affected_wits;
                                map { $moved_wits{$_} = 1 } @affected_wits;
                                $transposed->{$trdg->id} = 
                                        [ _table_witnesses( $table, $rdg->{'t'}, $lacunose, $aclabel ) ];
                                $readings_at_rank{$trdg->id} = $trdg;
                        }
                # ...or it is empty, ergo a gap.
                } else {
                        _add_to_witlist( $wit, $check_for_gaps, $aclabel );
                }
        }
        my $gap_wits = Set::Scalar->new();
        map { _add_to_witlist( $_, $gap_wits, $aclabel ) 
                unless $moved_wits{$_} } $check_for_gaps->members;
                 
        # Group the readings, collapsing groups by relationship if needed.      
        my $grouped_readings = {};
        foreach my $rdg ( values %readings_at_rank ) {
                # Skip readings that have been collapsed into others.
                next if exists $grouped_readings->{$rdg->id} 
                        && $grouped_readings->{$rdg->id} eq 'COLLAPSE';
                # Get the witness list, including from readings collapsed into this one.
                my @wits = _table_witnesses( $table, $rdg, $lacunose, $aclabel );
                if( $collapse && $collapse->size ) {
                        my $filter = sub { $collapse->has( $_[0]->type ) };
                        foreach my $other ( $rdg->related_readings( $filter ) ) {
                                my @otherwits = _table_witnesses( $table, $other, $lacunose, $aclabel );
                                push( @wits, @otherwits );
                                $grouped_readings->{$other->id} = 'COLLAPSE';
                        }
                }
                $grouped_readings->{$rdg->id} = Set::Scalar->new( @wits );
        }
        if( $gap_wits->members ) {
                $grouped_readings->{'(omitted)'} = $gap_wits;
        }
         
        # Get rid of our collapsed readings
        map { delete $grouped_readings->{$_} if(
                         $grouped_readings->{$_} eq 'COLLAPSE'
                         || $grouped_readings->{$_}->is_empty ) } 
                keys %$grouped_readings;
                 
        # If something was transposed, check the groups for doubled-up readings
        if( $has_transposition ) {
                # print STDERR "Group for rank $rank:\n";
                # map { print STDERR "\t$_: " . join( ' ' , @{$grouped_readings->{$_}} ) . "\n" } 
                #       keys %$grouped_readings;
                _check_transposed_consistency( $c, $rank, $transposed, $grouped_readings );
        }
         
        # Return the result
        return $grouped_readings;
}
# Helper function to query the alignment table for all witnesses (a.c. included)
# that have a given reading at its rank.
sub _table_witnesses {
        my( $table, $trdg, $lacunose, $aclabel ) = @_;
        my $tableidx = $trdg->rank - 1;
        my $has_reading = Set::Scalar->new();
        foreach my $row ( @{$table->{'alignment'}} ) {
                my $wit = $row->{'witness'};
                next if _is_lacunose( $wit, $lacunose, $aclabel );
                my $rdg = $row->{'tokens'}->[$tableidx];
                next unless exists $rdg->{'t'} && defined $rdg->{'t'};
                _add_to_witlist( $wit, $has_reading, $aclabel )
                        if $rdg->{'t'}->id eq $trdg->id;
        }
        return $has_reading->members;
}
# Helper function to see if a witness is lacunose even if we are asking about
# the a.c. version
sub _is_lacunose {
        my ( $wit, $lac, $acstr ) = @_;
        if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
                $wit = $1;
        }
        return $lac->has( $wit );
}
# Helper function to ensure that X and X a.c. never appear in the same list.
sub _add_to_witlist {
        my( $wit, $list, $acstr ) = @_;
        if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
                # Don't add X a.c. if we already have X 
                return if $list->has( $1 );
        } else {
                # Delete X a.c. if we are about to add X
                $list->delete( $wit.$acstr );
        }
        $list->insert( $wit );
}
sub _check_transposed_consistency {
        my( $c, $rank, $transposed, $groupings ) = @_;
        my %seen_wits;
        my %thisrank;
        # Note which readings are actually at this rank, and which witnesses
        # belong to which reading.
        foreach my $rdg ( keys %$groupings ) {
                my $rdgobj = $c->reading( $rdg );
                # Count '(omitted)' as a reading at this rank
                $thisrank{$rdg} = 1 if !$rdgobj || $rdgobj->rank == $rank;
                map { push( @{$seen_wits{$_}}, $rdg ) } @{$groupings->{$rdg}};
        }
        # Our work is done if we have no witness belonging to more than one
        # reading.
        my @doubled = grep { scalar @{$seen_wits{$_}} > 1 } keys %seen_wits;
        return unless @doubled;
        # If we have a symmetric related transposition, drop the non-rank readings.
        if( @doubled == scalar keys %seen_wits ) {
                foreach my $rdg ( keys %$groupings ) {
                        if( !$thisrank{$rdg} ) {
                                my $groupstr = wit_stringify( $groupings->{$rdg} );
                                my ( $matched ) = grep { $groupstr eq wit_stringify( $groupings->{$_} ) }
                                        keys %thisrank;
                                delete $groupings->{$rdg};
                                # If we found a group match, assume there is a symmetry happening.
                                # TODO think more about this
                                # print STDERR "*** Deleting symmetric reading $rdg\n";
                                unless( $matched ) {
                                        delete $transposed->{$rdg};
                                        warn "Found problem in evident symmetry with reading $rdg";
                                }
                        }
                }
        # Otherwise 'unhook' the transposed reading(s) that have duplicates.
        } else {
                foreach my $dup ( @doubled ) {
                        foreach my $rdg ( @{$seen_wits{$dup}} ) {
                                next if $thisrank{$rdg};
                                next unless exists $groupings->{$rdg};
                                # print STDERR "*** Deleting asymmetric doubled-up reading $rdg\n";
                                delete $groupings->{$rdg};
                                delete $transposed->{$rdg};
                        }
                }
                # and put any now-orphaned readings into an 'omitted' reading.
                foreach my $wit ( keys %seen_wits ) {
                        unless( grep { exists $groupings->{$_} } @{$seen_wits{$wit}} ) {
                                $groupings->{'(omitted)'} = Set::Scalar->new()
                                         unless exists $groupings->{'(omitted)'};
                                _add_to_witlist( $wit, $groupings->{'(omitted)'}, $c->ac_label );
                        }
                }
        }
}
# For the given grouping, return its situation graph based on the stemma.
sub _graph_for_grouping {
        my( $stemma, $grouping, $lacunose, $aclabel ) = @_;
        my $acwits = [];
        my $extant = {};
        foreach my $gs ( values %$grouping ) {
                map { 
                        if( $_ =~ /^(.*)\Q$aclabel\E$/ ) {
                                push( @$acwits, $1 ) unless $lacunose->has( $1 );
                        } else {
                                $extant->{$_} = 1 unless $lacunose->has( $_ );
                        }
                } $gs->members;
        }
        my $graph;
        try {
                # contig contains all extant wits and all hypothetical wits
                # needed to make up the groups.
                $graph = $stemma->situation_graph( $extant, $acwits, $aclabel );
        } catch ( Text::Tradition::Error $e ) {
                throw( "Could not extend graph with given extant and a.c. witnesses: "
                        . $e->message );
        } catch {
                throw( "Could not extend graph with a.c. witnesses @$acwits" );
        }
        return $graph;
}
=head2 solve_variants( $calcdir, @groups ) 
Looks up the set of groups in the answers provided by the external graph solver 
service and returns a cleaned-up answer, adding the rank IDs back where they belong.
The answer has the form 
  { "variants" => [ array of variant location structures ],
    "variant_count" => total,
    "conflict_count" => number of conflicts detected,
    "genealogical_count" => number of solutions found }
     
=cut
sub solve_variants {
        my( @groups ) = @_;
         
        # Are we using a local result directory, or did we pass an empty value
        # for the directory?
        my $dir;
        unless( ref( $groups[0] ) eq 'HASH' ) {
                $dir = shift @groups;
        }
        ## For each graph/group combo, make a Text::Tradition::Analysis::Result
        ## object so that we can send it off for IDP lookup.
        my $variants = [];
        my $genealogical = 0; # counter
        # TODO Optimize for unique graph problems
        my %problems;
        foreach my $graphproblem ( @groups ) {
                # Construct the calc result key and look up its answer
                my $problem = Text::Tradition::Analysis::Result->new(
                        graph => $graphproblem->{'graph'},
                        setlist => [ values %{$graphproblem->{'grouping'}} ] );
                if( exists $problems{$problem->object_key} ) {
                        $problem = $problems{$problem->object_key};
                } else {
                        $problems{$problem->object_key} = $problem;
                }
                $graphproblem->{'object'} = $problem;
        }
         
        my %results;
        if( $dir ) {
                my $scope = $dir->new_scope;
                map { $results{$_} = $dir->lookup( $_ ) || $problems{$_} } keys %problems;
        } else {
                my $json = JSON->new->allow_blessed->convert_blessed->utf8->encode( 
                        [ values %problems ] );
                # Send it off and get the result
                # print STDERR "Sending request: " . decode_utf8( $json ) . "\n";
                my $ua = LWP::UserAgent->new();
                my $resp = $ua->post( $SOLVER_URL, 'Content-Type' => 'application/json', 
                                                          'Content' => $json );      
                my $answer;     
                if( $resp->is_success ) {
                        $answer = decode_json( $resp->content );
                        throw( "Unexpected answer from IDP: $answer" ) unless ref( $answer ) eq 'ARRAY';
                } else {
                        throw( "IDP solver returned " . $resp->status_line . " / " . $resp->content
                                . "; cannot run graph analysis" );
                }
                # One more sanity check
                throw( "Something went wrong with answer symmetricity" )
                        unless keys( %problems ) == @$answer;
                # Convert the results
                foreach my $a ( @$answer ) {
                        my $r = Text::Tradition::Analysis::Result->new( $a );
                        $results{$r->object_key} = $r;
                }
        }
         
        # We now have a single JSON-encoded Result object per problem sent. Fold its
        # answers into our variant info structure.
        foreach my $graphproblem ( @groups ) {
                my $result = $results{$graphproblem->{'object'}->object_key}
                        || $graphproblem->{'object'};
                 
                # Initialize the result structure for this graph problem
                my $vstruct;
                if( $result->status eq 'OK' ) {
                        $vstruct = { readings => [] };
                        push( @$variants, $vstruct );
                } else {
                        push( @$variants, _init_unsolved( $graphproblem, $result->status ) );
                        next;
                }
                                 
                # 1. Did the group evaluate as genealogical?
                $vstruct->{genealogical} = $result->is_genealogical;
                $genealogical++ if $result->is_genealogical;
                 
                # 2. What are the calculated minimum groupings for each variant loc?
                foreach my $rid ( keys %{$graphproblem->{grouping}} ) {
                        my $inputset = $graphproblem->{grouping}->{$rid};
                        my $minset = $result->minimum_grouping_for( $inputset );
                        push( @{$vstruct->{readings}}, { readingid => $rid, group => $minset } );
                }
                 
                # 3. What are the sources and classes calculated for each witness?
                $vstruct->{witcopy_types} = { $result->classes };
                $vstruct->{reading_roots} = {};
                map { $vstruct->{reading_roots}->{$_} = 1 } $result->sources;
                 
        }
         
        return { 'variants' => $variants, 
                         'variant_count' => scalar @$variants,
                         'genealogical_count' => $genealogical };
}
sub _init_unsolved {
        my( $graphproblem, $status ) = @_;
        my $vstruct = { 'readings' => [] };
        $vstruct->{'unsolved'} = $status;
        foreach my $rid ( keys %{$graphproblem->{grouping}} ) {
                push( @{$vstruct->{readings}}, { readingid => $rid, 
                        group => [ $graphproblem->{grouping}->{$rid}->members ] } );
        }
        return $vstruct;
}
=head2 analyze_location ( $tradition, $graph, $location_hash )
Given the tradition, its stemma graph, and the solution from the graph solver,
work out the rest of the information we want.  For each reading we need missing, 
conflict, reading_parents, independent_occurrence, followed, not_followed,
and follow_unknown.  Alters the location_hash in place.
=cut
sub analyze_location {
        my ( $tradition, $graph, $variant_row, $lacunose ) = @_;
        my $c = $tradition->collation;
         
        if( exists $variant_row->{'unsolved'} ) {
                return;
        }
        my $reading_roots = delete $variant_row->{'reading_roots'};
        my $classinfo = delete $variant_row->{'witcopy_types'};
         
        # Make a hash of all known node memberships, and make the subgraphs.
        my $contig = {};
        my $subgraph = {};
        my $acstr = $c->ac_label;
        my @acwits;
         
        # Note which witnesses positively belong to which group. This information
        # comes ultimately from the IDP solver.
        # Also make a note of the reading's roots.
    foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
        my $rid = $rdghash->{'readingid'};
        my @roots;
        foreach my $wit ( @{$rdghash->{'group'}} ) {
                $contig->{$wit} = $rid;
            if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
                push( @acwits, $1 );
            }
            if( exists $reading_roots->{$wit} && $reading_roots->{$wit} ) {
                push( @roots, $wit );
            }
        }
                $rdghash->{'independent_occurrence'} = \@roots;
        }
                         
        # Now that we have all the node group memberships, calculate followed/
    # non-followed/unknown values for each reading.  Also figure out the
    # reading's evident parent(s).
    foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
        my $rid = $rdghash->{'readingid'};
        my $rdg = $c->reading( $rid );
        my @roots = @{$rdghash->{'independent_occurrence'}};
        my @reversions;
        if( $classinfo ) {
                @reversions = grep { $classinfo->{$_} eq 'revert' } 
                        $rdghash->{'group'}->members;
                $rdghash->{'reversions'} = \@reversions;
        }
        my @group = @{$rdghash->{'group'}};
         
        # Start figuring things out.  
        $rdghash->{'followed'} = scalar( @group ) 
                - ( scalar( @roots ) + scalar( @reversions ) );
        # Find the parent readings, if any, of this reading.
        my $sourceparents = _find_reading_parents( $rid, $graph, $contig, @roots );
                # Work out relationships between readings and their non-followed parent.
                _resolve_parent_relationships( $c, $rid, $rdg, $sourceparents );
                $rdghash->{'source_parents'} = $sourceparents;
                if( @reversions ) {
                        my $revparents = _find_reading_parents( $rid, $graph, $contig, @reversions );
                        _resolve_parent_relationships( $c, $rid, $rdg, $revparents );
                        $rdghash->{'reversion_parents'} = $revparents;
                }
                 
                # Find the number of times this reading was altered, and the number of
                # times we're not sure.
                my( %nofollow, %unknownfollow );
                foreach my $wit ( @{$rdghash->{'group'}} ) {
                        foreach my $wchild ( $graph->successors( $wit ) ) {
                                if( $reading_roots->{$wchild} && $contig->{$wchild}
                                        && $contig->{$wchild} ne $rid ) {
                                        # It definitely changed here.
                                        $nofollow{$wchild} = 1;
                                } elsif( !($contig->{$wchild}) ) {
                                        # The child is a hypothetical node not definitely in
                                        # any group. Answer is unknown.
                                        $unknownfollow{$wchild} = 1;
                                } # else it is either in our group, or it is a non-root node in a 
                                  # known group and therefore is presumed to have its reading from 
                                  # its group, not this link.
                        }
                }
                $rdghash->{'not_followed'} = keys %nofollow;
                $rdghash->{'follow_unknown'} = keys %unknownfollow;
                 
                # Now say whether this reading represents a conflict.
                unless( $variant_row->{'genealogical'} ) {
                        $rdghash->{'is_conflict'} = @roots != 1;
                        $rdghash->{'is_reverted'} = scalar @reversions;
                }               
    }
}
sub _find_reading_parents {
        my( $rid, $graph, $contig, @list ) = @_;
        my $parenthash = {};
        foreach my $wit ( @list ) {
                # Look in the stemma graph to find this witness's extant or known-reading
                # immediate ancestor(s), and look up the reading that each ancestor holds.
                my @check = $graph->predecessors( $wit );
                while( @check ) {
                        my @next;
                        foreach my $wparent( @check ) {
                                my $preading = $contig->{$wparent};
                                if( $preading && $preading ne $rid ) {
                                        $parenthash->{$preading} = 1;
                                } else {
                                        push( @next, $graph->predecessors( $wparent ) );
                                }
                        }
                        @check = @next;
                }
        }
        return $parenthash;
}
sub _resolve_parent_relationships {
        my( $c, $rid, $rdg, $rdgparents ) = @_;
        foreach my $p ( keys %$rdgparents ) {
                # Resolve the relationship of the parent to the reading, and
                # save it in our hash.
                my $pobj = $c->reading( $p );
                my $prep = $pobj ? $pobj->id . ' (' . $pobj->text . ')' : $p;
                my $phash = { 'label' => $prep };
                if( $pobj ) {
                        my $rel = $c->get_relationship( $p, $rid );
                        if( $rel ) {
                                _add_to_hash( $rel, $phash );
                        } elsif( $rdg ) {
                                # First check for a transposed relationship
                                if( $rdg->rank != $pobj->rank ) {
                                        foreach my $ti ( $rdg->related_readings( 'transposition' ) ) {
                                                next unless $ti->text eq $rdg->text;
                                                $rel = $c->get_relationship( $ti, $pobj );
                                                if( $rel ) {
                                                        _add_to_hash( $rel, $phash, 1 );
                                                        last;
                                                }
                                        }
                                        unless( $rel ) {
                                                foreach my $ti ( $pobj->related_readings( 'transposition' ) ) {
                                                        next unless $ti->text eq $pobj->text;
                                                        $rel = $c->get_relationship( $ti, $rdg );
                                                        if( $rel ) {
                                                                _add_to_hash( $rel, $phash, 1 );
                                                                last;
                                                        }
                                                }
                                        }
                                }
                                unless( $rel ) {
                                        # and then check for sheer word similarity.
                                        my $rtext = $rdg->text;
                                        my $ptext = $pobj->text;
                                        if( similar( $rtext, $ptext ) ) {
                                                # say STDERR "Words $rtext and $ptext judged similar";
                                                $phash->{relation} = { type => 'wordsimilar' };
                                        } 
                                }
                        } else {
                                $phash->{relation} = { type => 'deletion' };
                        }
                        # Get the attributes of the parent object while we are here
                        $phash->{'text'} = $pobj->text if $pobj;
                        if( $pobj && $pobj->does('Text::Tradition::Morphology') ) {
                                $phash->{'is_nonsense'} = $pobj->is_nonsense;
                                $phash->{'is_ungrammatical'} = $pobj->grammar_invalid;
                        }
                } elsif( $p eq '(omitted)' ) {
                        $phash->{relation} = { type => 'addition' };
                }
                # Save it
                $rdgparents->{$p} = $phash;
        }
}
sub _add_to_hash {
        my( $rel, $phash, $is_transposed ) = @_;
        $phash->{relation} = { type => $rel->type };
        $phash->{relation}->{transposed} = 1 if $is_transposed;
        $phash->{relation}->{annotation} = $rel->annotation
                if $rel->has_annotation;
}
=head2 similar( $word1, $word2 )
Use Algorithm::Diff to get a sense of how close the words are to each other.
This will hopefully handle substitutions a bit more nicely than Levenshtein.
=cut
#!/usr/bin/env perl
sub similar {
        my( $word1, $word2 ) = sort { length($a) <=> length($b) } @_;
        my @let1 = split( '', lc( $word1 ) );
        my @let2 = split( '', lc( $word2 ) );
        my $diff = Algorithm::Diff->new( \@let1, \@let2 );
        my $mag = 0;
        while( $diff->Next ) {
                if( $diff->Same ) {
                        # Take off points for longer strings
                        my $cs = $diff->Range(1) - 2;
                        $cs = 0 if $cs < 0;
                        $mag -= $cs;
                } elsif( !$diff->Items(1) ) {
                        $mag += $diff->Range(2);
                } elsif( !$diff->Items(2) ) {
                        $mag += $diff->Range(1);
                } else {
                        # Split the difference for substitutions
                        my $c1 = $diff->Range(1) || 1;
                        my $c2 = $diff->Range(2) || 1;
                        my $cd = ( $c1 + $c2 ) / 2;
                        $mag += $cd;
                }
        }
        return ( $mag <= length( $word1 ) / 2 );
}
sub _useful_variant {
        my( $rankgroup, $rankgraph, $acstr ) = @_;
        # Sort by group size and return
        my $is_useful = 0;
        foreach my $rdg ( keys %$rankgroup ) {
                my @wits = $rankgroup->{$rdg}->members;
                if( @wits > 1 ) {
                        $is_useful++;
                } else {
                        $is_useful++ unless( $rankgraph->is_sink_vertex( $wits[0] )
                                || $wits[0] =~ /\Q$acstr\E$/ );
                }
        }
        return $is_useful > 1;
}
=head2 wit_stringify( $groups )
Takes an array of witness groupings and produces a string like
['A','B'] / ['C','D','E'] / ['F']
=cut
sub wit_stringify {
    my $groups = shift;
    my @gst;
    # If we were passed an array of witnesses instead of an array of 
    # groupings, then "group" the witnesses first.
    unless( ref( $groups->[0] ) ) {
        my $mkgrp = [ $groups ];
        $groups = $mkgrp;
    }
    foreach my $g ( @$groups ) {
        push( @gst, '[' . join( ',', map { "'$_'" } @$g ) . ']' );
    }
    return join( ' / ', @gst );
}
1;
sub throw {
        Text::Tradition::Error->throw( 
                'ident' => 'Analysis error',
                'message' => $_[0],
        );
}
=head1 LICENSE
This package is free software and is provided "as is" without express
or implied warranty.  You can redistribute it and/or modify it under
the same terms as Perl itself.
=head1 AUTHOR
Tara L Andrews E<lt>aurum@cpan.orgE<gt>
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)