Perl x Open Food Facts Hackathon: Paris, France - May 24-25 Learn more

use Encode qw( decode_utf8 );
use Graph;
use IPC::Run qw/ run binary /;
use Moose;
=head1 NAME
Text::Tradition::Stemma - a representation of a I<stemma codicum> for a Text::Tradition
=head1 SYNOPSIS
use Text::Tradition;
my $t = Text::Tradition->new(
'name' => 'this is a text',
'input' => 'TEI',
'file' => '/path/to/tei_parallel_seg_file.xml' );
my $s = $tradition->add_stemma( dotfile => '/path/to/stemma.dot' );
=head1 DESCRIPTION
Text::Tradition is a library for representation and analysis of collated
texts, particularly medieval ones. The Stemma is a representation of the
copying relationships between the witnesses in a Tradition, modelled with
a connected rooted directed acyclic graph (CRDAG).
=head1 DOT SYNTAX
The easiest way to define a stemma is to use a special form of the 'dot'
syntax of GraphViz.
Each stemma opens with the line
digraph Stemma {
and continues with a list of all manuscript witnesses in the stemma, whether
extant witnesses or missing archetypes or hyparchetypes. Each of these is
listed by its sigil on its own line, e.g.:
alpha [ class=hypothetical ]
1 [ class=hypothetical,label=* ]
Ms4 [ class=extant ]
Extant witnesses are listed with class=extant; missing or postulated witnesses
are listed with class=hypothetical. Anonymous hyparchetypes must be given a
unique name or number, but can be represented as anonymous with the addition
of 'label=*' to their lines. Greek letters or other special characters may be
used as names, but they must always be wrapped in double quotes.
Links between manuscripts are then listed with arrow notation, as below. These
lines show the direction of copying, one step at a time, for the entire stemma.
alpha -> 1
1 -> Ms4
The final line in the definition should be the closing brace:
}
Thus for a set of extant manuscripts A, B, and C, where A and B were copied
from the archetype O and C was copied from B, the definition would be:
digraph Stemma {
O [ class=hypothetical]
A [ class=extant ]
B [ class=extant ]
C [ class=extant ]
O -> A
O -> B
B -> C
}
=head1 CONSTRUCTOR
=head2 new
The constructor. This should generally be called from Text::Tradition, but
if called directly it takes the following options:
=over
=item * dot - A filehandle open to a DOT representation of the stemma graph.
=item * graph - If no DOT specification is given, you can pass a Graph object
instead. The vertices of the graph should have an attribute 'class' set to
either of the values 'extant' or 'hypothetical'.
=item * is_undirected - If the graph specification (or graph object) is for an
undirected graph (e.g. a phylogenetic tree), this should be set.
=back
=begin testing
use TryCatch;
use_ok( 'Text::Tradition::Stemma' );
# Try to create a bad graph
TODO: {
local $TODO = "cannot use stdout redirection trick with FastCGI";
my $baddotfh;
open( $baddotfh, 't/data/besoin_bad.dot' ) or die "Could not open test dotfile";
try {
my $stemma = Text::Tradition::Stemma->new( dot => $baddotfh );
ok( 0, "Created broken stemma from dotfile with syntax error" );
} catch( Text::Tradition::Error $e ) {
like( $e->message, qr/^Error trying to parse/, "Syntax error in dot threw exception" );
}
}
# Create a good graph
my $dotfh;
open( $dotfh, 't/data/florilegium.dot' ) or die "Could not open test dotfile";
binmode( $dotfh, ':utf8' );
my $stemma = Text::Tradition::Stemma->new( dot => $dotfh );
is( ref( $stemma ), 'Text::Tradition::Stemma', "Created stemma from good dotfile" );
is( scalar $stemma->witnesses, 13, "Found correct number of extant witnesses" );
is( scalar $stemma->hypotheticals, 8, "Found correct number of extant hypotheticals" );
my $found_unicode_sigil;
foreach my $h ( $stemma->hypotheticals ) {
$found_unicode_sigil = 1 if $h eq "\x{3b1}";
}
ok( $found_unicode_sigil, "Found a correctly encoded Unicode sigil" );
# TODO Create stemma from graph, create stemma from undirected graph,
# create stemma from incompletely-specified graph
=end testing
=cut
has collation => (
is => 'ro',
isa => 'Text::Tradition::Collation',
clearer => 'clear_collation', # interim measure to remove refs in DB
weak_ref => 1,
);
has graph => (
is => 'rw',
isa => 'Graph',
predicate => 'has_graph',
);
has is_undirected => (
is => 'ro',
isa => 'Bool',
default => undef,
writer => 'set_undirected',
);
sub BUILD {
my( $self, $args ) = @_;
# If we have been handed a dotfile, initialize it into a graph.
if( exists $args->{'dot'} ) {
$self->_graph_from_dot( $args->{'dot'} );
} else {
}
}
before 'graph' => sub {
my $self = shift;
if( @_ ) {
# Make sure all unclassed graph nodes are marked extant.
my $g = $_[0];
throw( "Cannot set graph to a non-Graph object" )
unless ref( $g ) eq 'Graph';
foreach my $v ( $g->vertices ) {
unless( $g->has_vertex_attribute( $v, 'class' ) ) {
$g->set_vertex_attribute( $v, 'class', 'extant' );
}
}
$self->set_undirected( $g->is_undirected );
}
};
sub _graph_from_dot {
my( $self, $dotfh ) = @_;
my $reader = Graph::Reader::Dot->new();
# Redirect STDOUT in order to trap any error messages - syntax errors
# are evidently not fatal.
# TODO This breaks under FastCGI/Apache; reconsider.
my $reader_out;
#my $saved_stderr;
#open $saved_stderr, ">&STDOUT";
#close STDOUT;
#open STDOUT, ">", \$reader_out;
my $graph = $reader->read_graph( $dotfh );
#close STDOUT;
#open STDOUT, ">", \$saved_stderr;
if( $reader_out && $reader_out =~ /error/s ) {
throw( "Error trying to parse dot: $reader_out" );
} elsif( !$graph ) {
throw( "Failed to create graph from dot" );
}
$self->graph( $graph );
}
=head1 METHODS
=head2 as_dot( \%options )
Returns a normal dot representation of the stemma layout, suitable for rendering
with GraphViz. Options include:
=over
=item * graph - A hashref of global graph options.
=item * node - A hashref of global node options.
=item * edge - A hashref of global edge options.
=back
See the GraphViz documentation for the list of available options.
=cut
sub as_dot {
my( $self, $opts ) = @_;
## See if we are including any a.c. witnesses in this graph.
my $graph = $self->graph;
if( exists $opts->{'layerwits'} ) {
my $extant = {};
map { $extant->{$_} = 1 } $self->witnesses;
$graph = $self->situation_graph( $extant, $opts->{'layerwits'} );
}
# Get default and specified options
my %graphopts = (
# 'ratio' => 1,
'bgcolor' => 'transparent',
);
my %nodeopts = (
'fontsize' => 11,
'style' => 'filled',
'fillcolor' => 'white',
'color' => 'white',
'shape' => 'ellipse', # Shape for the extant nodes
);
my %edgeopts = (
'arrowhead' => 'none',
);
@graphopts{ keys %{$opts->{'graph'}} } = values %{$opts->{'graph'}}
if $opts->{'graph'};
@nodeopts{ keys %{$opts->{'node'}} } = values %{$opts->{'node'}}
if $opts->{'node'};
@edgeopts{ keys %{$opts->{'edge'}} } = values %{$opts->{'edge'}}
if $opts->{'edge'};
my $gdecl = $graph->is_directed ? 'digraph' : 'graph';
my @dotlines;
push( @dotlines, "$gdecl stemma {" );
## Print out the global attributes
push( @dotlines, _make_dotline( 'graph', %graphopts ) ) if keys %graphopts;
push( @dotlines, _make_dotline( 'edge', %edgeopts ) ) if keys %edgeopts;
push( @dotlines, _make_dotline( 'node', %nodeopts ) ) if keys %nodeopts;
# Add each of the nodes.
foreach my $n ( $graph->vertices ) {
my %vattr = ( 'id' => $n ); # Set the SVG element ID to the sigil itself
if( $graph->has_vertex_attribute( $n, 'label' ) ) {
$vattr{'label'} = $graph->get_vertex_attribute( $n, 'label' );
}
push( @dotlines, _make_dotline( $n, %vattr ) );
}
# Add each of our edges.
foreach my $e ( $graph->edges ) {
my( $from, $to ) = map { _dotquote( $_ ) } @$e;
my $connector = $graph->is_directed ? '->' : '--';
push( @dotlines, " $from $connector $to;" );
}
push( @dotlines, '}' );
return join( "\n", @dotlines );
}
=head2 alter_graph( $dotstring )
Alters the graph of this stemma according to the definition specified
in $dotstring.
=cut
sub alter_graph {
my( $self, $dotstring ) = @_;
my $dotfh;
open $dotfh, '<', \$dotstring;
binmode $dotfh, ':utf8';
$self->_graph_from_dot( $dotfh );
}
=head2 editable( $opts )
=head2 editable_graph( $graph, $opts )
Returns a version of the graph rendered in our definition format. The
output separates statements with a newline; set $opts->{'linesep'} to the
empty string or to a space if the result is to be sent via JSON.
If a situational version of the stemma is required, the arguments for
situation_graph should be passed via $opts->{'extant'} and $opts->{'layerwits'}.
=cut
sub editable {
my( $self, $opts ) = @_;
my $graph = $self->graph;
## See if we need an editable version of a situational graph.
if( exists $opts->{'layerwits'} || exists $opts->{'extant'} ) {
my $extant = delete $opts->{'extant'} || {};
my $layerwits = delete $opts->{'layerwits'} || [];
$graph = $self->situation_graph( $extant, $layerwits );
}
return editable_graph( $graph, $opts );
}
sub editable_graph {
my( $graph, $opts ) = @_;
# Create the graph
my $join = ( $opts && exists $opts->{'linesep'} ) ? $opts->{'linesep'} : "\n";
my $gdecl = $graph->is_undirected ? 'graph' : 'digraph';
my @dotlines;
push( @dotlines, "$gdecl stemma {" );
my @real; # A cheap sort
foreach my $n ( sort $graph->vertices ) {
my $c = $graph->get_vertex_attribute( $n, 'class' );
$c = 'extant' unless $c;
if( $c eq 'extant' ) {
push( @real, $n );
} else {
push( @dotlines, _make_dotline( $n, 'class' => $c ) );
}
}
# Now do the real ones
foreach my $n ( @real ) {
push( @dotlines, _make_dotline( $n, 'class' => 'extant' ) );
}
foreach my $e ( sort _by_vertex $graph->edges ) {
my( $from, $to ) = map { _dotquote( $_ ) } @$e;
my $conn = $graph->is_undirected ? '--' : '->';
push( @dotlines, " $from $conn $to;" );
}
push( @dotlines, '}' );
return join( $join, @dotlines );
}
sub _make_dotline {
my( $obj, %attr ) = @_;
my @pairs;
foreach my $k ( keys %attr ) {
my $v = _dotquote( $attr{$k} );
push( @pairs, "$k=$v" );
}
return sprintf( " %s [ %s ];", _dotquote( $obj ), join( ', ', @pairs ) );
}
sub _dotquote {
my( $str ) = @_;
return $str if $str =~ /^[A-Za-z0-9]+$/;
$str =~ s/\"/\\\"/g;
$str = '"' . $str . '"';
return $str;
}
sub _by_vertex {
return $a->[0].$a->[1] cmp $b->[0].$b->[1];
}
=head2 situation_graph( $extant, $layered )
Returns a graph which is the original stemma graph with all witnesses not
in the %$extant hash marked as hypothetical, and witness layers added to
the graph according to the list in @$layered. A layered (a.c.) witness is
added as a parent of its main version, and additionally shares all other
parents and children with that version.
=cut
sub situation_graph {
my( $self, $extant, $layerwits, $layerlabel ) = @_;
my $graph = $self->graph->copy;
foreach my $vertex ( $graph->vertices ) {
# Set as extant any vertex that is extant in the stemma AND
# exists in the $extant hash.
my $class = 'hypothetical';
$class = 'extant' if exists $extant->{$vertex} && $extant->{$vertex} &&
$self->graph->get_vertex_attribute( $vertex, 'class' ) ne 'hypothetical';
$graph->set_vertex_attribute( $vertex, 'class', $class );
}
# For each 'layered' witness in the layerwits array, add it to the graph
# as an ancestor of the 'main' witness, and otherwise with the same parent/
# child links as its main analogue.
# TOOD Handle case where B is copied from A but corrected from C
$layerlabel = ' (a.c.)' unless $layerlabel;
foreach my $lw ( @$layerwits ) {
# Add the layered witness and set it with the same attributes as
# its 'main' analogue
throw( "Cannot add a layer to a hypothetical witness $lw" )
unless $graph->get_vertex_attribute( $lw, 'class' ) eq 'extant';
my $lwac = $lw . $layerlabel;
$graph->add_vertex( $lwac );
$graph->set_vertex_attributes( $lwac,
$graph->get_vertex_attributes( $lw ) );
# Set it as ancestor to the main witness
$graph->add_edge( $lwac, $lw );
# Give it the same ancestors and descendants as the main witness has,
# bearing in mind that those ancestors and descendants might also just
# have had a layered witness defined.
foreach my $v ( $graph->predecessors( $lw ) ) {
next if $v eq $lwac; # Don't add a loop
$graph->add_edge( $v, $lwac );
$graph->add_edge( $v.$layerlabel, $lwac )
if $graph->has_vertex( $v.$layerlabel );
}
foreach my $v ( $graph->successors( $lw ) ) {
next if $v eq $lwac; # but this shouldn't occur
$graph->add_edge( $lwac, $v );
$graph->add_edge( $lwac, $v.$layerlabel )
if $graph->has_vertex( $v.$layerlabel );
}
}
return $graph;
}
=head2 as_svg
Returns an SVG representation of the graph, calling as_dot first.
=cut
sub as_svg {
my( $self, $opts ) = @_;
my $dot = $self->as_dot( $opts );
my @cmd = ( '-Tsvg' );
unshift( @cmd, $self->is_undirected ? 'neato' : 'dot' );
my $svg;
my $dotfile = File::Temp->new();
binmode $dotfile, ':utf8';
print $dotfile $dot;
close $dotfile;
push( @cmd, $dotfile->filename );
run( \@cmd, ">", binary(), \$svg );
return decode_utf8( $svg );
}
=head2 witnesses
Returns a list of the extant witnesses represented in the stemma.
=cut
sub witnesses {
my $self = shift;
my @wits = grep { $self->graph->get_vertex_attribute( $_, 'class' ) eq 'extant' }
$self->graph->vertices;
return @wits;
}
=head2 hypotheticals
Returns a list of the hypothetical witnesses represented in the stemma.
=cut
sub hypotheticals {
my $self = shift;
my @wits = grep
{ $self->graph->get_vertex_attribute( $_, 'class' ) eq 'hypothetical' }
$self->graph->vertices;
return @wits;
}
=head2 root_graph( $root_vertex )
If the stemma graph is undirected, make it directed with $root_vertex at the root.
If it is directed, re-root it.
=cut
sub root_graph {
my( $self, $rootvertex ) = @_;
my $graph;
if( $self->is_undirected ) {
$graph = $self->graph;
} else {
# Make an undirected version of this graph.
$graph = $self->graph->undirected_copy();
}
my $rooted = Graph->new();
$rooted->add_vertex( $rootvertex );
my @next = ( $rootvertex );
while( @next ) {
my @children;
foreach my $v ( @next ) {
# Place its not-placed neighbors (ergo children) in the tree
# and connect them
foreach my $n ( grep { !$rooted->has_vertex( $_ ) }
$graph->neighbors( $v ) ) {
$rooted->add_vertex( $n );
$rooted->add_edge( $v, $n );
push( @children, $n );
}
}
@next = @children;
}
# Set the vertex classes
map { $rooted->set_vertex_attribute( $_, 'class', 'hypothetical' ) }
$self->graph->hypotheticals;
map { $rooted->set_vertex_class( $_, 'class', 'extant' ) }
$self->graph->witnesses;
return $rooted;
}
sub throw {
Text::Tradition::Error->throw(
'ident' => 'Stemma error',
'message' => $_[0],
);
}
no Moose;
__PACKAGE__->meta->make_immutable;
1;
=head1 LICENSE
This package is free software and is provided "as is" without express
or implied warranty. You can redistribute it and/or modify it under
the same terms as Perl itself.
=head1 AUTHOR
Tara L Andrews E<lt>aurum@cpan.orgE<gt>