package String::Diff;
use strict;
use base qw(Exporter);
our @EXPORT_OK = qw( diff_fully diff diff_merge diff_regexp );
BEGIN {
local $@;
if ($ENV{STRING_DIFF_PP}) {
$@ = 1;
} else {
eval "use Algorithm::Diff::XS qw( sdiff );"; ## no critic
}
if ($@) {
eval "use Algorithm::Diff qw( sdiff );"; ## no critic
die $@ if $@;
}
}
our $VERSION = '0.04';
our %DEFAULT_MARKS = (
remove_open => '[',
remove_close => ']',
append_open => '{',
append_close => '}',
separator => '', # for diff_merge
);
sub diff_fully {
my($old, $new, %opts) = @_;
my $old_diff = [];
my $new_diff = [];
if ($opts{linebreak}) {
my @diff = sdiff( map{ my @l = map { ( $_, "\n") } split /\n/, $_; pop @l; [ @l ]} $old, $new);
for my $line (@diff) {
if ($line->[0] eq 'c') {
# change
my($old_diff_tmp, $new_diff_tmp) = _fully($line->[1], $line->[2]);
push @{ $old_diff }, @{ $old_diff_tmp };
push @{ $new_diff }, @{ $new_diff_tmp };
} elsif ($line->[0] eq '-') {
# remove
push @{ $old_diff }, ['-', $line->[1]];
} elsif ($line->[0] eq '+') {
# append
push @{ $new_diff }, ['+', $line->[2]];
} else {
# unchage
push @{ $old_diff }, ['u', $line->[1]];
push @{ $new_diff }, ['u', $line->[2]];
}
}
} else {
($old_diff, $new_diff) = _fully($old, $new);
}
wantarray ? ($old_diff, $new_diff) : [ $old_diff, $new_diff];
}
sub _fully {
my($old, $new) = @_;
return ([], []) unless $old || $new;
my @old_diff = ();
my @new_diff = ();
my $old_str;
my $new_str;
my @diff = sdiff( map{ $_ ? [ split //, $_ ] : [] } $old, $new);
my $last_mode = $diff[0]->[0];
for my $line (@diff) {
if ($last_mode ne $line->[0]) {
push @old_diff, [$last_mode, $old_str] if defined $old_str;
push @new_diff, [$last_mode, $new_str] if defined $new_str;
# skip concut ## i forget s mark
push @old_diff, ['s', ''] unless defined $old_str;
push @new_diff, ['s', ''] unless defined $new_str;
$old_str = $new_str = undef;
}
$old_str .= $line->[1];
$new_str .= $line->[2];
$last_mode = $line->[0];
}
push @old_diff, [$last_mode, $old_str] if defined $old_str;
push @new_diff, [$last_mode, $new_str] if defined $new_str;
@old_diff = _fully_filter('-', @old_diff);
@new_diff = _fully_filter('+', @new_diff);
return (\@old_diff, \@new_diff);
}
sub _fully_filter {
my($c_mode, @diff) = @_;
my @filter = ();
my $last_line = ['', ''];
for my $line (@diff) {
$line->[0] = $c_mode if $line->[0] eq 'c';
if ($last_line->[0] eq $line->[0]) {
$last_line->[1] .= $line->[1];
next;
}
push @filter, $last_line if length $last_line->[1];
$last_line = $line;
}
push @filter, $last_line if length $last_line->[1];
@filter;
}
sub diff {
my($old, $new, %opts) = @_;
my($old_diff, $new_diff) = diff_fully($old, $new, %opts);
%opts = (%DEFAULT_MARKS, %opts);
my $old_str = _str($old_diff, %opts);
my $new_str = _str($new_diff, %opts);
wantarray ? ($old_str, $new_str) : [ $old_str, $new_str];
}
sub _str {
my($diff, %opts) = @_;
my $str = '';
for my $parts (@{ $diff }) {
if ($parts->[0] eq '-') {
$str .= "$opts{remove_open}$parts->[1]$opts{remove_close}";
} elsif ($parts->[0] eq '+') {
$str .= "$opts{append_open}$parts->[1]$opts{append_close}";
} else {
$str .= $parts->[1];
}
}
$str;
}
sub diff_merge {
my($old, $new, %opts) = @_;
my($old_diff, $new_diff) = diff_fully($old, $new, %opts);
%opts = (%DEFAULT_MARKS, %opts);
my $old_c = 0;
my $new_c = 0;
my $str = '';
LOOP:
while (scalar(@{ $old_diff }) > $old_c && scalar(@{ $new_diff }) > $new_c) {
my $old_str = $opts{regexp} ? quotemeta $old_diff->[$old_c]->[1] : $old_diff->[$old_c]->[1];
my $new_str = $opts{regexp} ? quotemeta $new_diff->[$new_c]->[1] : $new_diff->[$new_c]->[1];
if ($old_diff->[$old_c]->[0] eq 'u' && $new_diff->[$new_c]->[0] eq 'u') {
$str .= $old_str;
$old_c++;
$new_c++;
} elsif ($old_diff->[$old_c]->[0] eq '-' && $new_diff->[$new_c]->[0] eq '+') {
$str .= "$opts{remove_open}$old_str";
$str .= "$opts{remove_close}$opts{separator}$opts{append_open}" unless $opts{regexp};
$str .= $opts{separator} if $opts{regexp};
$str .= "$new_str$opts{append_close}";
$old_c++;
$new_c++;
} elsif ($old_diff->[$old_c]->[0] eq 'u' && $new_diff->[$new_c]->[0] eq '+') {
$str .= "$opts{append_open}$new_str$opts{append_close}";
$new_c++;
} elsif ($old_diff->[$old_c]->[0] eq '-' && $new_diff->[$new_c]->[0] eq 'u') {
$str .= "$opts{remove_open}$old_str$opts{remove_close}";
$old_c++;
}
}
$str .= _list_gc($old_diff, $old_c, %opts);
$str .= _list_gc($new_diff, $new_c, %opts);
$str;
}
sub _list_gc {
my($diff, $c, %opts) = @_;
my $str = '';
while (scalar(@{ $diff }) > $c) {
my $_str = $opts{regexp} ? quotemeta $diff->[$c]->[1] : $diff->[$c]->[1];
if ($diff->[$c]->[0] eq '-') {
$str .= "$opts{remove_open}$_str$opts{remove_close}";
} elsif ($diff->[$c]->[0] eq '+') {
$str .= "$opts{append_open}$_str$opts{append_close}";
} else {
$str .= $_str;
}
$c++;
}
$str;
}
my %regexp_opts = (
remove_open => '(?:',
remove_close => ')',
append_open => '(?:',
append_close => ')',
separator => '|',
regexp => 1,
);
sub diff_regexp {
my($old, $new, %opts) = @_;
diff_merge($old, $new, %opts, %regexp_opts);
}
1;
__END__
=head1 NAME
String::Diff - Simple diff to String
=head1 SYNOPSIS
use String::Diff;
use String::Diff qw( diff_fully diff diff_merge diff_regexp );# export functions
# simple diff
my($old, $new) = String::Diff::diff('this is Perl', 'this is Ruby');
print "$old\n";# this is [Perl]
print "$new\n";# this is {Ruby}
my $diff = String::Diff::diff('this is Perl', 'this is Ruby');
print "$diff->[0]\n";# this is [Perl]
print "$diff->[1]\n";# this is {Ruby}
my $diff = String::Diff::diff('this is Perl', 'this is Ruby',
remove_open => '<del>',
remove_close => '</del>',
append_open => '<ins>',
append_close => '</ins>',
);
print "$diff->[0]\n";# this is <del>Perl</del>
print "$diff->[1]\n";# this is <ins>Ruby</ins>
# merged
my $diff = String::Diff::diff_merge('this is Perl', 'this is Ruby');
print "$diff\n";# this is [Perl]{Ruby}
my $diff = String::Diff::diff_merge('this is Perl', 'this is Ruby',
remove_open => '<del>',
remove_close => '</del>',
append_open => '<ins>',
append_close => '</ins>',
);
print "$diff\n";# this is <del>Perl</del><ins>Ruby</ins>
# change to default marks
%String::Diff::DEFAULT_MARKS = (
remove_open => '<del>',
remove_close => '</del>',
append_open => '<ins>',
append_close => '</ins>',
separator => '&lt;-OLD|NEW-&gt;', # for diff_merge
);
# generated for regexp
my $diff = String::Diff::diff_regexp('this is Perl', 'this is Ruby');
print "$diff\n";# this\ is\ (?:Perl|Ruby)
# detailed list
my $diff = String::Diff::diff_fully('this is Perl', 'this is Ruby');
for my $line (@{ $diff->[0] }) {
print "$line->[0]: '$line->[1]'\n";
}
# u: 'this is '
# -: 'Perl'
for my $line (@{ $diff->[1] }) {
print "$line->[0]: '$line->[1]'\n";
}
# u: 'this is '
# +: 'Ruby'
=head1 DESCRIPTION
String::Diff is the difference of a consecutive string is made.
after general diff is done, the difference in the line is searchable.
the mark of the addition and the deletion can be freely changed.
the color is colored to the terminal with ANSI, using the HTML display it.
after the line is divided, diff is taken when 'linebreak' option is specified.
my($old_string, $new_string) = String::Diff::diff_fully('this is Perl', 'this is Ruby', linebreak => 1);
my($old_string, $new_string) = String::Diff::diff('this is Perl', 'this is Ruby', linebreak => 1);
my $string = String::Diff::diff_merge('this is Perl', 'this is Ruby', linebreak => 1);
my $string = String::Diff::diff_regexp('this is Perl', 'this is Ruby', linebreak => 1);
In diff and diff_merge methods the mark of the difference can be changed.
my $diff = String::Diff::diff('this is Perl', 'this is Ruby',{
remove_open => '<del>',
remove_close => '</del>',
append_open => '<ins>',
append_close => '</ins>',
});
=head1 METHODS
=over 4
=item diff_fully
the list that divides diff according to the mark is returnd.
my($old_string, $new_string) = String::Diff::diff_fully('this is Perl', 'this is Ruby');
=item diff
abd the mark of the deletion and the addition is given to the string.
=item diff_merge
old and new string is merged with diff.
=item diff_regexp
the regular expression to which old string and new string are matched with regexp is returned.
=back
=head1 AUTHOR
Kazuhiro Osawa E<lt>ko@yappo.ne.jpE<gt>
=head1 SEE ALSO
L<Algorithm::Diff>
=head1 LICENSE
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut