package App::Greple::Regions;

use v5.14;
use warnings;
use Carp;
use Data::Dumper;
$Data::Dumper::Sortkeys = 1;

use Exporter 'import';
our @EXPORT      = qw(REGION_INSIDE REGION_OUTSIDE
		      REGION_UNION  REGION_INTERSECT
		      match_regions
		      classify_regions
		      select_regions
		      filter_regions
		      merge_regions
		      reverse_regions
		      match_borders
		      borders_to_regions
		      );
our %EXPORT_TAGS = ( );
our @EXPORT_OK   = qw();

use constant {

    REGION_AREA_MASK  => 1,
    REGION_INSIDE     => 0,
    REGION_OUTSIDE    => 1,

    REGION_SET_MASK   => 2,
    REGION_INTERSECT  => 0,
    REGION_UNION      => 2,
};

sub new {
    my $class = shift;

    my $obj = bless {
	FLAG => undef,
	SPEC => undef,
    }, $class;

    $obj->configure(@_) if @_;

    $obj;
}

sub configure {
    my $obj = shift;
    while (@_ >= 2) {
	$obj->{$_[0]} = $_[1];
	splice @_, 0, 2;
    }
}

sub spec { $_[0]->{SPEC} }
sub flag { $_[0]->{FLAG} }
sub is_x {
    my($p, $mask, $set) = @_;
    ((ref $p ? $p->flag : $p) & $mask) == $set;
}
sub is_union     { is_x $_[0], REGION_SET_MASK,  REGION_UNION     }
sub is_intersect { is_x $_[0], REGION_SET_MASK,  REGION_INTERSECT }
sub is_inside    { is_x $_[0], REGION_AREA_MASK, REGION_INSIDE    }
sub is_outside   { is_x $_[0], REGION_AREA_MASK, REGION_OUTSIDE   }

package App::Greple::Regions::Holder {

    sub new {
	my $class = shift;
	bless [], $class;
    }

    sub append {
	my $obj = shift;
	push @$obj, App::Greple::Regions->new(@_);
    }

    sub regions {
	my $obj = shift;
	@$obj;
    }

    sub union {
	grep { $_->is_union } shift->regions;
    }

    sub intersect {
	grep { $_->is_intersect } shift->regions;
    }
}

sub match_regions {
    my %arg = @_;
    my $pattern = $arg{pattern} // croak "Parameter error";
    my $regex = ref $pattern eq 'Regexp' ? $pattern : qr/$pattern/m;
    my @regions;

    no warnings 'utf8';

    while (/$regex/gp) {
	##
	## this is much faster than:
	## my($s, $e) = ($-[0], $+[0]);
	##
	## calling pos() cost is not neglective, either.
	##
	my $pos = pos();
	push @regions, [ $pos - length(${^MATCH}), $pos ];
    }
    @regions;
}

sub classify_regions {
    my $opt = ref $_[0] eq 'HASH' ? shift : {};

    $opt->{strict} and goto &classify_regions_strict;

    my @list = @{+shift};
    my @by = @{+shift};
    my @table;
    for (my $i = 0; $i < @by; $i++) {
	my($from, $to) = @{$by[$i]};
	while (@list and $list[0][1] < $from) {
	    shift @list;
	}
	while (@list and $list[0][1] == $from and $list[0][0] < $from) {
	    shift @list;
	}
	my $t = $table[$i] = [];
	for (my $i = 0; ($i < @list) and ($list[$i][0] < $to); $i++) {
	    push @$t, [ @{$list[$i]} ];
	}
    }
    @table;
}

sub classify_regions_strict {
    my @list = @{+shift};
    my @by = @{+shift};
    my @table;
    for (my $i = 0; $i < @by; $i++) {
	my($from, $to) = @{$by[$i]};
	while (@list and $list[0][0] < $from) {
	    shift @list;
	}
	my $t = $table[$i] = [];
	while (@list and $list[0][0] < $to and $list[0][1] <= $to) {
	    push @$t, shift @list;
	}
    }
    @table;
}

sub select_regions {
    my $opt = ref $_[0] eq 'HASH' ? shift : {};
    my($list, $by, $flag) = @_;
    my($inside, $outside) = ([], []);
    my $target = is_inside($flag) ? $inside : $outside;
    my $overlap = $opt->{strict} ? [] : $target;
    filter_regions($list, $by, $inside, $overlap, $outside);
    @$target;
}

##
## Split @input into @inside, @overlap, @outside by @filter and return
## their pointers.
##
## 4th and 5th result is corresponding entry of @filter for @inside
## and @overlap.
##
sub filter_regions {
    my @input = @{+shift};
    my @filter = @{+shift};
    my($inside, $overlap, $outside) = (shift//[], shift//[], shift//[]);
    my($inside_match, $overlap_match) = ([], []);

    for (my $i = 0; $i < @filter; $i++) {
	my($from, $to) = @{$filter[$i]};
	while (@input and $input[0][0] < $from and $input[0][1] <= $from) {
	    push @$outside, shift @input;
	}
	while (@input and $input[0][0] < $from) {
	    push @$overlap, shift @input;
	    $overlap_match->[$#{$overlap}] = $filter[$i];
	}
	while (@input and $input[0][0] < $to and $input[0][1] <= $to) {
	    push @$inside, shift @input;
	    $inside_match->[$#{$inside}] = $filter[$i];
	}
	while (@input and $input[0][0] < $to) {
	    push @$overlap, shift @input;
	    $overlap_match->[$#{$overlap}] = $filter[$i];
	}
    }
    push @$outside, splice @input;
    ($inside, $overlap, $outside, $inside_match, $overlap_match);
}

sub merge_regions {
    my $option = ref $_[0] eq 'HASH' ? shift : {};
    my $nojoin = $option->{nojoin};
    my @in = $option->{destructive} ? @_ : map { [ @$_ ] } @_;
    unless ($option->{nosort}) {
	@in = sort({$a->[0] <=> $b->[0] || $b->[1] <=> $a->[1]
			||  (@$a > 2 ? $a->[2] <=> $b->[2] : 0)
		   } @in);
    }
    my @out;
    push(@out, shift @in) if @in;
    while (@in) {
	my $top = shift @in;

	if ($out[-1][1] > $top->[0]) {
	    $out[-1][1] = $top->[1] if $out[-1][1] < $top->[1];
	}
	elsif (!$nojoin
	       and $out[-1][1] == $top->[0]
	       ##
	       ## don't connect regions in different pattern group
	       ##
	       and (@$top < 3 or $out[-1][2] == $top->[2])
	    ) {
	    $out[-1][1] = $top->[1] if $out[-1][1] < $top->[1];
	}
	else {
	    push @out, $top;
	}
    }
    @out;
}

use List::Util qw(pairmap);

sub reverse_regions {
    my $option = ref $_[0] eq 'HASH' ? shift : {};
    my($from, $max) = @_;
    my @reverse = do {
	pairmap { [ $a, $b ] }
	0, map( { $_->[0] => $_->[1] } @$from ), $max
    };
    return @reverse if $option->{leave_empty};
    grep { $_->[0] != $_->[1] } @reverse
}

sub match_borders {
    my $regex = shift;
    my @border = (0);
    while (/$regex/gp) {
	my $pos = pos();
	for my $i ($pos - length(${^MATCH}), $pos) {
	    push @border, $i if $border[-1] != $i;
	}
    }
    push @border, length if $border[-1] != length;
    @border;
}

sub borders_to_regions {
    return () if @_ < 2;
    map { [ $_[$_-1], $_[$_] ] } 1..$#_;
}

1;