########################################################################
# Author:  Patrik Lambert (lambert@talp.ucp.es)
# Description: Provides method to cut and process a part of an alignment
#   in a sentence pair.
#
#-----------------------------------------------------------------------
#
#  Copyright 2004 by Patrik Lambert
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
########################################################################

package Lingua::AlignmentSlice;
@ISA = ("Lingua::Alignment"); # Inherits from Alignment

use strict;
use Lingua::AlSetLib;
use Dumpvalue;

#a slice or fraction of an Alignment object:
#inherits from an Alignment object
#zero is the point where to insert the slice in the father alignment
#indices are the token indices that really contain information in the slice. Only paste those ones.
#sub new2 {
#	my ($pkg,$al) = @_;
#	my $this = $pkg->Lingua::Alignment::new();

#	$this->{father}=$al;
#	$this->{zeroSource}=0;
#	$this->{zeroTarget}=0;
#	$this->{sourceIndices}={};
#	$this->{targetIndices}={};
#    return $this;    
#}

sub new {
	my ($pkg,$al) = @_;
	my $this = Lingua::Alignment->new();

	$this->{father}=$al;
	$this->{zeroSource}=0;
	$this->{zeroTarget}=0;
	$this->{sourceIndices}={};
	$this->{targetIndices}={};
    return bless $this,$pkg;    
}

sub setZero {
	my ($this,$zeroSource,$zeroTarget)=@_;
	$this->{zeroSource}=$zeroSource;
	$this->{zeroTarget}=$zeroTarget;
}

sub paste {
	my ($this,$al)=@_;
	if (!defined($al)){$al=$this->{father}}
	my %side=("source"=>"target","target"=>"source");
	my ($j,$i,$idx);
	my ($al_j,$al_i);
	my ($length,$side,$Side,$reverseSide,$ReverseSide);
	my @nullNotIn;
	my @nullIn;

#	print "slice to paste:\n";
#	main::dumpValue($this);
	while (($side,$reverseSide)= each(%side)){		
#		print "PASTE:$side\n";
#		print "ANTES DE PROCESAR NULL:\n";
#		main::dumpValue($al->{$side."Al"});
#		print "PROCESS NULL...\n";
		$Side=ucfirst($side);
		$ReverseSide=ucfirst($reverseSide);
	# 1 process NULL links
		if ($this->{$side."Indices"}{0}){
			$length=@{$al->{$side."Al"}[0]};
			#list links to null of the indices not included in perturbation
			@nullNotIn=();
			@nullIn=();
			foreach $i (@{$al->{$side."Al"}[0]}){
#				print "zero $ReverseSide:".$this->{"zero".$ReverseSide}."  i:$i\n";
				if (!$this->{$reverseSide."Indices"}{$i-$this->{"zero".$ReverseSide}}){
					push @nullNotIn,$i;
				}
			}
			#replace the other ones (those included in the perturbation) by their alignemnent in the perturbation
			@{$al->{$side."Al"}[0]}=@nullNotIn;
			foreach $i (@{$this->{$side."Al"}[0]}){
				push @{$al->{$side."Al"}[0]},$i+$this->{"zero".$ReverseSide};	
			}
#			print "null not in:",join(" ",@nullNotIn),"\n";
		}
#		print "DESPUÉS DE PROCESAR NULL:\n";
#		main::dumpValue($al->{$side."Al"});
#		print "PROCESS NO NULL...\n";

	# 2 process NO-null links
		foreach $j (keys %{$this->{$side."Indices"}}){
			if ($j>0){
				$al_j = $j+$this->{"zero".$Side};
				# if $j is in the perturbation all indices linked to it are also, 
				# so we can replace the entire array by that of the perturation:
				$al->{$side."Al"}[$al_j]=[]; 
				foreach $i (@{$this->{$side."Al"}[$j]}){
					if ($i>0){
						$al_i = $i+$this->{"zero".$ReverseSide};
					}else{
						$al_i = 0;	
					}
#					print "j i:$j $i->push $side.Al[$al_j],$al_i\n"; 
					push @{$al->{$side."Al"}[$al_j]},$al_i;	
				}
			}
		} #foreach $j...	
#		print "DESPUÉS DE PROCESAR NO NULL:\n";
#		main::dumpValue($al->{$side."Al"});
	} #while
}

sub clone {
	my $this = shift;
	my $clone = $this->Lingua::Alignment::clone();

	$clone->{father}=$this->{father};
	$clone->{zeroSource}=$this->{zeroSource};
	$clone->{zeroTarget}=$this->{zeroTarget};
	$clone->{sourceIndices}=$this->{sourceIndices};
	$clone->{targetIndices}=$this->{targetIndices};
	
	return bless $clone,"Lingua::AlignmentSlice";
}
##############################################################################
### SYMMETRIZATION SUBS (make sense only for GIZA type alignments)         ###
##############################################################################                        

sub sparse{
	my ($alSlice,$side) = @_;
	
	return ( (@{$alSlice->{$side."Words"}}-scalar(keys %{$alSlice->{$side."Indices"}}))>3 );
}
# SYMMETRIZATION RULE FUNCTION:	
# input: AlignmentSlice object (modAl)
# action: modAl is corrected by application of the rule, to symAl are added new symmetric links and al remains unchanged
# returns the number of applications of the rule in alignment
sub applyOneToMany_1 {
	my ($alSlice) = @_;
	my %side=("source","target","target","source");
	my ($source,$target);
	my ($j,$i,$k);
	my $ruleApplicationNb=0;
	my %candidate;
	my $failed = 0;

	for (($source,$target)= each (%side)){	
		for ($j=0; $j<@{$alSlice->{$source."Al"}};$j++){
			#select the $j's linked to various $i's
			if (defined($j)){
				if (@{$alSlice->{$source."Al"}[$j]}>1){
					($failed,$k) = (0,0);
					%candidate=();
					# for each $i linked to $j we look if the reverse link exists in targetAl and if $i is not linked to another source word
					while (!$failed && $k<@{$alSlice->{$source."Al"}[$j]}){
						$i = $alSlice->{$source."Al"}->[$j][$k];
						if (@{$alSlice->{$target."Al"}[$i]}==1 && $alSlice->{$target."Al"}->[$i][0]==$j){
							$candidate{$i} = 1;	
						}elsif (@{$alSlice->{$target."Al"}[$i]}>=1){	#$i is linked to some $j', $j' different from $j
							$failed = 1;
						}
						$k++;	
					}
					# if the conditions are present, we apply the rule
					if (!$failed && %candidate>0){
						foreach $i (@{$alSlice->{$source."Al"}[$j]}){
							#indicate modified word:
	#						${$modAl->{$target."Words"}}[$i]='#'.${$modAl->{$target."Words"}}[$i].'#';	
	#						${$symAl->{$target."Words"}}[$i]='#'.${$symAl->{$target."Words"}}[$i].'#';	
							if (! $candidate{$i}){
	#							push @{$modAl->{$target."Al"}[$i]},$j;
	#							push @{$symAl->{$target."Al"}[$i]},$j; 	
	#							push @{$symAl->{$source."Al"}[$j]},$i; 	
							}else{
								$ruleApplicationNb++;	
								#indicate modified word:
	#							${$modAl->{$source."Words"}}[$j]='#'.${$modAl->{$source."Words"}}[$j].'#';	
	#							${$symAl->{$source."Words"}}[$j]='#'.${$symAl->{$source."Words"}}[$j].'#';	
							}
						}	
					}
				} #if
			}
		} #for
	}	#for source,target
	return $ruleApplicationNb;		
}

#look for source|target tokens aligned with every target|source index in the slice
sub applyOneToMany_2 {
	my ($alSlice) = @_;
	my %side=("source"=>"target","target"=>"source");
	my ($source,$target);
	my ($j,$i,$k);
	my $ruleApplicationNb=0;
	my @candidate;
	my $failed;
	my @toModify = ();

	while (($source,$target)= each %side){
#		print "\n$source $target :\n";
		if (@toModify==0){
			#1 select the $j's linked to all $i's
			#  we want only one $j in this situation so sourceAl==2
			#  we eliminate situations where $j is linked to two $i's situated very far apart (ie: indices<<words)
			if (@{$alSlice->{$source."Al"}}==2 && @{$alSlice->{$source."Al"}[0]}==0 && (@{$alSlice->{$target."Words"}}-scalar(keys %{$alSlice->{$target."Indices"}}))<4){
				$j=1;
#				print "al:",join(" ",@{$alSlice->{$source."Al"}[$j]})." indices:",join(" ",keys %{$alSlice->{$target."Indices"}}),"\n";
				if ( @{$alSlice->{$source."Al"}[$j]}==scalar(keys %{$alSlice->{$target."Indices"}}) ){
					#2 check there is one $i with reverse link to $j (from Giza constraint there can be only one) and no $i linked to other word
					@candidate=();
					$failed=0;
					while (!$failed && $k<@{$alSlice->{$source."Al"}[$j]}){
						$i = $alSlice->{$source."Al"}->[$j][$k];
						# $i can only be aligned to $j or not linked
						if (@{$alSlice->{$target."Al"}[$i]}==1 && $alSlice->{$target."Al"}->[$i][0]==$j){
#							print "	1st class\n";
							$candidate[0]=$i;	
						}else{	#$i is linked to nothing
#							print "	modify $i\n";
							push @toModify,$i;
						}
						$k++;	
					}
				}	# if
			} 	#for $j...
			if (@candidate>0){	
				foreach $i (@toModify){
					push @{$alSlice->{$target."Al"}[$i]},$j;
					$ruleApplicationNb=1;	
				}
			}
		} #if @toModify==0
	} #while source,target
	return $ruleApplicationNb;		
}

sub selectSubgroups {
    my ($alSlice,$groups,$subGroups,$globals) = @_;
    my $sourceSize = @{$alSlice->{sourceWords}}-1;
    my $targetSize = @{$alSlice->{targetWords}}-1;
    my ($side,$k,$minK,$j,$i,$group,$subGroup,$count,$numWords);
    my ($sourceCandidate,$targetCandidate);
    my %candidates = ("source",[],"target",[]);
    my %subCandidates = ("source",[],"target",[]);
    my @allWords;
    my @words;
    my $dumper=new Dumpvalue;
    if ($sourceSize >0 && $targetSize >0 && ($sourceSize>1 || $targetSize>1) && !$alSlice->sparse("source") && !$alSlice->sparse("target")){
	#select frases contains in the group
	foreach $side (("source","target")){
	    # push group candidates	
	    @allWords = @{$alSlice->{$side."Words"}};
	    shift @allWords;
	    #prepare group to be splitted between punctuation, for subGroups hash
	    $group=join(" ",@allWords)." ";			
	    $group =~ s/([¡¿!\?\.,;:] )+/$1/g;
	    @allWords = split /[¡¿!\?\.,;:] /g,$group;
	    # remove puntuation to put it in group hash:
	    $group =~ s/[¡¿!\?\.,;:] //g;
	    $group =~ s/\s+$//g;
	    
	    push @{$candidates{$side}},$group;

	    if ($globals->{onlyGroups}==0){
		foreach $group (@allWords){
		    $group =~ s/\s+$//;
#				print "group:$group:\n";
		    @words = split / /,$group;
		    $numWords = scalar(@words);
		    #push subgroup candidates
		    if ($numWords<3){$minK=1}
		    elsif($numWords<5){$minK=2}
		    else{$minK=2};
#				print "minK:$minK numWords:$numWords\n";
		    for ($k=$minK;$k<=$numWords;$k++){
			for ($j=0;$j<$numWords-$k+1;$j++){
			    @words = split / /,$group;
			    $subGroup = join(" ",splice @words,$j,$k);
			    push @{$subCandidates{$side}},$subGroup;
#						print "	subGroup:$subGroup\n";
			}
		    }
		} #foreach $candidate (@allWords)
		foreach $sourceCandidate (@{$subCandidates{source}}){
		    foreach $targetCandidate (@{$subCandidates{target}}){
#				print "$sourceCandidate -- $targetCandidate\n";
			$subGroups->{"$sourceCandidate | $targetCandidate"}=1;
		    }
		}
	    }		
	    foreach $sourceCandidate (@{$candidates{source}}){
		foreach $targetCandidate (@{$candidates{target}}){
		    # number of words may change after filtering punctuation:
		    $sourceSize=scalar(split / /,$sourceCandidate);
		    $targetSize=scalar(split / /,$targetCandidate);
		    if ($sourceSize >0 && $targetSize >0 && ($sourceSize>1 || $targetSize>1)){
			$groups->{"$sourceCandidate | $targetCandidate"}=1;
		    }
		}
	    }
	}
    }
    return ($groups,$subGroups);
}

sub applyGrouping {
    my ($alSlice,$groupKeys,$subGroupKeys,$globals) = @_;
    my $myGroupKeys=$groupKeys;
    my $sourceSize = @{$alSlice->{sourceWords}};
    my $targetSize = @{$alSlice->{targetWords}};
    my ($side,$reverseSide,$k,$minK,$j,$i,$idx,$interPunctuationIdx,$pushed,$candidate,$refToCand,$word);
    my %side=("source"=>"target","target"=>"source");
    my ($refToSourceCand,$sourceCandidate,$refToTargetCand,$targetCandidate,$bestMatch);
    my %candidates;
    my %cands;
    my %targetCands;
    my @allWords;
    my @words;
    my ($regExp,$num,$match,$numMatches);
    my @matches;
    my ($nscan,$modifications,$modified)=(0,1,0);
    my %toProcess;
    my ($numSourceIndicesToProcess,$numTargetIndicesToProcess);
    my ($first_j,$first_i); 
    my @sourceCandidateTokens;
    my @targetCandidateTokens;
    my %crossLinksPatterns;
    my $clone=$alSlice->clone();
    my $lastChance=0;
    my @grepMatch;
    my %grepMatches;
    my $dumper = new Dumpvalue;
    my $verbose = $globals->{verbose};

    if ($sourceSize >1 && $targetSize >1 && !$alSlice->sparse("source") && !$alSlice->sparse("target")){
	my $defaultActionGrouping=$globals->{defaultActionGrouping};
	$alSlice->$defaultActionGrouping();
	#once you have the intersection, it's easy to list the reciprocal links
	$clone->intersect();
	for ($j=0;$j<@{$clone->{sourceAl}};$j++){
	    if (defined($clone->{sourceAl}[$j])){
		foreach $i (@{$clone->{sourceAl}[$j]}){
		    $crossLinksPatterns{$clone->{sourceWords}[$j].'.*\|.*'.$clone->{targetWords}[$i]}=1;	
		}
	    }	
	}
	if ($verbose > 1){
	    print $alSlice->sourceSentence." | ".$alSlice->targetSentence."\n";
	    if ($verbose > 2){
		print "recip links:\n";
		print $dumper->dumpValue(\%crossLinksPatterns);
	    }
	}
	$toProcess{sourceIndices}={%{$alSlice->{sourceIndices}}};
	$toProcess{targetIndices}={%{$alSlice->{targetIndices}}};
	delete($toProcess{sourceIndices}{0});
	delete($toProcess{targetIndices}{0});
	$toProcess{sourceWords}=[@{$alSlice->{sourceWords}}];
	$toProcess{targetWords}=[@{$alSlice->{targetWords}}];
	shift @{$toProcess{sourceWords}};	#remove NULL word
	shift @{$toProcess{targetWords}};
	$toProcess{sourceWordPos}=[1..scalar(@{$toProcess{sourceWords}})];
	$toProcess{targetWordPos}=[1..scalar(@{$toProcess{targetWords}})];		
	$numSourceIndicesToProcess = scalar(keys %{$toProcess{sourceIndices}});
	$numTargetIndicesToProcess = scalar(keys %{$toProcess{targetIndices}});
	
	while ( $numSourceIndicesToProcess>0 && $numTargetIndicesToProcess>0 ){
#	    print "s words:",join(" ",@{$toProcess{sourceWords}})," - t words:",join(" ",@{$toProcess{targetWords}}),"\n";
#	    print "ind to process s:",join(" ",keys %{$toProcess{sourceIndices}})," - target:",join(" ",keys %{$toProcess{targetIndices}}),"\n";
		# IF FAILED RETURN	
	    if (!$lastChance && !$modifications){
		$lastChance=1;
		if ($globals->{onlyGroups}==1){
		    return -1;
		}
	    }
	    %grepMatches=();
#			print "$nscan: myGroupKeys:$myGroupKeys lastChance:$lastChance\n";
	    %candidates = ("source",[],"target",[]);
	    
	    # SELECT FRASES CONTAINED IN THE GROUP (if the number of indices is different (ie modifications==1)
	    if ($nscan==0 || $modifications){
		while (($side,$reverseSide)=each %side){
		    #target words
		    $targetCands{$side} = " |".join("|",@{$toProcess{$reverseSide."Words"}});
		    $targetCands{$side} =~ s/\|[\(\)\?¿!¡\.,]//g;	#remove punctuation marks
		    $targetCands{$side} =~ s/([\\\(\)\[\{\^\$\*\+\?\.])/\\$1/g; #escape special characters
		    if ($verbose > 2){print "targetCands $side:$targetCands{$side}\n";}
		    #source group candidates
		    @allWords=();
		    @{$allWords[0]}=();
		    $interPunctuationIdx = 0;
		    for ($idx=0;$idx<@{$toProcess{$side."Words"}};$idx++){
			if ($toProcess{$side."Words"}[$idx]=~/[\(\)\?¿!¡\.,]/ || !$alSlice->{$side."Indices"}{$toProcess{$side."WordPos"}[$idx]}){
#						if ($toProcess{$side."Words"}[$idx]=~/[\(\)\?¿!¡\.,]/){
			    unless ($idx==0 ||$idx==@{$toProcess{$side."Words"}}-1 || $pushed==0){
				$interPunctuationIdx++;
				$pushed=0;
			    }
			}else{
			    push @{$allWords[$interPunctuationIdx]},{"pos" => $toProcess{$side."WordPos"}[$idx],"txt" => $toProcess{$side."Words"}[$idx]};
			    $pushed=1;
			}
		    }
		    @{$cands{$side}}=();
		    for ($interPunctuationIdx=0;$interPunctuationIdx<@allWords;$interPunctuationIdx++){
			if (scalar(keys %{$toProcess{$side."Indices"}})<4){$minK=1}
			elsif(scalar(keys %{$toProcess{$side."Indices"}})<5){$minK=2}
			else{$minK=3};
			#select subgroups for grouping candidates
			for ($k=$minK;$k<=@{$allWords[$interPunctuationIdx]} && $k<7;$k++){
			    for ($j=0;$j<=@{$allWords[$interPunctuationIdx]}-$k;$j++){
				@words=@{$allWords[$interPunctuationIdx]};
				# substitute words not in slice indices by "blank" (ie '[^ ]?')
#				for ($idx=0;$idx<@words;$idx++){
#			            if (!$alSlice->{$side."Indices"}{$words[$idx]->{pos}}){
#				       $words[$idx]->{txt} ='[^ ]+';
#				    }  
#				}
				push @{$cands{$side}},[splice @words,$j,$k];
#								print " -j:$j -k:$k -s words to process:",@{$toProcess{$side."Words"}}-$k,"\n";
								#if split 3 in 1-2 or 5 in 2-3 for instance put left-over as candidate:
				if (@words>0 && @words<$minK && ( $j==0 || $j==@{$allWords[$interPunctuationIdx]}-$k)){
				    push @{$cands{$side}},[@words];
				}
			    }
			}
		    }
		}#foreach side
		}#if modifications
			
#			print "targetCands source:",$targetCands{source},"\n";
#			print "targetCands target:",$targetCands{target},"\n";
			
		# FILTER POSSIBLE CANDIDATES
		foreach $side (("source","target")){
		    foreach $refToCand (@{$cands{$side}}){
			$candidate = printGroup($refToCand);
			if ($candidate !~ /^(\[\^ \]\+ ?)+$/){
			    $candidate =~ s/(\[\^ \]\+ )/\($1\){0,1}/g;
			    $candidate =~ s/( \[\^ \]\+)/\($1\){0,1}/g;
			    $candidate =~ s/([\\\(\)\[\{\^\$\*\+\?\.])/\\$1/g; #escape special characters 
			    
			    if ($verbose>2){
				print "cand: $candidate\n";
			    }
			    if ($side eq "source"){
				if (!$lastChance){
				    $regExp = '^\d+ \| '.$candidate.' \| ('.$targetCands{$side}.')+$';
				}else{
				    $regExp = $candidate.'.*\|.*('.$targetCands{$side}.')+';	
					}
			    }else{
				if (!$lastChance){
				    $regExp = '^\d+ \| ('.$targetCands{$side}.')+ \| '.$candidate.'$';
				}else{
				    $regExp = '('.$targetCands{$side}.')+.*\|.*'.$candidate;	
				}
			    }
			    $regExp =~ s/\?/\\\?/g;
			    if ($verbose > 2){print "$regExp\n";}
			    @grepMatch = grep(/$regExp/,@$myGroupKeys); 
			    if (@grepMatch>0){
				push @{$candidates{$side}},$refToCand;
				foreach $match (@grepMatch){
					    $grepMatches{$match}=1;
					}
			    }
			}
		    } #foreach @cands
			    
		    foreach $refToCand (@{$candidates{$side}}){
			$candidate= printGroup($refToCand);
#					print "	",$candidate."\n";
		    }
		} #foreach $side
	    @matches=();
	    
	    # CROSS POSSIBLE CANDIDATES AND IF THEY MATCH PUSH INTO @MATCHES
	    foreach $refToSourceCand (@{$candidates{source}}){
		foreach $refToTargetCand (@{$candidates{target}}){
		    $sourceCandidate=printGroup($refToSourceCand);
		    $targetCandidate=printGroup($refToTargetCand);
		    if ($verbose > 2){
			print "s t:$sourceCandidate - $targetCandidate		indices: s $numSourceIndicesToProcess -t $numTargetIndicesToProcess\n";
		    }
		    # for subGroupKeys, eliminate candidates with one word each side, except if they are the only words to process
		    if (!$lastChance){
			$regExp = '^\d+ \| '.$sourceCandidate.' \| '.$targetCandidate.'$';
		    }else{
			$regExp = ' '.$sourceCandidate.' .*\|.* '.$targetCandidate;
		    }				
#					print "$regExp\n";
		    @grepMatch= grep(/$regExp/,keys %grepMatches);						
		    if (@grepMatch>0){
			$numMatches=0;
			foreach $match (@grepMatch){
#								print "		$match\n";
			    ($num)=split(" \\| ",$match);
			    $numMatches+=$num;
			}
#							print "numMatches:$numMatches\n";
			push @matches, [$numMatches,[@$refToSourceCand],[@$refToTargetCand]];
		    }
		}
	    }
	    if ($verbose>2){
		print "MATCHES:\n";
		print $dumper->dumpValue(\@matches);
	    }
		# ANALYSE AND COMBINES MATCHES TO SELECT THE BEST MATCH
	    if (@matches ==0){
		$modifications=0;
		if ($lastChance){
		    if ($modified){return $modified}
		    else {return -1};	
		}
	    }else{
		$modifications=1;
		if (@matches ==1){
		    $bestMatch=@matches[0];	
		}else {
		    if ($lastChance){
			$bestMatch=searchBestSubGroupMatch(\@matches,\%crossLinksPatterns);	
		    }else{
			$bestMatch=searchBestGroupMatch(\@matches,$verbose);	
		    }
		} #if @matches==1
		($num,$refToSourceCand,$refToTargetCand)=@$bestMatch;
		if ($verbose>0){
		    print "***bestMatch:",printGroup($refToSourceCand),"--",printGroup($refToTargetCand),"\n";
		}
		#apply grouping:
		#see if there is something left to process for the next step:
		for ($j=0;$j<@{$toProcess{sourceWordPos}};$j++){
		    if ($toProcess{sourceWordPos}[$j]==$refToSourceCand->[0]{pos}){
			$first_j=$j;
			last;
		    }
		}
		for ($i=0;$i<@{$toProcess{targetWordPos}};$i++){
		    if ($toProcess{targetWordPos}[$i]==$refToTargetCand->[0]{pos}){
			$first_i=$i;
			last;
		    }
		}
		
#				print "sourceWords:",join(" ",@{$toProcess{sourceWords}})," -splice: ",$first_j,",",scalar(@$refToSourceCand),"\n";
#				print "targetWords:",join(" ",@{$toProcess{targetWords}})," -splice: ",$first_i,",",scalar(@$refToTargetCand),"\n";
		splice @{$toProcess{sourceWords}},$first_j,scalar(@$refToSourceCand);
		splice @{$toProcess{sourceWordPos}},$first_j,scalar(@$refToSourceCand);
		splice @{$toProcess{targetWords}},$first_i,scalar(@$refToTargetCand);
		splice @{$toProcess{targetWordPos}},$first_i,scalar(@$refToTargetCand);
#				print "s wordPos:",join(" ",@{$toProcess{sourceWordPos}}),"\n";
#				print "t wordPos:",join(" ",@{$toProcess{targetWordPos}}),"\n";
		for ($j=$refToSourceCand->[0]{pos};$j<=$refToSourceCand->[@$refToSourceCand-1]{pos};$j++){
		    delete $toProcess{sourceIndices}{$j};
		}
#				print "\nsourceWords 2:",join(" ",@{$toProcess{sourceWords}}),"\n";
		for ($i=$refToTargetCand->[0]{pos};$i<=$refToTargetCand->[@$refToTargetCand-1]{pos};$i++){
		    delete $toProcess{targetIndices}{$i};
		}
#				print "targetWords 2:",join(" ",@{$toProcess{targetWords}}),"\n";
		$numSourceIndicesToProcess = scalar(keys %{$toProcess{sourceIndices}});
		$numTargetIndicesToProcess = scalar(keys %{$toProcess{targetIndices}});

		$alSlice->group($refToSourceCand,$refToTargetCand,$globals->{extendGroups});
#				print "modifs:".$modifications."\n";
		if ($modifications>0){
		    $modified=1;	
		}
	    } #if @matches==0 else
	    $nscan++;
#			print "modif:$modifications\n";
#			print "sourceInd:",scalar(keys %{$toProcess{sourceIndices}}),"\n";
#			print "targetInd:",scalar(keys %{$toProcess{targetIndices}}),"\n";
			
	} #while no changes and areas to cover
	return 1;				
    } 	#general if
    return 0;
}

sub printGroup{
    my $refToGroup = shift;
    my $candidate="";
    my $word;
    foreach $word (@$refToGroup){
	$candidate=$candidate." ".$word->{txt};
    }
    $candidate =~ s/^ //;
    return $candidate;	
}

sub searchBestGroupMatch{
    my $matches = shift;
    my $verbose = shift;
    my @matchNums = ();
    my ($match,$idx,$length);
    my ($minMatch,$maxMatch);
    my @bestCandidates;
    my %maxLength;
    
    foreach $match (@$matches){
	if ($verbose>1){
	    print $match->[0];
	}
	push @matchNums,$match->[0];	
    }
    ($minMatch,$maxMatch) = Lingua::AlSetLib::minmax( \@matchNums );
    @bestCandidates = ();
    foreach $match (@$matches){
	if ($match->[0]==$maxMatch){
	    push @bestCandidates,$match;
	}	
    }
    if (@bestCandidates>1){
	$maxLength{"length"}=0;
	for ($idx=0;$idx<@bestCandidates;$idx++){
	    $length = @{$bestCandidates[$idx]->[1]}+@{$bestCandidates[$idx]->[2]};
#			print "len:$length\n";
	    if ($length>$maxLength{"length"}){
		$maxLength{"idx"}=$idx;
		$maxLength{"length"}=$length;
	    }
	}
	$bestCandidates[0]=$bestCandidates[$maxLength{"idx"}];	
    }
    return $bestCandidates[0];	
}

sub searchBestSubGroupMatch{
    my ($matches,$crossLinkedWords) = @_;
    my %matchNums = ();
    my @sortedMatchNums;
    my ($match,$idx,$k,$length,$maxMatch);
    my $thisOneWithCross=0;
    
    my @bestCandidates;
    my @finalBest;
    my @candsWithoutCrossLink;
    my @candsWithCrossLink;
    my @currentWithoutCrossLink;
    my $regExp;
    
    # sort all matchcounts retrieved:
    foreach $match (@$matches){
	$matchNums{$match->[0]}=1;	
    }
    @sortedMatchNums=reverse (sort { $a <=> $b; } keys %matchNums);
#	print "\nMATCHNUMS: ",join(" ",@sortedMatchNums),"\n\n";	

    for ($idx=0;$idx<@sortedMatchNums && $idx<2;$idx++){
	#list matches of current matchcount
	@bestCandidates = ();
	@currentWithoutCrossLink=();
	foreach $match (@$matches){
	    if ($match->[0]==$sortedMatchNums[$idx]){
		push @bestCandidates,$match;
	    }	
	}
	foreach $match (@bestCandidates){
#			print "		$match:",printGroup($match->[1]),"--",printGroup($match->[2]),"\n";
	    #look if contains cross link
	    $thisOneWithCross=0;
	    foreach $regExp (keys %$crossLinkedWords){
		if ((printGroup($match->[1]).' | '.printGroup($match->[2]))=~/$regExp/){
#					print "$regExp --> $match\n";
		    $thisOneWithCross++;	
		    last;	
		}	
	    }
	    if ($thisOneWithCross){
		push @candsWithCrossLink,$match;
	    }else{
		push @currentWithoutCrossLink,$match;
	    }
	}
	if (@candsWithCrossLink>0){
	    last;
	}else{
	    push @{$candsWithoutCrossLink[$idx]},@currentWithoutCrossLink;
	}
    }
    for ($idx=0;$idx<@candsWithoutCrossLink;$idx++){
	foreach $match (@{$candsWithoutCrossLink[$idx]}){
#			print "$idx without		match:",printGroup($match->[1]),"--",printGroup($match->[2]),"\n";
	}	
    }
    foreach $match (@candsWithCrossLink){
#		print "with		match:",printGroup($match->[1]),"--",printGroup($match->[2]),"\n";
    }	
    if (@candsWithCrossLink>0){
	push @finalBest,clusterGroups(\@candsWithCrossLink);
	if ($idx>0 && @candsWithoutCrossLink>0){
	    push @finalBest,clusterGroups($candsWithoutCrossLink[0]);
	}		
    }else{
	for ($k=0;$k<@candsWithoutCrossLink;$k++){
	    push @finalBest,clusterGroups($candsWithoutCrossLink[$k]);
	}
    }
#	print "num finals:",scalar(@finalBest),"\n";
    return clusterGroups(\@finalBest); 		
}

sub clusterGroups{
    my $groups = shift;
    my ($match,$group,$word,$k,$idx,$l);
    my %clusterPositions;
    my %isInCluster;
    my %cluster;
    my @right;
    
    #1.Search positions included in cluster, in each side. Start with first match in group.
    foreach $k (1,2){
	foreach $word (@{$groups->[0][$k]}){
	    $clusterPositions{$k}{$word->{pos}}=1;			
	}
    }
    for ($idx=1;$idx<@$groups;$idx++){
	%isInCluster=();
	foreach $k (1,2){
	    foreach $word (@{$groups->[$idx][$k]}){
		if ($clusterPositions{$k}{$word->{pos}}){
		    $isInCluster{$k}=1;
		    last;
		}	
	    }
	}
	if ( $isInCluster{1} || $isInCluster{2} ){
	    foreach $k (1,2){
		foreach $word (@{$groups->[$idx][$k]}){
		    $clusterPositions{$k}{$word->{pos}}=1;
		}
	    }
	}
    } #for $idx
    
    #2. Build cluster
    foreach $k (1,2){
	foreach $word (@{$groups->[0][$k]}){ #we started with first match in group.
	    push @{$cluster{$k}},$word;
	    delete($clusterPositions{$k}->{$word->{pos}});	
	}
	for ($idx=1;$idx<@$groups && scalar(keys %{$clusterPositions{$k}})>0;$idx++){
	    $group=$groups->[$idx][$k];
	    foreach $word (@$group){
		if ($clusterPositions{$k}{$word->{pos}}){
		    #insert in cluster
		    for ($l=0;$l<@{$cluster{$k}};$l++){
			if ($cluster{$k}->[$l]{pos}>$word->{pos}){
			    last;	
			}
		    }
		    @right=splice @{$cluster{$k}},$l;
		    @{$cluster{$k}}=(@{$cluster{$k}},$word,@right);
		    #delete key
		    delete($clusterPositions{$k}->{$word->{pos}});	
		}	
	    }
	} #for each group to cluster
    } #foreach $k (source,target)
    return [$groups->[0][0],$cluster{1},$cluster{2}];
}

sub group{
    my ($alSlice,$refToSourceCand,$refToTargetCand,$extendGroup) = @_;
    my ($j,$i);
    my $nLinks=0;
    my $first_j=$refToSourceCand->[0]{pos};
    my $last_j=$first_j+@$refToSourceCand-1;
    my $first_i=$refToTargetCand->[0]{pos};
    my $last_i=$first_i+@$refToTargetCand-1;
    my @sourceExtensions;
    my @targetExtensions;
    my ($minMatch,$maxMatch);
    
    if ($extendGroup==1){
	#first we extend the group to cross links aligned with some member of the group
	for ($j=1;$j<@{$alSlice->{sourceAl}};$j++){
	    if (defined($alSlice->{sourceAl}[$j]) && @{$alSlice->{sourceAl}[$j]}>0 && ($j<$first_j || $j>$last_j)){
		for ($i=$first_i;$i<=$last_i;$i++){
		    if ($alSlice->isCrossLink($j,$i)){
			if ($j<$first_j){$first_j=$j}
			if ($j>$last_j){$last_j=$j}
		    }
		}
	    }
	}
	for ($i=1;$i<@{$alSlice->{targetAl}};$i++){
	    if (defined($alSlice->{targetAl}[$i]) && @{$alSlice->{targetAl}[$i]}>0 && ($i<$first_i || $i>$last_i)){
		for ($j=$first_j;$j<=$last_j;$j++){
		    if ($alSlice->isCrossLink($j,$i)){
			if ($i<$first_i){$first_i=$i}
			if ($i>$last_i){$last_i=$i}
		    }
		}
	    }
	}
    }
    #then we group
    for ($j=$first_j;$j<=$last_j;$j++){
	for ($i=$first_i;$i<=$last_i;$i++){
	    if ($alSlice->{sourceIndices}->{$j} && $alSlice->{targetIndices}->{$i}){
		if (!$alSlice->isIn("sourceAl",$j,$i)){
		    push @{$alSlice->{sourceAl}[$j]},$i;	
		    $nLinks++;
		}	
		if (!$alSlice->isIn("targetAl",$i,$j)){
		    push @{$alSlice->{targetAl}[$i]},$j;	
		    $nLinks++;
		}
	    } 	
	} #for
    } #for
    return $nLinks;
}

sub processNull {
	my $this=shift;
	
		
}
1;