$Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper::VERSION
=
'112.0_55'
;
$Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper::VERSION
=
'112.055'
;
no
warnings
'uninitialized'
;
our
@ISA
=
qw(Bio::EnsEMBL::IdMapping::BaseObject)
;
use
constant
SIMILAR_SCORE_RATIO
=> 0.01;
sub
basic_mapping {
my
$self
=
shift
;
my
$matrix
=
shift
;
my
$mapping_name
=
shift
;
unless
(
$matrix
and
$matrix
->isa(
'Bio::EnsEMBL::IdMapping::ScoredMappingMatrix'
) )
{
throw(
'Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.'
);
}
throw(
'Need a name for serialising the mapping.'
)
unless
(
$mapping_name
);
my
$dump_path
=
path_append(
$self
->conf->param(
'basedir'
),
'mapping'
);
my
$mappings
=
Bio::EnsEMBL::IdMapping::MappingList->new(
-DUMP_PATH
=>
$dump_path
,
-CACHE_FILE
=>
"${mapping_name}.ser"
,
-AUTO_LOAD
=> 1, );
if
(
$mappings
->loaded ) {
$self
->logger->info(
"Read existing mappings from ${mapping_name}.ser.\n"
);
return
$mappings
;
}
my
$sources_done
= {};
my
$targets_done
= {};
my
@sorted_entries
=
sort
{
$b
->score <=>
$a
->score } @{
$matrix
->get_all_Entries };
while
(
my
$entry
=
shift
(
@sorted_entries
) ) {
next
if
(
$sources_done
->{
$entry
->source }
or
$targets_done
->{
$entry
->target } );
next
if
(
$self
->higher_score_exists(
$entry
,
$matrix
,
$sources_done
,
$targets_done
) );
my
$other_sources
= [];
my
$other_targets
= [];
if
(
$self
->ambiguous_mapping(
$entry
,
$matrix
,
$other_sources
,
$other_targets
) )
{
$other_sources
=
$self
->filter_sources(
$other_sources
,
$sources_done
);
$other_targets
=
$self
->filter_targets(
$other_targets
,
$targets_done
);
next
if
(
scalar
(
@$other_sources
) or
scalar
(
@$other_targets
) );
}
$mappings
->add_Entry(
$entry
);
$sources_done
->{
$entry
->source } = 1;
$targets_done
->{
$entry
->target } = 1;
}
$mappings
->write_to_file;
return
$mappings
;
}
sub
higher_score_exists {
my
(
$self
,
$entry
,
$matrix
,
$sources_done
,
$targets_done
) =
@_
;
my
$source
=
$entry
->source;
my
$target
=
$entry
->target;
my
$score
=
$entry
->score;
foreach
my
$other_source
( @{
$matrix
->get_sources_for_target(
$target
) } )
{
if
(
$other_source
!=
$source
and !
$sources_done
->{
$other_source
}
and
$score
<
$matrix
->get_score(
$other_source
,
$target
) )
{
return
1;
}
}
foreach
my
$other_target
( @{
$matrix
->get_targets_for_source(
$source
) } )
{
if
(
$other_target
!=
$target
and !
$targets_done
->{
$other_target
}
and
$score
<
$matrix
->get_score(
$source
,
$other_target
) )
{
return
1;
}
}
return
0;
}
sub
ambiguous_mapping {
my
(
$self
,
$entry
,
$matrix
,
$other_sources
,
$other_targets
) =
@_
;
my
$source
=
$entry
->source;
my
$target
=
$entry
->target;
my
$score
=
$entry
->score;
my
$retval
= 0;
foreach
my
$other_source
( @{
$matrix
->get_sources_for_target(
$target
) } )
{
my
$other_score
=
$matrix
->get_score(
$other_source
,
$target
);
if
(
$other_source
!=
$source
and (
$self
->scores_similar(
$score
,
$other_score
)
or
$score
<
$other_score
) )
{
$retval
= 1;
push
@{
$other_sources
},
$other_source
;
}
}
foreach
my
$other_target
( @{
$matrix
->get_targets_for_source(
$source
) } )
{
my
$other_score
=
$matrix
->get_score(
$source
,
$other_target
);
if
(
$other_target
!=
$target
and (
$self
->scores_similar(
$score
,
$other_score
)
or
$score
<
$other_score
) )
{
$retval
= 1;
push
@{
$other_targets
},
$other_target
;
}
}
return
$retval
;
}
sub
scores_similar {
my
(
$self
,
$s1
,
$s2
) =
@_
;
return
0
if
(
$s1
== 1 and
$s2
< 1 );
my
$diff
=
$s1
-
$s2
;
$diff
= -
$diff
if
(
$diff
< 0 );
my
$pc
= 2
*$diff
/(
$s1
+
$s2
);
return
(
$pc
< SIMILAR_SCORE_RATIO );
}
sub
filter_sources {
my
(
$self
,
$other_sources
,
$sources_done
) =
@_
;
unless
(
scalar
( @{
$other_sources
} )
and
scalar
(
keys
%{
$sources_done
} ) )
{
return
$other_sources
;
}
my
@tmp
= ();
foreach
my
$e
( @{
$other_sources
} ) {
push
@tmp
,
$e
unless
(
$sources_done
->{
$e
} );
}
return
\
@tmp
;
}
sub
filter_targets {
my
(
$self
,
$other_targets
,
$targets_done
) =
@_
;
unless
(
scalar
( @{
$other_targets
} )
and
scalar
(
keys
%{
$targets_done
} ) )
{
return
$other_targets
;
}
my
@tmp
= ();
foreach
my
$e
( @{
$other_targets
} ) {
push
@tmp
,
$e
unless
(
$targets_done
->{
$e
} );
}
return
\
@tmp
;
}
1;