# warn sprintf("Stop codon is '%s'\n",substr($trans->translateable_seq,-3));
#$support->log_verbose("Stops found in $tsi ($tname)\n",1);
$log_object->_save_log('log_verbose', '', $gsi, '', $tsi, '', "Stops found in $tsi ($tname)");
# find out where and how many stops there are
my@found_stops;
my$mrna= $trans->translateable_seq;
my$offset= 0;
my$tstop;
while($pseq=~ /^([^\*]*)\*(.*)/) {
my$pseq1_f= $1;
$pseq= $2;
my$seq_flag= 0;
$offset+= length($pseq1_f) * 3;
my$stop= substr($mrna, $offset, 3);
my$aaoffset= int($offset/ 3)+1;
push(@found_stops, [ $stop, $aaoffset]);
$tstop.= "$aaoffset ";
$offset+= 3;
}
# are all stops TGA...?
my$num_stops= scalar(@found_stops);
my$num_tga= 0;
my$positions;
foreachmy$stop(@found_stops) {
$positions.= $stop->[0]."(".$stop->[1].") ";
if($stop->[0] eq $scodon) {
$num_tga++;
}
}
my$source= $gene->source;
#...no - an internal stop codon error in the database...
if($num_tga< $num_stops) {
if($sourceeq 'havana') {
#$support->log_warning("INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
$log_object->_save_log('log_warning', '', $gsi, 'TRANSCRIPT', $tsi, 'VQCT_internal_stop', "INTERNAL STOPS HAVANA: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons [$mod_date]: Sequence = $orig_seq Stops at $positions)");
}
else{
#$support->log_warning("INTERNAL STOPS EXTERNAL: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons[$mod_date]:\nSequence = $orig_seq\nStops at $positions)\n\n");
$log_object->_save_log('log_warning', '', $gsi, 'TRANSCRIPT', $tsi, 'VQCT_internal_stop', "INTERNAL STOPS EXTERNAL: Transcript $tsi ($tname) from gene $gname has non \'$scodon\' stop codons[$mod_date]: Sequence = $orig_seq Stops at $positions)");
}
}
#...yes - check remarks
else{
my$flag_remark= 0; # 1 if word seleno has been used
my$flag_remark2= 0; # 1 if existing remark has correct numbering
#$support->log_warning("seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]\n");
$log_object->_save_log('log_warning', '', $gsi, '', $tsi, 'VQCT_wrong_selC_coord', "seleno remark for $tsi stored as Annotation_remark not hidden remark) [$mod_date]");
$annot_stops=$1;
}
elsif($text=~ /^$alabel2(.*)/) {
my$maybe= $1;
if($maybe=~ /^\s*\d+(\s+\d+)*\s*$/) {
$annot_stops=$maybe;
} else{
$log_object->_save_log('log', '', $gene->stable_id, '', $tsi, '', "Maybe annotated stop in incorrect format, maybe just a remark that happens to begin '$alabel2'".
" -- might need to investigate: '$alabel2$maybe' [$mod_date]");
}
}
}
}
#check the location of the annotated edits matches actual stops in the sequence
$log_object->_save_log('log_warning', '', $gene->stable_id, 'DNA', $tsi, 'VQCT_wrong_selC_coord', "DNA: Annotated stop for transcript tsi ($tname) is in DNA not peptide coordinates) [$mod_date]");
$log_object->_save_log('log_warning', '', $gene->stable_id, 'PEPTIDE', $tsi, 'VQCT_wrong_selC_coord', "PEPTIDE: Annotated stop for transcript $tsi ($tname) is out by one) [$mod_date]");
$log_object->_save_log('log', '', $gene->stable_id, 'TRANSCRIPT', $tsi, '', "Annotated stop for transcript $tsi ($tname) known to be a stop codon. Ok. [$mod_date]");
$log_object->_save_log('log', '', $gene->stable_id, 'TRANSCRIPT', $tsi, '', "Annotated stop for transcript $tsi ($tname) known to be a terminal Sec. Ok. [$mod_date]");
} else{
$log_object->_save_log('log_warning', '', $gene->stable_id, 'TRANSCRIPT', $tsi, '', "Annotated stop for transcript $tsi ($tname) \"$offset\" matches actual stop codon yet has no entry in script config to disambiguate it. Please investigate and add appropriate entry to config arrays in add_selcys.pl. [$mod_date]");
}
}
else{
$log_object->_save_log('log_warning', '', $gene->stable_id, 'TRANSCRIPT', $tsi, 'VQCT_wrong_selC_coord', "Annotated stop for transcript $tsi ($tname) \"$offset\" does not match a TGA codon) [$mod_date]");
push@annotated_stops, $offset;
}
}
$i++;
}
}
#check location of found stops matches annotated ones - any new ones are reported
#$support->log_verbose("Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators:\n\t".$seen_transcripts->{$tsi}.") [$mod_date]\n");
$log_object->_save_log('log_verbose', '', $gene->stable_id, '', $tsi, 'VQCT_pot_selC', "Transcript $tsi ($tname) has potential selenocysteines but has been discounted by annotators: ".$seen_transcripts->{$tsi}->[0].") [$mod_date]");
}
else{
#$support->log("POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]\n");
$log_object->_save_log('log', '', $gene->stable_id, '', $tsi, 'VQCT_pot_selC', "POTENTIAL SELENO ($seq) in $tsi ($tname, gene $gname) found at $pos [$mod_date]");
}
}
}
}
}
}
sub_save_log{
my$self=shift;
my$log_type= shift;
my$chrom_name=shift|| '';
my$gsi=shift|| '';
my$type=shift|| '';
my$tsi=shift|| '';
my$tag=shift|| '';
my$txt=shift|| '';
$self->$log_type($txt."\n");
}
#details of annotators comments
__DATA__
OTTHUMT00000144659 = FIXED- changed to transcript
OTTHUMT00000276377 = FIXED- changed to transcript
OTTHUMT00000257741 = FIXED- changed to nmd
OTTHUMT00000155694 = NOT_FIXED- should be nmd but external annotation but cannot be fixed
OTTHUMT00000155695 = NOT_FIXED- should be nmd but external annotation but cannot be fixed
OTTHUMT00000282573 = FIXED- changed to unprocessed pseudogene
OTTHUMT00000285227 = FIXED- changed start site
OTTHUMT00000151008 = FIXED- incorrect trimming of CDS, removed extra stop codon