Hide Show 25 lines of Pod
use
Carp
qw(cluck croak carp)
;
Hide Show 17 lines of Pod
sub
entry2liveseq {
my
(
$self
,
%args
) =
@_
;
my
(
$getswissprotinfo
)=(
$args
{-getswissprotinfo});
if
(
defined
(
$getswissprotinfo
)) {
if
((
$getswissprotinfo
ne 0)&&(
$getswissprotinfo
ne 1)) {
carp
"-getswissprotinfo argument can take only boolean (1 or 0) values. Setting it to 0, i.e. not trying to retrieve SwissProt information...."
;
$getswissprotinfo
=0;
}
}
else
{
$getswissprotinfo
=1;
}
my
$hashref
=
$self
->{
'hash'
};
unless
(
$hashref
) {
return
(0); }
my
@translationobjects
=
$self
->hash2liveseq(
$hashref
,
$getswissprotinfo
);
my
$test_transl
=0;
if
(
$test_transl
) {
$self
->test_transl(
$hashref
,\
@translationobjects
);}
return
@translationobjects
;
}
Hide Show 54 lines of Pod
sub
gene2liveseq {
my
(
$self
,
%args
) =
@_
;
my
(
$gene_name
,
$flanking
,
$getswissprotinfo
,
$cds_position
)=(
$args
{-gene_name},
$args
{-flanking},
$args
{-getswissprotinfo},
$args
{-position});
my
$input
;
unless
((
$gene_name
)||(
$cds_position
)) {
carp
"Gene_Name or Position not specified for gene2liveseq loading function"
;
return
(0);
}
if
((
$gene_name
)&&(
$cds_position
)) {
carp
"Gene_Name and Position cannot be given together, use one"
;
return
(0);
}
elsif
(
$gene_name
) {
$input
=
$gene_name
;
}
else
{
$input
=
"cds-position:"
.
$cds_position
;
}
if
(
defined
(
$getswissprotinfo
)) {
if
((
$getswissprotinfo
ne 0)&&(
$getswissprotinfo
ne 1)) {
carp
"-getswissprotinfo argument can take only boolean (1 or 0) values. Setting it to 0, i.e. not trying to retrieve SwissProt information...."
;
$getswissprotinfo
=0;
}
}
else
{
$getswissprotinfo
=1;
}
if
(
defined
(
$flanking
)) {
unless
(
$flanking
>= 0) {
carp
"No sense in specifying a number < 0 for flanking regions to be created for gene2liveseq loading function"
;
return
(0);
}
}
else
{
$flanking
=500;
}
my
$hashref
=
$self
->{
'hash'
};
unless
(
$hashref
) {
return
(0); }
my
$gene
=
$self
->hash2gene(
$hashref
,
$input
,
$flanking
,
$getswissprotinfo
);
unless
(
$gene
) {
carp
"gene2liveseq produced error"
;
return
(0);
}
return
$gene
;
}
sub
test_transl {
my
(
$self
,
$entry
)=
@_
;
my
@features
=@{
$entry
->{
'Features'
}};
my
@translationobjects
=@{
$_
[1]};
my
(
$i
,
$translation
);
my
(
$obj_transl
,
$hash_transl
);
my
@cds
=@{
$entry
->{
'CDS'
}};
foreach
$translation
(
@translationobjects
) {
$obj_transl
=
$translation
->seq;
$hash_transl
=
$cds
[
$i
]->{
'qualifiers'
}->{
'translation'
};
unless
(
$obj_transl
eq
$hash_transl
) {
cluck
"Serious error: Translation from the Entry does not match Translation from object's seq for CDS at position $i"
;
carp
"\nEntry's transl: "
,
$hash_transl
,
"\n"
;
carp
"\nObject's transl: "
,
$obj_transl
,
"\n"
;
exit
;
}
$i
++;
}
}
sub
hash2liveseq {
my
(
$self
,
$entry
,
$getswissprotinfo
)=
@_
;
my
$i
;
my
@transcripts
;
my
$dna
=Bio::LiveSeq::DNA->new(
-seq
=>
$entry
->{
'Sequence'
} );
$dna
->alphabet(
lc
(
$entry
->{
'Molecule'
}));
$dna
->display_id(
$entry
->{
'ID'
});
$dna
->accession_number(
$entry
->{
'AccNumber'
});
$dna
->desc(
$entry
->{
'Description'
});
my
@cds
=@{
$entry
->{
'CDS'
}};
my
(
$swissacc
,
$swisshash
);
my
@swisshashes
;
for
$i
(0..
$#cds
) {
push
(
@transcripts
,
$cds
[
$i
]->{
'range'
});
if
(
$getswissprotinfo
) {
$swissacc
=
$cds
[
$i
]->{
'qualifiers'
}->{
'db_xref'
};
$swisshash
=
$self
->get_swisshash(
$swissacc
);
push
(
@swisshashes
,
$swisshash
);
}
}
my
@translations
=(
$self
->transexonscreation(
$dna
,\
@transcripts
));
my
$translation
;
my
$j
=0;
foreach
$translation
(
@translations
) {
if
(
$swisshashes
[
$j
]) {
$self
->swisshash2liveseq(
$swisshashes
[
$j
],
$translation
);
}
$j
++;
}
return
(
@translations
);
}
sub
hash2gene {
my
(
$self
,
$entry
,
$input
,
$flanking
,
$getswissprotinfo
)=
@_
;
my
$entryfeature
;
my
$genefeatureshash
;
my
@cds
=@{
$entry
->{
'CDS'
}};
if
(
index
(
$input
,
"cds-position:"
) == 0 ) {
my
$cds_position
=
substr
(
$input
,13);
if
((
$cds_position
>= 1)&&(
$cds_position
<=
scalar
(
@cds
))) {
$genefeatureshash
=
$self
->_findgenefeatures(
$entry
,
undef
,
$cds_position
,
$getswissprotinfo
);
}
}
else
{
$genefeatureshash
=
$self
->_findgenefeatures(
$entry
,
$input
,
undef
,
$getswissprotinfo
);
}
unless
((
$genefeatureshash
)&&(
scalar
(@{
$genefeatureshash
->{
'genefeatures'
}}))) {
my
@genes
=
$self
->genes(
$entry
);
my
$cds_number
=
scalar
(
@cds
);
warn
"Warning! Not even one genefeature found
for
/
$input
/....
The genes present in this entry are:\n\t
@genes
\n
The number of CDS in this entry is:\n\t
$cds_number
\n";
return
(0);
}
my
(
$min
,
$max
)=
$self
->rangeofarray(@{
$genefeatureshash
->{
'labels'
}});
my
$seqlength
=
$entry
->{
'SeqLength'
};
my
(
$mindna
,
$maxdna
);
if
(
$min
-
$flanking
< 1) {
$mindna
=1;
}
else
{
$mindna
=
$min
-
$flanking
;
}
if
(
$max
+
$flanking
>
$seqlength
) {
$maxdna
=
$seqlength
;
}
else
{
$maxdna
=
$max
+
$flanking
;
}
my
$subseq
=
substr
(
$entry
->{
'Sequence'
},
$mindna
-1,
$maxdna
-
$mindna
+1);
my
$dna
=Bio::LiveSeq::DNA->new(
-seq
=>
$subseq
,
-offset
=>
$mindna
);
$dna
->alphabet(
lc
(
$entry
->{
'Molecule'
}));
$dna
->source(
$entry
->{
'Organism'
});
$dna
->display_id(
$entry
->{
'ID'
});
$dna
->accession_number(
$entry
->{
'AccNumber'
});
$dna
->desc(
$entry
->{
'Description'
});
my
@transcripts
=@{
$genefeatureshash
->{
'transcripts'
}};
unless
(
@transcripts
) {
cluck
"no CDS feature found for /$input/...."
;
return
(0);
}
my
@translationobjs
=
$self
->transexonscreation(
$dna
,\
@transcripts
);
my
@transcriptobjs
;
my
$translation
;
my
$j
=0;
my
@ttables
=@{
$genefeatureshash
->{
'ttables'
}};
my
@swisshashes
=@{
$genefeatureshash
->{
'swisshashes'
}};
foreach
$translation
(
@translationobjs
) {
push
(
@transcriptobjs
,
$translation
->get_Transcript);
if
(
$ttables
[
$j
]) {
$translation
->get_Transcript->translation_table(
$ttables
[
$j
]);
}
if
(
$swisshashes
[
$j
]) {
$self
->swisshash2liveseq(
$swisshashes
[
$j
],
$translation
);
}
$j
++;
}
my
%gene
;
$gene
{DNA}=
$dna
;
$gene
{Transcripts}=\
@transcriptobjs
;
$gene
{Translations}=\
@translationobjs
;
my
@exonobjs
;
my
@intronobjs
;
my
@repeatunitobjs
;
my
@repeatregionobjs
;
my
@primtranscriptobjs
;
my
(
$object
,
$range
,
$start
,
$end
,
$strand
);
my
@exons
=@{
$genefeatureshash
->{
'exons'
}};
my
@exondescs
=@{
$genefeatureshash
->{
'exondescs'
}};
if
(
@exons
) {
my
$exoncount
= 0;
foreach
$range
(
@exons
) {
(
$start
,
$end
,
$strand
)=@{
$range
};
$object
= Bio::LiveSeq::Exon->new(
-seq
=>
$dna
,
-start
=>
$start
,
-end
=>
$end
,
-strand
=>
$strand
);
if
(
$object
!= -1) {
$object
->desc(
$exondescs
[
$exoncount
])
if
defined
$exondescs
[
$exoncount
];
$exoncount
++;
push
(
@exonobjs
,
$object
);
}
else
{
$exoncount
++;
}
}
$gene
{Exons}=\
@exonobjs
;
}
my
@introns
=@{
$genefeatureshash
->{
'introns'
}};
my
@introndescs
=@{
$genefeatureshash
->{
'introndescs'
}};
if
(
@introns
) {
my
$introncount
= 0;
foreach
$range
(
@introns
) {
(
$start
,
$end
,
$strand
)=@{
$range
};
$object
=Bio::LiveSeq::Intron->new(
-seq
=>
$dna
,
-start
=>
$start
,
-end
=>
$end
,
-strand
=>
$strand
);
if
(
$object
!= -1) {
$object
->desc(
$introndescs
[
$introncount
]);
$introncount
++;
push
(
@intronobjs
,
$object
);
}
else
{
$introncount
++;
}
}
$gene
{Introns}=\
@intronobjs
;
}
my
@prim_transcripts
=@{
$genefeatureshash
->{
'prim_transcripts'
}};
if
(
@prim_transcripts
) {
foreach
$range
(
@prim_transcripts
) {
(
$start
,
$end
,
$strand
)=@{
$range
};
$object
=Bio::LiveSeq::Prim_Transcript->new(
-seq
=>
$dna
,
-start
=>
$start
,
-end
=>
$end
,
-strand
=>
$strand
);
if
(
$object
!= -1) {
push
(
@primtranscriptobjs
,
$object
); }
}
$gene
{Prim_Transcripts}=\
@primtranscriptobjs
;
}
my
@repeat_regions
=@{
$genefeatureshash
->{
'repeat_regions'
}};
my
@repeat_regions_family
=@{
$genefeatureshash
->{
'repeat_regions_family'
}};
if
(
@repeat_regions
) {
my
$k
=0;
foreach
$range
(
@repeat_regions
) {
(
$start
,
$end
,
$strand
)=@{
$range
};
$object
=Bio::LiveSeq::Repeat_Region->new(
-seq
=>
$dna
,
-start
=>
$start
,
-end
=>
$end
,
-strand
=>
$strand
);
if
(
$object
!= -1) {
$object
->desc(
$repeat_regions_family
[
$k
]);
$k
++;
push
(
@repeatregionobjs
,
$object
);
}
else
{
$k
++;
}
}
$gene
{Repeat_Regions}=\
@repeatregionobjs
;
}
my
@repeat_units
=@{
$genefeatureshash
->{
'repeat_units'
}};
my
@repeat_units_family
=@{
$genefeatureshash
->{
'repeat_units_family'
}};
if
(
@repeat_units
) {
my
$k
=0;
foreach
$range
(
@repeat_units
) {
(
$start
,
$end
,
$strand
)=@{
$range
};
$object
=Bio::LiveSeq::Repeat_Unit->new(
-seq
=>
$dna
,
-start
=>
$start
,
-end
=>
$end
,
-strand
=>
$strand
);
if
(
$object
!= -1) {
$object
->desc(
$repeat_units_family
[
$k
]);
$k
++;
push
(
@repeatunitobjs
,
$object
);
}
else
{
$k
++;
}
}
$gene
{Repeat_Units}=\
@repeatunitobjs
;
}
my
$gene_name
=
$genefeatureshash
->{
'gene_name'
};
return
(Bio::LiveSeq::Gene->new(
-name
=>
$gene_name
,
-features
=>\
%gene
,
-upbound
=>
$min
,
-downbound
=>
$max
));
}
sub
rangeofarray {
my
$self
=
shift
;
my
@array
=
@_
;
my
(
$max
,
$min
,
$element
);
$min
=
$max
=
shift
(
@array
);
foreach
$element
(
@array
) {
$element
= 0
unless
defined
$element
;
if
(
$element
<
$min
) {
$min
=
$element
;
}
if
(
$element
>
$max
) {
$max
=
$element
;
}
}
return
(
$min
,
$max
);
}
sub
transexonscreation {
my
$self
=
shift
;
my
$dna
=
$_
[0];
my
@transcripts
=@{
$_
[1]};
my
(
@transexons
,
$start
,
$end
,
$strand
,
$exonref
,
$exonobj
,
$transcript
,
$transcriptobj
);
my
$translationobj
;
my
@translationobjects
;
foreach
$transcript
(
@transcripts
) {
foreach
$exonref
(@{
$transcript
}) {
(
$start
,
$end
,
$strand
)=@{
$exonref
};
$exonobj
=Bio::LiveSeq::Exon->new(
-seq
=>
$dna
,
-start
=>
$start
,
-end
=>
$end
,
-strand
=>
$strand
);
push
(
@transexons
,
$exonobj
);
}
$transcriptobj
=Bio::LiveSeq::Transcript->new(
-exons
=> \
@transexons
);
if
(
$transcriptobj
!= -1) {
$translationobj
=Bio::LiveSeq::Translation->new(
-transcript
=>
$transcriptobj
);
@transexons
=();
push
(
@translationobjects
,
$translationobj
);
}
}
return
(
@translationobjects
);
}
Hide Show 13 lines of Pod
sub
printswissprot {
my
(
$self
,
$entry
)=
@_
;
unless
(
$entry
) {
return
;
}
printf
"ID: %s\n"
,
$entry
->{
'ID'
};
printf
"ACC: %s\n"
,
$entry
->{
'AccNumber'
};
printf
"GENE: %s\n"
,
$entry
->{
'Gene'
};
printf
"DES: %s\n"
,
$entry
->{
'Description'
};
printf
"ORG: %s\n"
,
$entry
->{
'Organism'
};
printf
"SEQLN: %s\n"
,
$entry
->{
'SeqLength'
};
printf
"SEQ: %s\n"
,
substr
(
$entry
->{
'Sequence'
},0,64);
if
(
$entry
->{
'Features'
}) {
my
@features
=@{
$entry
->{
'Features'
}};
my
$i
;
for
$i
(0..
$#features
) {
print
"|"
,
$features
[
$i
]->{
'name'
},
"|"
;
print
" at "
,
$features
[
$i
]->{
'location'
},
": "
;
print
""
,
$features
[
$i
]->{
'desc'
},
"\n"
;
}
}
}
Hide Show 10 lines of Pod
sub
printembl {
my
(
$self
,
$entry
)=
@_
;
unless
(
$entry
) {
$entry
=
$self
->{
'hash'
};
}
my
(
$i
,
$featurename
);
printf
"ID: %s\n"
,
$entry
->{
'ID'
};
printf
"ACC: %s\n"
,
$entry
->{
'AccNumber'
};
printf
"MOL: %s\n"
,
$entry
->{
'Molecule'
};
printf
"DIV: %s\n"
,
$entry
->{
'Division'
};
printf
"DES: %s\n"
,
$entry
->{
'Description'
};
printf
"ORG: %s\n"
,
$entry
->{
'Organism'
};
printf
"SEQLN: %s\n"
,
$entry
->{
'SeqLength'
};
printf
"SEQ: %s\n"
,
substr
(
$entry
->{
'Sequence'
},0,64);
my
@features
=@{
$entry
->{
'Features'
}};
my
@cds
=@{
$entry
->{
'CDS'
}};
print
"\nFEATURES\nNumber of CDS: "
,
scalar
(
@cds
),
" (out of "
,
$entry
->{
'FeaturesNumber'
},
" total features)\n"
;
my
(
$exonref
,
$transcript
);
my
(
$qualifiernumber
,
$qualifiers
,
$key
);
my
(
$start
,
$end
,
$strand
);
my
$j
=0;
for
$i
(0..
$#features
) {
$featurename
=
$features
[
$i
]->{
'name'
};
if
(
$featurename
eq
"CDS"
) {
print
"|CDS| number $j at feature position: $i\n"
;
$transcript
=
$features
[
$i
]->{
'range'
};
foreach
$exonref
(@{
$transcript
}) {
(
$start
,
$end
,
$strand
)=@{
$exonref
};
print
"\tExon: start $start end $end strand $strand\n"
;
}
$j
++;
}
else
{
print
"|$featurename| at feature position: $i\n"
;
print
"\trange: "
;
print
join
(
"\t"
,@{
$features
[
$i
]->{
'range'
}}),
"\n"
;
}
$qualifiernumber
=
$features
[
$i
]->{
'qual_number'
};
$qualifiers
=
$features
[
$i
]->{
'qualifiers'
};
foreach
$key
(
keys
(%{
$qualifiers
})) {
print
"\t\t"
,
$key
,
": "
;
print
$qualifiers
->{
$key
},
"\n"
;
}
}
}
Hide Show 10 lines of Pod
sub
genes {
my
(
$self
,
$entry
)=
@_
;
unless
(
$entry
) {
$entry
=
$self
->{
'hash'
};
}
my
@entryfeatures
=@{
$entry
->{
'Features'
}};
my
(
$genename
,
$genenames
,
$entryfeature
);
for
$entryfeature
(
@entryfeatures
) {
$genename
=
$entryfeature
->{
'qualifiers'
}->{
'gene'
};
if
(
$genename
) {
if
(
index
(
$genenames
,
$genename
) == -1) {
$genenames
.=
$genename
.
" "
;
}
}
}
return
(
split
(/ /,
$genenames
));
}
sub
swisshash2liveseq {
my
(
$self
,
$entry
,
$translation
)=
@_
;
my
$translength
=
$translation
->
length
;
$translation
->desc(
$translation
->desc .
$entry
->{
'Description'
});
$translation
->display_id(
"SWISSPROT:"
.
$entry
->{
'ID'
});
$translation
->accession_number(
"SWISSPROT:"
.
$entry
->{
'AccNumber'
});
$translation
->name(
$entry
->{
'Gene'
});
$translation
->source(
$entry
->{
'Organism'
});
my
@aarangeobjects
;
my
(
$start
,
$end
,
$aarangeobj
,
$feature
);
my
@features
;
my
@newfeatures
;
if
(
$entry
->{
'Features'
}) {
@features
=@{
$entry
->{
'Features'
}};
}
my
$cleavedmet
=0;
foreach
$feature
(
@features
) {
if
((
$feature
->{
'name'
} eq
"INIT_MET"
)&&(
$feature
->{
'location'
} eq
"0 0"
)) {
$cleavedmet
=1;
$translation
->{
'offset'
}=
"1"
;
}
else
{
push
(
@newfeatures
,
$feature
);
}
}
my
$swissseq
=
$entry
->{
'Sequence'
};
my
$liveseqtransl
=
$translation
->seq;
chop
$liveseqtransl
;
my
$translated
=
substr
(
$liveseqtransl
,
$cleavedmet
);
my
(
$liveseq_aa
,
$swiss_aa
,
$codes_aa
)=
$self
->_get_alignment(
$translated
,
$swissseq
);
if
((
index
(
$liveseq_aa
,
"-"
) != -1)||(
index
(
$swiss_aa
,
"-"
) != -1)) {
print
"LIVE-SEQ=\'$liveseq_aa\'\nIDENTITY=\'$codes_aa\'\nSWS-PROT=\'$swiss_aa\'\n"
;
carp
"Nucleotides translation and SwissProt translation are different in size, cannot attach the SwissSequence to the EMBL one, cannot add any AminoAcidRange object/Domain information!"
;
return
;
}
@features
=
@newfeatures
;
foreach
$feature
(
@features
) {
(
$start
,
$end
)=
split
(/ /,
$feature
->{
'location'
});
$aarangeobj
=Bio::LiveSeq::AARange->new(
-start
=>
$start
+
$cleavedmet
,
-end
=>
$end
+
$cleavedmet
,
-name
=>
$feature
->{
'name'
},
-description
=>
$feature
->{
'description'
},
-translation
=>
$translation
,
-translength
=>
$translength
);
if
(
$aarangeobj
!= -1) {
push
(
@aarangeobjects
,
$aarangeobj
);
}
}
$translation
->{
'aa_ranges'
}=\
@aarangeobjects
;
}
sub
get_swisshash {
return
(0);
}
sub
_findgenefeatures {
my
(
$self
,
$entry
,
$gene_name
,
$cds_position
,
$getswissprotinfo
)=
@_
;
my
@entryfeatures
=@{
$entry
->{
'Features'
}};
my
@exons
;
my
@introns
;
my
@prim_transcripts
;
my
@transcripts
;
my
@repeat_units
;
my
@repeat_regions
;
my
@repeat_units_family
;
my
@repeat_regions_family
;
my
$rpt_family
;
my
$entryfeature
;
my
@genefeatures
;
my
$desc
;
my
@exondescs
;
my
@introndescs
;
my
(
$swissacc
,
$swisshash
);
my
@swisshashes
;
my
@ttables
;
my
(
$name
,
$exon
);
my
@range
;
my
@cdsexons
;
my
@labels
;
my
@cds
=@{
$entry
->{
'CDS'
}};
my
$skipgenematch
=0;
if
(
scalar
(
@cds
) == 1) {
$skipgenematch
=1;
}
my
(
$cds_begin
,
$cds_end
,
$proximity
);
if
(
$cds_position
) {
my
@cds_exons
=@{
$cds
[
$cds_position
-1]->{
'range'
}};
(
$cds_begin
,
$cds_end
)=(
$cds_exons
[0]->[0],
$cds_exons
[-1]->[1]);
$gene_name
=
$cds
[
$cds_position
-1]->{
'qualifiers'
}->{
'gene'
};
unless
(
$skipgenematch
) {
carp
"--DEBUG-- cdsbegin $cds_begin cdsend $cds_end--------"
;
}
$proximity
=100;
}
for
$entryfeature
(
@entryfeatures
) {
if
((
$skipgenematch
)||((
$cds_position
)&&(
$self
->_checkfeatureproximity(
$entryfeature
->{
'range'
},
$cds_begin
,
$cds_end
,
$proximity
)))||(!(
$cds_position
)&&(
$entryfeature
->{
'qualifiers'
}->{
'gene'
} eq
"$gene_name"
))) {
push
(
@genefeatures
,
$entryfeature
);
my
@range
=@{
$entryfeature
->{
'range'
}};
$name
=
$entryfeature
->{
'name'
};
my
%qualifierhash
=%{
$entryfeature
->{
'qualifiers'
}};
if
(
$name
eq
"CDS"
) {
if
(
$getswissprotinfo
) {
$swissacc
=
$entryfeature
->{
'qualifiers'
}->{
'db_xref'
};
$swisshash
=
$self
->get_swisshash(
$swissacc
);
push
(
@swisshashes
,
$swisshash
);
}
push
(
@ttables
,
$entryfeature
->{
'qualifiers'
}->{
'transl_table'
});
for
$exon
(
@range
) {
push
(
@labels
,
$exon
->[0],
$exon
->[1]);
}
push
(
@transcripts
,
$entryfeature
->{
'range'
});
}
else
{
if
(
$entryfeature
->{
'locationtype'
} &&
$entryfeature
->{
'locationtype'
} eq
"joined"
) {
@range
=(
$range
[0]->[0],
$range
[-1]->[1]);
}
push
(
@labels
,
$range
[0],
$range
[1]);
if
(
$name
eq
"exon"
) {
$desc
=
$entryfeature
->{
'qualifiers'
}->{
'number'
};
if
(
$entryfeature
->{
'qualifiers'
}->{
'note'
}) {
if
(
$desc
) {
$desc
.=
"|"
.
$entryfeature
->{
'qualifiers'
}->{
'note'
};
}
else
{
$desc
=
$entryfeature
->{
'qualifiers'
}->{
'note'
};
}
}
push
(
@exondescs
,
$desc
||
"unknown"
);
push
(
@exons
,\
@range
);
}
if
(
$name
eq
"intron"
) {
$desc
=
$entryfeature
->{
'qualifiers'
}->{
'number'
};
if
(
$desc
) {
$desc
.=
"|"
.
$entryfeature
->{
'qualifiers'
}->{
'note'
};
}
else
{
$desc
=
$entryfeature
->{
'qualifiers'
}->{
'note'
};
}
push
(
@introndescs
,
$desc
||
"unknown"
);
push
(
@introns
,\
@range
);
}
if
((
$name
eq
"prim_transcript"
)||(
$name
eq
"mRNA"
)) {
push
(
@prim_transcripts
,\
@range
); }
if
(
$name
eq
"repeat_unit"
) {
push
(
@repeat_units
,\
@range
);
$rpt_family
=
$entryfeature
->{
'qualifiers'
}->{
'rpt_family'
};
push
(
@repeat_units_family
,
$rpt_family
||
"unknown"
);
}
if
(
$name
eq
"repeat_region"
) {
push
(
@repeat_regions
,\
@range
);
$rpt_family
=
$entryfeature
->{
'qualifiers'
}->{
'rpt_family'
};
push
(
@repeat_regions_family
,
$rpt_family
||
"unknown"
);
}
}
}
}
unless
(
$gene_name
) {
$gene_name
=
"cds-position:"
.
$cds_position
; }
my
%genefeatureshash
;
$genefeatureshash
{gene_name}=
$gene_name
;
$genefeatureshash
{genefeatures}=\
@genefeatures
;
$genefeatureshash
{labels}=\
@labels
;
$genefeatureshash
{ttables}=\
@ttables
;
$genefeatureshash
{swisshashes}=\
@swisshashes
;
$genefeatureshash
{transcripts}=\
@transcripts
;
$genefeatureshash
{exons}=\
@exons
;
$genefeatureshash
{exondescs}=\
@exondescs
;
$genefeatureshash
{introns}=\
@introns
;
$genefeatureshash
{introndescs}=\
@introndescs
;
$genefeatureshash
{prim_transcripts}=\
@prim_transcripts
;
$genefeatureshash
{repeat_units}=\
@repeat_units
;
$genefeatureshash
{repeat_regions}=\
@repeat_regions
;
$genefeatureshash
{repeat_units_family}=\
@repeat_units_family
;
$genefeatureshash
{repeat_regions_family}=\
@repeat_regions_family
;
return
(\
%genefeatureshash
);
}
sub
_checkfeatureproximity {
my
(
$self
,
$range
,
$cds_begin
,
$cds_end
,
$proximity
)=
@_
;
my
@range
=@{
$range
};
my
(
$begin
,
$end
,
$strand
);
if
(
ref
(
$range
[0]) eq
"ARRAY"
) {
(
$begin
,
$end
,
$strand
)=(
$range
[0]->[0],
$range
[-1]->[1],
$range
[0]->[2]);
}
else
{
(
$begin
,
$end
,
$strand
)=
@range
;
}
if
(
$cds_begin
>
$cds_end
) {
(
$cds_begin
,
$cds_end
)=(
$cds_end
,
$cds_begin
);
}
if
(
$strand
== -1) {
(
$begin
,
$end
)=(
$end
,
$begin
);
}
if
((
$cds_begin
-
$end
)>
$proximity
) {
carp
"--DEBUG-- feature rejected: begin $begin end $end -------"
;
return
(0);
}
if
((
$begin
-
$cds_end
)>
$proximity
) {
carp
"--DEBUG-- feature rejected: begin $begin end $end -------"
;
return
(0);
}
carp
"--DEBUG-- feature accepted: begin $begin end $end -------"
;
return
(1);
}
sub
_get_alignment {
my
(
$self
,
$seq1
,
$seq2
)=
@_
;
my
$fastafile1
=
"/tmp/tmpfastafile1"
;
my
$fastafile2
=
"/tmp/tmpfastafile2"
;
my
$grepcut
=
'egrep -v "[[:digit:]]|^ *$|sequences" | cut -c8-'
;
my
$alignprogram
=
"/usr/local/etc/bioinfo/fasta2/align -s /usr/local/etc/bioinfo/fasta2/idnaa.mat $fastafile1 $fastafile2 2>/dev/null | $grepcut"
;
open
my
$TMPFASTAFILE1
,
">$fastafile1"
|| croak
"Cannot write into $fastafile1 for aa alignment"
;
open
my
$TMPFASTAFILE2
,
">$fastafile2"
|| croak
"Cannot write into $fastafile1 for aa alignment"
;
print
$TMPFASTAFILE1
">firstseq\n$seq1\n"
;
print
$TMPFASTAFILE2
">secondseq\n$seq2\n"
;
close
$TMPFASTAFILE1
;
close
$TMPFASTAFILE2
;
my
$alignment
=`
$alignprogram
`;
my
@alignlines
=
split
(/\n/,
$alignment
);
my
(
$linecount
,
$seq1_aligned
,
$seq2_aligned
,
$codes
);
for
(
$linecount
=0;
$linecount
<
@alignlines
;
$linecount
+=3) {
$seq1_aligned
.=
$alignlines
[
$linecount
];
$codes
.=
$alignlines
[
$linecount
+1];
$seq2_aligned
.=
$alignlines
[
$linecount
+2];
}
return
(
$seq1_aligned
,
$seq2_aligned
,
$codes
);
}
sub
_common_novelaasequence2gene {
my
(
$species_codon_usage
,
$ttabid
,
$aasequence
,
$gene_name
)=
@_
;
my
@species_codon_usage
=@{
$species_codon_usage
};
my
@codon_usage_label
=
qw (cga
cgc cgg cgt aga agg cta ctc ctg ctt tta ttg tca tcc tcg
tct agc agt aca acc acg act cca ccc ccg cct gca gcc gcg gct gga
ggc ggg ggt gta gtc gtg gtt aaa aag aac aat caa cag cac cat gaa
gag gac gat tac tat tgc tgt ttc ttt ata atc att atg tgg taa tag
tga);
my
(
$i
,
$j
);
my
%codon_usage_value
;
my
$aa_codon_total
;
for
(
$i
=0;
$i
<64;
$i
++) {
$codon_usage_value
{
$codon_usage_label
[
$i
]}=
$species_codon_usage
[
$i
];
}
my
$CodonTable
= Bio::Tools::CodonTable->new (
-id
=>
$ttabid
);
my
@aminoacids
=
split
(//,
uc
(
$aasequence
));
my
@alt_codons
;
my
(
$relativeusage
,
$dnasequence
,
$chosen_codon
,
$dice
,
$partial
,
$thiscodon
);
for
$i
(
@aminoacids
) {
@alt_codons
=
$CodonTable
->revtranslate(
$i
);
unless
(
@alt_codons
) {
carp
"No reverse translation possible for aminoacid \'$i\'"
;
$dnasequence
.=
"???"
;
}
else
{
$aa_codon_total
=0;
for
$j
(
@alt_codons
) {
$aa_codon_total
+=
$codon_usage_value
{
$j
};
}
$dice
=
rand
(1);
$partial
=0;
$chosen_codon
=
""
;
CODONCHOICE:
for
$j
(0..
@alt_codons
) {
$thiscodon
=
$alt_codons
[
$j
];
$relativeusage
=(
$codon_usage_value
{
$thiscodon
}/
$aa_codon_total
);
if
(
$dice
<
$relativeusage
+
$partial
) {
$chosen_codon
=
$thiscodon
;
last
CODONCHOICE;
}
else
{
$partial
+=
$relativeusage
;
}
}
unless
(
$chosen_codon
) {
$chosen_codon
=
$alt_codons
[-1];
}
$dnasequence
.=
$chosen_codon
;
}
}
my
$dna
= Bio::LiveSeq::DNA->new(
-seq
=>
$dnasequence
);
my
$min
=1;
my
$max
=
length
(
$dnasequence
);
my
$exon
= Bio::LiveSeq::Exon->new(
-seq
=>
$dna
,
-start
=>
$min
,
-end
=>
$max
,
-strand
=> 1);
my
@exons
=(
$exon
);
my
$transcript
= Bio::LiveSeq::Transcript->new(
-exons
=> \
@exons
);
$transcript
->translation_table(
$ttabid
);
my
@transcripts
=(
$transcript
);
my
$translation
= Bio::LiveSeq::Translation->new(
-transcript
=>
$transcript
);
my
@translations
=(
$translation
);
my
%features
=(
DNA
=>
$dna
,
Transcripts
=> \
@transcripts
,
Translations
=> \
@translations
);
my
$gene
= Bio::LiveSeq::Gene->new(
-name
=>
$gene_name
,
-features
=> \
%features
,
-upbound
=>
$min
,
-downbound
=>
$max
);
unless
(
$gene
) {
carp
"Error in Gene creation phase"
;
return
(0);
}
return
$gene
;
}
1;