$Bio::EnsEMBL::Utils::SeqDumper::VERSION
=
'112.0.0_58'
;
$Bio::EnsEMBL::Utils::SeqDumper::VERSION
=
'112.0.058'
;
use
Fcntl
qw( SEEK_SET SEEK_END )
;
my
$DUMP_HANDLERS
=
{
'FASTA'
=> \
&dump_fasta
,
'EMBL'
=> \
&dump_embl
,
'GENBANK'
=> \
&dump_genbank
};
my
@COMMENTS
=
(
'This sequence was annotated by ###SOURCE###. Please visit '
.
'All feature locations are relative to the first (5\') base '
.
'of the sequence in this file. The sequence presented is '
.
'always the forward strand of the assembly. Features '
.
'that lie outside of the sequence contained in this file '
.
'have clonal location coordinates in the format: '
.
'<clone accession>.<version>:<start>..<end>'
,
'The /gene indicates a unique id for a gene, /note="transcript_id=..."'
.
' a unique id for a transcript, /protein_id a unique id for a peptide '
.
'and note="exon_id=..." a unique id for an exon. These ids are '
.
'maintained wherever possible between versions.'
,
'All the exons and transcripts in Ensembl are confirmed by '
.
'similarity to either protein or cDNA sequences.'
);
sub
new {
my
(
$caller
,
$slice
,
$params
) =
@_
;
my
$class
=
ref
(
$caller
) ||
$caller
;
my
$feature_types
= {
'gene'
=> 1,
'genscan'
=> 1,
'repeat'
=> 1,
'similarity'
=> 1,
'variation'
=> 1,
'contig'
=> 1,
'marker'
=> 1,
'estgene'
=> 0,
'vegagene'
=> 0};
my
$self
=
bless
{
'feature_types'
=>
$feature_types
},
$class
;
foreach
my
$p
(
sort
keys
%{
$params
|| {}}) {
$self
->{
$p
} =
$params
->{
$p
};
}
exists
$self
->{
'chunk_factor'
} and
defined
$self
->{
'chunk_factor'
}
or
$self
->{
'chunk_factor'
} = 1000;
return
$self
;
}
sub
enable_feature_type {
my
(
$self
,
$type
) =
@_
;
$type
|| throw(
"type arg is required"
);
if
(
exists
(
$self
->{
'feature_types'
}->{
$type
})) {
$self
->{
'feature_types'
}->{
$type
} = 1;
}
else
{
warning(
"unknown feature type '$type'\n"
.
"valid types are: "
.
join
(
','
,
keys
%{
$self
->{
'feature_types'
}}));
}
}
sub
attach_database {
my
(
$self
,
$name
,
$db
) =
@_
;
$name
|| throw(
"name arg is required"
);
unless
(
$db
&&
ref
(
$db
) &&
$db
->isa(
'Bio::EnsEMBL::DBSQL::DBAdaptor'
)) {
throw(
"db arg must be a Bio::EnsEMBL::DBSQL::DBAdaptor not a [$db]"
);
}
$self
->{
'attached_dbs'
}->{
$name
} =
$db
;
}
sub
get_database {
my
(
$self
,
$name
) =
@_
;
$name
|| throw(
"name arg is required"
);
return
$self
->{
'attached_dbs'
}->{
$name
};
}
sub
remove_database {
my
(
$self
,
$name
) =
@_
;
$name
|| throw(
"name arg is required"
);
if
(
exists
$self
->{
'attached_dbs'
}->{
$name
}) {
return
delete
$self
->{
'attached_dbs'
}->{
$name
};
}
return
undef
;
}
sub
disable_feature_type {
my
(
$self
,
$type
) =
@_
;
$type
|| throw(
"type arg is required"
);
if
(
exists
(
$self
->{
'feature_types'
}->{
$type
})) {
$self
->{
'feature_types'
}->{
$type
} = 0;
}
else
{
warning(
"unknown feature type '$type'\n"
.
"valid types are: "
.
join
(
','
,
keys
%{
$self
->{
'feature_types'
}}));
}
}
sub
is_enabled {
my
(
$self
,
$type
) =
@_
;
$type
|| throw(
"type arg is required"
);
if
(
exists
(
$self
->{
'feature_types'
}->{
$type
})) {
return
$self
->{
'feature_types'
}->{
$type
};
}
else
{
warning(
"unknown feature type '$type'\n"
.
"valid types are: "
.
join
(
','
,
keys
%{
$self
->{
'feature_types'
}}));
}
}
sub
dump
{
my
(
$self
,
$slice
,
$format
,
$outfile
,
$seq
,
$no_append
) =
@_
;
$format
|| throw(
"format arg is required"
);
$slice
|| throw(
"slice arg is required"
);
my
$dump_handler
=
$DUMP_HANDLERS
->{
uc
(
$format
)};
unless
(
$dump_handler
) {
throw(
"No dump handler is defined for format $format\n"
);
}
my
$FH
= IO::File->new;;
if
(
$outfile
) {
my
$mode
= (
$no_append
) ?
'>'
:
'>>'
;
$FH
->
open
(
"${mode}${outfile}"
) or throw(
"Could not open file $outfile: $!"
);
}
else
{
$FH
= \
*STDOUT
;
}
&$dump_handler
(
$self
,
$slice
,
$FH
,
$seq
);
$FH
->
close
if
(
$outfile
);
}
sub
dump_embl {
my
$self
=
shift
;
my
$slice
=
shift
;
my
$FH
=
shift
;
my
$len
=
$slice
->
length
;
my
$version
;
my
$acc
;
my
$cs
=
$slice
->coord_system();
my
$name_str
=
$cs
->name() .
' '
.
$slice
->seq_region_name();
$name_str
.=
' '
.
$cs
->version
if
(
$cs
->version);
my
$start
=
$slice
->start;
my
$end
=
$slice
->end;
my
$slice_adaptor
=
$slice
->adaptor();
my
$full_slice
=
$slice
->adaptor->fetch_by_region(
$cs
->name,
$slice
->seq_region_name,
undef
,
undef
,
undef
,
$cs
->version);
my
$entry_name
=
$slice
->seq_region_name();
if
(
$full_slice
->name eq
$slice
->name) {
$name_str
.=
' full sequence'
;
$acc
=
$slice
->seq_region_name();
my
@acc_ver
=
split
(/\./,
$acc
);
if
(
@acc_ver
== 2) {
$acc
=
$acc_ver
[0];
$version
=
$acc_ver
[0] .
'.'
.
$acc_ver
[1];
}
elsif
(
@acc_ver
== 1 &&
$cs
->version()) {
$version
=
$acc
.
'.'
.
$cs
->version();
}
else
{
$version
=
$acc
;
}
}
else
{
$name_str
.=
' partial sequence'
;
$acc
=
$slice
->name();
$version
=
$acc
;
}
$acc
=
$slice
->name();
$: = (
" \t\n-,"
);
seek
(
$FH
, 0, SEEK_END);
my
$EMBL_HEADER
=
'@< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~
';
my
$VALUE
=
"$entry_name standard; DNA; HTG; $len BP."
;
$self
->
write
(
$FH
,
$EMBL_HEADER
,
'ID'
,
$VALUE
);
$self
->
print
(
$FH
,
"XX\n"
);
$self
->
write
(
$FH
,
$EMBL_HEADER
,
'AC'
,
$acc
);
$self
->
print
(
$FH
,
"XX\n"
);
$self
->
write
(
$FH
,
$EMBL_HEADER
,
'SV'
,
$version
);
$self
->
print
(
$FH
,
"XX\n"
);
$self
->
write
(
$FH
,
$EMBL_HEADER
,
'DT'
,
$self
->_date_string);
$self
->
print
(
$FH
,
"XX\n"
);
my
$meta_container
=
$slice
->adaptor()->db()->get_MetaContainer();
$self
->
write
(
$FH
,
$EMBL_HEADER
,
'DE'
,
$meta_container
->get_scientific_name() .
" $name_str $start..$end annotated by Ensembl"
);
$self
->
print
(
$FH
,
"XX\n"
);
$self
->
write
(
$FH
,
$EMBL_HEADER
,
'KW'
,
'.'
);
$self
->
print
(
$FH
,
"XX\n"
);
my
$species_name
=
$meta_container
->get_scientific_name();
if
(
my
$cn
=
$meta_container
->get_common_name()) {
$species_name
.=
" ($cn)"
;
}
$self
->
write
(
$FH
,
$EMBL_HEADER
,
'OS'
,
$species_name
);
my
@cls
= @{
$meta_container
->get_classification()};
$self
->
write
(
$FH
,
$EMBL_HEADER
,
'OC'
,
join
(
'; '
,
reverse
(
@cls
)) .
'.'
);
$self
->
print
(
$FH
,
"XX\n"
);
my
$annotation_source
=
$self
->annotation_source(
$meta_container
);
foreach
my
$comment
(
@COMMENTS
) {
$comment
=~ s/\
$self
->
write
(
$FH
,
$EMBL_HEADER
,
'CC'
,
$comment
);
$self
->
print
(
$FH
,
"XX\n"
);
}
$self
->
print
(
$FH
,
"FH Key Location/Qualifiers\n"
);
my
$FEATURE_TABLE
=
'FT ^<<<<<<<<<<<<<<<^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~
';
$self
->_dump_feature_table(
$slice
,
$FH
,
$FEATURE_TABLE
);
$self
->
print
(
$FH
,
"XX\n"
);
my
$sq_offset
=
tell
(
$FH
);
$sq_offset
== -1 and throw
"Unable to get offset for output fh"
;
$self
->
print
(
$FH
,
"SQ Sequence ########## BP; ########## A; ########## C; ########## G; ########## T; ########## other;\n"
);
my
$acgt
=
$self
->write_embl_seq(
$slice
,
$FH
);
$self
->
print
(
$FH
,
"//\n"
);
my
$end_of_entry_offset
=
tell
(
$FH
);
$end_of_entry_offset
== -1 and throw
"Unable to get offset for output fh"
;
seek
(
$FH
,
$sq_offset
, SEEK_SET)
or throw
"Cannot seek backward to sequence header position"
;
$self
->
print
(
$FH
,
sprintf
"SQ Sequence %10d BP; %10d A; %10d C; %10d G; %10d T; %10d other;"
,
$acgt
->{tot},
$acgt
->{a},
$acgt
->{c},
$acgt
->{g},
$acgt
->{t},
$acgt
->{n});
seek
(
$FH
,
$end_of_entry_offset
, SEEK_SET)
or throw
"Cannot seek forward to end of entry"
;
$: =
" \n-"
;
}
sub
dump_genbank {
my
(
$self
,
$slice
,
$FH
) =
@_
;
$: =
" \t\n-,"
;
my
$GENBANK_HEADER
=
'^<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~
';
my
$GENBANK_SUBHEADER
=
' ^<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~
';
my
$GENBANK_FT
=
' ^<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~
';
my
$version
;
my
$acc
;
my
$cs
=
$slice
->coord_system();
my
$name_str
=
$cs
->name() .
' '
.
$slice
->seq_region_name();
$name_str
.=
' '
.
$cs
->version
if
(
$cs
->version);
my
$slice_adaptor
=
$slice
->adaptor();
my
$full_slice
=
$slice
->adaptor->fetch_by_region(
$cs
->name,
$slice
->seq_region_name,
undef
,
undef
,
undef
,
$cs
->version);
my
$entry_name
=
$slice
->seq_region_name();
if
(
$full_slice
->name eq
$slice
->name) {
$name_str
.=
' full sequence'
;
$acc
=
$slice
->seq_region_name();
my
@acc_ver
=
split
(/\./,
$acc
);
if
(
@acc_ver
== 2) {
$acc
=
$acc_ver
[0];
$version
=
$acc_ver
[0] .
$acc_ver
[1];
}
elsif
(
@acc_ver
== 1 &&
$cs
->version()) {
$version
=
$acc
.
$cs
->version();
}
else
{
$version
=
$acc
;
}
}
else
{
$name_str
.=
' partial sequence'
;
$acc
=
$slice
->name();
$version
=
$acc
;
}
$acc
=
$slice
->name();
my
$length
=
$slice
->
length
;
my
$start
=
$slice
->start();
my
$end
=
$slice
->end();
my
$date
=
$self
->_date_string;
my
$meta_container
=
$slice
->adaptor()->db()->get_MetaContainer();
seek
(
$FH
, 0, SEEK_END);
my
$tag
=
'LOCUS'
;
my
$value
=
"$entry_name $length bp DNA HTG $date"
;
$self
->
write
(
$FH
,
$GENBANK_HEADER
,
$tag
,
$value
);
$tag
=
"DEFINITION"
;
$value
=
$meta_container
->get_scientific_name() .
" $name_str $start..$end reannotated via EnsEMBL"
;
$self
->
write
(
$FH
,
$GENBANK_HEADER
,
$tag
,
$value
);
$self
->
write
(
$FH
,
$GENBANK_HEADER
,
'ACCESSION'
,
$acc
);
$self
->
write
(
$FH
,
$GENBANK_HEADER
,
'VERSION'
,
$version
);
$self
->
write
(
$FH
,
$GENBANK_HEADER
,
'KEYWORDS'
,
'.'
);
my
$common_name
=
$meta_container
->get_common_name();
$common_name
=
$meta_container
->get_scientific_name()
unless
$common_name
;
$self
->
write
(
$FH
,
$GENBANK_HEADER
,
'SOURCE'
,
$common_name
);
my
@cls
= @{
$meta_container
->get_classification()};
shift
@cls
;
$self
->
write
(
$FH
,
$GENBANK_SUBHEADER
,
'ORGANISM'
,
$meta_container
->get_scientific_name());
$self
->
write
(
$FH
,
$GENBANK_SUBHEADER
,
''
,
join
(
'; '
,
reverse
@cls
) .
"."
);
my
$annotation_source
=
$self
->annotation_source(
$meta_container
);
foreach
my
$comment
(
@COMMENTS
) {
$comment
=~ s/\
$self
->
write
(
$FH
,
$GENBANK_HEADER
,
'COMMENT'
,
$comment
);
}
$self
->
print
(
$FH
,
"FEATURES Location/Qualifiers\n"
);
$self
->_dump_feature_table(
$slice
,
$FH
,
$GENBANK_FT
);
my
$sq_offset
=
tell
(
$FH
);
$sq_offset
== -1 and throw
"Unable to get offset for output fh"
;
$self
->
print
(
$FH
,
"BASE COUNT ########## a ########## c ########## g ########## t ########## n\nORIGIN\n"
);
my
$acgt
=
$self
->write_genbank_seq(
$slice
,
$FH
);
$self
->
print
(
$FH
,
"//\n"
);
my
$end_of_entry_offset
=
tell
(
$FH
);
$end_of_entry_offset
== -1 and throw
"Unable to get offset for output fh"
;
seek
(
$FH
,
$sq_offset
, SEEK_SET)
or throw
"Cannot seek backward to sequence header position"
;
$self
->
print
(
$FH
,
sprintf
"BASE COUNT %10d a %10d c %10d g %10d t %10d n"
,
$acgt
->{a},
$acgt
->{c},
$acgt
->{g},
$acgt
->{t},
$acgt
->{n});
seek
(
$FH
,
$end_of_entry_offset
, SEEK_SET)
or throw
"Cannot seek forward to end of entry"
;
$: =
" \n-"
;
}
sub
_dump_feature_table {
my
$self
=
shift
;
my
$slice
=
shift
;
my
$FH
=
shift
;
my
$FORMAT
=
shift
;
my
$lite
=
$slice
->adaptor->db->remove_db_adaptor(
'lite'
);
my
$meta
=
$slice
->adaptor->db->get_MetaContainer;
my
@ff
= (
$FH
,
$FORMAT
);
my
$value
;
my
$classification
=
join
(
', '
, @{
$meta
->get_classification()});
$self
->
write
(
@ff
,
'source'
,
"1.."
.
$slice
->
length
());
$self
->
write
(
@ff
,
''
,
'/organism="'
.
$meta
->get_scientific_name().
'"'
);
$self
->
write
(
@ff
,
''
,
'/db_xref="taxon:'
.
$meta
->get_taxonomy_id().
'"'
);
my
@gene_slices
;
if
(
$self
->is_enabled(
'gene'
)) {
push
@gene_slices
,
$slice
;
}
my
$gene_dbs
= {
'vegagene'
=>
'vega'
,
'estgene'
=>
'estgene'
};
foreach
my
$gene_type
(
keys
%$gene_dbs
) {
if
(
$self
->is_enabled(
$gene_type
)) {
my
$db
=
$self
->get_database(
$gene_dbs
->{
$gene_type
});
if
(
$db
) {
my
$sa
=
$db
->get_SliceAdaptor();
push
@gene_slices
,
$sa
->fetch_by_name(
$slice
->name());
}
else
{
warning(
"A ["
.
$gene_dbs
->{
$gene_type
} .
"] database must be "
.
"attached to this SeqDumper\n(via a call to "
.
"attach_database) to retrieve genes of type [$gene_type]"
);
}
}
}
foreach
my
$gene_slice
(
@gene_slices
) {
my
@genes
= @{
$gene_slice
->get_all_Genes(
undef
,
undef
, 1)};
while
(
my
$gene
=
shift
@genes
) {
$value
=
$self
->features2location( [
$gene
] );
$self
->
write
(
@ff
,
'gene'
,
$value
);
$self
->
write
(
@ff
,
""
,
'/gene='
.
$gene
->stable_id_version() );
if
(
defined
(
$gene
->display_xref)){
$self
->
write
(
@ff
,
""
,
'/locus_tag="'
.
$gene
->display_xref->display_id.
'"'
);
}
my
$desc
=
$gene
->description;
if
(
defined
(
$desc
) and
$desc
ne
""
){
$desc
=~ s/\"//;
$self
->
write
(
@ff
,
""
,
'/note="'
.
$gene
->description.
'"'
);
}
foreach
my
$transcript
(@{
$gene
->get_all_Transcripts}) {
my
$translation
=
$transcript
->translation;
if
(
$translation
) {
$value
=
$self
->features2location(
$transcript
->get_all_Exons);
$self
->
write
(
@ff
,
'mRNA'
,
$value
);
$self
->
write
(
@ff
,
''
,
'/gene="'
.
$gene
->stable_id_version().
'"'
);
$self
->
write
(
@ff
,
''
,
'/standard_name="'
.
$transcript
->stable_id_version().
'"'
);
$value
=
$self
->features2location(
$transcript
->get_all_translateable_Exons);
$self
->
write
(
@ff
,
'CDS'
,
$value
);
my
$codon_start
=
$self
->transcript_to_codon_start(
$transcript
);
$self
->
write
(
@ff
,
''
,
qq{/codon_start="${codon_start}
"})
if
$codon_start
> 1;
my
$codon_table_id
=
$self
->_get_codon_table_id(
$slice
);
if
(
$codon_table_id
> 1){
$self
->
write
(
@ff
,
''
,
'/transl_table='
.
$codon_table_id
);
}
$self
->
write
(
@ff
,
''
,
'/gene="'
.
$gene
->stable_id_version().
'"'
);
$self
->
write
(
@ff
,
''
,
'/protein_id="'
.
$translation
->stable_id_version().
'"'
);
$self
->
write
(
@ff
,
''
,
'/note="transcript_id='
.
$transcript
->stable_id_version().
'"'
);
foreach
my
$dbl
(@{
$transcript
->get_all_DBLinks}) {
my
$db_xref
=
'/db_xref="'
.
$dbl
->dbname().
':'
.
$dbl
->primary_id().
'"'
;
my
$go_db_xref
=
'/db_xref="'
.
$dbl
->primary_id().
'"'
;
$value
= (
$dbl
->dbname()=~/GO/) ?
$go_db_xref
:
$db_xref
;
$self
->
write
(
@ff
,
''
,
$value
);
}
my
$pep
=
$transcript
->translate();
if
(
$pep
) {
$value
=
'/translation="'
.
$pep
->seq().
'"'
;
$self
->
write
(
@ff
,
''
,
$value
);
}
}
else
{
$value
=
$self
->features2location(
$transcript
->get_all_Exons);
$self
->
write
(
@ff
,
'misc_RNA'
,
$value
);
$self
->
write
(
@ff
,
''
,
'/gene="'
.
$gene
->stable_id_version().
'"'
);
foreach
my
$dbl
(@{
$transcript
->get_all_DBLinks}) {
$value
=
'/db_xref="'
.
$dbl
->dbname().
':'
.
$dbl
->primary_id().
'"'
;
$self
->
write
(
@ff
,
''
,
$value
);
}
$self
->
write
(
@ff
,
''
,
'/note="'
.
$transcript
->biotype().
'"'
);
$self
->
write
(
@ff
,
''
,
'/standard_name="'
.
$transcript
->stable_id_version().
'"'
);
}
}
}
foreach
my
$gene
(@{
$gene_slice
->get_all_Genes(
undef
,
undef
,1)}) {
foreach
my
$exon
(@{
$gene
->get_all_Exons}) {
$self
->
write
(
@ff
,
'exon'
,
$self
->features2location([
$exon
]));
$self
->
write
(
@ff
,
''
,
'/note="exon_id='
.
$exon
->stable_id_version().
'"'
);
}
}
}
if
(
$self
->is_enabled(
'genscan'
)) {
my
@genscan_exons
;
my
@transcripts
= @{
$slice
->get_all_PredictionTranscripts(
undef
,1)};
while
(
my
$transcript
=
shift
@transcripts
) {
my
$exons
=
$transcript
->get_all_Exons();
push
@genscan_exons
,
@$exons
;
$self
->
write
(
@ff
,
'mRNA'
,
$self
->features2location(
$exons
));
$self
->
write
(
@ff
,
''
,
'/product="'
.
$transcript
->translate()->seq().
'"'
);
$self
->
write
(
@ff
,
''
,
'/note="identifier='
.
$transcript
->stable_id_version.
'"'
);
$self
->
write
(
@ff
,
''
,
'/note="Derived by automated computational'
.
' analysis using gene prediction method:'
.
$transcript
->analysis->logic_name .
'"'
);
}
}
if
(
$self
->is_enabled(
'variation'
) &&
$slice
->can(
'get_all_VariationFeatures'
)) {
my
@variations
= @{
$slice
->get_all_VariationFeatures()};
while
(
my
$snp
=
shift
@variations
) {
my
$ss
=
$snp
->start;
my
$se
=
$snp
->end;
next
if
(
$ss
< 1 ||
$se
>
$slice
->
length
);
$self
->
write
(
@ff
,
'variation'
,
"$ss..$se"
);
$self
->
write
(
@ff
,
''
,
'/replace="'
.
$snp
->allele_string.
'"'
);
my
$rs_id
=
$snp
->variation_name();
my
$db
=
$snp
->source_name();
$self
->
write
(
@ff
,
''
,
"/db_xref=\"$db:$rs_id\""
);
}
}
if
(
$self
->is_enabled(
'similarity'
)) {
foreach
my
$sim
(@{
$slice
->get_all_SimilarityFeatures}) {
$self
->
write
(
@ff
,
'misc_feature'
,
$self
->features2location([
$sim
]));
$self
->
write
(
@ff
,
''
,
'/note="match: '
.
$sim
->hseqname.
' : '
.
$sim
->hstart.
'..'
.
$sim
->hend.
'('
.
$sim
->hstrand.
')"'
);
}
}
if
(
$self
->is_enabled(
'repeat'
)) {
my
@rfs
= @{
$slice
->get_all_RepeatFeatures()};
while
(
my
$repeat
=
shift
@rfs
) {
$self
->
write
(
@ff
,
'repeat_region'
,
$self
->features2location([
$repeat
]));
$self
->
write
(
@ff
,
''
,
'/note="'
.
$repeat
->repeat_consensus->name.
' repeat: matches '
.
$repeat
->hstart.
'..'
.
$repeat
->hend .
'('
.
$repeat
->hstrand.
') of consensus"'
);
}
}
if
(
$self
->is_enabled(
'marker'
) &&
$slice
->can(
'get_all_MarkerFeatures'
)) {
my
@markers
= @{
$slice
->get_all_MarkerFeatures()};
while
(
my
$mf
=
shift
@markers
) {
$self
->
write
(
@ff
,
'STS'
,
$self
->features2location([
$mf
]));
if
(
$mf
->marker->display_MarkerSynonym) {
$self
->
write
(
@ff
,
''
,
'/standard_name="'
.
$mf
->marker->display_MarkerSynonym->name .
'"'
);
}
my
@synonyms
= @{
$mf
->marker->get_all_MarkerSynonyms};
@synonyms
=
grep
{
$_
->source }
@synonyms
;
foreach
my
$synonym
(
@synonyms
) {
$self
->
write
(
@ff
,
''
,
'/db_xref="'
.
$synonym
->source.
':'
.
$synonym
->name.
'"'
);
}
$self
->
write
(
@ff
,
''
,
'/note="map_weight='
.
$mf
->map_weight.
'"'
);
}
}
if
(
$self
->is_enabled(
'contig'
)) {
foreach
my
$segment
(@{
$slice
->project(
'seqlevel'
)}) {
my
(
$start
,
$end
,
$slice
) =
@$segment
;
$self
->
write
(
@ff
,
'misc_feature'
,
$start
.
'..'
.
$end
);
$self
->
write
(
@ff
,
''
,
'/note="contig '
.
$slice
->seq_region_name .
' '
.
$slice
->start .
'..'
.
$slice
->end .
'('
.
$slice
->strand .
')"'
);
}
}
$slice
->adaptor->db->add_db_adaptor(
'lite'
,
$lite
)
if
$lite
;
}
sub
transcript_to_codon_start {
my
(
$self
,
$transcript
) =
@_
;
my
$start_phase
=
$transcript
->start_Exon()->phase();
return
(
$start_phase
== 1 ) ? 3 :
(
$start_phase
== 2 ) ? 2 :
(
$start_phase
== 0 ) ? 1 :
-1;
}
sub
_get_codon_table_id{
my
(
$self
,
$slice
) =
@_
;
my
$codon_table_id
= 1;
my
$codon_table_attributes
=
$slice
->get_all_Attributes(
"codon_table"
);
if
(@{
$codon_table_attributes
}) {
$codon_table_id
=
$codon_table_attributes
->[0]->value;
}
return
$codon_table_id
;
}
sub
dump_fasta {
my
$self
=
shift
;
my
$slice
=
shift
;
my
$FH
=
shift
;
my
$id
=
$slice
->seq_region_name;
my
$seqtype
=
'dna'
;
my
$idtype
=
$slice
->coord_system->name;
my
$location
=
$slice
->name;
my
$start
= 1;
my
$end
=
$slice
->
length
();
my
$header
=
">$id $seqtype:$idtype $location\n"
;
$self
->
print
(
$FH
,
$header
);
my
$FORMAT
= '^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
';
my
$cur
=
$start
;
while
(
$cur
<=
$end
) {
my
$to
=
$cur
+ 59_999;
$to
=
$end
if
(
$to
>
$end
);
my
$seq
=
$slice
->subseq(
$cur
,
$to
);
$cur
=
$to
+ 1;
$self
->
write
(
$FH
,
$FORMAT
,
$seq
);
}
}
sub
features2location {
my
$self
=
shift
;
my
$features
=
shift
;
my
@join
= ();
foreach
my
$f
(
@$features
) {
my
$slice
=
$f
->slice;
my
$start
=
$f
->start();
my
$end
=
$f
->end();
my
$strand
=
$f
->strand();
if
(
$start
>= 1 &&
$end
<=
$slice
->
length
) {
if
(
$strand
== 1) {
push
@join
,
"$start..$end"
;
}
else
{
push
@join
,
"complement($start..$end)"
;
}
}
else
{
my
@fs
= ();
my
$projection
=
$f
->project(
'seqlevel'
);
foreach
my
$segment
(
@$projection
) {
my
$slice
=
$segment
->[2];
my
$slc_start
=
$slice
->start();
my
$slc_end
=
$slice
->end();
my
$seq_reg
=
$slice
->seq_region_name();
if
(
$slice
->strand == 1) {
push
@join
,
"$seq_reg:$slc_start..$slc_end"
;
}
else
{
push
@join
,
"complement($seq_reg:$slc_start..$slc_end)"
;
}
}
}
}
my
$out
=
join
','
,
@join
;
if
(
scalar
@join
> 1) {
$out
=
"join($out)"
;
}
return
$out
;
}
sub
annotation_source {
my
(
$self
,
$meta_container
) =
@_
;
my
@providers
= ();
my
@provider_urls
= ();
foreach
( @{
$meta_container
->list_value_by_key(
'annotation.provider_name'
)} ) {
push
@providers
,
$_
if
$_
ne
''
;
}
foreach
( @{
$meta_container
->list_value_by_key(
'annotation.provider_url'
)} ) {
push
@provider_urls
,
$_
if
$_
ne
''
;
}
if
( !
scalar
(
@providers
) ) {
foreach
( @{
$meta_container
->list_value_by_key(
'assembly.provider_name'
)} ) {
push
@providers
,
$_
if
$_
ne
''
;
}
foreach
( @{
$meta_container
->list_value_by_key(
'assembly.provider_url'
)} ) {
push
@provider_urls
,
$_
if
$_
ne
''
;
}
}
if
( !
scalar
(
@providers
) ) {
push
@providers
,
'Ensembl'
;
}
my
$annotation_source
=
join
(
' and '
,
@providers
);
if
(
@provider_urls
) {
$annotation_source
.=
' '
.
sprintf
(
q{(%s)}
,
join
(
', '
,
@provider_urls
));
}
return
$annotation_source
;
}
sub
_date_string {
my
$self
=
shift
;
my
(
$sec
,
$min
,
$hour
,
$mday
,
$mon
,
$year
) =
localtime
(
time
());
my
$month
= (
'JAN'
,
'FEB'
,
'MAR'
,
'APR'
,
'MAY'
,
'JUN'
,
'JUL'
,
'AUG'
,
'SEP'
,
'OCT'
,
'NOV'
,
'DEC'
)[
$mon
];
$year
+= 1900;
return
"$mday-$month-$year"
;
}
sub
write
{
my
(
$self
,
$FH
,
$FORMAT
,
@values
) =
@_
;
while
(
defined
(
$values
[-1]) and
$values
[-1] ne
''
) {
formline
(
$FORMAT
,
@values
);
$self
->
print
(
$FH
, $^A );
$^A =
''
;
}
}
sub
write_genbank_seq {
my
$self
=
shift
;
my
$FH
=
shift
;
my
$seq
=
shift
;
my
$base_total
=
shift
;
$base_total
||= 0;
my
$GENBANK_SEQ
=
'@>>>>>>>> ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<<~
';
my
$total
= -59 +
$base_total
;
while
(
$$seq
) {
$total
+= 60;
formline
(
$GENBANK_SEQ
,
$total
,
$$seq
,
$$seq
,
$$seq
,
$$seq
,
$$seq
,
$$seq
);
$self
->
print
(
$FH
, $^A );
$^A =
''
;
}
}
sub
write_genbank_seq {
my
$self
=
shift
;
my
$slice
=
shift
;
my
$FH
=
shift
;
my
$width
= 60;
my
$chunk_size
=
$self
->{
'chunk_factor'
} *
$width
;
$chunk_size
> 0 or throw
"Invalid chunk size: check chunk_factor parameter"
;
my
$start
= 1;
my
$end
=
$slice
->
length
;
my
$total
= -59;
my
$here
=
$start
;
my
$GENBANK_SEQ
=
'@>>>>>>>> ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<<~
';
my
$acgt
;
while
(
$here
<=
$end
) {
my
$there
=
$here
+
$chunk_size
- 1;
$there
=
$end
if
(
$there
>
$end
);
my
$sseq
=
$slice
->subseq(
$here
,
$there
);
$acgt
->{a} +=
$sseq
=~
tr
/Aa//;
$acgt
->{c} +=
$sseq
=~
tr
/Cc//;
$acgt
->{g} +=
$sseq
=~
tr
/Gg//;
$acgt
->{t} +=
$sseq
=~
tr
/Tt//;
while
(
$sseq
) {
$total
+= 60;
formline
(
$GENBANK_SEQ
,
$total
,
$sseq
,
$sseq
,
$sseq
,
$sseq
,
$sseq
,
$sseq
);
$self
->
print
(
$FH
, $^A);
$^A =
''
;
}
$here
=
$there
+ 1;
}
$acgt
->{n} =
$end
- (
$acgt
->{a} +
$acgt
->{c} +
$acgt
->{g} +
$acgt
->{t});
$acgt
->{tot} =
$end
;
return
$acgt
;
}
sub
write_embl_seq {
my
$self
=
shift
;
my
$slice
=
shift
;
my
$FH
=
shift
;
my
$width
= 60;
my
$chunk_size
=
$self
->{
'chunk_factor'
} *
$width
;
$chunk_size
> 0 or throw
"Invalid chunk size: check chunk_factor parameter"
;
my
$start
= 1;
my
$end
=
$slice
->
length
;
my
$total
=
$end
;
my
$here
=
$start
;
my
$EMBL_SEQ
=
' ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< @>>>>>>>>>~
';
my
$acgt
;
while
(
$here
<=
$end
) {
my
$there
=
$here
+
$chunk_size
- 1;
$there
=
$end
if
(
$there
>
$end
);
my
$sseq
=
$slice
->subseq(
$here
,
$there
);
$acgt
->{a} +=
$sseq
=~
tr
/Aa//;
$acgt
->{c} +=
$sseq
=~
tr
/Cc//;
$acgt
->{g} +=
$sseq
=~
tr
/Gg//;
$acgt
->{t} +=
$sseq
=~
tr
/Tt//;
while
(
$sseq
) {
$total
-= 60;
$total
= 0
if
$total
< 0;
formline
(
$EMBL_SEQ
,
$sseq
,
$sseq
,
$sseq
,
$sseq
,
$sseq
,
$sseq
,
$end
-
$total
);
$self
->
print
(
$FH
, $^A);
$^A =
''
;
}
$here
=
$there
+ 1;
}
$acgt
->{n} =
$end
- (
$acgt
->{a} +
$acgt
->{c} +
$acgt
->{g} +
$acgt
->{t});
$acgt
->{tot} =
$end
;
return
$acgt
;
}
sub
print
{
my
(
$self
,
$FH
,
$string
) =
@_
;
if
(!
print
$FH
$string
){
print
STDERR
"Problem writing to disk\n"
;
print
STDERR
"the string is $string\n"
;
die
"Could not write to file handle"
;
}
}
1;