my
%line_is
= (
ID
=>
q/ID\s+(\w{2,3}\.\d+)/
,
TITLE
=>
q/TITLE\s+(\S.*)/
,
GENE
=>
q/GENE\s+(\S.*)/
,
CYTOBAND
=>
q/CYTOBAND\s+(\S.*)/
,
MGI
=>
q/MGI\s+(\S.*)/
,
LOCUSLINK
=>
q/LOCUSLINK\s+(\S.*)/
,
HOMOL
=>
q/HOMOL\s+(\S.*)/
,
EXPRESS
=>
q/EXPRESS\s+(\S.*)/
,
RESTR_EXPR
=>
q/RESTR_EXPR\s+(\S.*)/
,
GNM_TERMINUS
=>
q/GNM_TERMINUS\s+(\S.*)/
,
CHROMOSOME
=>
q/CHROMOSOME\s+(\S.*)/
,
STS
=>
q/STS\s+(\S.*)/
,
TXMAP
=>
q/TXMAP\s+(\S.*)/
,
PROTSIM
=>
q/PROTSIM\s+(\S.*)/
,
SCOUNT
=>
q/SCOUNT\s+(\S.*)/
,
SEQUENCE
=>
q/SEQUENCE\s+(\S.*)/
,
ACC
=>
q/ACC=(\w+)(\.\d+)?/
,
NID
=>
q/NID=\s*(\S.*)/
,
PID
=>
q/PID=\s*(\S.*)/
,
CLONE
=>
q/CLONE=\s*(\S.*)/
,
END
=>
q/END=\s*(\S.*)/
,
LID
=>
q/LID=\s*(\S.*)/
,
MGC
=>
q/MGC=\s*(\S.*)/
,
SEQTYPE
=>
q/SEQTYPE=\s*(\S.*)/
,
TRACE
=>
q/TRACE=\s*(\S.*)/
,
PERIPHERAL
=>
q/PERIPHERAL=\s*(\S.*)/
,
DELIMITER
=>
q{^//}
,
);
sub
_initialize {
my
(
$self
,
@args
) =
@_
;
$self
->SUPER::_initialize(
@args
);
if
(!
$self
->cluster_factory()) {
$self
->cluster_factory(Bio::Cluster::ClusterFactory->new(
-type
=>
'Bio::Cluster::UniGene'
));
}
}
sub
next_cluster {
my
(
$self
) =
@_
;
local
$/ =
"\n//"
;
return
unless
my
$entry
=
$self
->_readline;
my
(
%unigene
,
@express
,
@locuslink
,
@chromosome
,
@sts
,
@txmap
,
@protsim
,
@sequence
);
my
$UGobj
;
foreach
my
$line
(
split
/\n/,
$entry
) {
if
(
$line
=~ /
$line_is
{ID}/gcx) {
$unigene
{ID} = $1;
}
elsif
(
$line
=~ /
$line_is
{TITLE}/gcx ) {
$unigene
{TITLE} = $1;
}
elsif
(
$line
=~ /
$line_is
{GENE}/gcx) {
$unigene
{GENE} = $1;
}
elsif
(
$line
=~ /
$line_is
{CYTOBAND}/gcx) {
$unigene
{CYTOBAND} = $1;
}
elsif
(
$line
=~ /
$line_is
{MGI}/gcx) {
$unigene
{MGI} = $1;
}
elsif
(
$line
=~ /
$line_is
{LOCUSLINK}/gcx) {
@locuslink
=
split
/;/, $1;
}
elsif
(
$line
=~ /
$line_is
{HOMOL}/gcx) {
$unigene
{HOMOL} = $1;
}
elsif
(
$line
=~ /
$line_is
{EXPRESS}/gcx) {
my
$express
= $1;
$express
=~ s/^;//;
@express
=
split
/\s*;/,
$express
;
}
elsif
(
$line
=~ /
$line_is
{RESTR_EXPR}/gcx) {
$unigene
{RESTR_EXPR} = $1;
}
elsif
(
$line
=~ /
$line_is
{GNM_TERMINUS}/gcx) {
$unigene
{GNM_TERMINUS} = $1;
}
elsif
(
$line
=~ /
$line_is
{CHROMOSOME}/gcx) {
push
@chromosome
, $1;
}
elsif
(
$line
=~ /
$line_is
{TXMAP}/gcx) {
push
@txmap
, $1;
}
elsif
(
$line
=~ /
$line_is
{STS}/gcx) {
push
@sts
, $1;
}
elsif
(
$line
=~ /
$line_is
{PROTSIM}/gcx) {
push
@protsim
, $1;
}
elsif
(
$line
=~ /
$line_is
{SCOUNT}/gcx) {
$unigene
{SCOUNT} = $1;
}
elsif
(
$line
=~ /
$line_is
{SEQUENCE}/gcx) {
my
$seq
= {};
my
@items
=
split
(/;/, $1);
foreach
(
@items
) {
if
(/
$line_is
{ACC}/gcx) {
$seq
->{acc} = $1;
$seq
->{version} =
substr
($2,1)
if
defined
$2;
}
elsif
(/
$line_is
{NID}/gcx) {
$seq
->{nid} = $1;
}
elsif
(/
$line_is
{PID}/gcx) {
$seq
->{pid} = $1;
}
elsif
(/
$line_is
{CLONE}/gcx) {
$seq
->{clone} = $1;
}
elsif
(/
$line_is
{END}/gcx) {
$seq
->{end} = $1;
}
elsif
(/
$line_is
{LID}/gcx) {
$seq
->{lid} = $1;
}
elsif
(/
$line_is
{MGC}/gcx) {
$seq
->{mgc} = $1;
}
elsif
(/
$line_is
{SEQTYPE}/gcx) {
$seq
->{seqtype} = $1;
}
elsif
(/
$line_is
{TRACE}/gcx) {
$seq
->{trace} = $1;
}
elsif
(/
$line_is
{PERIPHERAL}/gcx) {
$seq
->{peripheral} = $1;
}
}
push
@sequence
,
$seq
;
}
elsif
(
$line
=~ /
$line_is
{DELIMITER}/gcx) {
$UGobj
=
$self
->cluster_factory->create_object(
-display_id
=>
$unigene
{ID},
-description
=>
$unigene
{TITLE},
-size
=>
$unigene
{SCOUNT},
-members
=> \
@sequence
);
$UGobj
->gene(
$unigene
{GENE})
if
defined
(
$unigene
{GENE});
$UGobj
->cytoband(
$unigene
{CYTOBAND})
if
defined
(
$unigene
{CYTOBAND});
$UGobj
->mgi(
$unigene
{MGI})
if
defined
(
$unigene
{MGI});
$UGobj
->locuslink(\
@locuslink
);
$UGobj
->homol(
$unigene
{HOMOL})
if
defined
(
$unigene
{HOMOL});
$UGobj
->express(\
@express
);
$UGobj
->restr_expr(
$unigene
{RESTR_EXPR})
if
defined
(
$unigene
{RESTR_EXPR});
$UGobj
->gnm_terminus(
$unigene
{GNM_TERMINUS})
if
defined
(
$unigene
{GNM_TERMINUS});
$UGobj
->chromosome(\
@chromosome
);
$UGobj
->sts(\
@sts
);
$UGobj
->txmap(\
@txmap
);
$UGobj
->protsim(\
@protsim
);
}
}
return
$UGobj
;
}
1;