The Perl and Raku Conference 2025: Greenville, South Carolina - June 27-29 Learn more

# POD documentation - main docs before the code
=head1 NAME
GenOO::Spliceable - Role for a region that can be spliced
=head1 SYNOPSIS
# This role provides regions with the splicing attributes and methods
=head1 DESCRIPTION
An object that consumes this role gets splicing attributes and methods such as exons and
introns. The key attributes of this class are "splice_starts" and "splice_stops"
which are sorted arrays of coordinates that define the intervals for exons.
-------------EXON_1----------- ------------EXON_2------------
SPLICE_START_1...SPLICE_STOP_1...INTRON...SPLICE_START_2...SPLICE_STOP_2...INTRON...
=head1 EXAMPLES
# Get the location information on the reference sequence
$obj_with_role->exons;
$obj_with_role->introns;
# Check if a position is within an exon or an intron
$obj_with_role->is_position_within_exon(120); # 1/0
$obj_with_role->is_position_within_intron(120); # 0/1
# Get the length of the exonic region
$obj_with_role->exonic_length;
=cut
# Let the code begin...
$GenOO::Spliceable::VERSION = '1.5.2';
# Define new data type
subtype 'SortedArrayRef', as 'ArrayRef', where { _sorted_array() };
# Define coercions to new data type
coerce 'SortedArrayRef', from 'ArrayRef', via { [sort {$a <=> $b} @{$_}] };
coerce 'SortedArrayRef', from 'Str' , via { [sort {$a <=> $b} (split(/\D+/,$_))] };
# Define attributes
has 'splice_starts' => (
isa => 'SortedArrayRef',
is => 'ro',
writer => '_set_splice_starts',
required => 1,
coerce => 1
);
has 'splice_stops' => (
isa => 'SortedArrayRef',
is => 'ro',
writer => '_set_splice_stops',
required => 1,
coerce => 1
);
has 'exons' => (
isa => 'ArrayRef',
is => 'ro',
builder => '_create_exons',
init_arg => undef,
lazy => 1,
);
has 'introns' => (
isa => 'ArrayRef',
is => 'ro',
builder => '_create_introns',
init_arg => undef,
lazy => 1,
);
# Define consumed roles
sub BUILD {
my $self = shift;
$self->_sanitize_splice_starts_and_stops;
}
#######################################################################
######################## Interface Methods ########################
#######################################################################
sub is_position_within_exon {
my ($self, $position) = @_;
my $exons = $self->exons;
foreach my $exon (@$exons) {
if ($exon->contains_position($position)) {
return 1;
}
}
return 0;
}
sub is_position_within_intron {
my ($self, $position) = @_;
my $introns = $self->introns;
foreach my $intron (@$introns) {
if ($intron->contains_position($position)) {
return 1;
}
}
return 0;
}
sub exon_exon_junctions {
my ($self) = @_;
my @junctions;
my @junction_starts;
my @junction_stops;
my $exons = $self->exons;
if (@$exons > 1) {
for (my $i=0;$i<@$exons-1;$i++) {
push @junction_starts, $$exons[$i]->stop;
push @junction_stops, $$exons[$i+1]->start;
}
}
my $junctions_count = @junction_starts == @junction_stops ? @junction_starts : die "Junctions starts are not of the same size as junction stops\n";
for (my $i=0;$i<$junctions_count;$i++) {
push @junctions, GenOO::Junction->new(
species => $self->species,
strand => $self->strand,
chromosome => $self->chromosome,
start => $junction_starts[$i],
stop => $junction_stops[$i],
part_of => $self,
);
}
return \@junctions;
}
sub exonic_sequence {
my ($self) = @_;
if (defined $self->sequence) {
my $exonic_sequence = '';
my $seq = $self->strand == 1 ? $self->sequence : reverse($self->sequence);
foreach my $exon (@{$self->exons}) {
$exonic_sequence .= substr($seq, ($exon->start - $self->start), $exon->length);
}
if ($self->strand == 1) {
return $exonic_sequence;
}
else {
return reverse($exonic_sequence);
}
}
}
sub exonic_length {
my ($self) = @_;
my $length = 0;
foreach my $exon (@{$self->exons}) {
$length += $exon->length;
}
return $length;
}
sub intronic_length {
my ($self) = @_;
my $length = 0;
foreach my $intron (@{$self->introns}) {
$length += $intron->length;
}
return $length;
}
sub relative_exonic_position {
my ($self, $abs_pos) = @_;
if ($self->is_position_within_exon($abs_pos)) {
my $relative_pos = $abs_pos - $self->start;
foreach my $intron (@{$self->introns}) {
if ($intron->stop < $abs_pos) {
$relative_pos -= $intron->length;
}
else {
last;
}
}
return $relative_pos;
}
else {
return undef;
}
}
sub set_splice_starts_and_stops {
my ($self, $splice_starts, $splice_stops) = @_;
$self->_set_splice_starts($splice_starts);
$self->_set_splice_stops($splice_stops);
$self->_sanitize_splice_starts_and_stops;
}
#######################################################################
####################### Private Methods ############################
#######################################################################
sub _create_exons {
my ($self) = @_;
my $exon_starts = $self->splice_starts;
my $exon_stops = $self->splice_stops;
my @exons;
for (my $i=0;$i<@{$exon_starts};$i++) {
push @exons, GenOO::Exon->new({
strand => $self->strand,
chromosome => $self->rname,
start => $$exon_starts[$i],
stop => $$exon_stops[$i],
part_of => $self
});
}
return \@exons;
}
sub _create_introns {
my ($self) = @_;
my $exon_starts = $self->splice_starts;
my $exon_stops = $self->splice_stops;
my @introns;
if ($self->start < $$exon_starts[0]) {
push @introns, GenOO::Intron->new({
strand => $self->strand,
chromosome => $self->rname,
start => $self->start,
stop => $$exon_starts[0] - 1,
part_of => $self,
});
}
for (my $i=1;$i<@{$exon_starts};$i++) {
push @introns, (GenOO::Intron->new({
strand => $self->strand,
chromosome => $self->rname,
start => ${$exon_stops}[$i-1] + 1,
stop => ${$exon_starts}[$i] - 1,
part_of => $self,
}));
}
if ($self->stop > $$exon_stops[-1]) {
push @introns, (GenOO::Intron->new({
strand => $self->strand,
chromosome => $self->rname,
start => $$exon_stops[-1] + 1,
stop => $self->stop,
part_of => $self,
}));
}
return \@introns;
}
sub _sanitize_splice_starts_and_stops {
my ($self) = @_;
my $splice_starts = $self->splice_starts;
my $splice_stops = $self->splice_stops;
if (@$splice_starts != @$splice_stops) {
die "Error: Spice starts array is not of the same size as splice_stops (".scalar @$splice_starts."!=".scalar @$splice_stops.")\n";
}
my $index = 0;
while ($index < (@$splice_starts-1)) {
if ($$splice_stops[$index] == $$splice_starts[$index+1] - 1) {
$$splice_stops[$index] = $$splice_stops[$index+1];
splice @$splice_starts, $index+1, 1;
splice @$splice_stops, $index+1, 1;
}
else {
$index++;
}
}
}
#######################################################################
####################### Private Routines ###########################
#######################################################################
sub _sanitize_splice_coords_within_limits {
my ($pre_splice_starts, $pre_splice_stops, $start, $stop) = @_;
my @splice_starts;
my @splice_stops;
for (my $i=0;$i<@$pre_splice_starts;$i++) {
if ($$pre_splice_stops[$i] < $start) {
next;
}
elsif ($$pre_splice_starts[$i] > $stop) {
next;
}
else { #if the exon overlaps or is contained in the UTR5
if ($start >= $$pre_splice_starts[$i]) {
push @splice_starts, $start;
}
else {
push @splice_starts, $$pre_splice_starts[$i];
}
if ($stop < $$pre_splice_stops[$i]) {
push @splice_stops, $stop;
}
else {
push @splice_stops, $$pre_splice_stops[$i];
}
}
}
return \@splice_starts, \@splice_stops;
}
sub _sorted_array {
my $arrayref = $_;
if (@{$arrayref} > 1){
for (my $i = 1; $i < @{$arrayref}; $i++){
if ($$arrayref[$i] < $$arrayref[$i-1]){
return 0;
}
}
return 1;
}
return 1;
}
1;