#!/usr/bin/env perl
use
5.012;
if
( -e
"$RealBin/../lib/Proch/N50.pm"
and -e
"$RealBin/../Changes"
) {
use
lib
"$RealBin/../lib"
;
}
our
%program
= (
'NAME'
=>
'FASTX N50'
,
'AUTHOR'
=>
'Andrea Telatin'
,
'MAIL'
=>
'andrea.telatin@gmail.com'
,
'VERSION'
=>
'4.2.0'
,
);
my
$hasJSON
=
undef
;
my
$t
;
local
$Term::ANSIColor::AUTORESET
= 1;
my
$opt_separator
=
"\t"
;
my
$opt_format
=
'default'
;
my
%formats
= (
'default'
=>
'Prints only N50 for single file, TSV for multiple files'
,
'tsv'
=>
'Tab separated output (file, seqs, total size, N50, min, max)'
,
'json'
=>
'JSON (JavaScript Object Notation) output'
,
'csv'
=>
'Alias for tsv and --separator ","'
,
'custom'
=>
'Custom format with --template STRING'
,
'screen'
=>
'Screen friendly table (requires Text::ASCIITable)'
,
'short'
=>
'Not implemented'
,
'full'
=>
'Not implemented'
,
);
my
(
$opt_help
,
$opt_version
,
$opt_debug
,
$opt_color
,
$opt_nonewline
,
$opt_noheader
,
$opt_pretty
,
$opt_basename
,
$opt_template
,
$opt_fullpath
,
$opt_thousand_separator
,
$opt_ne
,
$opt_format_screen
,
$opt_format_json
,
$opt_sort_by
,
$opt_reverse_sort
,
);
$opt_sort_by
=
'N50'
;
$opt_reverse_sort
=
undef
;
my
%valid_sort_keys
= (
'auN'
=> 1,
'Ne'
=> 1,
'N90'
=> 1,
'N75'
=> 1,
'N50'
=> 1,
'min'
=> 1,
'max'
=> 1,
'seqs'
=> 1,
'size'
=> 1,
'path'
=> 1,
);
$valid_sort_keys
{
"N$opt_ne"
} = 1
if
(
defined
$opt_ne
);
my
$tab
=
"\t"
;
my
$new
=
"\n"
;
my
$_opt
= GetOptions(
'a|abspath'
=> \
$opt_fullpath
,
'b|basename'
=> \
$opt_basename
,
'c|color'
=> \
$opt_color
,
'd|debug'
=> \
$opt_debug
,
'e|Ne=i'
=> \
$opt_ne
,
'f|format=s'
=> \
$opt_format
,
'h|help'
=> \
$opt_help
,
'j|json'
=> \
$opt_format_json
,
'n|nonewline'
=> \
$opt_nonewline
,
'o|sortby=s'
=> \
$opt_sort_by
,
'p|pretty'
=> \
$opt_pretty
,
'r|reverse'
=> \
$opt_reverse_sort
,
's|separator=s'
=> \
$opt_separator
,
'q|thousands-sep'
=> \
$opt_thousand_separator
,
't|template=s'
=> \
$opt_template
,
'u|noheader'
=> \
$opt_noheader
,
'v|version'
=> \
$opt_version
,
'x|screen'
=> \
$opt_format_screen
,
);
$opt_sort_by
=
'N50'
if
(
$opt_sort_by
eq
'n50'
);
pod2usage( {
-exitval
=> 0,
-verbose
=> 2 } )
if
$opt_help
;
version()
if
defined
$opt_version
;
our
%output_object
;
our
%output_print
;
if
(
$opt_format
eq
'list'
) {
say
STDERR
"AVAILABLE OUTPUT FORMATS:"
;
for
my
$f
(
sort
keys
%formats
) {
if
(
$opt_color
) {
print
BOLD,
$f
,
"\t"
;
if
(
$formats
{
$f
} eq
'Not implemented'
) {
say
RED
$formats
{
$f
}, RESET;
}
else
{
say
RESET,
$formats
{
$f
};
}
}
else
{
say
"$f\t$formats{$f}"
;
}
}
exit
;
}
unless
(
defined
$ARGV
[0] or
$opt_version
) {
say
STDERR
"\n Usage: n50 file.fa ... \n Type n50 --help for full manual"
;
}
die
"Error: Please specify either -x (--screen) or -j (--json)\n"
if
(
$opt_format_screen
and
$opt_format_json
);
die
"Error: -x (--screen) or -j (--json) are incompatible with -f (--format)\n"
if
(
$opt_format
ne
'default'
and (
$opt_format_screen
or
$opt_format_json
) );
$opt_format
=
'screen'
if
(
$opt_format_screen
);
$opt_format
=
'json'
if
(
$opt_format_json
);
unless
(
defined
$valid_sort_keys
{
$opt_sort_by
} ) {
say
STDERR
" FATAL ERROR: Invalid sort key for -o ($opt_sort_by)"
;
say
STDERR
" Valid sort keys are: "
,
join
(
', '
,
keys
%valid_sort_keys
);
die
;
}
my
%sorters
= (
asc
=>
sub
{
$output_object
{
$b
}{
$opt_sort_by
} <=>
$output_object
{
$a
}{
$opt_sort_by
};
},
desc
=>
sub
{
$output_object
{
$a
}{
$opt_sort_by
} <=>
$output_object
{
$b
}{
$opt_sort_by
};
},
string_asc
=>
sub
{
$a
cmp
$b
},
string_desc
=>
sub
{
$b
cmp
$a
},
);
my
$sorting_order
;
if
(
$opt_sort_by
eq
'path'
) {
$sorting_order
=
'string_asc'
;
$sorting_order
=
'string_desc'
if
(
$opt_reverse_sort
);
}
else
{
$sorting_order
=
'asc'
;
$sorting_order
=
'desc'
if
(
$opt_reverse_sort
);
}
debug(
"Sorting by <$opt_sort_by>: order=$sorting_order"
);
die
(
"Unexpected fatal error:\nInvalid sort function \"$sorting_order\".\n"
)
if
( not
defined
$sorters
{
$sorting_order
} );
my
$sort_function
=
$sorters
{
$sorting_order
};
if
(
defined
$opt_format
) {
$opt_format
=
lc
(
$opt_format
);
if
( !
$formats
{
$opt_format
} ) {
my
@list
=
sort
keys
(
%formats
);
die
" FATAL ERROR:\n Output format not valid (--format '$opt_format').\n Use one of the following: "
.
join
(
', '
,
@list
) .
".\n"
;
}
if
(
$opt_format
eq
'json'
) {
$hasJSON
=
eval
{
JSON::PP->
import
();
1;
};
die
"FATAL ERROR: Please install perl module JSON::PP first [e.g. cpanm JSON::PP]\n"
unless
(
$hasJSON
);
}
if
(
$opt_format
eq
'screen'
) {
my
$has_table
=
eval
{
Text::ASCIITable->
import
();
$t
= Text::ASCIITable->new();
my
@cols
= (
'File'
,
'Seqs'
,
'Total bp'
,
'N50'
,
'min'
,
'max'
,
'N75'
,
'N90'
,
'auN'
);
push
@cols
,
"N$opt_ne"
if
(
defined
$opt_ne
);
$t
->setCols(
@cols
);
1;
};
if
( !
$has_table
) {
die
"ERROR:\nFormat 'screen' requires Text::ASCIITable installed.\n"
;
}
}
if
(
$opt_format
eq
'custom'
and !
defined
$opt_template
) {
print
STDERR
" [WARNING] There is no default template at the moment!\n"
;
print
STDERR
" Specify a --template STRING with --format custom or no output will be printed\n"
;
}
if
(
$formats
{
$opt_format
} eq
'Not implemented'
) {
print
STDERR
" WARNING: Format '$opt_format' not implemented yet. Switching to 'tsv'.\n"
;
$opt_format
=
'tsv'
;
}
}
foreach
my
$file
(
@ARGV
) {
if
( ( ! -e
"$file"
) and (
$file
ne
'-'
) ) {
die
" FATAL ERROR:\n File not found ($file).\n"
;
}
elsif
(-d
"$file"
) {
print
STDERR
"WARNING: Ignoring directory $file.\n"
;
next
;
}
elsif
(
$file
eq
'-'
) {
$file
=
'-'
;
}
else
{
open
STDIN,
'<'
,
"$file"
||
die
" FATAL ERROR:\n Unable to open file for reading ($file).\n"
;
}
my
$JSON
= 0;
$JSON
= 1
if
(
defined
$opt_format
and
$opt_format
=~ /JSON/ );
my
$FileStats
= Proch::N50::getStats(
$file
,
$JSON
,
$opt_ne
) || 0;
if
( !
$FileStats
->{status} ) {
print
STDERR
"[WARNING]\tError parsing \"$file\". Skipped.\n"
;
if
(
$opt_debug
) {
print
Dumper
$FileStats
;
}
next
;
}
say
Dumper
$FileStats
if
(
$opt_debug
);
if
( !
defined
$FileStats
->{auN} ) {
say
STDERR
"Fatal error: 'auN' statistics not calculated parsing \"$file\". Proch::N50 > 1.2.0 required, "
,
$Proch::N50::VERSION
,
" found."
;
say
STDERR Dumper
$FileStats
;
say
STDERR
$Proch::N50::VERSION
;
die
;
}
my
$n50
=
$FileStats
->{N50} + 0;
my
$n
=
$FileStats
->{seqs} + 0;
my
$slen
=
$FileStats
->{size} + 0;
my
$min
=
$FileStats
->{min} + 0;
my
$max
=
$FileStats
->{max} + 0;
my
$n75
=
$FileStats
->{N75} + 0;
my
$n90
=
$FileStats
->{N90} + 0;
my
$auN
=
$FileStats
->{auN} + 0;
my
$Ne
=
defined
$opt_ne
?
$FileStats
->{Ne} :
undef
;
say
STDERR
"[$file]\tTotalSize:$slen;N50:$n50;Sequences:$n"
if
(
$opt_debug
);
$file
= basename(
$file
)
if
(
$opt_basename
);
$file
= File::Spec->rel2abs(
$file
)
if
(
$opt_fullpath
);
my
%metrics
= (
'seqs'
=>
$n
,
'N50'
=>
$n50
,
'size'
=>
$slen
,
'min'
=>
$min
,
'max'
=>
$max
,
'auN'
=>
$auN
,
'N75'
=>
$n75
,
'N90'
=>
$n90
,
);
$metrics
{
"N$opt_ne"
} =
$Ne
if
(
defined
$opt_ne
);
if
(
defined
$output_object
{
$file
} ) {
print
STDERR
" WARNING: "
,
"Overwriting '$file': multiple items with the same filename. Try not using -b/--basename.\n"
;
}
$output_object
{
$file
} = \
%metrics
;
$output_print
{
$file
} = {};
for
my
$key
(
keys
%{
$output_object
{
$file
} } ) {
if
(
$opt_thousand_separator
or
$opt_format
eq
'screen'
) {
$output_print
{
$file
}{
$key
} = thousands(
$output_object
{
$file
}{
$key
} );
}
else
{
$output_print
{
$file
}{
$key
} =
$output_object
{
$file
}{
$key
} ;
}
}
}
my
$file_num
=
scalar
keys
%output_object
;
if
( not
$opt_format
or
$opt_format
eq
'default'
) {
debug(
"Activating format <default>"
);
if
(
$file_num
== 1 ) {
my
@keys
=
keys
%output_object
;
if
(
$opt_nonewline
) {
print
$opt_thousand_separator
? thousands(
$output_object
{
$keys
[0] }{
'N50'
} )
:
$output_object
{
$keys
[0] }{
'N50'
};
}
else
{
say
$opt_thousand_separator
? thousands(
$output_object
{
$keys
[0] }{
'N50'
} )
:
$output_object
{
$keys
[0] }{
'N50'
};
}
}
else
{
foreach
my
$r
(
sort
$sort_function
keys
%output_object
) {
say
$r
,
$opt_separator
,
$opt_thousand_separator
? thousands(
$output_print
{
$r
}{
'N50'
} )
:
$output_print
{
$r
}{
'N50'
};
}
}
}
elsif
(
$opt_format
eq
'json'
) {
my
$json
= JSON::PP->new->ascii->allow_nonref;
my
$json_printed
=
$json
->encode( \
%output_print
);
if
(
$opt_pretty
) {
$json_printed
=
$json
->pretty->encode( \
%output_print
);
}
say
$json_printed
;
}
elsif
(
$opt_format
eq
'tsv'
or
$opt_format
eq
'csv'
) {
$opt_separator
=
','
if
(
$opt_format
eq
'csv'
);
my
@fields
= (
'path'
,
'seqs'
,
'size'
,
'N50'
,
'min'
,
'max'
,
'N75'
,
'N90'
,
'auN'
);
push
(
@fields
,
"N$opt_ne"
)
if
(
defined
$opt_ne
);
say
'#'
,
join
(
$opt_separator
,
@fields
)
if
( !
defined
$opt_noheader
);
foreach
my
$r
(
sort
$sort_function
keys
%output_object
) {
print
$r
,
$opt_separator
;
for
(
my
$i
= 1 ;
$i
<=
$#fields
;
$i
++ ) {
if
(
$opt_thousand_separator
) {
print
'"'
if
(
$opt_format
eq
'csv'
or
$opt_separator
eq
','
);
print
thousands(
$output_object
{
$r
}{
$fields
[
$i
] } )
if
(
defined
$output_object
{
$r
}{
$fields
[
$i
] } );
print
'"'
if
(
$opt_format
eq
'csv'
or
$opt_separator
eq
','
);
}
else
{
print
$output_object
{
$r
}{
$fields
[
$i
] }
if
(
defined
$output_object
{
$r
}{
$fields
[
$i
] } );
}
if
( (
$i
==
$#fields
) and ( not
$opt_nonewline
) ) {
print
"\n"
;
}
else
{
print
$opt_separator
;
}
}
}
}
elsif
(
$opt_format
eq
'custom'
) {
my
@fields
= (
'seqs'
,
'size'
,
'N50'
,
'min'
,
'max'
,
'N75'
,
'N90'
,
'auN'
);
push
(
@fields
,
"N$opt_ne"
)
if
(
defined
$opt_ne
);
foreach
my
$r
(
sort
$sort_function
keys
%output_object
) {
my
$output_string
=
''
;
$output_string
.=
$opt_template
if
(
defined
$opt_template
);
$output_string
=~ s/(\{new\}|\{n\}|\\n)/
$new
/g
if
(
$output_string
=~ /(\{new\}|\{n\}|\\n)/ );
$output_string
=~ s/(\{tab\}|\{t\}|\\t)/
$tab
/g
if
(
$output_string
=~ /(\{tab\}|{t}|\\t)/ );
$output_string
=~ s/\{path\}/
$r
/g
if
(
$output_string
=~ /\{path\}/ );
foreach
my
$f
(
@fields
) {
$output_string
=~ s/\{
$f
\}/
$output_print
{
$r
}{
$f
}/g;
}
print
$output_string
;
}
}
elsif
(
$opt_format
eq
'screen'
) {
my
@fields
= (
'path'
,
'seqs'
,
'size'
,
'N50'
,
'min'
,
'max'
,
'N75'
,
'N90'
,
'auN'
);
push
(
@fields
,
"N$opt_ne"
)
if
(
defined
$opt_ne
);
foreach
my
$r
(
sort
$sort_function
keys
%output_object
) {
my
@array
;
push
(
@array
,
$r
);
for
(
my
$i
= 1 ;
$i
<=
$#fields
;
$i
++ ) {
if
(
defined
$output_print
{
$r
}{
$fields
[
$i
] } ) {
push
(
@array
,
$output_print
{
$r
}{
$fields
[
$i
] } )
}
else
{
say
Dumper
$output_print
{
$r
};
die
"$fields[$i] not defined"
;
}
}
$t
->addRow(
@array
);
}
print
$t
;
}
sub
debug {
return
unless
defined
$opt_debug
;
my
(
$message
,
$title
) =
@_
;
$title
=
'INFO'
unless
defined
$title
;
$title
=
uc
(
$title
);
printMessage(
$message
,
$title
,
'green'
,
'yellow'
);
return
1;
}
sub
printMessage {
my
(
$message
,
$title
,
$title_color
,
$message_color
) =
@_
;
if
(!
defined
$title_color
) {
$title_color
=
'reset'
;
}
elsif
(! colorvalid(
$title_color
)) {
say
STDERR
"$title_color not valid title color"
;
$title_color
=
'reset'
;
}
if
(!
defined
$message_color
) {
$message_color
=
'reset'
;
}
elsif
(! colorvalid(
$message_color
)) {
say
STDERR
"$message_color not valid title color"
;
$message_color
=
'reset'
;
}
print
STDERR colored(
"$title"
,
$title_color
),
"\t"
if
(
$title
);
say
colored(
"$message"
,
$message_color
);
return
1;
}
sub
version {
printMessage(
"$program{NAME}, ver. $program{VERSION}"
,
''
,
undef
,
'bold blue'
);
printMessage(
"Program to calculate N50 from multiple FASTA/FASTQ files,\n"
.
);
printMessage(
"Using Proch::N50 $Proch::N50::VERSION"
,
''
,
undef
,
'red'
);
return
$program
{VERSION};
}
sub
n50fromHash {
my
(
$hash_ref
,
$total
) =
@_
;
my
$tlen
= 0;
foreach
my
$s
(
sort
{
$a
<=>
$b
}
keys
%{
$hash_ref
} ) {
$tlen
+=
$s
* ${
$hash_ref
}{
$s
};
return
$s
if
(
$tlen
> (
$total
/ 2 ) );
}
return
0;
}
sub
thousands {
my
(
$number
) =
@_
;
my
(
$int
,
$dec
) =
split
/\./,
$number
;
$int
=~ s/(\d{1,3}?)(?=(\d{3})+$)/$1,/g;
$dec
=
$dec
?
".$dec"
:
''
;
return
"$int$dec"
;
}
sub
readfq {
my
(
$fh
,
$aux
) =
@_
;
@$aux
= [
undef
, 0 ]
if
( !(
@$aux
) );
return
if
(
$aux
->[1] );
if
( !
defined
(
$aux
->[0] ) ) {
while
(<
$fh
>) {
chomp
;
if
(
substr
(
$_
, 0, 1 ) eq
'>'
||
substr
(
$_
, 0, 1 ) eq
'@'
) {
$aux
->[0] =
$_
;
last
;
}
}
if
( !
defined
(
$aux
->[0] ) ) {
$aux
->[1] = 1;
return
;
}
}
my
$name
=
''
;
if
(
defined
$_
) {
$name
= /^.(\S+)/ ? $1 :
''
;
}
my
$seq
=
''
;
my
$c
;
$aux
->[0] =
undef
;
while
(<
$fh
>) {
chomp
;
$c
=
substr
(
$_
, 0, 1 );
last
if
(
$c
eq
'>'
||
$c
eq
'@'
||
$c
eq
'+'
);
$seq
.=
$_
;
}
$aux
->[0] =
$_
;
$aux
->[1] = 1
if
( !
defined
(
$aux
->[0] ) );
return
(
$name
,
$seq
)
if
(
$c
ne
'+'
);
my
$qual
=
''
;
while
(<
$fh
>) {
chomp
;
$qual
.=
$_
;
if
(
length
(
$qual
) >=
length
(
$seq
) ) {
$aux
->[0] =
undef
;
return
(
$name
,
$seq
,
$qual
);
}
}
$aux
->[1] = 1;
return
(
$name
,
$seq
);
}