#!/usr/bin/env perl
use strict;
use feature qw/state say/;
use 5.010;
my $args = new Getopt::Declare q{
[strict]
[mutex: -d -e -l -plot]
[mutex: -e -l -p -plot]
-d[ump] dump the header (-d); dump the header and the profile (-dp)
-e[xtract] extract the entire scan as binary data (will print by default)
-l[ist] print or extract the peak list [required]
-p[rofile] print or extract the scan profile as a 2-column table [required]
-plot plot the profile (and the called peaks, if available) [required]
-v convert f -> M/z [requires: -p]
-z add empty bins [requires: -p || -plot]
-n[umber] <n:+i> select scan number <n> [required]
-mz <low:+n> .. <high:+n> limit the plot to the range of M/z values between <low> and <high> [requires: (-p && -v) || -plot]
<file> input file [required]
}
or exit(-1);
my $file = $args->{"<file>"};
-e $file or die "file '$file' does not exist";
-f $file or die "'$file' is not a plain file";
-s $file or die "'$file' has zero size";
# -----------------------------------------------------------------------------
open INPUT, "<$file" or die "can't open '$file': $!";
binmode INPUT;
my $header = Finnigan::FileHeader->decode(\*INPUT);
my $VERSION = $header->version;
my $seq_row = Finnigan::SeqRow->decode(\*INPUT, $VERSION);
my $cas_info = Finnigan::CASInfo->decode(\*INPUT);
my $rfi = Finnigan::RawFileInfo->decode(\*INPUT, $VERSION);
my $data_addr = $rfi->preamble->data_addr;
my $run_header_addr = $rfi->preamble->run_header_addr;
# fast-forward to RunHeader
seek INPUT, $run_header_addr, 0;
my $run_header = Finnigan::RunHeader->decode(\*INPUT, $VERSION);
my $scan_index_addr = $run_header->scan_index_addr;
my $trailer_addr = $run_header->trailer_addr;
# fast-forward to ScanIndex
seek INPUT, $scan_index_addr, 0;
my $scan_index;
# this code is not fool-proof and is not finished! It assumes that
# there are exactly as many entries in ScanIndex as would fit
# between $first_scan and $last_scan. In other words, the internal
# indices and links are not checked.
my $first_scan = $run_header->sample_info->first_scan;
my $last_scan = $run_header->sample_info->last_scan;
my $n = $args->{-n}{"<n>"};
die "index $n is not in the range of available scan numbers ($first_scan .. $last_scan)"
unless $n >= $first_scan and $n <= $last_scan;
# get the first entry
my $entry = Finnigan::ScanIndexEntry->decode(\*INPUT, $VERSION);
my $size = $entry->size;
if ($n > 1) {
seek INPUT, $scan_index_addr + ($n - 1)*$size, 0;
$entry = Finnigan::ScanIndexEntry->decode(\*INPUT, $VERSION);
}
$scan_index = $entry->values;
# Now go read the trailer. Because the trailer records are of variable
# size, they are not directly addressable and all of them must be
# read, up to the highest number in the range.
seek INPUT, $trailer_addr, 0;
# read the number of ScanEvent records in the file
my $rec;
my $bytes_to_read = 4;
my $nbytes = read INPUT, $rec, $bytes_to_read;
$nbytes == $bytes_to_read
or die "could not read all $bytes_to_read bytes of the trailer scan events count at $trailer_addr";
my $trailer_length = unpack 'V', $rec;
if ($trailer_length == 0) {
print STDERR "warning: zero trailer length at file offset " . (tell(INPUT) - $nbytes) . "\n";
$trailer_length = $last_scan - $first_scan + 1;
}
# read all scan events preceding the desired one (they are variable-size structures)
my $scan_event;
foreach my $i ( 1 .. $trailer_length) {
$scan_event = Finnigan::ScanEvent->decode(\*INPUT, $VERSION) or die "could not decode ScanEvent at file offset" . tell(INPUT);
next if $i < $n;
last;
}
# start of the scan data (packet header)
seek INPUT, $data_addr + $scan_index->{offset}, 0;
my $ph = Finnigan::PacketHeader->decode(\*INPUT);
my $profile;
$profile = Finnigan::Profile->decode(\*INPUT, $ph->layout) if $ph->profile_size;
my $peaks;
$peaks = Finnigan::Peaks->decode(\*INPUT) if $ph->peak_list_size;
# dump the header
if ( exists $args->{-d} ) {
$ph->dump();
}
# least the centroids
if ( exists $args->{-l} ) {
if ( $peaks ) {
if ( exists $args->{-e} ) {
say STDERR sprintf("%d (%x) .. %d (%x)", $peaks->addr, $peaks->addr, $peaks->addr + $peaks->size, $peaks->addr + $peaks->size);
seek INPUT, $peaks->addr, 0;
$nbytes = read INPUT, $rec, $peaks->size;
$nbytes == $peaks->size
or die "could not read all " . $peaks->size . " bytes of the peak list at " . $peaks->addr;
print $rec;
}
else {
$peaks->list;
}
exit(0); # the job is done
}
else {
say STDERR "this scan has no called peaks";
}
}
# various ways to look at the profile
if ( exists $args->{-p} or exists $args->{-plot}) {
unless ( $profile ) {
say STDERR "this scan has no profile";
exit; # done with the task
}
if ( exists $args->{-p} ) {
if ( exists $args->{-e} ) {
say STDERR sprintf("%d (%x) .. %d (%x)", $profile->addr, $profile->addr, $profile->addr + $profile->size, $profile->addr + $profile->size);
seek INPUT, $profile->addr, 0;
$nbytes = read INPUT, $rec, $profile->size;
$nbytes == $profile->size
or die "could not read all " . $profile->size . " bytes of the peak list at " . $profile->addr;
print $rec;
}
else {
if ( exists $args->{-d} ) {
say "--------------------------------------";
say "Profile:";
say " first value: " . $profile->first_value;
say " step: " . $profile->step;
say " number of chunks: " . $profile->nchunks;
say " number of bins: " . $profile->nbins;
foreach my $i ( 0 .. $profile->nchunks - 1 ) {
say " --- Chunk $i ---";
say $profile->chunk->[$i]->dump;
}
}
else {
my $range = [$ph->low_mz, $ph->high_mz];
if ( exists $args->{-mz} ) {
$range = [$args->{-mz}{"<low>"}, $args->{-mz}{"<high>"}];
}
if ( exists $args->{-v} ) {
$profile->set_converter($scan_event->converter);
$profile->print_bins($range, $args->{-z});
}
else {
if ( $scan_event->nparam == 0 ) {
# this is a (naive) sign that the profile has already been converted
$profile->set_converter(sub{shift});
}
$profile->print_bins(undef, $args->{-z});
}
}
}
}
else {
# must plot, then
my $range = [$ph->low_mz, $ph->high_mz];
if ( exists $args->{-mz} ) {
$range = [$args->{-mz}{"<low>"}, $args->{-mz}{"<high>"}];
}
if ( $scan_event->nparam == 0 ) {
# this is a (naive) sign that the profile has already been converted
say qq{profile_input <- "f\ts};
$profile->set_converter(sub{shift});
$profile->print_bins($range, $args->{-z}); # no conversion
say q{"};
my $peaks_exist = $peaks ? "centroids are present" : "no centroids";
say <<END;
p <- read.table(textConnection(profile_input), header=TRUE)
postscript(file="", command="cat", paper="special", onefile=F, horizontal=F, width=8, height=6, pointsize=15)
plot(p, type="h", main="converted profile ($peaks_exist)", xlab=expression(M/z), ylab="abundance")
END
}
else {
$profile->set_converter($scan_event->converter);
say qq{profile_input <- "mz\ts};
$profile->print_bins($range, $args->{-z});
say q{"};
say qq{centroid_input <- "mz\ts};
$peaks->list();
say q{"};
say <<END;
p <- read.table(textConnection(profile_input), header=TRUE)
c <- read.table(textConnection(centroid_input), header=TRUE)
postscript(file="", command="cat", paper="special", onefile=F, horizontal=F, width=8, height=6, pointsize=15)
plot(p, type="h", main="software-converted profile and peak centroids", xlab=expression(M/z), ylab="abundance")
points(c)
END
}
}
}
__END__
=head1 NAME
uf-scan - examine scan data in a single MS scan
=head1 SYNOPSIS
uf-scan [options] file
Options:
-d[ump] dump the packet header (-d); dump the header and the profile (-dp)
-e[xtract] extract the entire scan data as a binary chunk
-l[ist] list the called peaks [required]
-p[rofile] print the scan profile as a 2-column table [required]
-plot plot the profile (along with the called peaks, if available) [required]
-v convert f -> M/z [requires: -p]
-z add empty bins [requires: -p]
-n[umber] <n:+i> select scan number <n> [required]
-mz <low:+n> .. <high:+n> limit the plot to the range of M/z values between <low> and <high> [requires: -plot]
<file> input file [required]
=head1 OPTIONS
=over 4
=item B<-help>
Print a brief help message and exits.
=item B<-l[ist]>
Get the peak list from the scan number B<-n>, if they are available. If the called peaks are not present, uf-scan will print a message to that effect and exit.
=item B<-p[rofile]>
Prints the scan profile for scan number B<-n>, if it is available. If the profile is not present, uf-scan will print a message to that effect and exit.
=item B<-plot>
This option generates an R script to plot the profile. Prints the scan profile for scan number B<-n>, if it is available. If the profile is not present, uf-scan will print a message to that effect and exit.
=item B<-n[umber]>
Gives the number of a single scan to process
=item B<-e[xtract]>
Used with either B<-l> or B<-p>, this option option clips the binary data chunk corresponding to either the peak list or to the scan profile and sends it to STDOUT.
=back
=head1 DESCRIPTION
B<uf-scan> can be used to list or plot the scan data for a single scan. The B<-profile> option instructs B<uf-scan> to print the profile data, the B<-list> option lists the peaks, and the B<-plot> option writes an R script to plot the profile and peak centroids, if both kinds of data are present in the raw file, or just the profile if the centroids are not present.
Options B<-profile>, B<-list> and B<-plot> are mutually exclusive.
To convert the raw scan data into M/z values, use the B<-v> option.
Option B<-z> fills the gaps between the profile peaks with zeroes, to create a continuous table.
=head1 SEE ALSO
Finnigan::PacketHeader
Finingan::Profile
Finingan::Peaks
Finnigan::Scan
L<uf-mzxml>
=head1 EXAMPLES
=over 4
=item Print all raw profile bins in the 1st scan:
uf-scan -p -n 1 sample.raw
=item Extracts the entire scan profile in the binary form:
uf-scan -ep -n 1 sample.raw
=item Same as above, except the bin values will be converted into M/z:
uf-scan -pv -n 1 sample.raw
=item Same as above, but in addition, all empty bins are wirtten out as well:
uf-scan -pvz -n 1 sample.raw
=item Print the list of centroids in the 1st scan:
uf-scan -l -n 1 sample.raw
Note that B<uf-scan> does not calculate the peak centroids from the
profile; it only lists the existing centroids if they are present.
=item This command will call R to plot the profile in the given range of M/z values:
uf-scan -plot -n 1 -mz 445.0 .. 445.2 sample.raw | R --vanilla --slave > plot.eps
If called peaks are present, they will be shown as dots on the graph.
=item To see the amount of correction applied to each bin in a scan:
uf-scan -d -p -n 18588 sample.raw | grep fudge | cut -f 5
=back
=cut