scripts/searchio/fastam9_to_table.PLS


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
              #!/usr/bin/perl -w
=head1 NAME 
fastm9_to_table  - turn FASTA -m 9 output into NCBI -m 9 tabular output
=head1 SYNOPSIS
 fastm9_to_table [-e evaluefilter] [-b bitscorefilter] [--header] [-o outfile] inputfile1 inputfile2 ... 
=head1 DESCRIPTION
Comand line options:
  -e/--evalue   evalue    -- filter by evalue
  -b/--bitscore bitscore  -- filter by bitscore
  --header                -- boolean flag to print column header 
  -o/--out                -- optional outputfile to write data, 
                             otherwise will write to STDOUT
  -h/--help               -- show this documentation
Not technically a SearchIO script as this doesn't use any Bioperl
components but is a useful and fast.  The output is tabular output
with the standard NCBI -m9 columns.
 queryname
 hit name
 percent identity
 alignment length
 number mismatches 
 number gaps
 query start  (if on rev-strand start > end)
 query end 
 hit start (if on rev-strand start > end)
 hit end 
 evalue
 bit score
Additionally 3 more columns are provided
 fasta score
 sw-score
 percent similar
 query length
 hit length
 query gaps
 hit gaps
=head1 AUTHOR - Jason Stajich
Jason Stajich jason_at_bioperl-dot-org
=cut
use strict;
use Getopt::Long;
my $hitsection = 0;
my %data;
my ($evalue,$bitscore,$header,$outfile) = ( 10, 0);
GetOptions(
           'b|bitscore|bits:f'   => \$bitscore,
           'e|evalue:f'          => \$evalue,
           'header'              => \$header,
           'o|out|outfile:s'     => \$outfile,
           'h|help'              => sub { exec('perldoc',$0); exit; }
           );
my $outfh;
if( $outfile ) { 
    open($outfh, ">$outfile") || die("$outfile: $!");
} else { 
    $outfh = \*STDOUT; 
}
            
# query start -- an0
# query en    -- ax0
# hit start   -- an1
# hit end     -- ax1
my @fields = qw(qname hname percid alen mmcount gapcount 
                qstart qend hstart hend evalue score bits fs sw-score 
                percsim qlen hlen qgap hgap);
print $outfh "#",uc(join("", map{ sprintf("%-10s",$_) } @fields)), "\n" if $header;
while(<>) {
    my $linestr = $_;
    if( /^\s*\d+>>>(\S+).+/ ) {        
        $data{'qname'} = $1;
        if( /\-\s+(\d+)\s+(aa|nt)\s+$/ ){
            $data{'qlen'} = $1;
        }
    } elsif( $hitsection && /^>>>\Q$data{'qname'}/ ) { 
        $hitsection = 0;
    } elsif( /^The best scores are:/ ) {               
        $hitsection = 1;
    } elsif( /^\s+$/ ) {
    } elsif( $hitsection ) {
        if( s/^(\S+)\s+(.+)\(\s*(\d+)\)\s+// ) {
            my ($hit, $desc,$hitlen) = ($1,$2,$3);
            my ($dir) = ( s/^\[(r|f)\]\s+// );
            my @line = split(/\s+/,$_);
            $data{'hname'}    = $hit;
            $data{'hlen'}     = $hitlen;
            $data{'score'}    = shift @line;
            $data{'bits'}     = shift @line;
            $data{'evalue'}   = shift @line;
            $data{'percid'}   = shift @line;
             
            $data{'percsim'}  = shift @line;
            $data{'sw-score'} = shift @line;
            $data{'alen'}     = shift @line;
            $data{'qstart'}   = shift @line;
            $data{'qend'}     = shift @line;
            $data{'pn0'}      = shift @line; # pn0
            $data{'px0'}      = shift @line; # px0
            $data{'hstart'}   = shift @line; # an1 
            $data{'hend'}     = shift @line; # ax1
            $data{'pn1'}      = shift @line; # pn1
            $data{'px1'}      = shift @line; # px1
            # query + hit gaps
            $data{'qgap'}     = shift @line;
            $data{'hgap'}     = shift @line;
            $data{'gapcount'} = $data{'qgap'} + $data{'hgap'};
            $data{'fs'}       = shift @line;
             
            $data{'mmcount'} = $data{'alen'} - ( int($data{'percid'} * $data{'alen'}) + $data{'gapcount'});
            #next if( $data{'evalue'} > $evalue || 
        #            $data{'bits'} < $bitscore );
             
            for ( $data{'percid'}, $data{'percsim'} ) {
                $_ = sprintf("%.2f",$_*100);
            }
            print $outfh join( "\t",map { $data{$_} } @fields),"\n";
        } else { 
            # print STDERR "unrecognized line \n$linestr";
        }
    } else { 
        # warn("skipping a line like this: $_");
    }
}
	Global
`s`	Focus search bar
`?`	Bring up this help dialog
	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)
	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse
	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)