scripts/venn - metacpan.org


            
              1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
—
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
              #!/usr/bin/perl 
use 5.010;
use strict ;
use warnings ; 
use List::Util qw[ sum sum0 ];
use Getopt::Std ; 
use Term::ANSIColor qw/:constants color/ ; $Term::ANSIColor::AUTORESET = 1 ; 
@ARGV = ( ( grep m/^-/ , @ARGV ) , ( grep !m/^-/ , @ARGV ) ) if ! grep /^--$/ , @ARGV ; # 
getopts '=v',  \my %o ; 
do { select STDERR ; HELP_MESSAGE () } if  ! @ARGV ; 
# READING 
my @fq ; # 各ファイルにおいて、各行の文字列の頻度表を格納する
my %fq_ ; # 全ファイルにおいて、..
my $N = 0 ;
my $dum = <> if $o{'='} ;
while ( <> ) { 
    chomp ; 
    $fq[$N]{$_} ++ ; 
    $fq_{$_} ++ ;
    if ( eof ) { $N++ ; my $dum = <> if $o{'='} && ! eof() } ; #<-- eofの括弧ある無しを使い分けた
}
#print $N, "\n" ;
# Summing 
my %bfq ; # 添え字は、どの集合に含まれるかを2進数で考えた数 2番目の添え字はファイル番号 0始まり
my %bfq_ ; 
my %bfq_min ; 
my %bfq_max ;# 最小と最大を具体的な値として格納
for my $k ( keys %fq_ ) { 
    my @which = grep { exists $fq[$_]{$k} } 0 .. $N-1 ; # その文字列をどのファイルが持つか
    my $B = sum0 map { 1 << $_ } @which ;
    $bfq_ { $B } ++ ; 
    $bfq { $B } [ $_ ] += $fq [ $_ ] { $k } for @which ; 
    next unless $o{v} ;
    $bfq_min{$B} //= $k ; $bfq_min{$B} = $k if $bfq_min{$B} gt $k ; 
    $bfq_max{$B} //= $k ; $bfq_max{$B} = $k if $bfq_max{$B} lt $k ; 
}
# Printing
print join ( "\t", "cardinal", (map {"seq$_"} 1 .. $N) , $o{v} ? ('min','max') : () ) , "\n" ; 
for my $B ( sort { $a <=> $b } keys %bfq_ ) { 
    my @tmp = map { $_ // 0 } map { $bfq { $B } [$_] } 0 .. $N -1   ;
    push @tmp , $bfq_min{$B} , $bfq_max{$B}  if $o{v} ;
    print join "\t" , $bfq_{$B} , @tmp ; #, 
    print "\n" ;
}
# ヘルプの扱い
sub VERSION_MESSAGE {}
sub HELP_MESSAGE {
    use FindBin qw[ $Script ] ; 
    $ARGV[1] //= '' ;
    open my $FH , '<' , $0 ;
    while(<$FH>){
        s/\$0/$Script/g ;
        print $_ if s/^=head1// .. s/^=cut// and $ARGV[1] =~ /^o(p(t(i(o(ns?)?)?)?)?)?$/i ? m/^\s+\-/ : 1;
    }
    close $FH ;
    exit 0 ;
}
=encoding utf8
=head1
     
    $0 ファイル名の並び
    入力: 改行区切りで値の書き込まれた1個またはそれ以上のファイル
    出力:
      ファイルが n 個入力として与えられた場合、それらn個のファイルに
      出現した値について、それがどのファイルに出現したかに応じて
      2^n -1 通りに分類し、各分類(出力の各行(縦方向)に相当)において
      異なる値が何通り出現したか(横方向の第1列目)、それらの値がi番目の
      ファイルに何回出現したか(横方向の第i+1列目)の数を出力する。
    オプション: 
       -v : 出力の各行において、右側の2列に、各分類の文字列としての最小値と最大値も出力する。
       -= : 入力の各ファイルにおいて、1行目を読み飛ばす。
    利用例(実験例) : 
       cat somefile | venn 
           # somefile の行数と、異なる行の値の個数が分かる。
       venn -v <(seq 1 3)  <(seq 3 5)  <(seq 5 18) 
           # <( .. ) はプロセス置換なので、Unix-like のシェルでないと動かない可能性はある。
       venn -v <(saikoro)  <(saikoro) <(saikoro)
           # saikoro はこの$0を作った著者がこの$0と共に提供される別のプログラム。
=cut

	Global
`s`	Focus search bar
`?`	Bring up this help dialog

	GitHub
`g` `p`	Go to pull requests
`g` `i`	go to github issues (only if github is preferred repository)

	POD
`g` `a`	Go to author
`g` `c`	Go to changes
`g` `i`	Go to issues
`g` `d`	Go to dist
`g` `r`	Go to repository/SCM
`g` `s`	Go to source
`g` `b`	Go to file browse

	Search terms
module: (e.g. module:Plugin)
distribution: (e.g. distribution:Dancer auth)
author: (e.g. author:SONGMU Redis)
version: (e.g. version:1.00)