#!/usr/bin/perl
$DUMP_WITH_KEY
= 0;
$COMMANDS
= 1;
my
$hashes2
= { };
my
$hashes3
= { };
my
$hashes4
= { };
my
@files
= ();
foreach
my
$file
(
@ARGV
) {
if
(-d
$file
) {
find (\
&wanted
,
$file
);
sub
wanted {
(-f
$_
) and
push
(
@files
,
$File::Find::name
);
}
}
else
{
push
(
@files
,
$file
);
}
}
foreach
my
$file
(
@files
) {
open
(STDIN,
"<$file"
) or
warn
"$file cannot be opened"
;
my
@hash
= do_one();
close
STDIN;
my
$hash2
=
$hash
[0].
$hash
[1];
my
$hash3
=
$hash
[0].
$hash
[1].
$hash
[2];
my
$hash4
=
$hash
[0].
$hash
[1].
$hash
[2].
$hash
[3];
$hashes4
->{
$hash4
} .=
" "
.
$file
;
$hashes3
->{
$hash3
} .=
" "
.
$file
;
$hashes2
->{
$hash2
} .=
" "
.
$file
;
}
check_collisions (
$hashes4
);
check_collisions (
$hashes3
);
check_collisions (
$hashes2
);
exit
;
sub
check_collisions {
my
(
$db
,
$hash
,
$file
) =
@_
;
foreach
$k
(
sort
keys
%{
$db
}) {
$_
=
$db
->{
$k
};
next
unless
(/\S \S/);
s/^ //g;
if
(
$DUMP_WITH_KEY
) {
print
"$_ [$k]\n"
;
}
elsif
(
$COMMANDS
) {
my
$count
= 0;
while
(m/ /g) {
$count
++; }
/^(\S+) (.*)$/;
print
"echo \"$1 : $count dups\"; rm -f $2\n"
;
}
else
{
print
"$_\n"
;
}
delete
$hashes4
->{
$k
};
delete
$hashes3
->{
$k
};
delete
$hashes2
->{
$k
};
}
}
sub
do_one {
while
(<STDIN>) { /^$/ and
last
; }
my
$str
=
join
(
''
, <STDIN>);
$str
=~ s/<[^>]+?>/ /igs;
$str
=~ s/"[^\"\s]+\?[^\"\s]+\"/ /igs;
$str
=~ s/\S+\?\S+/ /igs;
$str
=~ s/\S+\@\S+/ /igs;
$str
=~ s/TRCK:\S+//;
$str
=~ s/^[a-z0-9]{6,}[-_a-z0-9]{12,}[a-z0-9]{6,}\s*\z//is;
$str
=~ s/^\s*\S{24,}\s*\z//is;
my
@data
=
split
(/\n/,
$str
);
my
$lpb
= (
$#data
+1) / 4;
my
@blks
= ();
push
(
@blks
,
join
(
''
,
splice
(
@data
, 0,
$lpb
)));
push
(
@blks
,
join
(
''
,
splice
(
@data
, 0,
$lpb
)));
push
(
@blks
,
join
(
''
,
splice
(
@data
, 0,
$lpb
)));
push
(
@blks
,
join
(
''
,
splice
(
@data
, 0,
$lpb
)));
my
@ret
= ();
foreach
my
$blk
(
@blks
) {
my
$digest
= sha1_base64(
$blk
);
push
(
@ret
,
$digest
);
}
@ret
;
}