#!/usr/local/perl5.005_56.Mar06/bin/perl -w
eval 'exec perl -w -S $0 "$@"'
if 0;
use strict;
require WAIT::Config;
my %OPT = (database => 'DB',
dir => $WAIT::Config->{WAIT_home} || '/tmp',
table => 'kbox',
clean => 0,
remove => 0,
);
GetOptions(\%OPT,
'database=s',
'dir=s',
'table=s',
'clean!',
'remove',
) || die "Usage: ...\n";
my $db;
if ($OPT{clean} and -d "$OPT{dir}/$OPT{database}") {
eval {
my $tmp = WAIT::Database->open(name => $OPT{database},
'directory' => $OPT{dir})
or die "Could not open table $OPT{table}: $@";
my $tbl = $tmp->table(name => $OPT{table});
$tbl->drop if $tbl;
$tmp->close;
rmtree("$OPT{dir}/$OPT{database}/$OPT{table}",1,1)
if -d "$OPT{dir}/$OPT{database}/$OPT{table}";
};
exit;
}
unless (-d "$OPT{dir}/$OPT{database}") {
$db = WAIT::Database->create(name => $OPT{database},
'directory' => $OPT{dir})
or die "Could not open database $OPT{database}: $@";
} else {
$db = WAIT::Database->open(name => $OPT{database},
'directory' => $OPT{dir})
or die "Could not open table $OPT{table}: $@";
}
my $layout= new WAIT::Parse::HTML;
my $stem = [{
'prefix' => ['isotr', 'isolc'],
'intervall' => ['isotr', 'isolc'],
},'decode_entities', 'isotr', 'isolc', 'split2', 'stop', 'Stem'];
my $text = [{
'prefix' => ['isotr', 'isolc'],
'intervall' => ['isotr', 'isolc'],
},
'decode_entities', 'isotr', 'isolc', 'split2', 'stop'];
my $sound = ['decode_entities', 'isotr', 'isolc', 'split2', 'Soundex'];
my %D;
my $access = tie (%D, 'WAIT::Document::Find', sub { $_[0] =~ /\.htm/; },
"/usr/local/etc/httpd/htdocs/berlin");
die $@ unless defined $access;
my $tb = $db->table(name => $OPT{table}) ||
$db->create_table
(name => $OPT{table},
attr => ['docid', 'headline', 'size'],
keyset => [['docid']],
layout => $layout,
access => $access,
invindex =>
[
'text' => $stem,
'title' => $stem,
'title' => $text,
]
);
die unless $tb;
my @DIRS;
if (@ARGV) {
@DIRS = @ARGV;
} else {
@DIRS = @{$WAIT::Config->{manpath}};
}
while (my ($path, $content) = each %D) {
&index($path, $content);
}
$db->close();
exit;
my $NO;
sub index {
my ($did, $value) = @_;
if ($tb->have('docid' => $did)) {
if (!$OPT{remove}) {
print "duplicate\n";
return;
}
} elsif ($OPT{remove}) {
print "missing\n";
return;
}
if (-s $did < 100) {
print "too small\n";
return;
}
unless (defined $value) {
print "unavailable\n";
return;
}
printf STDERR "ok [%d]\n", ++$NO;
my $record = $layout->split($value);
$record->{size} = length($value);
my $headline = $record->{title} || $did;
$headline =~ s/\s+/ /g; $headline =~ s/^\s+//;
printf "%s\n", substr($headline,0,80);
if ($OPT{remove}) {
$tb->delete('docid' => $did, headline => $headline, %{$record});
} else {
$tb->insert('docid' => $did, headline => $headline, %{$record});
}
}
__END__
## ###################################################################
## pod
## ###################################################################
=head1 NAME
index_html - generate a manual database for sman
=head1 SYNOPSIS
B<index_html>
[B<-database> I<database name>]
[B<-dir> I<database directory>]
[B<-table> I<name>]
[B<-remove>]
[I<mandir> ...]
=head1 DESCRIPTION
B<Index_html> generates/updates databases for B<sman>(1). If
I<mandir>s are specified, these are used. Otherwise the confiigured
default directories are indexed.
=head2 OPTIONS
=over 10
=item B<-database> I<database name>
Change the default database name to I<database name>.
=item B<-dir> I<database directory>
Change the default database directory to I<database directory>.
=item B<-table> I<name>
Use I<name> instead of C<man> as table name.
=item B<-clean>
Clean B<database> before indexing.
=item B<-remove>
Remove the selected directories from the database instead of
adding/updating. This works only for the manuals which are unchanged
since the indexing.
=head1 SEE ALSO
L<sman>.
=head1 AUTHOR
Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortmund.de>E<gt>