use
5.008006;
our
@ISA
=
qw(Exporter)
;
our
%EXPORT_TAGS
= (
'all'
=> [
qw(
)
] );
our
@EXPORT_OK
= ( @{
$EXPORT_TAGS
{
'all'
} } );
our
@EXPORT
=
qw(
)
;
our
$VERSION
=
'0.31'
;
sub
new {
my
$package
=
shift
;
my
$self
= {
_uri
=>
undef
,
_wikiwords
=>
undef
,
_wikiindex
=>
undef
,
_extension
=>
"html"
,
};
return
bless
(
$self
,
$package
);
}
sub
extension {
my
(
$self
,
$extension
)=
@_
;
$self
->{_extension} =
$extension
if
defined
(
$extension
);
return
$self
->{_extension};
}
sub
seturi {
my
(
$self
,
$uri
) =
@_
;
$self
->{_uri} =
$uri
if
defined
(
$uri
);
return
$self
->{_uri};
}
sub
wikiindex{
my
(
$self
,
@wikiindex
) =
@_
;
@{
$self
->{_wikiindex}} =
@wikiindex
if
@wikiindex
;
if
(
defined
(@{
$self
->{_wikiindex}})) {
return
@{
$self
->{_wikiindex}};
};
}
sub
wikiwords{
my
(
$self
,
@wikiwords
) =
@_
;
@{
$self
->{_wikiwords}} =
@wikiwords
if
@wikiwords
;
if
(
defined
(@{
$self
->{_wikiwords}})) {
return
@{
$self
->{_wikiwords}};
};
}
sub
urlencode {
my
(
$self
,
$str
) =
@_
;
return
$str
;
}
sub
urldecode {
my
(
$self
,
$str
) =
@_
;
$str
=~ s/%([A-Fa-f0-9]{2})/
pack
(
'C'
,
hex
($1))/seg;
return
$str
;
}
sub
makeflatpages{
my
(
$self
,
$folder
) =
@_
;
my
@wikiwords
=
$self
->wikiwords();
print
"Wikiwords"
.Data::Dumper->Dump([
@wikiwords
]).
"\n"
;
my
$extractor
=new HTML::Extract();
my
$uri
=
$self
->seturi();
$uri
=~/(.*)\/(.*)\//;
my
$uriextension
=$2;
my
@categories
;
foreach
my
$word
(
@wikiwords
){
sleep
7;
print
"Working on $word with $uri\n"
;
if
(
$word
=~/http\:\/\/(.*)/){
print
"Looking at $word (ignore) \n"
;
}
else
{
print
"Looking at $uri$word (get page) \n"
;
my
$text
=
$extractor
->gethtml(
$uri
.
$word
,
"tagclass=wiki-content"
);
my
$ext
=
$self
->extension();
$text
=~s/\"\/
$uriextension
\/([0-9A-z\-\_\:\%\&\.\,\;\+\
my
@rawcategories
=
split
(/href=\"([0-9A-z\-\_\:\%\&\.\,\;\+\
foreach
my
$category
(
@rawcategories
) {
$category
=~/(^[0-9A-Za-z\-\_\:\%\&\.\,\;\+\
if
(!$1 eq
""
){
push
(
@categories
,$1);
}
}
if
(
$text
=~ /Category:Exclude/){
print
"Not printing $word (excluded)\n"
;
}
else
{
$text
=~s/href=\"Category:([0-9A-z\-\_\%\&\.\,\;\+\
my
$contexturi
=
$uri
;
$contexturi
=~s/details/context/;
$text
=~s/\/confluence\/display\/context\//
$contexturi
/g;
my
$cleanword
=
$self
->urldecode(
$word
);
open
(FILEHANDLE,
">$folder/$cleanword."
.
$self
->extension()) ||
die
(
"cannot open file: ($folder/$word.$self->extension()) "
. $!);
print
FILEHANDLE "<title>
$word
</title><
link
rel=stylesheet href=\"style.css\" type=\"text/css\"> </head><body>\n
<?php include(
'header.inc'
); ?>";
open
(FILE2,
"<header.html"
);
my
@rawheader
=<FILE2>;
my
$header
=
join
(
''
,
@rawheader
);
close
(FILE2);
print
FILEHANDLE
"$header\n<div id=\"column-content\">\n<h1 class=\"firstHeading\">"
.
$self
->urldecode(
$word
).
"</h1>"
;
print
FILEHANDLE
"\n$text\n"
;
print
FILEHANDLE
"<div class=\"printfooter\"> Retrieved from "<a href=\""
.
$uri
.
$word
.
"\">"
.
$self
->urlencode(
$uri
.
$word
).
"</a>"</div>"
;
print
FILEHANDLE
"\n<?php include('footer.inc'); ?>\n</div> </body></html>"
;
close
(FILEHANDLE);
sleep
6;
}
}
}
}
1;