$VERSION
=
sprintf
"%d"
,
q$Revision: 3385 $
=~ /(\d+)/;
Hide Show 25 lines of Pod
BEGIN {
my
$members
= [
'stylesheet'
,
'css'
,
'html'
,
'html_tree'
,
'query'
,
'strip_attrs'
,
'leave_style'
,
'warns_as_errors'
,
'content_warnings'
,
'post_fetch_filter'
];
foreach
my
$member
(@{
$members
}) {
no
strict
'refs'
;
*{
'_'
.
$member
} =
sub
{
my
(
$self
,
$value
) =
@_
;
$self
->_check_object();
$self
->{
$member
} =
$value
if
defined
(
$value
);
return
$self
->{
$member
};
}
}
}
Hide Show 23 lines of Pod
sub
new {
my
(
$proto
,
$params
) =
@_
;
my
$class
=
ref
(
$proto
) ||
$proto
;
my
$self
= {
stylesheet
=>
undef
,
css
=> CSS::Inliner::Parser->new({
warns_as_errors
=>
$$params
{warns_as_errors}}),
html
=>
undef
,
html_tree
=>
$$params
{html_tree} || HTML::TreeBuilder->new(),
query
=>
undef
,
content_warnings
=>
undef
,
strip_attrs
=> (
defined
(
$$params
{strip_attrs}) &&
$$params
{strip_attrs}) ? 1 : 0,
leave_style
=> (
defined
(
$$params
{leave_style}) &&
$$params
{leave_style}) ? 1 : 0,
warns_as_errors
=> (
defined
(
$$params
{warns_as_errors}) &&
$$params
{warns_as_errors}) ? 1 : 0,
post_fetch_filter
=> (
defined
(
$$params
{post_fetch_filter}) &&
ref
(
$$params
{post_fetch_filter}) eq
'CODE'
) ?
$$params
{post_fetch_filter} :
undef
};
bless
$self
,
$class
;
return
$self
;
}
Hide Show 24 lines of Pod
sub
fetch_file {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
unless
(
$params
&&
$$params
{url}) {
croak
"You must pass in hash params that contain a url argument"
;
}
my
$html
=
$self
->_fetch_html({
url
=>
$$params
{url}});
$self
->
read
({
html
=>
$html
});
return
();
}
Hide Show 13 lines of Pod
sub
read_file {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
unless
(
$params
&&
$$params
{filename}) {
croak
"You must pass in hash params that contain a filename argument"
;
}
open
FILE,
"<"
,
$$params
{filename} or
die
$!;
my
$html
=
do
{
local
( $/ ) ; <FILE> } ;
$self
->
read
({
html
=>
$html
});
return
();
}
Hide Show 17 lines of Pod
sub
read
{
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
unless
(
$params
&&
$$params
{html}) {
croak
"You must pass in hash params that contains html data"
;
}
$self
->_html_tree()->store_comments(1);
$self
->_html_tree()->parse(
$$params
{html});
$self
->_init_query();
my
$stylesheet
=
$self
->_parse_stylesheet();
$self
->_html(
$$params
{html});
$self
->_stylesheet(
$stylesheet
);
return
();
}
Hide Show 10 lines of Pod
sub
inlinify {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
$self
->_content_warnings({});
unless
(
$self
->_html() &&
$self
->_html_tree()) {
croak
"You must instantiate and read in your content before inlinifying"
;
}
my
$html
;
if
(
defined
$self
->_css()) {
$self
->_css()->
read
({
css
=>
$self
->_stylesheet()});
my
@css_warnings
= @{
$self
->_css()->content_warnings()};
my
%content_warns
=
map
{
$_
=> 1}
@css_warnings
;
$self
->_content_warnings(\
%content_warns
);
my
%matched_elements
;
my
$count
= 0;
foreach
my
$entry
(@{
$self
->_css()->get_entries()}) {
my
$selector
=
$$entry
{selector};
my
$properties
=
$$entry
{properties};
if
(
$selector
=~ /[\w\*]:(?:(active|focus|hover|
link
|visited|
after
|
before
|selection|target|first-line|first-letter|first-child|first-child))\b/io) {
$self
->_report_warning({
info
=>
"The pseudo-class ':$1' cannot be supported inline"
});
next
;
}
if
(
$selector
=~ /^\@/io) {
$self
->_report_warning({
info
=>
"The directive '$selector' cannot be supported inline"
});
next
;
}
my
$query_result
;
eval
{
$query_result
=
$self
->query({
selector
=>
$selector
});
};
if
($@) {
$self
->_report_warning({
info
=> $@->info() });
next
;
}
my
$specificity
=
$self
->specificity({
selector
=>
$selector
});
foreach
my
$element
(@{
$query_result
->get_elements()}) {
$matched_elements
{
$element
->address()} ||= [];
my
%match_info
= (
rule
=>
$selector
,
element
=>
$element
,
specificity
=>
$specificity
,
position
=>
$count
,
css
=>
$properties
);
push
(@{
$matched_elements
{
$element
->address()}}, \
%match_info
);
$count
++;
}
}
foreach
my
$matches
(
values
%matched_elements
) {
my
$element
=
$matches
->[0]->{element};
my
@sorted_matches
=
sort
{
$a
->{specificity} <=>
$b
->{specificity} ||
$a
->{position} <=>
$b
->{position} }
@$matches
;
my
%new_style
;
foreach
my
$match
(
@sorted_matches
) {
%new_style
= (
%new_style
, %{
$match
->{css}});
}
if
(
defined
(
$element
->attr(
'style'
))) {
my
$cur_style
=
$self
->_split({
style
=>
$element
->attr(
'style'
)});
%new_style
= (
%new_style
, %{
$cur_style
});
}
$element
->attr(
'style'
,
$self
->_expand({
properties
=> \
%new_style
}));
}
$self
->_collapse_inline_styles();
$html
=
$self
->_html_tree()->as_HTML(
q@^\n\r\t !\#\$%\(-;=?-~'@
,' ',{});
}
else
{
$html
=
$self
->{html};
}
return
$html
.
"\n"
;
}
Hide Show 8 lines of Pod
sub
query {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
unless
(
$self
->_query()) {
$self
->_init_query();
}
return
$self
->_query()->query(
$$params
{selector});
}
Hide Show 8 lines of Pod
sub
specificity {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
unless
(
$self
->_query()) {
$self
->_init_query();
}
return
$self
->_query()->get_specificity(
$$params
{selector});
}
Hide Show 13 lines of Pod
sub
content_warnings {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
my
@content_warnings
=
keys
%{
$self
->_content_warnings()};
return
\
@content_warnings
;
}
sub
_check_object {
my
(
$self
,
$params
) =
@_
;
unless
(
ref
$self
) {
croak
"You must instantiate this class in order to properly use it"
;
}
return
();
}
sub
_report_warning {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
if
(
$self
->_warns_as_errors()) {
croak
$$params
{info};
}
else
{
my
$warnings
=
$self
->_content_warnings();
$$warnings
{
$$params
{info}} = 1;
}
return
();
}
sub
_fetch_url {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
my
$ua
= LWP::UserAgent->new;
$ua
->agent(
"Mozilla/4.0 ("
.
$ua
->agent .
")"
);
$ua
->protocols_allowed( [
'http'
,
'https'
] );
my
$uri
= URI->new(
$$params
{url});
my
$req
= HTTP::Request->new(
'GET'
,
$uri
);
my
$res
=
$ua
->request(
$req
);
if
(!
$res
->is_success()) {
die
'There was an error in fetching the document for '
.
$uri
.
' : '
.
$res
->message;
}
if
(
$res
->content_type ne
'text/html'
&&
$res
->content_type ne
'text/css'
) {
die
'The web site address you entered is not an HTML document.'
;
}
my
$content
=
$res
->content ||
''
;
$content
=~ s|</?html>||gi;
my
$baseref
=
$res
->base;
return
(
$content
,
$baseref
);
}
sub
_fetch_html {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
my
(
$content
,
$baseref
) =
$self
->_fetch_url({
url
=>
$$params
{url} });
if
(
my
$post_fetch_filter
=
$self
->_post_fetch_filter()) {
$content
=
&$post_fetch_filter
({
html
=>
$content
});
}
my
$doc
= HTML::TreeBuilder->new();
$doc
->parse(
$content
);
$doc
->
eof
;
$self
->_changelink_relative({
content
=>
$doc
->content,
baseref
=>
$baseref
});
$self
->_expand_stylesheet({
content
=>
$doc
,
html_baseref
=>
$baseref
});
my
$html
=
$doc
->as_HTML(
q@^\n\r\t !\#\$%\(-;=?-~'@
,' ',{});
return
$html
;
}
sub
_changelink_relative {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
my
$base
=
$$params
{baseref};
foreach
my
$i
(@{
$$params
{content}}) {
next
unless
ref
$i
eq
'HTML::Element'
;
if
(
$i
->tag eq
'img'
or
$i
->tag eq
'frame'
or
$i
->tag eq
'input'
or
$i
->tag eq
'script'
) {
if
(
$i
->attr(
'src'
) and
$base
) {
my
$uri
= URI->new(
$i
->attr(
'src'
));
$i
->attr(
'src'
,
$uri
->
abs
(
$base
));
}
}
elsif
(
$i
->tag eq
'form'
and
$base
) {
my
$uri
= URI->new(
$i
->attr(
'action'
));
$i
->attr(
'action'
,
$uri
->
abs
(
$base
));
}
elsif
((
$i
->tag eq
'a'
or
$i
->tag eq
'area'
or
$i
->tag eq
'link'
) and
$i
->attr(
'href'
) and
$i
->attr(
'href'
) !~ /^\
my
$uri
= URI->new(
$i
->attr(
'href'
));
my
$newuri
=
$base
?
$uri
->
abs
(
$base
) :
$uri
;
$i
->attr(
'href'
,
$newuri
->as_string());
}
elsif
(
$i
->tag eq
'td'
and
$i
->attr(
'background'
) and
$base
) {
my
$uri
= URI->new(
$i
->attr(
'background'
));
$i
->attr(
'background'
,
$uri
->
abs
(
$base
));
}
if
(
defined
$i
->content) {
$self
->_changelink_relative({
content
=>
$i
->content,
baseref
=>
$base
});
}
}
}
sub
__fix_relative_url {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
my
$uri
= URI->new(
$$params
{url});
return
$$params
{prefix} .
"'"
.
$uri
->
abs
(
$$params
{base})->as_string .
"'"
;
}
sub
_expand_stylesheet {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
my
$doc
=
$$params
{content};
my
$stylesheets
= ();
my
$head
=
$doc
->look_down(
"_tag"
,
"head"
);
my
@style
=
$head
->look_down(
'_tag'
,
'style'
,
'href'
,
qr/^https?:\/
\//);
my
@link
=
$head
->look_down(
'_tag'
,
'link'
,
'rel'
,
'stylesheet'
);
my
@stylesheets
= (
@style
,
@link
);
foreach
my
$i
(
@link
) {
my
(
$content
,
$baseref
) =
$self
->_fetch_url({
url
=>
$i
->attr(
'href'
) });
$content
=~ s/(url\()[
"']?((?:(?!https?:\/\/)(?!\))[^"
'])*)["']?(?=\))/
$self
->__fix_relative_url({
prefix
=> $1,
url
=> $2,
base
=>
$baseref
})/exsgi;
my
$stylesheet
= HTML::Element->new(
'style'
,
type
=>
"text/css"
,
rel
=>
"stylesheet"
);
$stylesheet
->push_content(
$content
);
$i
->replace_with(
$stylesheet
);
}
foreach
my
$i
(
@style
) {
my
$baseref
=
$$params
{html_baseref};
my
$content
=
$i
->content();
$content
=~ s/(url\()[
"']?((?:(?!https?:\/\/)(?!\))[^"
'])*)["']?(?=\))/
$self
->__fix_relative_url({
prefix
=> $1,
url
=> $2,
base
=>
$baseref
})/exsgi;
my
$stylesheet
= HTML::Element->new(
'style'
);
$stylesheet
->push_content(
$content
);
$i
->replace_with(
$stylesheet
);
}
return
();
}
sub
_parse_stylesheet {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
my
$stylesheet
=
''
;
my
$head
=
$self
->_html_tree()->look_down(
"_tag"
,
"head"
);
my
@style
=
$head
->look_down(
'_tag'
,
'style'
,
'type'
,
'text/css'
);
my
@link
=
$head
->look_down(
'_tag'
,
'link'
,
'rel'
,
'stylesheet'
,
'type'
,
'text/css'
);
if
(
scalar
@link
) {
die
'Inliner only supports link tags if you fetch the document from a remote source'
;
}
foreach
my
$i
(
@style
) {
if
((
$i
->tag eq
'style'
) && (!
$i
->attr(
'media'
) ||
$i
->attr(
'media'
) =~ m/\b(all|screen)\b/)) {
foreach
my
$item
(
$i
->content_list()) {
$item
=~ s/<!--//mg;
$item
=~ s/-->//mg;
$stylesheet
.=
$item
;
}
}
unless
(
$self
->_leave_style()) {
$i
->
delete
();
}
}
return
$stylesheet
;
}
sub
_collapse_inline_styles {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
my
$content
=
exists
(
$$params
{content}) ?
$$params
{content} : [
$self
->_html_tree()];
foreach
my
$i
(@{
$content
}) {
next
unless
(
ref
$i
eq
'HTML::Element'
||
ref
$i
eq
'HTML::TreeBuilder'
);
if
(
$i
->attr(
'style'
)) {
my
$existing_styles
=
$i
->attr(
'style'
);
$existing_styles
=~
tr
/\n\t/ /;
my
$styles
=
$self
->_split({
style
=>
$existing_styles
});
my
$collapsed_style
=
''
;
foreach
my
$key
(
sort
keys
%{
$styles
}) {
$collapsed_style
.=
$key
.
': '
.
$$styles
{
$key
} .
'; '
;
}
$collapsed_style
=~ s/\s*$//;
$i
->attr(
'style'
,
$collapsed_style
);
}
if
(
$self
->_strip_attrs()) {
$i
->attr(
'id'
,
undef
);
$i
->attr(
'class'
,
undef
);
}
if
(
defined
$i
->content) {
$self
->_collapse_inline_styles({
content
=>
$i
->content()});
}
}
}
sub
_init_query {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
$self
->{query} = HTML::Query->new(
$self
->_html_tree());
return
();
}
sub
_expand {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
my
$properties
=
$$params
{properties};
my
$inline
=
''
;
foreach
my
$key
(
keys
%{
$properties
}) {
$inline
.=
$key
.
':'
.
$$properties
{
$key
} .
';'
;
}
return
$inline
;
}
sub
_split {
my
(
$self
,
$params
) =
@_
;
$self
->_check_object();
my
$style
=
$params
->{style};
my
%split
;
foreach
(
grep
{ /\S/ }
split
/\;/,
$style
) {
unless
( /^\s*([\w._-]+)\s*:\s*(.*?)\s*$/ ) {
$self
->_report_warning({
info
=>
"Invalid or unexpected property '$_' in style '$style'"
});
}
$split
{
lc
$1} = $2;
}
return
\
%split
;
}
1;
Hide Show 23 lines of Pod