use
5.020;
our
$VERSION
=
'2.09'
;
has
translation
=>
'NIV'
;
has
reference
=>
sub
{ Bible::Reference->new(
bible
=>
'Protestant'
) };
has
ua
=>
sub
{
my
$ua
= Mojo::UserAgent->new(
max_redirects
=> 3 );
$ua
->transactor->name( __PACKAGE__ .
'/'
. ( __PACKAGE__->VERSION //
'2.0'
) );
return
$ua
;
};
sub
translations (
$self
) {
my
$translations
;
$self
->ua->get(
$self
->url )->result->dom->find(
'select.search-dropdown option'
)->
each
(
sub
{
my
$class
=
$_
->attr(
'class'
) ||
''
;
if
(
$class
eq
'lang'
) {
my
@language
=
$_
->text =~ /\-{3}(.+)\s\(([^\)]+)\)\-{3}/;
push
(
@$translations
, {
language
=>
$language
[0],
acronym
=>
$language
[1],
} );
}
elsif
( not
$class
) {
my
@translation
=
$_
->text =~ /\s*(.+)\s\(([^\)]+)\)/;
push
( @{
$translations
->[-1]{translations} }, {
translation
=>
$translation
[0],
acronym
=>
$translation
[1],
} );
}
} );
return
$translations
;
}
sub
structure (
$self
,
$translation
=
$self
->translation ) {
return
$self
->ua->get(
$self
->url->clone->path(
$self
->url->path .
'bcv/'
)->query( {
version
=>
$translation
} )
)->result->json->{data}[0];
}
sub
_retag (
$tag
,
$retag
) {
$tag
->tag(
$retag
);
delete
$tag
->attr->{
$_
}
for
(
keys
%{
$tag
->attr } );
}
sub
fetch (
$self
,
$reference
,
$translation
=
$self
->translation ) {
my
$runs
=
$self
->reference
->acronyms(0)->require_chapter_match(0)->require_book_ucfirst(0)
->clear->in(
$reference
)->as_runs;
$reference
=
$runs
->[0]
unless
(
@$runs
!= 1 or
$runs
->[0] !~ /\w\s*\d/ );
my
$result
=
$self
->ua->get(
$self
->url->query( {
version
=>
$translation
,
search
=>
$reference
,
} )
)->result;
croak(
$translation
.
' "'
. (
$reference
//
'(undef)'
) .
'" did not match a chapter or run of verses'
)
if
(
$result
->dom->at(
'div.content-section'
) );
return
Mojo::ByteStream->new(
$result
->body )->decode->to_string;
}
sub
parse (
$self
,
$html
) {
return
unless
(
$html
);
my
$dom
= Mojo::DOM->new(
$html
);
my
$ref_display
=
$dom
->at(
'div.bcv div.dropdown-display-text'
);
croak(
'source appears to be invalid; check your inputs'
)
unless
(
$ref_display
and
$ref_display
->text );
my
$reference
=
$ref_display
->text;
my
$translation
=
$dom
->at(
'div.passage-col'
)->attr(
'data-translation'
);
croak(
'EXB (Extended Bible) translation not supported'
)
if
(
$translation
eq
'EXB'
);
my
$block
=
$dom
->at(
'div.passage-text div.passage-content div:first-child'
);
$block
->find(
'*[data-link]'
)->
each
(
sub
{
delete
$_
->attr->{
'data-link'
} } );
$html
=
$block
->to_string;
$html
=~ s`<sup>(\d+)</sup>.<
sub
>(\d+)</
sub
>`$1/$2`g;
$html
=~ s`(?:
<
;){2,}(.*?)(?:\x{2019}
>
;|(?:
>
;){2,})`\x{201c}$1\x{201d}`g;
$html
=~ s`(?:
<
;)(.*?)(?:
>
;|\x{2019})`\x{2018}$1\x{2019}`g;
$html
=~ s`\\\w+``g;
$html
=~ s/(?:\.\s*){2,}\./\x{2026}/;
$html
=~ s/\x{200a}//g;
$block
= Mojo::DOM->new(
$html
)->at(
'div'
);
$_
->parent->strip
if
(
$_
=
$block
->find(
'div.poetry > h2'
)->first );
$block
->descendant_nodes->
grep
(
sub
{
$_
->type eq
'comment'
} )->
each
(
'remove'
);
$block
->find(
'.il-text, hidden, hr, .translation-note, span.inline-note, a.full-chap-link, b.inline-h3, top1'
)->
each
(
'remove'
);
$block
->find(
'.std-text, hgroup, b, em, versenum, char'
)->
each
(
'strip'
);
$block
->find(
'i, .italic, .trans-change, .idiom, .catch-word, selah, span.selah'
)
->
each
(
sub
{ _retag(
$_
,
'i'
) } );
$block
->find(
'.woj, u.jesus-speech'
)->
each
(
sub
{ _retag(
$_
,
'woj'
) } );
$block
->find(
'.divine-name, .small-caps'
)->
each
(
sub
{ _retag(
$_
,
'small_caps'
) } );
$block
->find(
'sup'
)->
grep
(
sub
{
length
$_
->text == 1 } )->
each
(
sub
{
$_
->content(
'-'
.
$_
->content );
$_
->strip;
} );
$self
->reference->acronyms(1)->require_chapter_match(1)->require_book_ucfirst(1);
my
$footnotes
=
$block
->at(
'div.footnotes'
);
if
(
$footnotes
) {
$footnotes
->find(
'a.bibleref'
)->
each
(
sub
{
(
my
$ref
=
$_
->attr(
'data-bibleref'
) //
''
) =~ s/\.(\d+)\.(\d+)/ $1:$2/g;
$_
->replace(
$ref
);
} );
$footnotes
->remove;
$footnotes
= {
map
{
'#'
.
$_
->attr(
'id'
) =>
$self
->reference->clear->in(
$_
->at(
'span'
)->all_text
)->as_text
}
$footnotes
->find(
'ol li'
)->
each
};
}
my
$crossrefs
=
$block
->at(
'div.crossrefs'
);
if
(
$crossrefs
) {
$crossrefs
->find(
'a.bibleref'
)->
each
(
sub
{
(
my
$ref
=
$_
->attr(
'data-bibleref'
) //
''
) =~ s/\.(\d+)\.(\d+)/ $1:$2/g;
$_
->replace(
$ref
);
} );
$crossrefs
->remove;
$crossrefs
= {
map
{
'#'
.
$_
->attr(
'id'
) =>
$self
->reference->clear->in(
$_
->at(
'a:last-child'
)->attr(
'data-bibleref'
)
)->refs
}
$crossrefs
->find(
'ol li'
)->
each
};
}
$block
->find(
'span.text > a.bibleref'
)
->
map
(
'parent'
)
->
grep
(
sub
{
$_
->content =~ /^\[<a/ } )
->
each
(
sub
{
$_
->find(
'a'
)->
each
(
sub
{
(
my
$ref
=
$_
->attr(
'data-bibleref'
) //
''
) =~ s/\.(\d+)\.(\d+)/ $1:$2/g;
$_
->replace(
$ref
);
} );
my
$content
=
$_
->content;
$content
=~ s|\s+\[([^\]]+)\]|
'<footnote>'
.
$self
->reference->clear->in($1)->as_text .
'</footnote>'
|ge;
$_
->content(
$content
);
} );
$block
->find(
'i > a.bibleref, crossref > a.bibleref'
)
->
map
(
'parent'
)
->
grep
(
sub
{
$_
->children->size == 1 } )
->
each
(
sub
{
my
$a
=
$_
->at(
'a:last-child'
);
(
my
$ref
=
$_
->attr(
'data-bibleref'
) //
''
) =~ s/\.(\d+)\.(\d+)/ $1:$2/g;
$_
->tag(
'sup'
);
$_
->attr({
'class'
=>
'crossreference'
,
'data-cr'
=>
$a
->attr(
'data-bibleref'
),
});
$crossrefs
= {
$a
->attr(
'data-bibleref'
) =>
$self
->reference->clear->in(
$ref
)->refs
};
} );
$block
->find(
'a.bibleref'
)->
each
(
'strip'
);
$block
->find(
'sup.crossreference, sup.footnote'
)->
each
(
sub
{
if
(
$_
->attr(
'class'
) eq
'footnote'
) {
$_
->replace(
(
$footnotes
->{
$_
->attr(
'data-fn'
) } )
?
'<footnote>'
.
$footnotes
->{
$_
->attr(
'data-fn'
) } .
'</footnote>'
:
''
);
}
elsif
(
$_
->attr(
'class'
) eq
'crossreference'
) {
$_
->replace(
(
$crossrefs
->{
$_
->attr(
'data-cr'
) } )
?
'<crossref>'
.
$crossrefs
->{
$_
->attr(
'data-cr'
) } .
'</crossref>'
:
''
);
}
} );
$block
->find(
'footnote, crossref'
)->
each
(
sub
{
_retag(
$_
,
$_
->tag );
my
$previous
=
$_
;
$previous
=
$previous
->previous_node
while
(
$previous
and
$previous
->tag and
(
$previous
->tag eq
'crossref'
or
$previous
->tag eq
'footnote'
)
);
my
$previous_char
=
substr
( ( (
$previous
) ?
$previous
->all_text ||
$previous
->content :
''
), -1 );
my
$next
=
$_
;
$next
=
$next
->next_node
while
(
$next
and
$next
->tag and
(
$next
->tag eq
'crossref'
or
$next
->tag eq
'footnote'
)
);
my
$next_char
=
substr
( ( (
$next
) ?
$next
->all_text ||
$next
->content :
''
), 0, 1 );
$_
->append(
' '
)
if
(
length
$previous_char
and
length
$next_char
and
$previous_char
=~ /[:;,\w\!\.\?]/ and
$next_char
=~ /\w/
);
} );
_retag(
$block
,
'obml'
);
$block
->child_nodes->first->prepend(
$block
->new_tag(
'reference'
,
$reference
) );
$block
->find(
'h3.chapter'
)->
each
(
'remove'
);
$block
->find(
'h2 + h3'
)->
each
(
sub
{
$_
->tag(
'h4'
) } );
$block
->find(
'h2, h3'
)->
each
(
sub
{ _retag(
$_
,
'header'
) } );
$block
->find(
'h4'
)->
each
(
sub
{ _retag(
$_
,
'sub_header'
) } );
$block
->find(
'.versenum'
)->
grep
(
sub
{
$_
->text =~ /^\s*\(/ } )->
each
(
'remove'
);
$block
->find(
'.chapternum + .versenum'
)->
each
(
sub
{
$_
->previous->remove } );
$block
->find(
'.chapternum + i > .versenum'
)->
each
(
sub
{
$_
->parent->previous->remove } );
$block
->find(
'.chapternum'
)->
each
(
sub
{
_retag(
$_
,
'verse_number'
);
$_
->content(1);
} );
$block
->find(
'.versenum'
)->
each
(
sub
{
_retag(
$_
,
'verse_number'
);
my
$verse_number
=
$_
->content;
$verse_number
=~ s/^.*://g;
(
$verse_number
) =
$verse_number
=~ /(\d+)/;
$_
->content(
$verse_number
);
} );
$block
->find(
'span.text'
)->
each
(
sub
{ _retag(
$_
,
'text'
) } );
$block
->find(
'table'
)->
each
(
sub
{
$_
->find(
'tr'
)->
each
(
sub
{
$_
->find(
'th'
)->
each
(
'remove'
);
unless
(
$_
->child_nodes->size ) {
$_
->strip;
}
else
{
$_
->replace(
join
(
''
,
'<text>'
,
$_
->find(
'td text'
)->
map
(
'content'
)->
join
(
', '
),
(
(
$_
->find(
'td text'
)->
map
(
'text'
)->
last
=~ /\W$/ ) ?
''
:
(
$_
->following_nodes->size ) ?
'; '
:
'.'
),
( (
$_
->following_nodes->size ) ?
'</text> '
:
'</text>'
),
) );
}
} );
$_
->tag(
'div'
);
$_
->content(
'<p>'
.
$_
->content .
'</p>'
);
} );
$block
->find(
'ul, ol'
)->
each
(
sub
{
$_
->find(
'li'
)->
each
(
sub
{
$_
->tag(
'text'
);
$_
->find(
'text > text'
)->
each
(
'strip'
);
$_
->append_content(
'<br>'
)
if
(
$_
->
next
and
$_
->
next
->tag eq
'li'
);
} );
$_
->tag(
'div'
);
$_
->attr(
class
=>
'left-1'
);
$_
->content(
'<p>'
.
$_
->content .
'</p>'
);
} );
$block
->find(
join
(
', '
,
map
{
'div.left-'
.
$_
} 1 .. 9 ) )->
each
(
sub
{
my
(
$left
) =
$_
->attr(
'class'
) =~ /\bleft\-(\d+)/;
$_
->find(
'text'
)->
each
(
sub
{
$_
->attr(
indent
=>
$left
) } );
$_
->strip;
} );
$block
->find(
'div.poetry'
)->
each
(
sub
{
$_
->attr(
class
=>
'indent-1'
) } );
$block
->find(
join
(
', '
,
map
{
'.indent-'
.
$_
} 1 .. 9 ) )->
each
(
sub
{
my
(
$indent
) =
$_
->attr(
'class'
) =~ /\bindent\-(\d+)/;
$_
->find(
'text'
)->
each
(
sub
{
$_
->attr(
indent
=>
$indent
+ (
$_
->attr(
'indent'
) || 0 ) );
} );
$_
->strip;
} );
$block
->find(
join
(
', '
,
map
{
'.indent-'
.
$_
.
'-breaks'
} 1 .. 5 ) )->
each
(
'remove'
);
$block
->find(
'text[indent]'
)->
each
(
sub
{
my
$level
=
$_
->attr(
'indent'
);
_retag(
$_
,
'indent'
);
$_
->attr(
level
=>
$level
);
} );
$block
->find(
'text'
)->
each
(
'strip'
);
$block
->find(
'indent + indent'
)->
each
(
sub
{
if
(
$_
->previous->attr(
'level'
) eq
$_
->attr(
'level'
) ) {
$_
->previous->append_content(
' '
.
$_
->content );
$_
->remove;
}
} );
$block
->find(
'p'
)->
each
(
sub
{ _retag(
$_
,
'p'
) } );
$block
->at(
'p'
)->prepend_content(
'<verse_number>1</verse_number>'
)
if
(
$block
->at(
'p'
) and not
$block
->at(
'p'
)->at(
'verse_number'
) );
$block
->find(
'div, span, u, sup, bk, verse, start-chapter'
)->
each
(
'strip'
);
$_
->
each
(
'strip'
)
while
(
$_
=
$block
->find(
'i > i'
) and
$_
->size );
$html
= html_unescape(
$block
->to_string );
$html
=~ s/<p>[ ]+/<p>/g;
$html
=~ s/,([A-z]|<i>)/, $1/g;
$html
=~ s/([A-z])(<i>)/$1 $2/g;
$html
=~ s/([a-z])([A-Z])/$1 $2/g;
$html
=~ s/([a-z])([:;,!?])([A-Za-z])/$1$2 $3/g;
return
$html
;
}
sub
get (
$self
,
$reference
,
$translation
=
$self
->translation ) {
return
Bible::OBML->new->html(
$self
->parse(
$self
->fetch(
$reference
,
$translation
) ) );
}
1;