my
$ALPHANUM
=
'\p{Letter}\p{Number}\pM'
;
my
$reserved
=
q{;/?:@&=+$,[]#}
;
my
$mark
=
q{-_.!~*'()}
;
my
$unreserved
=
"A-Za-z0-9\Q$mark\E"
;
my
$uric
=
quotemeta
(
$reserved
) .
$unreserved
.
"%"
;
my
%im_types
= (
yahoo
=>
'yahoo'
,
ymsgr
=>
'yahoo'
,
callto
=>
'callto'
,
skype
=>
'callto'
,
callme
=>
'callto'
,
aim
=>
'aim'
,
msn
=>
'msn'
,
asap
=>
'asap'
,
);
my
$im_re
=
join
'|'
,
keys
%im_types
;
sub
create_grammar {
my
$all_phrases
= [
qw(waflphrase asis wikilink a im mail tt b i del)
];
my
$all_blocks
= [
qw(
pre wafl_block
hr hx
waflparagraph
ul ol
blockquote table
p empty
else
)
];
return
{
_all_blocks
=>
$all_blocks
,
_all_phrases
=>
$all_phrases
,
top
=> {
blocks
=>
$all_blocks
,
},
empty
=> {
match
=>
qr/^\s*\n/
,
filter
=>
sub
{
my
$node
=
shift
;
$node
->{type} =
''
;
},
},
wafl_block
=> {
match
=>
qr/(?:^\.([\w\-]+)\ *\n)((?:.*\n)*?)(?:\.\1\ *\n|\z)/
,
},
p
=> {
match
=>
qr/^( # Capture whole thing
(?m:
^(?! # All consecutive lines *not* starting with
(?:
[\#\-\*]+[\ ] |
[\^\|\>] |
\.\w+\s*\n |
\{[^\}]+\}\s*\n
)
)
.*\S.*\n
)+
)
(\s*\n)* # and all blank lines after
/
x,
phrases
=>
$all_phrases
,
filter
=>
sub
{
chomp
},
},
else
=> {
match
=>
qr/^(.*)\n/
,
phrases
=> [],
filter
=>
sub
{
my
$node
=
shift
;
$node
->{type} =
'p'
;
},
},
pre
=> {
match
=>
qr/^(?m:^\.pre\ *\n)((?:.*\n)*?)(?m:^\.pre\ *\n)(?:\s*\n)?/
,
},
blockquote
=> {
match
=>
qr/^((?m:^>.*\n)+)(\s*\n)?/
,
blocks
=>
$all_blocks
,
filter
=>
sub
{
s/^>\ ?//gm;
},
},
waflparagraph
=> {
match
=>
qr/^\{(.*)\}[\ \t]*\n(?:\s*\n)?/
,
filter
=>
sub
{
my
$node
=
shift
;
my
(
$function
,
$options
) =
split
/[: ]/,
$node
->{text}, 2;
my
$replacement
=
defined
$1 ? $1 :
''
;
$options
=
''
unless
defined
$options
;
$options
=~ s/\s*(.*?)\s*/
$replacement
/;
$node
->{attributes}{function} =
$function
;
$node
->{attributes}{options} =
$options
;
undef
$_
;
},
},
hx
=> {
match
=>
qr/^(\^+) *(.*?)(\s+=+)?\s*?\n+/
,
phrases
=>
$all_phrases
,
filter
=>
sub
{
my
$node
=
shift
;
$node
->{type} =
'h'
.
length
(
$node
->{1});
$_
=
$node
->{text} =
$node
->{2};
},
},
ul
=> {
match
=> re_list(
'[\*\-\+]'
),
blocks
=> [
qw(ul ol subl li)
],
filter
=>
sub
{ s/^[\*\-\+\
},
ol
=> {
match
=> re_list(
'\#'
),
blocks
=> [
qw(ul ol subl li)
],
filter
=>
sub
{ s/^[\*\
},
subl
=> {
type
=>
'li'
,
match
=>
qr/^( # Block must start at beginning
# Capture everything in $1
(.*)\n # Capture the whole first line
[\*\#]+\ .*\n # Line starting with one or more bullet
(?:[\*\#]+\ .*\n)* # Lines starting with '*' or '#'
)(?:\s*\n)?/
x,
blocks
=> [
qw(ul ol li2)
],
},
li
=> {
match
=>
qr/(.*)\n/
,
phrases
=>
$all_phrases
,
},
li2
=> {
type
=>
''
,
match
=>
qr/(.*)\n/
,
phrases
=>
$all_phrases
,
},
hr
=> {
match
=>
qr/^--+(?:\s*\n)?/
,
},
table
=> {
match
=>
qr/^(
(
(?m:^\|.*\|\ \n(?=\|))
|
(?m:^\|.*\|\ \ +\n)
|
(?ms:^\|.*?\|\n)
)+
)(?:\s*\n)?/
x,
blocks
=> [
'tr'
],
},
tr
=> {
match
=>
qr/^((?m:^\|.*?\|(?:\n| \n(?=\|)| +\n)))/
s,
blocks
=> [
'td'
],
filter
=>
sub
{ s/\s+\z// },
},
td
=> {
match
=>
qr/\|?\s*(.*?)\s*\|\n?/
s,
phrases
=>
$all_phrases
,
},
wikilink
=> {
match
=>
qr/
(?:"([^"]*)"\s*)?(?:^|(?<=[^$ALPHANUM]))\[(?=[^\s\[\]])
(.*?)
\](?=[^$ALPHANUM]|\z)
/
x,
filter
=>
sub
{
my
$node
=
shift
;
$node
->{attributes}{target} =
$node
->{2};
$_
=
$node
->{1} ||
$node
->{2};
},
},
b
=> {
match
=> re_huggy(
q{\*}
),
phrases
=>
$all_phrases
,
},
tt
=> {
match
=> re_huggy(
q{\`}
),
},
i
=> {
match
=> WikiText::Socialtext::Parser::re_huggy(
q{\_}
),
phrases
=>
$all_phrases
,
},
del
=> {
match
=> re_huggy(
q{\-}
),
phrases
=>
$all_phrases
,
},
im
=> {
match
=>
qr/(\b(?:$im_re)\:[^\s\>\)]+)/
,
filter
=>
sub
{
my
$node
=
shift
;
my
(
$type
,
$id
) =
split
/:/,
$node
->{text}, 2;
$node
->{attributes}{type} =
$type
;
$node
->{attributes}{id} =
$id
;
undef
$_
;
},
},
waflphrase
=> {
match
=>
qr/
(?:^|(?<=[\s\-]))
(?:"(.+?)")?
\{
([\w-]+)
(?=[\:\ \}])
(?:\s*:)?
\s*(.*?)\s*
\}
(?=[^A-Za-z0-9]|\z)
/
x,
filter
=>
sub
{
my
$node
=
shift
;
my
(
$label
,
$function
,
$options
) = @{
$node
}{
qw(1 2 3)
};
$label
||=
''
;
$node
->{attributes}{function} =
$function
;
$node
->{attributes}{options} =
$options
;
$_
=
$label
;
},
},
mail
=> {
match
=>
qr/
(?:"([^"]*)"\s*)?
<?
(?:mailto:)?
([\w+%\-\.]+@(?:[\w\-]+\.)+[\w\-]+)
>?
/
x,
filter
=>
sub
{
my
$node
=
shift
;
$_
=
$node
->{1} ||
$node
->{2};
$node
->{attributes}{address} =
$node
->{2};
},
},
a
=> {
type
=>
'hyperlink'
,
match
=>
qr{
(?:"([^"]*)"\s*)?
<?
(
(?:http|https|ftp|irc|file):
(?://)?
[$uric]+
[A-Za-z0-9/#]
)
>?
}
x,
filter
=>
sub
{
my
$node
=
shift
;
$_
=
$node
->{1} ||
$node
->{2};
$node
->{attributes}{target} =
$node
->{2};
},
},
asis
=> {
match
=>
qr/
\{\{
(.*?)
\}\}(\}*)
/
xs,
filter
=>
sub
{
my
$node
=
shift
;
$node
->{type} =
''
;
$_
=
$node
->{1} .
$node
->{2};
},
},
};
}
sub
re_huggy {
my
$brace1
=
shift
;
my
$brace2
=
shift
||
$brace1
;
qr/
(?:^|(?<=[^{$ALPHANUM}$brace1]))$brace1(?=\S)(?!$brace2)
(.*?)
$brace2(?=[^{$ALPHANUM}$brace2]|\z)
/
x;
}
sub
re_list {
my
$bullet
=
shift
;
return
qr/^( # Block must start at beginning
# Capture everything in $1
^$bullet+\ .*\n # Line starting with one or more bullet
(?:[\*\-\+\#]+\ .*\n)* # Lines starting with '*' or '#'
)(?:\s*\n)?/
x,
}
1;