use
vars
qw( $VERSION $debug )
;
$VERSION
= 0.1 ;
$debug
= 0 ;
'COPY_MODE'
,
'DOCTYPE'
,
'HEADER'
,
'IN_FH'
,
'IN_NAME'
,
'WORK_NAME'
,
'WORK_FH'
,
'REV'
,
'STACK'
,
'UNDECODED_CONTENT'
,
) ;
sub
new {
my
$class
=
shift
;
$class
=
ref
$class
||
$class
;
my
VCP::Source::revml
$self
=
$class
->SUPER::new(
@_
) ;
$self
->{COPY_MODE} = 1 ;
my
(
$spec
,
$args
) =
@_
;
my
$parsed_spec
=
$self
->parse_repo_spec(
$spec
) ;
my
$save_doctype
;
{
local
*ARGV
=
$args
;
GetOptions(
'dtd|version'
=>
sub
{
$self
->{DOCTYPE} = RevML::Doctype->new(
shift
@$args
) ;
},
'save-doctype'
=> \
$save_doctype
,
) or
$self
->usage_and_exit ;
}
$self
->{DOCTYPE} = RevML::Doctype->new
unless
$self
->{DOCTYPE} ;
if
(
$save_doctype
) {
$self
->{COPY_MODE} = 0 ;
$self
->{DOCTYPE}->save_as_pm ;
}
my
@errors
;
my
$files
=
$parsed_spec
->{FILES} ;
$self
->{IN_NAME} =
defined
$files
&&
length
$files
?
$files
:
'-'
;
if
(
$self
->{IN_NAME} eq
'-'
) {
$self
->{IN_FH} = \
*STDIN
;
}
else
{
$self
->{IN_FH} = Symbol::gensym ;
open
(
$self
->{IN_FH},
"<$self->{IN_NAME}"
)
or
die
"$!: $self->{IN_NAME}\n"
;
}
$self
->{WORK_FH} = Symbol::gensym ;
die
join
(
''
,
@errors
)
if
@errors
;
return
$self
;
}
sub
dest_expected {
my
VCP::Source::revml
$self
=
shift
;
return
$self
->{COPY_MODE} ;
}
sub
handle_header {
my
VCP::Source::revml
$self
=
shift
;
$self
->{HEADER} =
shift
;
return
;
}
sub
get_rev {
my
VCP::Source::revml
$self
=
shift
;
my
VCP::Rev
$r
;
(
$r
) =
@_
;
}
sub
copy_revs {
my
VCP::Source::revml
$self
=
shift
;
$self
->revs( VCP::Revs->new ) ;
$self
->parse_revml_file ;
$self
->dest->sort_revs(
$self
->revs ) ;
my
VCP::Rev
$r
;
while
(
$r
=
$self
->revs->
shift
) {
$self
->get_rev(
$r
) ;
$self
->dest->handle_rev(
$r
) ;
}
}
sub
parse_revml_file {
my
VCP::Source::revml
$self
=
shift
;
my
@stack
;
$self
->{STACK} = \
@stack
;
my
$char_handler
=
sub
{
my
$expat
=
shift
;
my
$pelt
=
$stack
[-1] ;
my
$tag
=
$pelt
->{NAME} ;
my
$content
=
$pelt
->{CONTENT} ;
if
(
defined
$content
) {
if
(
@$content
&&
$content
->[-1]->{TYPE} eq
'PCDATA'
) {
$content
->[-1]->{PCDATA} .=
$_
[0] ;
}
else
{
push
@$content
, {
TYPE
=>
'PCDATA'
,
PCDATA
=>
$_
[0] } ;
}
}
my
$sub
=
"$tag\_characters"
;
$self
->
$sub
(
@_
)
if
$self
->can(
$sub
) ;
} ;
my
$p
= XML::Parser->new(
Handlers
=> {
Start
=>
sub
{
my
$expat
=
shift
;
my
$tag
=
shift
;
if
(
$tag
eq
"char"
) {
while
(
@_
) {
my
(
$attr
,
$value
) = (
shift
,
shift
) ;
if
(
$attr
eq
"code"
) {
if
(
$value
=~ s{^0x}{} ) {
$value
=
chr
(
hex
(
$value
) ) ;
}
else
{
$value
=
chr
(
$value
) ;
}
$char_handler
->(
$expat
,
$value
) ;
}
}
return
;
}
push
@stack
, {
NAME
=>
$tag
,
ATTRS
=> {
@_
},
CONTENT
=> !
$self
->can(
"$tag\_characters"
) ? [] :
undef
,
} ;
my
$sub
=
"start_$tag"
;
$self
->
$sub
(
@_
)
if
$self
->can(
$sub
) ;
},
End
=>
sub
{
my
$expat
=
shift
;
my
$tag
=
shift
;
return
if
$tag
eq
"char"
;
die
"Unexpected </$tag>, expected </$stack[-1]>\n"
unless
$tag
eq
$stack
[-1]->{NAME} ;
my
$sub
=
"end_$tag"
;
$self
->
$sub
(
@_
)
if
$self
->can(
$sub
) ;
my
$elt
=
pop
@stack
;
if
(
@stack
&&
$stack
[-1]->{NAME} =~ /^rev(ml)?$/
&&
defined
$elt
->{CONTENT}
) {
if
(
$tag
eq
'label'
) {
push
@{
$stack
[-1]->{labels}},
$elt
;
}
else
{
$stack
[-1]->{
$tag
} =
$elt
;
}
}
},
Char
=>
$char_handler
,
},
) ;
$p
->parse(
$self
->{IN_FH} ) ;
}
sub
start_rev {
my
VCP::Source::revml
$self
=
shift
;
for
(
map
(
$self
->{STACK}->[-2]->{
$_
},
grep
/^[a-z_0-9]+$/,
keys
%{
$self
->{STACK}->[-2]}
) ) {
$self
->{HEADER}->{
$_
->{NAME}} =
$_
->{CONTENT}->[0]->{PCDATA} ;
}
$self
->{REV} =
undef
;
}
sub
init_rev_meta {
my
VCP::Source::revml
$self
=
shift
;
my
$rev_elt
=
$self
->{STACK}->[-2] ;
my
VCP::Rev
$r
= VCP::Rev->new() ;
for
(
grep
/^[a-z_0-9]+$/,
keys
%$rev_elt
) {
if
(
$_
eq
'labels'
) {
$r
->labels(
map
$_
->{CONTENT}->[0]->{PCDATA}, @{
$rev_elt
->{labels}}
) ;
}
else
{
my
$out_key
=
$_
;
$r
->
$out_key
(
$rev_elt
->{
$_
}->{CONTENT}->[0]->{PCDATA} ) ;
}
}
$r
->work_path(
$self
->work_path(
$r
->name,
$r
->rev_id ) ) ;
$self
->mkpdir(
$r
->work_path ) ;
$self
->{REV} =
$r
;
return
;
}
sub
start_delete {
my
VCP::Source::revml
$self
=
shift
;
$self
->init_rev_meta ;
$self
->{REV}->action(
"delete"
) ;
$self
->{REV}->work_path(
undef
) ;
}
sub
start_move {
my
VCP::Source::revml
$self
=
shift
;
$self
->init_rev_meta ;
$self
->{REV}->action(
"move"
) ;
$self
->{REV}->work_path(
undef
) ;
die
"<move> unsupported"
;
}
sub
start_content {
my
VCP::Source::revml
$self
=
shift
;
$self
->init_rev_meta ;
$self
->{REV}->action(
"edit"
) ;
$self
->{WORK_NAME} =
$self
->{REV}->work_path ;
$self
->{UNDECODED_CONTENT} =
""
;
sysopen
$self
->{WORK_FH},
$self
->{WORK_NAME}, O_WRONLY | O_CREAT | O_TRUNC
or
die
"$!: $self->{WORK_NAME}"
;
}
sub
content_characters {
my
VCP::Source::revml
$self
=
shift
;
if
(
$self
->{STACK}->[-1]->{ATTRS}->{encoding} eq
"base64"
) {
$self
->{UNDECODED_CONTENT} .=
shift
;
if
(
$self
->{UNDECODED_CONTENT} =~ s{(.*\n)}{} ) {
syswrite
(
$self
->{WORK_FH}, decode_base64( $1 ) )
or
die
"$! writing $self->{WORK_NAME}"
;
}
}
elsif
(
$self
->{STACK}->[-1]->{ATTRS}->{encoding} eq
"none"
) {
syswrite
$self
->{WORK_FH},
$_
[0]
or
die
"$! writing $self->{WORK_NAME}"
;
}
else
{
die
"vcp: unknown encoding '$self->{STACK}->[-1]->{ATTRS}->{encoding}'\n"
;
}
return
;
}
sub
end_content {
my
VCP::Source::revml
$self
=
shift
;
if
(
length
$self
->{UNDECODED_CONTENT} ) {
syswrite
(
$self
->{WORK_FH}, decode_base64(
$self
->{UNDECODED_CONTENT} ) )
or
die
"$! writing $self->{WORK_NAME}"
;
}
close
$self
->{WORK_FH} or
die
"$! closing $self->{WORK_NAME}"
;
if
(
$self
->none_seen ) {
$self
->dest->handle_header(
$self
->{HEADER} ) ;
}
$self
->seen(
$self
->{REV} ) ;
}
sub
start_delta {
my
VCP::Source::revml
$self
=
shift
;
$self
->init_rev_meta ;
my
$r
=
$self
->{REV} ;
$r
->action(
'edit'
) ;
$self
->{WORK_NAME} =
$self
->work_path(
$r
->name,
'delta'
) ;
sysopen
$self
->{WORK_FH},
$self
->{WORK_NAME}, O_WRONLY | O_CREAT | O_TRUNC
or
die
"$!: $self->{WORK_NAME}"
;
}
*delta_characters
= \
&content_characters
;
*delta_characters
= \
&content_characters
;
sub
end_delta {
my
VCP::Source::revml
$self
=
shift
;
close
$self
->{WORK_FH} or
die
"$! closing $self->{WORK_NAME}"
;
my
VCP::Rev
$r
=
$self
->{REV} ;
my
$is_first
=
$self
->none_seen ;
my
VCP::Rev
$saw
=
$self
->seen(
$r
) ;
die
"No original content to patch for "
,
$r
->name,
","
,
" revision "
,
$r
->rev_id
unless
defined
$saw
;
if
( -s
$self
->{WORK_NAME} ) {
my
$patchout
;
$self
->run(
[
'patch'
,
'-o'
,
$r
->work_path,
'-u'
,
$saw
->work_path,
$self
->{WORK_NAME}
],
\
undef
,
\
$patchout
,
)
or
die
"$! patching "
,
$saw
->work_path,
" up to rev "
,
$r
->rev_id,
",\n$patchout"
;
$patchout
=~ s/^missing header
for
.*$//m ;
$patchout
=~ s/^patching file.*$//m ;
debug
"vcp: patch output:\n$patchout"
if
debugging
$self
;
unlink
$self
->{WORK_NAME} or
warn
"$! unlinking $self->{WORK_NAME}\n"
;
}
else
{
debug
"vcp: linking "
,
$saw
->work_path,
", "
,
$r
->work_path
if
debugging
$self
;
link
$saw
->work_path,
$r
->work_path
or
die
"vcp: $! linking "
,
$saw
->work_path,
", "
,
$r
->work_path
}
if
(
$is_first
) {
$self
->dest->handle_header(
$self
->{HEADER} ) ;
}
}
sub
end_time {
my
VCP::Source::revml
$self
=
shift
;
my
$timestr
=
$self
->{STACK}->[-1]->{CONTENT}->[0]->{PCDATA} ;
confess
"Malformed time value $timestr\n"
unless
$timestr
=~ /^\d\d\d\d(\D\d\d){5}/ ;
confess
"Non-UTC time value $timestr\n"
unless
substr
$timestr
, -1 eq
'Z'
;
my
@f
=
split
( /\D/,
$timestr
) ;
--
$f
[1] ;
$self
->{STACK}->[-1]->{CONTENT}->[0]->{PCDATA} = timegm(
reverse
@f
) ;
}
*end_mod_time
=
*end_mod_time
= \
&end_time
;
sub
end_digest {
my
VCP::Source::revml
$self
=
shift
;
$self
->init_rev_meta
unless
defined
$self
->{REV} ;
my
$r
=
$self
->{REV} ;
my
$original_digest
=
$self
->{STACK}->[-1]->{CONTENT}->[0]->{PCDATA} ;
my
$d
= Digest::MD5->new() ;
if
(
$r
->is_base_rev ) {
$self
->dest->handle_header(
$self
->{HEADER} )
if
$self
->none_seen ;
return
unless
$self
->dest->backfill(
$r
) ;
my
VCP::Rev
$saw
=
$self
->seen(
$r
) ;
warn
"I've seen "
,
$r
->name,
" before"
if
$saw
;
}
my
$work_path
=
$r
->work_path ;
sysopen
F,
$work_path
, O_RDONLY
or
die
"vcp: $! opening '$work_path' for digestion\n"
;
$d
->addfile( \
*F
) ;
close
F ;
my
$reconstituted_digest
=
$d
->b64digest ;
unless
(
$original_digest
eq
$reconstituted_digest
) {
my
$reject_file_name
=
$r
->name ;
$reject_file_name
=~ s{[^A-Za-z0-9 -.]+}{-}g ;
$reject_file_name
=~ s{^-+}{}g ;
my
$reject_file_path
= File::Spec->catfile(
File::Spec->tmpdir,
$reject_file_name
) ;
link
$work_path
,
$reject_file_path
or
die
"vcp: digest check failed for "
,
$r
->as_string,
"\nvcp: failed to leave copy in '$reject_file_path': $!\n"
;
die
"vcp: digest check failed for "
,
$r
->as_string,
"\nvcp: copy left in '$reject_file_path'\n"
;
}
}
sub
end_rev {
my
VCP::Source::revml
$self
=
shift
;
$self
->revs->add(
$self
->{REV} )
unless
$self
->{REV}->is_base_rev ;
$self
->{REV} =
undef
;
}
1 ;