use strict;

package XML::Handler::Trees;
use vars qw/$VERSION/;
$VERSION = '0.02';

package XML::Handler::Tree;

sub new {
  my $class = ref($_[0]) || $_[0];
  bless {},$class;
}

sub start_document {
  my $self=shift;
  $self->{Lists}=[];
  $self->{Curlist}=$self->{Tree}=[];
}

sub start_element {
  my ($self,$element)=@_;
  my $newlist;
  if (exists $element->{LocalName}) {
    # namespaces are available!
    $newlist = [{}];
    foreach my $attr (values %{$element->{Attributes}}) {
      if ($attr->{NamespaceURI}) {
        $newlist->[0]{"{$attr->{NamespaceURI}}$attr->{LocalName}"} = $attr->{Value};
      }
      else {
        $newlist->[0]{$attr->{Name}} = $attr->{Value};
      }
    }
  }
  elsif (ref $element->{Attributes} eq 'HASH') {
    $newlist=[{map {$_=>$element->{Attributes}{$_}} keys %{$element->{Attributes}}}];
  }
  else {
    $newlist=[{map {$_=>$element->{Attributes}{$_}{Value}} keys %{$element->{Attributes}}}];
  }
  push @{ $self->{Lists} }, $self->{Curlist};
  if (exists($element->{LocalName}) && $element->{NamespaceURI}) {
    push @{ $self->{Curlist} }, "{$element->{NamespaceURI}}$element->{LocalName}" => $newlist;
  }
  else {
    push @{ $self->{Curlist} }, $element->{Name} => $newlist;
  }
  $self->{Curlist} = $newlist;
}

sub end_element {
  my ($self,$element)=@_;
  $self->{Curlist}=pop @{$self->{Lists}};
}

sub characters {
  my ($self,$text)=@_;
  my $clist = $self->{Curlist};
  my $pos = $#$clist;
  if ($pos>0 and $clist->[$pos-1] eq '0') {
    $clist->[$pos].=$text->{Data};
  }
  else {
    push @$clist,0=>$text->{Data};
  }
}

sub comment {}

sub processing_instruction {}

sub end_document {
  my $self=shift;
  delete $self->{Curlist};
  delete $self->{Lists};
  $self->{Tree};
}

package XML::Handler::EasyTree;

sub new {
  my $class=shift;
  $class=ref($class) || $class;
  my $self={Noempty=>0,Latin=>0,Searchable=>0,@_};
  $self->{Noempty}||=$self->{Searchable};
  bless $self,$class;
}

sub start_document {
  my $self = shift;
  $self->{Lists} = [];
  $self->{Curlist} = $self->{Tree} = [];
}

sub start_element {
  my ($self,$element)=@_;
  $self->checkempty();
  my $newlist=[];
  my $newnode;
  if ($self->{Searchable}) {
    $newnode= XML::Handler::EasyTree::Searchable->new( Name => $self->nsname($element), Content => $newlist );
  }
  else {
    $newnode={type=>'e',attrib=>{},name=>$self->nsname($element),content=>$newlist};
  }
  if (exists $element->{LocalName}) {
    while (my ($name,$obj) = each %{$element->{Attributes}}) {
      $newnode->{attrib}{$name} = $self->encode($obj->{Value});
    }
  }
  elsif (ref $element->{Attributes} eq 'HASH') {
    while (my ($name,$val)=each %{$element->{Attributes}}) {
      $newnode->{attrib}{$self->nsname($name)}=$self->encode($val);
    }
  }
  else {
    foreach my $att (keys %{$element->{Attributes}}) {
      $newnode->{attrib}{$self->nsname($element->{Attributes}{$att})}=$self->encode($element->{Attributes}{$att}{Value});
    }
  }
  push @{ $self->{Lists} }, $self->{Curlist};
  push @{ $self->{Curlist} }, $newnode;
  $self->{Curlist} = $newlist;
}

sub end_element {
  my $self=shift;
  $self->checkempty();
  $self->{Curlist}=pop @{$self->{Lists}};
}

sub characters {
  my ($self,$text)=@_;
  my $clist=$self->{Curlist};
  if (!@$clist || $clist->[-1]{type} ne 't') {
    push @$clist,{type=>'t',content=>''};
  }
  $clist->[-1]{content}.=$self->encode($text->{Data});
}

sub processing_instruction {
  my ($self,$pi)=@_;
  $self->checkempty();
  my $clist=$self->{Curlist};
  push @$clist,{type=>'p',target=>$self->encode($pi->{Target}),content=>$self->encode($pi->{Data})};
}

sub comment {}

sub end_document {
  my $self = shift;
  $self->checkempty();
  delete $self->{Curlist};
  delete $self->{Lists};
  if ($self->{Searchable}) {
    return XML::Handler::EasyTree::Searchable->new( Name => '__TOPLEVEL__', Content => $self->{Tree} );
  } 
  $self->{Tree};
}

sub nsname {
  my ($self,$name)=@_;
  if (ref $name) {
    if (defined $name->{NamespaceURI}) {
      $name="{$name->{NamespaceURI}}$name->{LocalName}";
    }
    else {
      $name=$name->{Name};
    }
  }
  return $self->encode($name);    
}

sub encode {
  my ($self,$text)=@_;
  if ($self->{Latin}) {
    $text=~s{([\xc0-\xc3])(.)}{
      my $hi = ord($1);
      my $lo = ord($2);
      chr((($hi & 0x03) <<6) | ($lo & 0x3F))
     }ge;
  }
  $text;
}

sub checkempty() {
  my $self=shift;
  if ($self->{Noempty}) {
    my $clist=$self->{Curlist};
    if (@$clist && $clist->[-1]{type} eq 't' && $clist->[-1]{content}=~/^\s+$/) {
      pop @$clist;
    }
  }
}

package XML::Handler::EasyTree::Searchable;

#
# new() returns a new node with the same structure at the `newnode'
# hashref
#
# Usage: XML::Handler::EasyTree::Searchable->new( Name => $name, Content => $content );
#
sub new {
  my $type = shift;
  my $class = ref($type) || $type || die "must supply a object type" ;

  my %opts = @_;

  my $name = $opts{Name} || '';
  my $content = $opts{Content} || undef;

  return bless ( {
		  type    => 'e',
		  attrib  => {},
		  name    => $name, 
		  content => $content,
		 }, $class);
}

#
# name() returns the name of the node. Ideally, it should return a
# "fully qualified" name, but it doesn't
#
sub name {
  my $self = shift;
  return $self->{name};
}

#
# value() returns the value associated with an object
#
sub value {
  my $self = shift;

  return( undef )
    unless( ( exists $self->{content} ) && ( defined $self->{content} ) );

  my $possible = $self->{content};

  die "not an array" unless( "$possible" =~ /ARRAY/ );

  $possible = $possible->[0];

  return( undef )
    unless( ( exists $possible->{type} ) && ( $possible->{type} eq 't' ) );

  return( undef )
    unless( ( exists $possible->{content} ) && ( defined $possible->{content} ) );

  return $possible->{content};
}

#
# usage: $newobj = $obj->child( $name );
#
# child() returns a child (elements only) of the object with the $name
#
# for the case where there is more than one child that match $name,
# the array context semantics haven't been completely worked out:
# - in an array context, all children are returned.
# - in scalar context, the first child matching $name is returned.
#
# In a scalar context, The XML::Parser::SimpleObj class returns an
# object containing all the children matching $name, unless there is
# only one child in which case it returns that child (see commented
# code). I find that behavior confusing.
# 
sub child {
  my $self = shift;
  my $spec = shift || '';

  my $array = $self->{content};

  my @rv;
  if( $spec ) {
    @rv = grep { $_->{name} eq $spec } grep { $_->{type} eq 'e' } @$array;
  } else {
    @rv = grep { $_->{type} eq 'e' } @$array;
  }

  my $num = scalar( @rv );
  
  if( wantarray() ) {
    return @rv; 
  } else {
    return '' unless( $num );
    return $rv[0] if( $num == 1 );
    # my $class = ref( $self );
    # return $class->new( Name => "__magic_child_list_object__", Content => [ @rv ] );
  }
}

#
# usage: @children = $obj->children( $name );
#
# children() returns a list of all children (elements only) of the
# $obj that match $name -- in the order in which they appeared in the
# original xml text.
#
sub children {
  my $self = shift;
  my $array = $self->{content};
  my $spec = shift || '';


  my @rv;
  if( $spec ) {
    @rv = grep { $_->{name} eq $spec } grep { $_->{type} eq 'e' } @$array;
  } else {
    @rv = grep { $_->{type} eq 'e' } @$array;
  }

  return @rv;
}

#
# usage: @children_names = $obj->children_names();
#
# children_names() returns a list of all the names of the objects
# children (elements only) in the order in which they appeared in the
# original text
#
sub children_names {
  my $self = shift;
  my $array = $self->{content};

  return map { $_->{name} } grep { $_->{type} eq 'e' } @$array;
}

#
# usage: $attrib = $obj->attribute( $att_name );
#
# attribute() returns the string associated with the attribute of the
# object. If not found returns a null string.
#
sub attribute {
  my $self = shift;
  my $spec = shift || return '';

  return '' unless( ( exists $self->{attrib} ) && ( defined $self->{attrib} ) );

  my $attrib = $self->{attrib};
  return '' unless( ( exists $attrib->{$spec} ) && ( defined $attrib->{$spec} ) );

  return $attrib->{$spec};
}
  
#
# usage: @attribute_list = $obj->attribute_list();
#
# attribute_list() returns a list (in no particular order) of the
# attribute names associated with the object
#
sub attribute_list {
  my $self = shift;

  return '' unless( ( exists $self->{attrib} ) && ( defined $self->{attrib} ) );

  my $attrib = $self->{attrib};
  return '' unless( "$attrib" =~ /HASH/ );

  return keys %$attrib;
}

#
# usage: $text = $obj->dump_tree();
#
# dump_tree() returns a textual representation (in xml form) of the
# object's heirarchy. Only elements are processed.
#
#
sub dump_tree {
  my $self = shift;
  my %opts = @_;

  my $pretty = delete $opts{-pretty};

  my $name	= $self->name();
  my $value	= $self->value();
  my @children	= $self->children();

  my $text = '';
  unless( $name eq '__TOPLEVEL__' ) {
      $text .= "<$name";
      for my $att ( $self->attribute_list() ) {
	  $text .= sprintf( " %s=\"%s\"", $att, encode($self->attribute( $att )) );
      }
      $text .= ">";

      if( $value ) {
	  $text .= encode($value);
      }
  }

  
  for my $child ( @children ) {
    $text .= $child->dump_tree();
  }

  unless( $name eq '__TOPLEVEL__' ) {
      $text .= "</$name>";
  }

  return $text;
}

#
# usage: $text = $obj->pretty_dump_tree();
#
# pretty_dump_tree() is identical to dump_tree(), except that newline
# and indentation embellishments are added
#
sub pretty_dump_tree {
  my $self = shift;
  my $tab = shift || 0;

  my $indent = " " x ( 2 * $tab );

  my $name	= $self->name();
  my $value	= $self->value();
  my @children	= $self->children();

  my $text = '';
  unless( $name eq '__TOPLEVEL__' ) {
      $text .= "$indent<$name";
      for my $att ( $self->attribute_list() ) {
	  $text .= sprintf( " %s=\"%s\"", $att, encode($self->attribute( $att )) );
      }
      $text .= ">";
      
      if( defined $value ) {
	  $text .= encode($value);
	  $text .= "</$name>\n";
	  return $text;
      } else {
	  $text .= "\n";
      }
  }

  for my $child ( @children ) {
    $text .= $child->pretty_dump_tree( $tab + 1 );
  }

  unless( $name eq '__TOPLEVEL__' ) {
      $text .= "$indent</$name>\n";
  }

  return $text;
}

sub encode {
  my $encstr=shift;
  my %encodings=('&'=>'amp','<'=>'lt','>'=>'gt','"'=>'quot',"'"=>'apos');
  $encstr=~s/([&<>"'])/&$encodings{$1};/g;
  $encstr;
}

package XML::Handler::TreeBuilder;

use vars qw(@ISA);
@ISA=qw(XML::Element);

sub new {
  require XML::Element; 
  my $class = ref($_[0]) || $_[0];
  my $self = XML::Element->new('NIL');
  $self->{'_element_class'} = 'XML::Element';
  $self->{'_store_comments'}     = 0;
  $self->{'_store_pis'}          = 0;
  $self->{'_store_declarations'} = 0;
  $self->{_stack}=[];
  bless $self, $class;
}
  
sub start_document {}

sub start_element {
  my ($self,$element)=@_;
  my @attlist;
  if (exists $element->{LocalName}) {
    @attlist=map {$_=>$element->{Attributes}{$_}{Value}} keys %{$element->{Attributes}};
  }
  elsif (ref $element->{Attributes} eq 'HASH') {
    @attlist=map {$_=>$element->{Attributes}{$_}} keys %{$element->{Attributes}};
  } 
  else {
    @attlist=map {$_=>$element->{Attributes}{$_}{Value}} keys %{$element->{Attributes}};
  }
  if(@{$self->{_stack}}) {
    push @{$self->{_stack}}, $self->{'_element_class'}->new($element->{Name},@attlist);
    $self->{_stack}[-2]->push_content( $self->{_stack}[-1] );
  }
  else {
    $self->tag($element->{Name});
    while(@attlist) {
      $self->attr(splice(@attlist,0,2));
    }
    push @{$self->{_stack}}, $self;
  }
}

sub end_element {
  my $self=shift;
  pop @{$self->{_stack}};
  return
}

sub characters {
  my ($self,$text)=@_;
  $self->{_stack}[-1]->push_content($text->{Data});
}
    
sub comment {
  my ($self,$comment)=@_;
  return unless $self->{'_store_comments'};
  (@{$self->{_stack}} ? $self->{_stack}[-1] : $self)->push_content(
      $self->{'_element_class'}->new('~comment', 'text' => $comment->{Data})
    );
  return;
}

sub processing_instruction {
  my ($self,$pi)=@_;
  return unless $self->{'_store_pis'};
  (@{$self->{_stack}} ? $self->{_stack}[-1] : $self)->push_content(
      $self->{'_element_class'}->new('~pi', 'text' => "$pi->{Target} $pi->{Data}")
    );
  return;
}

sub end_document {    
  my $self=shift;
  return $self;
}

sub _elem # universal accessor...
{
  my($self, $elem, $val) = @_;
  my $old = $self->{$elem};
  $self->{$elem} = $val if defined $val;
  return $old;
}

sub store_comments { shift->_elem('_store_comments', @_); }
sub store_declarations { shift->_elem('_store_declarations', @_); }
sub store_pis      { shift->_elem('_store_pis', @_); }

1;
__END__

=head1 NAME

XML::Handler::Trees - PerlSAX handlers for building tree structures

=head1 SYNOPSIS

  use XML::Handler::Trees;
  use XML::Parser::PerlSAX;

  my $p=XML::Parser::PerlSAX->new();
  my $h=XML::Handler::Tree->new();
  my $tree=$p->parse(Handler=>$h,Source=>{SystemId=>'file.xml'});

  my $p=XML::Parser::PerlSAX->new();
  my $h=XML::Handler::EasyTree->new(Noempty=>1);
  my $easytree=$p->parse(Handler=>$h,Source=>{SystemId=>'file.xml'});

  my $p=XML::Parser::PerlSAX->new();
  my $h=XML::Handler::TreeBuilder->new();
  $h->store_pis(1);
  my $tree=$p->parse(Handler=>$h,Source=>{SystemId=>'file.xml'});

=head1 DESCRIPTION

XML::Handler::Trees provides three PerlSAX handler classes for building
tree structures.  XML::Handler::Tree builds the same type of tree as the
"Tree" style in XML::Parser.  XML::Handler::EasyTree builds the same
type of tree as the "EasyTree" style added to XML::Parser by
XML::Parser::EasyTree.  XML::Handler::TreeBuilder builds the same type
of tree as Sean M. Burke's XML::TreeBuilder.  These classes make it
possible to construct these tree structures from sources other than
XML::Parser.

All three handlers can be driven by either PerlSAX 1 or PerlSAX 2
drivers.  In all cases, the end_document() method returns a reference to
the constructed tree, which normally becomes the return value of the
PerlSAX driver.

=head1 CLASS XML::Handler::Tree

This handler builds the same type of tree structure as the "Tree" style
in XML::Parser.  Some modules such as Dan Brian's XML::SimpleObject work
with this type of tree.  See the documentation for XML::Parser for details.  

=head2 METHODS

=over 4

=item $handler = XML::Handler::Tree->new()

Creates a handler object.

=back

=head1 CLASS XML::Handler::EasyTree

This handler builds a lightweight tree structure representing the XML 
document.  This structure is, at least in this author's opinion, easier to 
work with than the "standard" style of tree.  It is the same type of
structure as built by XML::Parser when using XML::Parser::EasyTree, or
by the get_simple_tree method in XML::Records.

The tree is returned as a reference to an array of tree nodes, each of
which is a hash reference. All nodes have a 'type' key whose value is
the type of the node: 'e' for element nodes, 't' for text nodes, and 'p'
for processing instruction nodes. All nodes also have a 'content' key
whose value is a reference to an array holding the element's child nodes
for element nodes, the string value for text nodes, and the data value
for processing instruction nodes. Element nodes also have an 'attrib'
key whose value is a reference to a hash of attribute names and values and a 'name'
key whose value is the element's name.  Processing instructions also have
a 'target' key whose value is the PI's target. 

EasyTree nodes are ordinary Perl hashes and are not objects.  Contiguous 
runs of text are always returned in a single node.

The reason the parser returns an array reference rather than the root 
element's node is that an XML document can legally contain processing 
instructions outside the root element (the xml-stylesheet PI is commonly 
used this way).

If namespace information is available (only possible with PerlSAX 2),
element and attribute names will be prefixed with their (possibly empty)
namespace URI enclosed in curly brackets, and namespace prefixes will be
stripped from names.

=head2 METHODS

=over 4

=item $handler = XML::Handler::EasyTree->new([options])

Creates a handler object.  Options can be provided hash-style:

=over 4

=item Noempty

If this is set to a true value, text nodes consisting entirely of
whitespace will not be stored in the tree.  The default is false.

=item Latin

If this is set to a true value, characters with Unicode values in the
Latin-1 range (160-255) will be stored in the tree as Latin-1 rather
than UTF-8.  The default is false.

=item Searchable

If this is set to a true value, the parser will return a tree of XML::Handler::EasyTree::Searchable
objects rather than bare array references, providing access to the navigation methods
listed below.  The top-level node returned will be a dummy element node with a name of "__TOPLEVEL__".
It is false by default.  Setting this option automatically enables the Noempty option.

=back

=back

=head2 XML::Handler::EasyTree::Searchable METHODS

If the Searchable option is set, all nodes in the tree will be XML::Handler::EasyTree::Searchable objects,
which have the same structure as EasyTree nodes but also implement the following methods similar to
those in XML::SimpleObject.

=over 4

=item $name = $node->name()

Returns the name of the node. Ideally, it should return a
"fully qualified" name, but it doesn't.

=item $val = $node->value()

Returns the text value associated with a node object.  Returns undef if the node has
no text children or its first child is not a text node.

=item $newobj = $obj->child( $name );

Returns a child (elements only) of the object with the $name.

For the case where there is more than one child that match $name,
the array context semantics haven't been completely worked out:
- in an array context, all children are returned.
- in scalar context, the first child matching $name is returned.

In a scalar context, The XML::Parser::SimpleObj class returns an
object containing all the children matching $name, unless there is
only one child in which case it returns that child (see commented
code). I find that behavior confusing.

=item @children = $obj->children( $name );

Returns a list of all children (elements only) of the
$obj that match $name -- in the order in which they appeared in the
original xml text.

=item @children_names = $obj->children_names();

Returns a list of all the names of the objects
children (elements only) in the order in which they appeared in the
original text.

=item $attrib = $obj->attribute( $att_name );

Returns the string associated with the attribute of the
object. If not found returns a null string.

=item @attribute_list = $obj->attribute_list();

Returns a list (in no particular order) of the
attribute names associated with the object

=item $text = $obj->dump_tree();

Returns a textual representation (in xml form) of the
object's hierarchy. Only elements are processed. The result will
be in whatever character encoding the SAX driver delivered (which may
not be the same encoding as the original source).

=item $text = $obj->pretty_dump_tree();

Identical to dump_tree(), except that newline
and indentation embellishments are added

=back

=head2 EXAMPLE

 #! /usr/bin/perl -w
 
 use XML::Handler::Trees;
 use XML::Parser::PerlSAX;
 use strict;
 
 my $p=XML::Parser::PerlSAX->new();
 my $h=XML::Handler::EasyTree->new( Searchable=>1 );
 my $easytree=$p->parse( Handler => $h, Source => { SystemId => 'systemB.xml' } );
 
 my $vme = $easytree->child( "vmesystem" );
 
 print "\n";
 print "vmesystem config: ", $vme->attribute( "configuration_name" ), "\n";
 
 print "\n";
 print "vmesystem children: ", join( ', ', $vme->children_names() ), "\n";
 
 print "\n";
 print "gps model is ", $vme->child( "gps" )->child( "model" )->value(), "\n";
 my $gps = $vme->child( "gps" );
 print "gps slot is ", $gps->child( "slot" )->value(), "\n";
 
 print "\n";
 print "reconstructed XML: \n";
 print $easytree->dump_tree(), "\n";
 
 # print "\n";
 # print "recontructed XML (pretty): \n";
 # print $easytree->pretty_dump_tree(), "\n";
 
 print "\n";
 exit;

=head1 CLASS XML::Handler::TreeBuilder

This handler builds XML document trees constructed of
XML::Element objects (XML::Element is a subclass of HTML::Element
adapted for XML).  To use it, XML::TreeBuilder and its prerequisite
HTML::Tree need to be installed.  See the documentation for those
modules for information on how to work with these tree structures.

=head2 METHODS

=over 4

=item $handler = XML::Handler::TreeBuilder->new()

Creates a handler which builds a tree rooted in an XML::Element.

=item $root->store_comments(value)

This determines whether comments will be stored in the tree (not all SAX
drivers generate comment events).  Currently, this is off by default.

=item $root->store_declarations(value)

This determines whether markup declarations will be stored in the tree.
Currently, this is off by default.  The present implementation does not
store markup declarations in any case; this method is provided for future use.

=item $root->store_pis(value)

This determines whether processing instructions will be stored in the tree.
Currently, this is off (false) by default.

=back

=head1 AUTHOR

Eric Bohlman (ebohlman@omsdev.com)

PerlSAX 2 compatibility added by Matt Sergeant (matt@sergeant.org)

XML::EasyTree::Searchable written by Stuart McDow (smcdow@moontower.org)

Copyright (c) 2001 Eric Bohlman.

Portions of this code Copyright (c) 2001 Matt Sergeant.

Portions of this code Copyright (c) 2001 Stuart McDow.

All rights reserved. This program is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.

=head1 SEE ALSO

 L<perl>
 L<XML::Parser>
 L<XML::SimpleObject>
 L<XML::Parser::EasyTree>
 L<XML::TreeBuilder>
 L<XML::Element>
 L<HTML::Element>
 L<PerlSAX>

=cut