# Copyright (C) 1998 Ken MacLeod
# XML::Grove is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
# $Id: Grove.pm,v 1.15 1999/08/17 15:01:28 kmacleod Exp $

use strict;
use 5.005;
use Data::Grove;

package XML::Grove;
use vars qw{$VERSION @ISA};

$VERSION = '0.46alpha';

@ISA = qw{Data::Grove};

package XML::Grove::Document;
use vars qw{@ISA $type_name};
@ISA = qw{XML::Grove};

$type_name = 'document';

# Override methods that may be loaded from Data::Grove::Parent.  In
# XML::Grove, `root' and `rootpath' refer to the root _element_ of the
# grove, not the document that contains it.

# Note: this routine specifically sets $value and does last instead of
# returning $child immediately because there is a bug in Perl 5.005 that
# causes the returned value to disappear
sub root {
    my $self = shift;

    if (@_) {
	return $self->{Contents} = [ shift ];
    } else {
	my $value = undef;

	foreach my $child (@{$self->{Contents}}) {
	    if ($child->isa('XML::Grove::Element')) {
		$value = $child;

	return $value;

sub rootpath {

package XML::Grove::Element;
use vars qw{ @ISA $type_name };
@ISA = qw{XML::Grove};
$type_name = 'element';

package XML::Grove::PI;
use vars qw{ @ISA $type_name };
@ISA = qw{XML::Grove};
$type_name = 'pi';

package XML::Grove::Entity::External;
use vars qw{ @ISA $type_name };
@ISA = qw{XML::Grove};
$type_name = 'external_entity';

package XML::Grove::Entity::SubDoc;
use vars qw{ @ISA $type_name };
@ISA = qw{XML::Grove};
$type_name = 'subdoc_entity';

package XML::Grove::Entity::SGML;
use vars qw{ @ISA $type_name };
@ISA = qw{XML::Grove};
$type_name = 'sgml_entity';

package XML::Grove::Entity;
use vars qw{ @ISA $type_name };
@ISA = qw{XML::Grove};
$type_name = 'entity';

package XML::Grove::Notation;
use vars qw{ @ISA $type_name };
@ISA = qw{XML::Grove};
$type_name = 'notation';

package XML::Grove::Comment;
use vars qw{ @ISA $type_name };
@ISA = qw{XML::Grove};
$type_name = 'comment';

package XML::Grove::SubDoc;
use vars qw{ @ISA $type_name };
@ISA = qw{XML::Grove};
$type_name = 'subdoc';

package XML::Grove::Characters;
use vars qw{ @ISA $type_name };
@ISA = qw{XML::Grove};
$type_name = 'characters';

package XML::Grove::CData;
use vars qw{ @ISA $type_name };
@ISA = qw{XML::Grove};
$type_name = 'cdata';

package XML::Grove::ElementDecl;
use vars qw{ @ISA $type_name };
@ISA = qw{XML::Grove};
$type_name = 'element_decl';

package XML::Grove::AttListDecl;
use vars qw{ @ISA $type_name };
@ISA = qw{XML::Grove};
$type_name = 'attlist_decl';



=head1 NAME

XML::Grove - Perl-style XML objects


 use XML::Grove;

 # Basic parsing and grove building
 use XML::Grove::Builder;
 use XML::Parser::PerlSAX;
 $grove_builder = XML::Grove::Builder->new;
 $parser = XML::Parser::PerlSAX->new ( Handler => $grove_builder );
 $document = $parser->parse ( Source => { SystemId => 'filename' } );

 # Creating new objects
 $document = XML::Grove::Document->new ( Contents => [ ] );
 $element = XML::Grove::Element->new ( Name => 'tag',
                                       Attributes => { },
				       Contents => [ ] );

 # Accessing XML objects
 $tag_name = $element->{Name};
 $contents = $element->{Contents};
 $parent = $element->{Parent};
 $characters->{Data} = 'XML is fun!';


XML::Grove is a tree-based object model for accessing the information
set of parsed or stored XML, HTML, or SGML instances.  XML::Grove
objects are Perl hashes and arrays where you access the properties of
the objects using normal Perl syntax:

  $text = $characters->{Data};

=head2 How To Create a Grove

There are several ways for groves to come into being, they can be read
from a file or string using a parser and a grove builder, they can be
created by your Perl code using the `C<new()>' methods of
XML::Grove::Objects, or databases or other sources can act as groves.

The most common way to build groves is using a parser and a grove
builder.  The parser is the package that reads the characters of an
XML file, recognizes the XML syntax, and produces ``events'' reporting
when elements (tags), text (characters), processing instructions, and
other sequences occur.  A grove builder receives (``consumes'' or
``handles'') these events and builds XML::Grove objects.  The last
thing the parser does is return the XML::Grove::Document object that
the grove builder created, with all of it's elements and character

The most common parser and grove builder are XML::Parser::PerlSAX (in
libxml-perl) and XML::Grove::Builder.  To build a grove, create the
grove builder first:

  $grove_builder = XML::Grove::Builder->new;

Then create the parser, passing it the grove builder as it's handler:

  $parser = XML::Parser::PerlSAX->new ( Handler => $grove_builder );

This associates the grove builder with the parser so that every time
you parse a document with this parser it will return an
XML::Grove::Document object.  To parse a file, use the `C<Source>'
parameter to the `C<parse()>' method containing a `C<SystemId>'
parameter (URL or path) of the file you want to parse:

  $document = $parser->parse ( Source => { SystemId => 'kjv.xml' } );

To parse a string held in a Perl variable, use the `C<Source>'
parameter containing a `C<String>' parameter:

  $document = $parser->parse ( Source => { String => $xml_text } );

The following are all parsers that work with XML::Grove::Builder:

  XML::Parser::PerlSAX (in libxml-perl, uses XML::Parser)
  XML::ESISParser      (in libxml-perl, uses James Clark's `nsgmls')
  XML::SAX2Perl        (in libxml-perl, translates SAX 1.0 to PerlSAX)

Most parsers supply more properties than the standard information set
below and XML::Grove will make available all the properties given by
the parser, refer to the parser documentation to find out what
additional properties it may provide.

Although there are not any available yet (August 1999), PerlSAX filters
can be used to process the output of a parser before it is passed to
XML::Grove::Builder.  XML::Grove::PerlSAX can be used to provide input
to PerlSAX filters or other PerlSAX handlers.

=head2 Using Groves

The properties provided by parsers are available directly using Perl's
normal syntax for accessing hashes and arrays.  For example, to get
the name of an element:

  $element_name = $element->{Name};

By convention, all properties provided by parsers are in mixed case.
`C<Parent>' properties are available using the
`C<Data::Grove::Parent>' module.

The following is the minimal set of objects and their properties that
you are likely to get from all parsers:

=head2 XML::Grove::Document

The Document object is parent of the root element of the parsed XML

=over 12

=item Contents

An array containing the root element.


A document's `Contents' may also contain processing instructions,
comments, and whitespace.

Some parsers provide information about the document type, the XML
declaration, or notations and entities.  Check the parser
documentation for property names.

=head2 XML::Grove::Element

The Element object represents elements from the XML source.

=over 12

=item Parent

The parent object of this element.

=item Name

A string, the element type name of this element

=item Attributes

A hash of strings or arrays

=item Contents

An array of elements, characters, processing instructions, etc.


In a purely minimal grove, the attributes of an element will be plain
text (Perl scalars).  Some parsers provide access to notations and
entities in attributes, in which case the attribute may contain an

=head2 XML::Grove::Characters

The Characters object represents text from the XML source.

=over 12

=item Parent

The parent object of this characters object

=item Data

A string, the characters


=head2 XML::Grove::PI

The PI object represents processing instructions from the XML source.

=over 12

=item Parent

The parent object of this PI object.

=item Target

A string, the processing instruction target.

=item Data

A string, the processing instruction data, or undef if none was supplied.


In addition to the minimal set of objects above, XML::Grove knows
about and parsers may provide the following objects.  Refer to the
parser documentation for descriptions of the properties of these

  ::Entity::External  External entity reference
  ::Entity::SubDoc    External SubDoc reference (SGML)
  ::Entity::SGML      External SGML reference (SGML)
  ::Entity            Entity reference
  ::Notation          Notation declaration
  ::Comment           <!-- A Comment -->
  ::SubDoc            A parsed subdocument (SGML)
  ::CData             A CDATA marked section
  ::ElementDecl       An element declaration from the DTD
  ::AttListDecl       An element's attribute declaration, from the DTD

=head1 METHODS

XML::Grove by itself only provides one method, new(), for creating new
XML::Grove objects.  There are Data::Grove and XML::Grove extension
modules that give additional methods for working with XML::Grove
objects and new extensions can be created as needed.

=over 4

=item $obj = XML::Grove::OBJECT->new( [PROPERTIES] )

`C<new>' creates a new XML::Grove object with the type I<OBJECT>, and
with the initial I<PROPERTIES>.  I<PROPERTIES> may be given as either
a list of key-value pairs, a hash, or an XML::Grove object to copy.
I<OBJECT> may be any of the objects listed above.


This is a list of available extensions and the methods they provide
(as of Feb 1999).  Refer to their module documentation for more
information on how to use them.

    as_string       return portions of groves as a string
    attr_as_string  return an element's attribute as a string

    as_canon_xml    return XML text in canonical XML format

    parse           emulate a PerlSAX parser using the grove objects

    root            return the root element of a grove
    rootpath        return an array of all objects between the root
                    element and this object, inclusive

    Data::Grove::Parent also adds `C<Parent>' and `C<Raw>' properties
    to grove objects.

    accept          call back a subroutine using an object type name
    accept_name     call back using an element or tag name
    children_accept for each child in Contents, call back a sub
    children_accept_name  same, but using tag names
    attr_accept     call back for the objects in attributes

    get_ids         return a list of all ID attributes in grove

    at_path         $el->at_path('/html/body/ul/li[4]')

    filter          run a sub against all the objects in the grove


The class `C<XML::Grove>' is the superclass of all classes in the
XML::Grove module.  `C<XML::Grove>' is a subclass of `C<Data::Grove>'.

If you create an extension and you want to add a method to I<all>
XML::Grove objects, then create that method in the XML::Grove
package.  Many extensions only need to add methods to
XML::Grove::Document and/or XML::Grove::Element.

When you create an extension you should definitly provide a way to
invoke your module using objects from your package too.  For example,
XML::Grove::AsString's `C<as_string()>' method can also be called
using an XML::Grove::AsString object:

  $writer= new XML::Grove::AsString;
  $string = $writer->as_string ( $xml_object );

=head1 AUTHOR

Ken MacLeod, ken@bitsko.slc.ut.us

=head1 SEE ALSO

perl(1), XML::Grove(3)

Extensible Markup Language (XML) <http://www.w3c.org/XML>