# -*- Perl -*-

package SGML::DTDParse::Tokenizer;

use strict;
use vars qw($VERSION $CVS);

$VERSION = do { my @r=(q$Revision: 2.1 $=~/\d+/g); sprintf "%d."."%03d"x$#r,@r };
$CVS = '$Id: Tokenizer.pm,v 2.1 2005/07/02 23:51:18 ehood Exp $ ';

use strict;
use Text::DelimMatch;

require 5.000;
require Carp;

{
    package SGML::DTDParse::Tokenizer::Group;

    sub new {
	my($type, $cm) = @_;
	my($class) = ref($type) || $type;
	my($self) = {};

	bless $self, $class;

	die "Bad call to SGML::DTDParse::Tokenizer::Group: $cm\n"
	    if $cm !~ /^\((.*)\)(.?)$/s;

	$self->{'OCCURRENCE'} = $2;
	$self->{'CONTENT_MODEL'} = new SGML::DTDParse::Tokenizer $1, 1;

	return $self;
    }

    sub print {
	my($self, $depth) = @_;

	print "\t" x $depth, "(\n";
	$self->{'CONTENT_MODEL'}->print($depth+1);
	print "\t" x $depth, ")\n";
    }
}

{
    package SGML::DTDParse::Tokenizer::Element;

    sub new {
	my($type, $elem) = @_;
	my($class) = ref($type) || $type;
	my($self) = {};

	bless $self, $class;

	die "Bad call to SGML::DTDParse::Tokenizer::Element: $elem\n"
	    if $elem !~ /^(\S+?)([\*\?\+]?)$/s;

	$self->{'ELEMENT'} = $1;
	$self->{'OCCURRENCE'} = $2;

	return $self;
    }

    sub print {
	my($self, $depth) = @_;

	print "\t" x $depth, $self->{'ELEMENT'}, $self->{'OCCURRENCE'}, "\n";
    }
}

{
    package SGML::DTDParse::Tokenizer::ParameterEntity;

    sub new {
	my($type, $pe) = @_;
	my($class) = ref($type) || $type;
	my($self) = {};

	bless $self, $class;

	die "Bad call to SGML::DTDParse::Tokenizer::ParameterEntity: $pe\n"
	    if $pe !~ /^(\S+)$/s;

	$self->{'PARAMETER_ENTITY'} = $1;

	return $self;
    }

    sub print {
	my($self, $depth) = @_;

	print "\t" x $depth, "%", $self->{'PARAMETER_ENTITY'}, ";\n";
    }
}

{
    package SGML::DTDParse::Tokenizer::Connector;

    sub new {
	my($type, $con) = @_;
	my($class) = ref($type) || $type;
	my($self) = {};

	bless $self, $class;

	die "Bad call to SGML::DTDParse::Tokenizer::Connector: $con\n"
	    if $con !~ /^[\,\|\&]$/s;

	$self->{'CONNECTOR'} = $con;

	return $self;
    }

    sub print {
	my($self, $depth) = @_;

	print "\t" x $depth, $self->{'CONNECTOR'}, "\n";
    }
}

sub new {
    my($type, $cm, $internal) = @_;
    my($class) = ref($type) || $type;
    my($self) = {};
    my(@model) = ();

    bless $self, $class;

    $self->{'CONTENT_MODEL_STRING'} = $cm;

#    print "-->$cm\n";

    if ($cm =~ /(.*?)\s\-(\(.*)$/) {
	my($excl) = $2;
	my($exclcm) = new SGML::DTDParse::Tokenizer $excl;
	$self->{'EXCLUSION'} = $exclcm;
	$cm = $1;
    }

    if ($cm =~ /(.*?)\s\+(\(.*)$/) {
	my($incl) = $2;
	my($inclcm) = new SGML::DTDParse::Tokenizer $incl;
	$self->{'INCLUSION'} = $inclcm;
	$cm = $1;
    }

#    print "==>$cm\n";

    $cm =~ s/^\s+//sg;

    # Simplification: always make the content model a group; unless it's
    # declared content.
    #
    if (!$internal) {
#	print "$cm\n\n";

	my($mc) = new Text::DelimMatch '\(', '\)[\?\+\*]*';
	my($pre, $match, $rest) = $mc->match($cm);

	if ($cm ne 'EMPTY' && $cm ne 'CDATA' && $cm ne 'RCDATA') {
	    if ($cm !~ /^\(/s || ($rest !~ /^\s*$/s)) {
		$cm = "($cm)";
	    }
	}
    }

    while ($cm ne "") {
	if ($cm =~ /^\(/s) {
	    # group;
	    my($mc) = new Text::DelimMatch '\(', '\)[\?\+\*]*';
	    my($pre, $match, $rest) = $mc->match($cm);
	    my($group);

#	    print "\tgroup:\n";
#	    print "\t\tp:$pre\n";
#	    print "\t\tm:$match\n";
#	    print "\t\tr:$rest\n";

	    $group = new SGML::DTDParse::Tokenizer::Group $match;
	    push (@model, $group);

	    $cm = $rest;
	} elsif ($cm =~ /^\%/s) {
	    # parameter entity
	    my($pe);
	    my($pent);
	    if ($cm =~ /%(.*?);?([\|\,\&\s].*)$/s) {
		$pe = $1;
		$cm = $2;
	    } else {
		$pe = $cm;
		$cm = "";
		$pe = $1 if $pe =~ /^\%(.*?);?$/s;
	    }

	    $pent = new SGML::DTDParse::Tokenizer::ParameterEntity $pe;
	    push (@model, $pent);
	} elsif ($cm =~ /^[\,\|\&]/s) {
	    # connector
	    my($con) = new SGML::DTDParse::Tokenizer::Connector $&;
	    $cm = $';

#	    print "\tconnector: $&\n";

	    push (@model, $con);
	} else {
	    # element
	    my($elem);
	    my($element);
	    if ($cm =~ /(.*?)([\|\,\&\s].*)$/s) {
		$elem = $1;
		$cm = $2;
	    } else {
		$elem = $cm;
		$cm = "";
	    }

	    $element = new SGML::DTDParse::Tokenizer::Element $elem;
	    push (@model, $element);
	}

	$cm =~ s/^\s+//sg;
    }

#    print "<==\n";

    @{$self->{'MODEL'}} = @model;

    return $self;
}

sub print {
    my($self) = shift;
    my($depth) = shift || 1;
    my(@model) = @{$self->{'MODEL'}};
    local($_);

    foreach $_ (@model) {
	$_->print($depth);
    }
}

1;