###########################################
# File::Comments -- 2005, Mike Schilli <cpan@perlmeister.com>
###########################################

###########################################
package File::Comments;
###########################################

use strict;
use warnings;
use Log::Log4perl qw(:easy);
use Sysadm::Install qw(:all);
use File::Basename;
use Module::Pluggable
  require     => 1,
  #search_path => [qw(File::Comments::Plugin)],
  ;

our $VERSION = "0.08";

###########################################
sub new {
###########################################
    my($class, %options) = @_;

    my $self = {

        cold_calls     => 1,
        default_plugin => undef,

        suffixes   => {},
        bases      => {},
        plugins    => [],
        %options,
    };

    bless $self, $class;

        # Init plugins
    $self->init();

    return $self;
}

###########################################
sub init {
###########################################
    my($self) = @_;

    $self->{plugins} = [];

    for($self->plugins()) {
        DEBUG "Initializing plugin $_";
        my $plugin = $_->new(mothership => $self);
        push @{$self->{plugins}}, $plugin;
    }
}

###########################################
sub find_plugin {
###########################################
    my($self) = @_;

        # Is there a suffix handler defined?
    if(defined $self->{target}->{suffix} and
       exists $self->{suffixes}->{$self->{target}->{suffix}}) {

        DEBUG "Searching for plugin handling suffix $self->{target}->{suffix}";

        for my $plugin (@{$self->{suffixes}->{$self->{target}->{suffix}}}) {
            DEBUG "Checking if ", ref $plugin, 
                  " is applicable for suffix ",
                  "'$self->{target}->{suffix}'";
            if($plugin->applicable($self->{target})) {
                DEBUG ref($plugin), " accepted";
                return $plugin;
            } else {
                DEBUG ref($plugin), " rejected";
            }
        }
    }

        # Is there a base handler defined?
    if(defined $self->{target}->{file_base} and
       exists $self->{bases}->{$self->{target}->{file_base}}) {

        DEBUG "Searching for plugin handling base $self->{target}->{file_base}";

        for my $plugin (@{$self->{bases}->{$self->{target}->{file_base}}}) {
            DEBUG "Checking if ", ref $plugin, 
                  " is applicable for base ",
                  "'$self->{target}->{file_base}'";
            if($plugin->applicable($self->{target})) {
                DEBUG ref($plugin), " accepted";
                return $plugin;
            } else {
                DEBUG ref($plugin), " rejected";
            }
        }
    }

        # Hmm ... no volunteers yet.
    return undef unless $self->{cold_calls};

        # Go from door to door and check if some plugin wants to 
        # handle it. Set the 'cold_call' flag to let the plugin know
        # about our desparate move.
    for my $plugin (@{$self->{plugins}}) {
         DEBUG "Checking if ", ref $plugin, " is applicable for ",
               "file '$self->{target}->{path}' (cold call)";
        if($plugin->applicable($self->{target}, 1)) {
            DEBUG "Cold call accepted";
            return $plugin;
        } else {
            DEBUG "Cold call rejected";
        }
    }

    return undef;
}

###########################################
sub guess_type {
###########################################
    my($self, $target) = @_;

    if(ref $target) {
        $self->{target} = $target;
    } else {
        $self->{target} = File::Comments::Target->new(path => $target);
    }

    my $plugin = $self->find_plugin();

    if(! defined $plugin) {
        ERROR "No plugin found to handle $target";
        return undef;
    }

    return $plugin->type(); 
}

###########################################
sub comments {
###########################################
    my($self, $target) = @_;

    return  $_[0]->dispatch($target, "comments");
}

###########################################
sub stripped {
###########################################
    my($self, $target) = @_;

    return  $_[0]->dispatch($target, "stripped");
}

###########################################
sub dispatch {
###########################################
    my($self, $target, $function) = @_;

    if(ref $target) {
        $self->{target} = $target;
    } else {
        $self->{target} = File::Comments::Target->new(path => $target);
    }

    my $plugin = $self->find_plugin();

    if(! defined $plugin) {
        if($self->{default_plugin}) {
            $plugin = $self->{default_plugin};
        } else {
            ERROR "Type of $target couldn't be determined";
                # Just return and empty list
            return undef;
        }
    }

    DEBUG "Calling ", ref $plugin, 
          " to handle $self->{target}->{path}";

    return $plugin->$function($self->{target});
}

###########################################
sub register_suffix {
###########################################
    my($self, $suffix, $plugin_obj) = @_;

    DEBUG "Registering ", ref $plugin_obj, 
          " as a handler for suffix $suffix";

        # Could be more than one, line them up
    push @{$self->{suffixes}->{$suffix}}, $plugin_obj;
}

###########################################
sub suffix_registered {
###########################################
    my($self, $suffix) = @_;

    return exists $self->{suffixes}->{$suffix};
}

###########################################
sub register_base {
###########################################
    my($self, $base, $plugin_obj) = @_;

    DEBUG "Registering ", ref $plugin_obj, 
          " as a handler for base $base";

        # Could be more than one, line them up
    push @{$self->{bases}->{$base}}, $plugin_obj;
}

##################################################
# Poor man's Class::Struct
##################################################
sub make_accessor {
##################################################
    my($package, $name) = @_;

    no strict qw(refs);

    my $code = <<EOT;
        *{"$package\\::$name"} = sub {
            my(\$self, \$value) = \@_;
    
            if(defined \$value) {
                \$self->{$name} = \$value;
            }
            if(exists \$self->{$name}) {
                return (\$self->{$name});
            } else {
                return "";
            }
        }
EOT
    if(! defined *{"$package\::$name"}) {
        eval $code or die "$@";
    }
}

###########################################
package File::Comments::Target;
###########################################
use Sysadm::Install qw(:all);
use File::Basename;
use Log::Log4perl qw(:easy);

###########################################
sub new {
###########################################
    my($class, %options) = @_;

    my $self = {
        path       => undef,
        dir        => undef,
        file_name  => undef,
        file_base  => undef,
        content    => undef,
        suffix     => undef,
        %options,
    };

    bless $self, $class;

    $self->load($self->{path}, $self->{content});

    return $self;
}

###########################################
sub load {
###########################################
    my($self, $path, $content) = @_;

    $self->{content}   = $content unless $content;
    $self->{path}      = $path;
    $self->{content}   = slurp $path unless defined $self->{content};

    $self->{file_name} = basename($path);

    $self->{dir}       = dirname($path);
    $self->{suffix}    = undef;
    $self->{file_base} = $self->{file_name};

    if(index($self->{file_name}, ".") >= 0) {
        ($self->{file_base}, $self->{suffix}) = 
            ($self->{file_name} =~ m#(.+)(\.[^.]*$)#);
    }

    DEBUG "Loaded file path=", def($path),
          " name=",            def($self->{file_name}),
          " dir=",             def($self->{dir}), 
          " suffix=",          def($self->{suffix}), 
          " base=",            def($self->{file_base}); 
}

###########################################
sub def {
###########################################
    return $_[0] if defined $_[0];
    return "**undef**";
}

File::Comments::make_accessor("File::Comments::Target", $_)
   for qw(path file_name file_base content suffix dir);

1;

__END__

=head1 NAME

File::Comments - Recognizes file formats and extracts format-specific comments

=head1 SYNOPSIS

    use File::Comments;

    my $snoop = File::Comments->new();

        # *----------------
        # | program.c:
        # | /* comment */
        # | main () {}
        # *----------------
    my $comments = $snoop->comments("program.c");
        # => [" comment "]

        # *----------------
        # | script.pl:
        # | # comment
        # | print "howdy!\n"; # another comment
        # *----------------
    my $comments = $snoop->comments("script.pl");
        # => [" comment", " another comment"]

        # or strip comments from a file:
    my $stripped = $snoop->stripped("script.pl");
        # => "print "howdy!\n";"

        # or just guess a file's type:
    my $type = $snoop->guess_type("program.c");    
        # => "c"

=head1 DESCRIPTION

File::Comments guesses the type of a given file, determines the format
used for comments, extracts all comments, and returns them as a reference
to an array of chunks. Alternatively, it strips all comments from a
file.

Currently supported are Perl scripts, C/C++ programs, Java, makefiles,
JavaScript, Python and PHP.

The plugin architecture used by File::Comments makes it easy to add new
formats. To support a new format, a new plugin module has to be installed.
No modifications to the File::Comments codebase are necessary, new 
plugins will be picked up automatically.

File::Comments can also be used to simply guess a file's type. It it
somewhat more flexible than File::MMagic and File::Type.
File types in File::Comments are typically based on file name suffixes
(*.c, *.pl, etc.). If no suffix is available, or a given suffix
is ambiguous (e.g. if several plugins have registered a handler for
the same suffix), then the file's content is used to narrow down the
possibilities and arrive at a decision.

WARNING: THIS MODULE IS UNDER DEVELOPMENT, QUALITY IS ALPHA. IF YOU
FIND BUGS, OR WANT TO CONTRIBUTE PLUGINS, PLEASE SEND THEM MY WAY.

=head2 FILE TYPES

Currently, the following plugins are included in the File::Comments 
distribution:

    ###############################################
    # plugin                              type    #
    ###############################################
      File::Comments::Plugin::C          c            (o)
      File::Comments::Plugin::Makefile   makefile  (X)
      File::Comments::Plugin::Perl       perl      (X)
      File::Comments::Plugin::JavaScript js           (o)
      File::Comments::Plugin::Java       java         (o)
      File::Comments::Plugin::HTML       html      (X)
      File::Comments::Plugin::Python     python       (o)
      File::Comments::Plugin::PHP        php          (o)

          (X) Fully implemented
          (o) Implemented with regular expressions, only works for
              easy cases until real parsers are employed.

The constants listed in the I<type> column are the strings returned
by the C<guess_type()> method.

=head1 Methods

=over 4

=item $snoop = File::Comments-E<gt>new()

Create a new comment extractor engine. This will automatically initialize
all plugins.

To avoid cold calls (L<Cold Calls>), set C<cold_calls> to a false value
(defaults to 1):

    $snoop = File::Comments->new( cold_calls => 0 );

By default, if no plugin can be found for a given file, C<File::Comments>
will throw a fatal error and C<die()>. If this is undesirable and
a default plugin should be used instead, it can be specified in
the constructor using the C<default_plugin> parameter:

    $snoop = File::Comments->new( 
      default_plugin => "File::Comments::Plugin::Makefile"
    );

=item $comments = $snoop-E<gt>comments("program.c");

Extract all comments from a file. After determining the file type
by either suffix or content (L<Cold Calls>), comments are extracted
as chunks and returned as a reference to an array.

To get a single string containing all comments, just join the chunks:

    my $comments_string = join '', @$comments;

=item $stripped_text = $snoop-E<gt>stripped("program.c");

Strip all comments from a file. After determining the file type
by either suffix or content (L<Cold Calls>), all comments are removed
and the stripped text is returned in a scalar.

=item $type = $snoop-E<gt>guess_type("script.pl")

Guess the type of a file, based on either suffix, or in absense of a suffix
via L<Cold Calls>. Return the result as a string: C<"c">, C<"makefile">,
C<"perl">, etc. (L<FILE TYPES>).

=item $snoop->suffix_registered("c")

Returns true if one of the plugins has registered the given suffix.

=back

=head2 Writing new plugins

Writing a new plugin to add functionality to the File::Comments framework
is as simple as defining a new module, derived from the baseclass of all
plugins, C<File::Comments::Plugin>. Three additional methods are needed: 
C<init()>, C<type()>, and C<comments()>.

C<init()> gets called when the mothership finds the plugin and
initializes it. This is the time to register extensions that the plugin
wants to handle.

The second mandatory method for a plugin is C<type()>, which returns
a string, indicating the type of the file examined. Usually this can
be done without further ado, since a basic plugin will called only
on files which it registered for by suffix. Exceptions to this are
explained later.

The third method is C<comments()>, which returns a reference to an 
array of comment lines. The content of the source file to be examined
will be available in 

    $self->{target}->{content}

by the time C<comments()> gets called.

And that's it. Here's a functional basic plugin, registering a new 
suffix ".odd" with the mothership and expecting files with comment lines
that start with C<ODDCOMMENT>:

    ###########################################
    package File::Comments::Plugin::Oddball;
    ###########################################

    use strict;
    use warnings;
    use File::Comments::Plugin;

    our $VERSION = "0.01";
    our @ISA     = qw(File::Comments::Plugin);

    ###########################################
    sub init {
    ###########################################
        my($self) = @_;
    
        $self->register_suffix(".odd");
    }

    ###########################################
    sub type {
    ###########################################
        my($self) = @_;
    
        return "odd";
    }

    ###########################################
    sub comments {
    ###########################################
        my($self) = @_;
    
        # Some code to extract all comments from 
        # $self->{target}->{content}:
        my @comments = ($self->{target}->{content} =~ /^ODDCOMMENT:(.*)/);
        return \@comments;
    }

    1;

=head2 Cold Calls

If a file doesn't have an extension or an extensions that's served by
multiple plugins, File::Comments will go shop around and ask all
plugins if they want to handle the file. The mothership calls 
each plugin's C<applicable()> method, passing it an object of
type C<File::Comments::Target>, which contains the following
fields:

When the plugin gets such a I<cold call> (indicated by the
third parameter to C<applicable()>, it can either accept or deny
the request. To arrive at a decision, it can peek into the target
object. The Perl plugin illustrates this:

    ###########################################
    sub applicable {
    ###########################################
        my($self, $target, $cold_call) = @_;
    
        return 1 unless $cold_call;
    
        return 1 if $target->{content} =~ /^#!.*perl\b/;

        return 0;
    }

If a plugin does not define a C<applicable()> method, a default method
is inherited from the base class C<File::Comments::Plugin>, which looks
like this:

    ###########################################
    sub applicable {
    ###########################################
        my($self, $target, $cold_call) = @_;

        return 0 if $cold_call;
        return 1;
    }

This will deny all I<cold calls> and only accept requests for files
with suffixes or base names the plugin has already signed up for.

=head2 Plugin Inheritance

Plugins can reuse existing plugins by inheritance. For example, if
you wanted to write a I<catch-all> plugin that takes over all cold
calls and handles comments like the C<Makefile> plugin, you can
simply use

    ###########################################
    package File::Comments::Plugin::Catchall;
    ###########################################

    use strict;
    use warnings;
    use File::Comments::Plugin;
    use File::Comments::Plugin::Makefile;

    our $VERSION = "0.01";
    our @ISA     = qw(File::Comments::Plugin::Makefile);

    ###########################################
    sub applicable {
    ###########################################
        my($self) = @_;
    
        return 1;
    }

C<File::Comments::Plugin::Catchall> just implements C<applicable()>
and inherits everything else from C<File::Comments::Plugin::Makefile>.

=head1 LEGALESE

Copyright 2005 by Mike Schilli, all rights reserved.
This program is free software, you can redistribute it and/or
modify it under the same terms as Perl itself.

=head1 AUTHOR

2005, Mike Schilli <cpan@perlmeister.com>