package Algorithm::RegressionTree;

#--------------------------------------------------------------------------------------
# Copyright (c) 2017 Avinash Kak. All rights reserved.  This program is free
# software.  You may modify and/or distribute it under the same terms as Perl itself.
# This copyright notice must remain attached to the file.
#
# Algorithm::RegressionTree is a Perl module for constructing regression trees.  It calls
# on the main Algorithm::DecisionTree module for some of its functionality.
# -------------------------------------------------------------------------------------

#use lib 'blib/lib', 'blib/arch';

#use 5.10.0;
use strict;
use warnings;
use Carp;
use File::Basename;
use Algorithm::DecisionTree 3.43;
use List::Util qw(reduce min max pairmap sum);
use Math::GSL::Matrix;
use Graphics::GnuplotIF;

our $VERSION = '3.43';

@Algorithm::RegressionTree::ISA = ('Algorithm::DecisionTree');

############################################   Constructor  ##############################################
sub new { 
    my ($class, %args) = @_;
    my @params = keys %args;
    croak "\nYou have used a wrong name for a keyword argument --- perhaps a misspelling\n" 
                           if check_for_illegal_params(@params) == 0;
    my %dtargs = %args;
    delete $dtargs{dependent_variable_column};
    delete $dtargs{predictor_columns};
    delete $dtargs{mse_threshold};
    delete $dtargs{need_data_normalization};
    delete $dtargs{jacobian_choice};
    delete $dtargs{debug1_r};
    delete $dtargs{debug2_r};
    delete $dtargs{debug3_r};
    my $instance = Algorithm::DecisionTree->new(%dtargs);
    bless $instance, $class;
    $instance->{_dependent_variable_column}       =  $args{dependent_variable_column} || undef;
    $instance->{_predictor_columns}               =  $args{predictor_columns} || 0;
    $instance->{_mse_threshold}                   =  $args{mse_threshold} || 0.01;
    $instance->{_jacobian_choice}                 =  $args{jacobian_choice} || 0;
    $instance->{_need_data_normalization}         =  $args{need_data_normalization} || 0;
    $instance->{_dependent_var}                   =  undef;
    $instance->{_dependent_var_values}            =  undef;
    $instance->{_samples_dependent_var_val_hash}  =  undef;
    $instance->{_root_node}                       =  undef;
    $instance->{_debug1_r}                        =  $args{debug1_r} || 0;
    $instance->{_debug2_r}                        =  $args{debug2_r} || 0;
    $instance->{_debug3_r}                        =  $args{debug3_r} || 0;
    $instance->{_sample_points_for_dependent_var} =  [];
    $instance->{_output_for_plots}                =  {};
    $instance->{_output_for_surface_plots}        =  {};
    bless $instance, $class;
}

##############################################  Methods  #################################################
sub get_training_data_for_regression {
    my $self = shift;
    die("Aborted. get_training_data_csv() is only for CSV files") unless $self->{_training_datafile} =~ /\.csv$/;
    my @dependent_var_values;
    my %all_record_ids_with_dependent_var_values;
    my $firstline;
    my %data_hash;
    $|++;
    open FILEIN, $self->{_training_datafile};
    my $record_index = 0;
    while (<FILEIN>) {
        next if /^[ ]*\r?\n?$/;
        $_ =~ s/\r?\n?$//;
        my $record = $self->{_csv_cleanup_needed} ? cleanup_csv($_) : $_;
        if ($record_index == 0) {
            $firstline = $record;
            $record_index++;
            next;
        }
        my @parts = split /,/, $record;
        my $record_label = shift @parts;
        $record_label  =~ s/^\s*\"|\"\s*$//g;
        $data_hash{$record_label} = \@parts;
        push @dependent_var_values, $parts[$self->{_dependent_variable_column}-1];
        $all_record_ids_with_dependent_var_values{$parts[0]} = $parts[$self->{_dependent_variable_column}-1];
        print "." if $record_index % 10000 == 0;
        $record_index++;
    }
    close FILEIN;    
    $self->{_how_many_total_training_samples} = $record_index; #it's less by 1 from total num of records; okay
    print "\n\nTotal number of training samples: $self->{_how_many_total_training_samples}\n" if $self->{_debug1_r};
    my @all_feature_names =   grep $_, split /,/, substr($firstline, index($firstline,','));
    my $dependent_var_column_heading = $all_feature_names[$self->{_dependent_variable_column} - 1];
    my @feature_names = map {$all_feature_names[$_-1]} @{$self->{_predictor_columns}};
    my %dependent_var_value_for_sample_hash = map {"sample_" . $_  =>  "$dependent_var_column_heading=" . $data_hash{$_}->[$self->{_dependent_variable_column} - 1 ] } keys %data_hash;
    my @sample_names = map {"sample_$_"} keys %data_hash;
    my %feature_values_for_samples_hash = map {my $sampleID = $_; "sample_" . $sampleID  =>  [map {my $fname = $all_feature_names[$_-1]; $fname . "=" . eval{$data_hash{$sampleID}->[$_-1] =~ /^\d+$/ ? sprintf("%.1f", $data_hash{$sampleID}->[$_-1] ) : $data_hash{$sampleID}->[$_-1] } } @{$self->{_predictor_columns}} ] }  keys %data_hash;    
    my %features_and_values_hash = map { my $a = $_; {$all_feature_names[$a-1] => [  map {my $b = $_; $b =~ /^\d+$/ ? sprintf("%.1f",$b) : $b} map {$data_hash{$_}->[$a-1]} keys %data_hash ]} } @{$self->{_predictor_columns}};     
    my %numeric_features_valuerange_hash   =   ();
    my %feature_values_how_many_uniques_hash  =  ();
    my %features_and_unique_values_hash = ();
    my $numregex =  '[+-]?\ *(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?';
    foreach my $feature (keys %features_and_values_hash) {
        my %seen = ();
        my @unique_values_for_feature =  grep {$_ if $_ ne 'NA' && !$seen{$_}++} @{$features_and_values_hash{$feature}};
        $feature_values_how_many_uniques_hash{$feature} = scalar @unique_values_for_feature;
        my $not_all_values_float = 0;
        map {$not_all_values_float = 1 if $_ !~ /^$numregex$/} @unique_values_for_feature;
        if ($not_all_values_float == 0) {
            my @minmaxvalues = minmax(\@unique_values_for_feature);
            $numeric_features_valuerange_hash{$feature} = \@minmaxvalues; 
        }
        $features_and_unique_values_hash{$feature} = \@unique_values_for_feature;
    }
    if ($self->{_debug1_r}) {
        print "\nDependent var values: @dependent_var_values\n";
        print "\nEach sample data record:\n";
        foreach my $kee (sort {sample_index($a) <=> sample_index($b)} keys %feature_values_for_samples_hash) {
            print "$kee    =>   @{$feature_values_for_samples_hash{$kee}}\n";
        }
        print "\ndependent var value for each data sample:\n";
        foreach my $kee (sort {sample_index($a) <=> sample_index($b)} keys %dependent_var_value_for_sample_hash) {
            print "$kee    =>   $dependent_var_value_for_sample_hash{$kee}\n";
        }
        print "\nfeatures and the values taken by them:\n";
        for my $kee  (sort keys %features_and_values_hash) {
            print "$kee    =>   @{$features_and_values_hash{$kee}}\n";                        
        }
        print "\nnumeric features and their ranges:\n";
        for my $kee  (sort keys %numeric_features_valuerange_hash) {
            print "$kee    =>   @{$numeric_features_valuerange_hash{$kee}}\n";
        }
        print "\nnumber of unique values in each feature:\n";        
        for my $kee  (sort keys %feature_values_how_many_uniques_hash) {
            print "$kee    =>   $feature_values_how_many_uniques_hash{$kee}\n";
        }
    }
    $self->{_XMatrix}  =  undef;
    $self->{_YVector}  =  undef;
    $self->{_dependent_var} = $dependent_var_column_heading;
    $self->{_dependent_var_values} = \@dependent_var_values;
    $self->{_feature_names} = \@feature_names;
    $self->{_samples_dependent_var_val_hash}  = \%dependent_var_value_for_sample_hash;
    $self->{_training_data_hash}  =  \%feature_values_for_samples_hash;
    $self->{_features_and_values_hash}  = \%features_and_values_hash;
    $self->{_features_and_unique_values_hash}  =  \%features_and_unique_values_hash;
    $self->{_numeric_features_valuerange_hash} = \%numeric_features_valuerange_hash;
    $self->{_feature_values_how_many_uniques_hash} = \%feature_values_how_many_uniques_hash;
    $self->SUPER::calculate_first_order_probabilities();
}

sub construct_XMatrix_and_YVector_all_data {
    my $self = shift;
    my $matrix_rows_as_lists =  [ map {my @arr = @$_; [map substr($_,index($_,'=')+1), @arr] } map {$self->{_training_data_hash}->{$_}} sort {sample_index($a) <=> sample_index($b)} keys %{$self->{_training_data_hash}} ];
    map {push @$_, 1} @{$matrix_rows_as_lists};
    map {print "XMatrix row: @$_\n"} @{$matrix_rows_as_lists} if $self->{_debug1_r};
    my $XMatrix = Math::GSL::Matrix->new(scalar @{$matrix_rows_as_lists}, scalar @{$matrix_rows_as_lists->[0]});
    pairmap {$XMatrix->set_row($a,$b)} ( 0..@{$matrix_rows_as_lists}-1, @{$matrix_rows_as_lists} )
                       [ map { $_, $_ + @{$matrix_rows_as_lists} } ( 0 .. @{$matrix_rows_as_lists}-1 ) ];
    if ($self->{_debug1_r}) {
        foreach my $rowindex (0..@{$matrix_rows_as_lists}-1) {
            my @onerow = $XMatrix->row($rowindex)->as_list;
            print "XMatrix row again: @onerow\n";
        }
    }
    $self->{_XMatrix} = $XMatrix;
    my @dependent_var_values =  map {my $val = $self->{_samples_dependent_var_val_hash}->{$_}; substr($val,index($val,'=')+1)} sort {sample_index($a) <=> sample_index($b)} keys %{$self->{_samples_dependent_var_val_hash}};
    print "dependent var values: @dependent_var_values\n" if $self->{_debug1_r};
    my $YVector = Math::GSL::Matrix->new(scalar @{$matrix_rows_as_lists}, 1);
    pairmap {$YVector->set_row($a,$b)} ( 0..@{$matrix_rows_as_lists}-1, map {[$_]} @dependent_var_values )
                       [ map { $_, $_ + @{$matrix_rows_as_lists} } ( 0 .. @{$matrix_rows_as_lists}-1 ) ];
    if ($self->{_debug1_r}) {
        foreach my $rowindex (0..@{$matrix_rows_as_lists}-1) {
            my @onerow = $YVector->row($rowindex)->as_list;
            print "YVector row: @onerow\n";
        }
    }
    $self->{_YVector} = $YVector;
    return ($XMatrix, $YVector);
}

sub estimate_regression_coefficients {
    my $self = shift;
    my ($XMatrix, $YVector, $display) = @_;
    $display = 0 unless defined $display;
    my ($nrows, $ncols) = $XMatrix->dim;
    print "nrows=$nrows   ncols=$ncols\n" if $self->{_debug2_r};
    my $jacobian_choice = $self->{_jacobian_choice};
    my $X = $XMatrix->copy();
    my $y = $YVector->copy();
    if ($self->{_need_data_normalization}) {
        die "normalization feature is yet to be added to the module --- sorry";
    }
    my $beta0 = (transpose($X) * $X)->inverse() * transpose($X) * $y;
    my ($betarows, $betacols) = $beta0->dim;
    die "Something has gone wrong with the calculation of beta coefficients" if $betacols > 1;
    if ($jacobian_choice == 0) {
#        my $error = sum(abs_vector_as_list( $y - ($X * $beta) )) / $nrows;   
        my $error = sum( map abs, ($y - ($X * $beta0) )->col(0)->as_list ) / $nrows;   
        my $predictions = $X * $beta0;
        if ($display) {
            if ($ncols == 2) {
                my @xvalues = $X->col(0)->as_list;
                my @yvalues = $predictions->col(0)->as_list;
                $self->{_output_for_plots}->{scalar(keys %{$self->{_output_for_plots}}) + 1} = [\@xvalues,\@yvalues];
            } elsif ($ncols == 3) {
                my @xvalues;
                my @yvalues = $predictions->col(0)->as_list;
                foreach my $row_index (0 .. $X->rows - 1) {
                    my @onerow = $X->row($row_index)->as_list;
                    pop @onerow;
                    push @xvalues, "@onerow";
                }
                $self->{_output_for_surface_plots}->{scalar(keys %{$self->{_output_for_surface_plots}}) + 1} = [\@xvalues,\@yvalues];
            } else {
                print "no display when the overall dimensionality of the data exceeds 3\n";
            }
        }
        return ($error, $beta0);    
    }
    my $beta = $beta0; 
    if ($self->{_debug2_r}) {
        print "\ndisplaying beta0 matrix\n";
        display_matrix($beta);
    }
    my $gamma = 0.1;
    my $iterate_again_flag = 1;
    my $delta = 0.001;
    my $master_interation_index = 0;
    $|++;
    while (1) {
        print "*" unless $master_interation_index++ % 100;
        last unless $iterate_again_flag;
        $gamma *= 0.1;
        $beta0 = 0.99 * $beta0;
        print "\n\n======== starting iterations with gamma= $gamma ===========\n\n\n" if $self->{_debug2_r};
        $beta = $beta0;
        my $beta_old = Math::GSL::Matrix->new($betarows, 1)->zero;
        my $error_old = sum( map abs, ($y - ($X * $beta_old) )->col(0)->as_list ) / $nrows;   
        my $error;
        foreach my $iteration (0 .. 1499) {
            print "." unless $iteration % 100;
            $beta_old = $beta->copy;
            my $jacobian;
            if ($jacobian_choice == 1) {
                $jacobian = $X;
            } elsif ($jacobian_choice == 2) {      
                my $x_times_delta_beta = $delta * $X * $beta;
                $jacobian = Math::GSL::Matrix->new($nrows, $ncols);
                foreach my $i (0 .. $nrows - 1) {
                    my @row = ($x_times_delta_beta->get_elem($i,0)) x $ncols;
                    $jacobian->set_row($i, \@row);
                }
                $jacobian = (1.0/$delta) * $jacobian;
            } else {
                die "wrong choice for the jacobian_choice";
            }
#            $beta = $beta_old + 2 * $gamma * transpose($X) * ( $y - ($X * $beta) );
            $beta = $beta_old + 2 * $gamma * transpose($jacobian) * ( $y - ($X * $beta) );
            $error = sum( map abs, ($y - ($X * $beta) )->col(0)->as_list ) / $nrows;   
            if ($error > $error_old) {
                if (vector_norm($beta - $beta_old) < (0.00001 * vector_norm($beta_old))) {
                    $iterate_again_flag = 0;
                    last;
                } else {
                    last;
                }
            }
            if ($self->{_debug2_r}) {
                print "\n\niteration: $iteration   gamma: $gamma   current error: $error\n";
                print "\nnew beta:\n";
                display_matrix $beta;
            }
            if ( vector_norm($beta - $beta_old) < (0.00001 * vector_norm($beta_old)) ) { 
                print "iterations used: $iteration with gamma: $gamma\n" if $self->{_debug2_r};
                $iterate_again_flag = 0;
                last;
            }
            $error_old = $error;
        }
    }
    display_matrix($beta) if $self->{_debug2_r};
    my $predictions = $X * $beta;
    my @error_distribution = ($y - ($X * $beta))->as_list;
    my $squared_error =  sum map abs, @error_distribution;
    my $error = $squared_error / $nrows;
    if ($display) {
        if ($ncols == 2) {
            my @xvalues = $X->col(0)->as_list;
            my @yvalues = $predictions->col(0)->as_list;
            $self->{_output_for_plots}->{scalar(keys %{$self->{_output_for_plots}}) + 1} = [\@xvalues,\@yvalues];
        } elsif ($ncols == 3) {
            my @xvalues;
            my @yvalues = $predictions->col(0)->as_list;
            foreach my $row_index (0 .. $X->rows - 1) {
                my @onerow = $X->row($row_index)->as_list;
                pop @onerow;
                push @xvalues, "@onerow";
            }
            $self->{_output_for_surface_plots}->{scalar(keys %{$self->{_output_for_surface_plots}}) + 1} = [\@xvalues,\@yvalues];
        } else {
            print "no display when the overall dimensionality of the data exceeds 3\n";
        }
    }
    return ($error, $beta);
}

##-------------------------------  Construct Regression Tree  ------------------------------------


##  At the root node, you do ordinary linear regression for the entire dataset so that you
##  can later compare the linear regression fit with the results obtained through the 
##  regression tree.  Subsequently, you call the recursive_descent() method to construct
##  the tree.

sub construct_regression_tree {
    my $self = shift;
    print "\nConstructing regression tree...\n";
    my $root_node = RTNode->new(undef, undef, undef, [], $self, 'root');
    my ($XMatrix,$YVector) = $self->construct_XMatrix_and_YVector_all_data();
    my ($error,$beta) = $self->estimate_regression_coefficients($XMatrix, $YVector, 1); 
    $root_node->set_node_XMatrix($XMatrix);
    $root_node->set_node_YVector($YVector);
    $root_node->set_node_error($error);
    $root_node->set_node_beta($beta);
    $root_node->set_num_data_points($XMatrix->cols);
    print "\nerror at root: $error\n";
    print "\nbeta at root:\n";
    display_matrix($beta);
    $self->{_root_node} = $root_node;
    $self->recursive_descent($root_node) if $self->{_max_depth_desired} > 0;
    return $root_node;
}

##  We first look for a feature, along with its partitioning point, that yields the 
##  largest reduction in MSE compared to the MSE at the parent node.  This feature and
##  its partitioning point are then used to create two child nodes in the tree.
sub recursive_descent {
    my $self = shift;
    my $node = shift;
    print "\n==================== ENTERING RECURSIVE DESCENT ==========================\n";
    my $node_serial_number = $node->get_serial_num();
    my @features_and_values_or_thresholds_on_branch = @{$node->get_branch_features_and_values_or_thresholds()};
    my @copy_of_path_attributes = @{deep_copy_array(\@features_and_values_or_thresholds_on_branch)};
    if (@features_and_values_or_thresholds_on_branch > 0) {
        my ($error,$beta,$XMatrix,$YVector) = 
          $self->_error_for_given_sequence_of_features_and_values_or_thresholds(\@copy_of_path_attributes);
        $node->set_node_XMatrix($XMatrix);
        $node->set_node_YVector($YVector);
        $node->set_node_error($error);
        $node->set_node_beta($beta);
        $node->set_num_data_points($XMatrix->cols);
        print "\nNODE SERIAL NUMBER: $node_serial_number\n";
        print "\nFeatures and values or thresholds on branch: @features_and_values_or_thresholds_on_branch\n";
        return if $error <= $self->{_mse_threshold}; 
    }
    my ($best_feature,$best_minmax_error_at_partitioning_point,$best_feature_partitioning_point) = 
                                               $self->best_feature_calculator(\@copy_of_path_attributes);
    return unless defined $best_feature_partitioning_point;
    print "\nBest feature found: $best_feature\n";
    print "Best feature partitioning_point: $best_feature_partitioning_point\n";
    print "Best minmax error at partitioning point: $best_minmax_error_at_partitioning_point\n";
    $node->set_feature($best_feature);
    $node->display_node() if $self->{_debug2_r}; 
    return if (defined $self->{_max_depth_desired}) && 
                            (@features_and_values_or_thresholds_on_branch >= $self->{_max_depth_desired}); 
    if ($best_minmax_error_at_partitioning_point > $self->{_mse_threshold}) {
        my @extended_branch_features_and_values_or_thresholds_for_lessthan_child = 
                                        @{deep_copy_array(\@features_and_values_or_thresholds_on_branch)};
        my @extended_branch_features_and_values_or_thresholds_for_greaterthan_child  = 
                                        @{deep_copy_array(\@features_and_values_or_thresholds_on_branch)}; 
        my $feature_threshold_combo_for_less_than = "$best_feature" . '<' . "$best_feature_partitioning_point";
        my $feature_threshold_combo_for_greater_than = "$best_feature" . '>' . "$best_feature_partitioning_point";
        push @extended_branch_features_and_values_or_thresholds_for_lessthan_child, 
                                                                  $feature_threshold_combo_for_less_than;
        push @extended_branch_features_and_values_or_thresholds_for_greaterthan_child, 
                                                               $feature_threshold_combo_for_greater_than;
        my $left_child_node = RTNode->new(undef, undef, undef, 
                          \@extended_branch_features_and_values_or_thresholds_for_lessthan_child, $self);
        $node->add_child_link($left_child_node);
        $self->recursive_descent($left_child_node);
        my $right_child_node = RTNode->new(undef, undef, undef, 
                        \@extended_branch_features_and_values_or_thresholds_for_greaterthan_child, $self);
        $node->add_child_link($right_child_node);
        $self->recursive_descent($right_child_node);
    }
}

##  This is the heart of the regression tree constructor.  Its main job is to figure
##  out the best feature to use for partitioning the training data samples at the
##  current node.  The partitioning criterion is that the largest of the MSE's in 
##  the two partitions should be smaller than the error associated with the parent
##  node.
sub best_feature_calculator {
    my $self = shift;
    my $features_and_values_or_thresholds_on_branch = shift;
    my @features_and_values_or_thresholds_on_branch =  @$features_and_values_or_thresholds_on_branch;
    print "\n\nfeatures_and_values_or_thresholds_on_branch: @features_and_values_or_thresholds_on_branch\n";
    if (@features_and_values_or_thresholds_on_branch == 0) {
        my $best_partition_point_for_feature_hash = { map {$_ => undef} @{$self->{_feature_names}} };
        my $best_minmax_error_for_feature_hash = { map {$_ => undef} @{$self->{_feature_names}} };
        foreach my $i (0 .. @{$self->{_feature_names}}-1) {
            my $feature_name = $self->{_feature_names}->[$i];
            my @values = @{$self->{_sampling_points_for_numeric_feature_hash}->{$feature_name}};
            my @partitioning_errors;
            my %partitioning_error_hash;
            foreach my $value (@values[10 .. $#values - 10]) {
                my $feature_and_less_than_value_string =  "$feature_name" . '<' . "$value";
                my $feature_and_greater_than_value_string = "$feature_name" . '>' . "$value";
                my @for_left_child;
                my @for_right_child;
                if (@features_and_values_or_thresholds_on_branch) {
                    @for_left_child = @{deep_copy_array(\@features_and_values_or_thresholds_on_branch)};
                    push @for_left_child, $feature_and_less_than_value_string;
                    @for_right_child = @{deep_copy_array(\@features_and_values_or_thresholds_on_branch)};
                    push @for_right_child, $feature_and_greater_than_value_string;
                } else {
                    @for_left_child = ($feature_and_less_than_value_string);
                    @for_right_child = ($feature_and_greater_than_value_string);
                }
                my ($error1,$beta1,$XMatrix1,$YVector1) = 
                        $self->_error_for_given_sequence_of_features_and_values_or_thresholds(\@for_left_child);
                my ($error2,$beta2,$XMatrix2,$YVector2) = 
                        $self->_error_for_given_sequence_of_features_and_values_or_thresholds(\@for_right_child);
                my $partitioning_error = max($error1, $error2);
                push @partitioning_errors, $partitioning_error;
                $partitioning_error_hash{$partitioning_error} = $value;
            }
            my $min_max_error_for_feature = min(@partitioning_errors);
            $best_partition_point_for_feature_hash->{$feature_name} = 
                                             $partitioning_error_hash{$min_max_error_for_feature};
            $best_minmax_error_for_feature_hash->{$feature_name} = $min_max_error_for_feature;
        }
        my $best_feature_name;
        my $best_feature_paritioning_point;
        my $best_minmax_error_at_partitioning_point;
        foreach my $feature (keys %{$best_minmax_error_for_feature_hash}) {
            if (! defined $best_minmax_error_at_partitioning_point) {
                $best_minmax_error_at_partitioning_point = $best_minmax_error_for_feature_hash->{$feature};
                $best_feature_name = $feature;
            } elsif ($best_minmax_error_at_partitioning_point > $best_minmax_error_for_feature_hash->{$feature}) {
                $best_minmax_error_at_partitioning_point = $best_minmax_error_for_feature_hash->{$feature};
                $best_feature_name = $feature;
            }
        }
        my $best_feature_partitioning_point =  $best_partition_point_for_feature_hash->{$best_feature_name};
        return ($best_feature_name,$best_minmax_error_at_partitioning_point,$best_feature_partitioning_point);
    } else {
        my $pattern1 = '(.+)=(.+)';
        my $pattern2 = '(.+)<(.+)';
        my $pattern3 = '(.+)>(.+)';
        my @true_numeric_types;
        my @symbolic_types;
        my @true_numeric_types_feature_names;
        my @symbolic_types_feature_names;
        foreach my $item (@features_and_values_or_thresholds_on_branch) {
            if ($item =~ /$pattern2/) {
                push @true_numeric_types, $item;
                push @true_numeric_types_feature_names, $1;
            } elsif ($item =~ /$pattern3/) {
                push @true_numeric_types, $item;
                push @true_numeric_types_feature_names, $1;
            } elsif ($item =~ /$pattern1/) {
                push @symbolic_types, $item;
                push @symbolic_types_feature_names, $1;
            } else {
                die "format error in the representation of feature and values or thresholds";
            }
        }
        my %seen = ();
        @true_numeric_types_feature_names = grep {$_ if !$seen{$_}++} @true_numeric_types_feature_names;
        %seen = ();
        @symbolic_types_feature_names = grep {$_ if !$seen{$_}++} @symbolic_types_feature_names;
        my @bounded_intervals_numeric_types = 
                           @{$self->find_bounded_intervals_for_numeric_features(\@true_numeric_types)};
        # Calculate the upper and the lower bounds to be used when searching for the best
        # threshold for each of the numeric features that are in play at the current node:
        my (%upperbound, %lowerbound);
        foreach my $feature (@true_numeric_types_feature_names) {
            $upperbound{$feature} = undef;
            $lowerbound{$feature} = undef;
        }
        foreach my $item (@bounded_intervals_numeric_types) {
            foreach my $feature_grouping (@$item) {
                if ($feature_grouping->[1] eq '>') {
                    $lowerbound{$feature_grouping->[0]} = $feature_grouping->[2];
                } else {
                    $upperbound{$feature_grouping->[0]} = $feature_grouping->[2];
                }
            }
        }
        my $best_partition_point_for_feature_hash = { map {$_ => undef} @{$self->{_feature_names}} };
        my $best_minmax_error_for_feature_hash = { map {$_ => undef} @{$self->{_feature_names}} };
        foreach my $i (0 .. @{$self->{_feature_names}}-1) {
            my $feature_name = $self->{_feature_names}->[$i];
            my @values = @{$self->{_sampling_points_for_numeric_feature_hash}->{$feature_name}};
            my @newvalues;
            if (contained_in($feature_name, @true_numeric_types_feature_names)) {
                if (defined($upperbound{$feature_name}) && defined($lowerbound{$feature_name}) &&
                              $lowerbound{$feature_name} >= $upperbound{$feature_name}) {
                    next;
                } elsif (defined($upperbound{$feature_name}) && defined($lowerbound{$feature_name}) &&
                                    $lowerbound{$feature_name} < $upperbound{$feature_name}) {
                    foreach my $x (@values) {
                        push @newvalues, $x if $x > $lowerbound{$feature_name} && $x <= $upperbound{$feature_name};
                    }
                } elsif (defined($upperbound{$feature_name})) {
                    foreach my $x (@values) {
                        push @newvalues, $x if $x <= $upperbound{$feature_name};
                    }
                } elsif (defined($lowerbound{$feature_name})) {
                    foreach my $x (@values) {
                        push @newvalues, $x if $x > $lowerbound{$feature_name};
                    }
                } else {
                    die "Error is bound specifications in best feature calculator";
                }
            } else {
                @newvalues = @{deep_copy_array(\@values)};
            }
            next if @newvalues < 30;
            my @partitioning_errors;
            my %partitioning_error_hash;
            foreach my $value (@newvalues[10 .. $#newvalues - 10]) {
                my $feature_and_less_than_value_string =  "$feature_name" . '<' . "$value";
                my $feature_and_greater_than_value_string = "$feature_name" . '>' . "$value";
                my @for_left_child;
                my @for_right_child;
                if (@features_and_values_or_thresholds_on_branch) {
                    @for_left_child = @{deep_copy_array(\@features_and_values_or_thresholds_on_branch)};
                    push @for_left_child, $feature_and_less_than_value_string;
                    @for_right_child = @{deep_copy_array(\@features_and_values_or_thresholds_on_branch)};
                    push @for_right_child, $feature_and_greater_than_value_string;
                } else {
                    @for_left_child = ($feature_and_less_than_value_string);
                    @for_right_child = ($feature_and_greater_than_value_string);
                }
                my ($error1,$beta1,$XMatrix1,$YVector1) = 
                        $self->_error_for_given_sequence_of_features_and_values_or_thresholds(\@for_left_child);
                my ($error2,$beta2,$XMatrix2,$YVector2) = 
                        $self->_error_for_given_sequence_of_features_and_values_or_thresholds(\@for_right_child);
                my $partitioning_error = max($error1, $error2);
                push @partitioning_errors, $partitioning_error;
                $partitioning_error_hash{$partitioning_error} = $value;
            }
            my $min_max_error_for_feature = min(@partitioning_errors);
            $best_partition_point_for_feature_hash->{$feature_name} = 
                                             $partitioning_error_hash{$min_max_error_for_feature};
            $best_minmax_error_for_feature_hash->{$feature_name} = $min_max_error_for_feature;
        }
        my $best_feature_name;
        my $best_feature_paritioning_point;
        my $best_minmax_error_at_partitioning_point;
        foreach my $feature (keys %{$best_minmax_error_for_feature_hash}) {
            if (! defined $best_minmax_error_at_partitioning_point) {
                $best_minmax_error_at_partitioning_point = $best_minmax_error_for_feature_hash->{$feature};
                $best_feature_name = $feature;
            } elsif ($best_minmax_error_at_partitioning_point > $best_minmax_error_for_feature_hash->{$feature}) {
                $best_minmax_error_at_partitioning_point = $best_minmax_error_for_feature_hash->{$feature};
                $best_feature_name = $feature;
            }
        }
        my $best_feature_partitioning_point =  $best_partition_point_for_feature_hash->{$best_feature_name};
        return ($best_feature_name,$best_minmax_error_at_partitioning_point,$best_feature_partitioning_point);
    }
}

##  This method requires that all truly numeric types only be expressed as '<' or '>'
##  constructs in the array of branch features and thresholds
sub _error_for_given_sequence_of_features_and_values_or_thresholds{
    my $self = shift;
    my $array_of_features_and_values_or_thresholds = shift;
    if (@$array_of_features_and_values_or_thresholds == 0) { 
        my ($XMatrix,$YVector) = $self->construct_XMatrix_and_YVector_all_data();
        my ($errors,$beta) = $self->estimate_regression_coefficients($XMatrix,$YVector);
        return ($errors,$beta,$XMatrix,$YVector)
    }
    my $pattern1 = '(.+)=(.+)';
    my $pattern2 = '(.+)<(.+)';
    my $pattern3 = '(.+)>(.+)';
    my @true_numeric_types;
    my @symbolic_types;
    my @true_numeric_types_feature_names;
    my @symbolic_types_feature_names;
    foreach my $item (@$array_of_features_and_values_or_thresholds) {
        if ($item =~ /$pattern2/) {
            push @true_numeric_types, $item;
            push @true_numeric_types_feature_names, $1;
        } elsif ($item =~ /$pattern3/) {
            push @true_numeric_types, $item;
            push @true_numeric_types_feature_names, $1;
        } elsif ($item =~ /$pattern1/) {
            push @symbolic_types, $item;
            push @symbolic_types_feature_names, $1;
        } else {
            die "format error in the representation of feature and values or thresholds";
        }
    }
    my %seen = ();
    @true_numeric_types_feature_names = grep {$_ if !$seen{$_}++} @true_numeric_types_feature_names;
    %seen = ();
    @symbolic_types_feature_names = grep {$_ if !$seen{$_}++} @symbolic_types_feature_names;
    my @bounded_intervals_numeric_types = 
                           @{$self->find_bounded_intervals_for_numeric_features(\@true_numeric_types)};
    # Calculate the upper and the lower bounds to be used when searching for the best
    # threshold for each of the numeric features that are in play at the current node:
    my (%upperbound, %lowerbound);
    foreach my $feature (@true_numeric_types_feature_names) {
        $upperbound{$feature} = undef;
        $lowerbound{$feature} = undef;
    }
    foreach my $item (@bounded_intervals_numeric_types) {
        foreach my $feature_grouping (@$item) {
            if ($feature_grouping->[1] eq '>') {
                $lowerbound{$feature_grouping->[0]} = $feature_grouping->[2];
            } else {
                $upperbound{$feature_grouping->[0]} = $feature_grouping->[2];
            }
        }
    }
    my %training_samples_at_node;
    foreach my $feature_name (@true_numeric_types_feature_names) {
        if ((defined $lowerbound{$feature_name}) && (defined $upperbound{$feature_name}) && 
                                     ($upperbound{$feature_name} <= $lowerbound{$feature_name})) {
            return (undef,undef,undef,undef); 
        } elsif ((defined $lowerbound{$feature_name}) && (defined $upperbound{$feature_name})) {
            foreach my $sample (keys %{$self->{_training_data_hash}}) {
                my @feature_val_pairs = @{$self->{_training_data_hash}->{$sample}};
                foreach my $feature_and_val (@feature_val_pairs) {
                    my $value_for_feature = substr($feature_and_val, index($feature_and_val,'=')+1 );
                    my $feature_involved =  substr($feature_and_val, 0, index($feature_and_val,'=') );
                    if (($feature_name eq $feature_involved) && 
                        ($lowerbound{$feature_name} < $value_for_feature) && 
                        ($value_for_feature <= $upperbound{$feature_name})) {
                        $training_samples_at_node{$sample} = 1;
                        last;
                    }
                }
            }  
        } elsif ((defined $upperbound{$feature_name}) && (! defined $lowerbound{$feature_name})) {
            foreach my $sample (keys %{$self->{_training_data_hash}}) {
                my @feature_val_pairs = @{$self->{_training_data_hash}->{$sample}};
                foreach my $feature_and_val (@feature_val_pairs) {
                    my $value_for_feature = substr($feature_and_val, index($feature_and_val,'=')+1 );
                    my $feature_involved =  substr($feature_and_val, 0, index($feature_and_val,'=') );
                    if (($feature_name eq $feature_involved) && 
                        ($value_for_feature <= $upperbound{$feature_name})) {
                        $training_samples_at_node{$sample} = 1;
                        last;
                    }
                }
            }  
        } elsif ((defined $lowerbound{$feature_name}) && (! defined $upperbound{$feature_name})) {
            foreach my $sample (keys %{$self->{_training_data_hash}}) {
                my @feature_val_pairs = @{$self->{_training_data_hash}->{$sample}};
                foreach my $feature_and_val (@feature_val_pairs) {
                    my $value_for_feature = substr($feature_and_val, index($feature_and_val,'=')+1 );
                    my $feature_involved =  substr($feature_and_val, 0, index($feature_and_val,'=') );
                    if (($feature_name eq $feature_involved) && 
                        ($value_for_feature > $lowerbound{$feature_name})) {
                        $training_samples_at_node{$sample} = 1;
                        last;
                    }
                }
            }  
        } else {
            die "Ill formatted call to the '_error_for_given_sequence_...' method";
        }   
    }
    foreach my $feature_and_value (@symbolic_types) {
        if ($feature_and_value =~ /$pattern1/) {
            my ($feature,$value) = ($1,$2);
            foreach my $sample (keys %{$self->{_training_data_hash}}) {
                my @feature_val_pairs = @{$self->{_training_data_hash}->{$sample}};
                foreach my $feature_and_val (@feature_val_pairs) {
                    my $feature_in_sample =  substr($feature_and_val, 0, index($feature_and_val,'=') );
                    my $value_in_sample = substr($feature_and_val, index($feature_and_val,'=')+1 );
                    if (($feature eq $feature_in_sample) && ($value eq $value_in_sample)) {
                        $training_samples_at_node{$sample} = 1;
                        last;
                    }
                }
            }
        }
    }
    my @training_samples_at_node = keys %training_samples_at_node;
    my $matrix_rows_as_lists =  [ map {my @arr = @$_; [map substr($_,index($_,'=')+1), @arr] } map {$self->{_training_data_hash}->{$_}} sort {sample_index($a) <=> sample_index($b)} @training_samples_at_node ];;
    map {push @$_, 1} @{$matrix_rows_as_lists};
    my $XMatrix = Math::GSL::Matrix->new(scalar @{$matrix_rows_as_lists}, scalar @{$matrix_rows_as_lists->[0]});
    pairmap {$XMatrix->set_row($a,$b)} ( 0..@{$matrix_rows_as_lists}-1, @{$matrix_rows_as_lists} )
                       [ map { $_, $_ + @{$matrix_rows_as_lists} } ( 0 .. @{$matrix_rows_as_lists}-1 ) ];
    if ($self->{_debug3_r}) {
        print "\nXMatrix as its transpose:";        
        my $displayX = transpose($XMatrix);
        display_matrix($displayX);
    }
    my @dependent_var_values =  map {my $val = $self->{_samples_dependent_var_val_hash}->{$_}; substr($val,index($val,'=')+1)} sort {sample_index($a) <=> sample_index($b)} @training_samples_at_node;
    print "dependent var values: @dependent_var_values\n" if $self->{_debug1_r};
    my $YVector = Math::GSL::Matrix->new(scalar @{$matrix_rows_as_lists}, 1);
    pairmap {$YVector->set_row($a,$b)} ( 0..@{$matrix_rows_as_lists}-1, map {[$_]} @dependent_var_values )
                       [ map { $_, $_ + @{$matrix_rows_as_lists} } ( 0 .. @{$matrix_rows_as_lists}-1 ) ];
    if ($self->{_debug3_r}) {
        print "\nYVector:";  
        my $displayY = transpose($YVector);
        display_matrix($displayY);
    }
    my ($error,$beta) = $self->estimate_regression_coefficients($XMatrix, $YVector);
    if ($self->{_debug3_r}) {
        display_matrix($beta);
        print("\n\nerror distribution at node: ", $error);
    }
    return ($error,$beta,$XMatrix,$YVector);
}

#-----------------------------    Predict with Regression Tree   ------------------------------

sub predictions_for_all_data_used_for_regression_estimation {
    my $self = shift;
    my $root_node = shift;
    my %predicted_values;
    my %leafnode_for_values;
    my $ncols = $self->{_XMatrix}->cols;
    if ($ncols == 2) {
        foreach my $sample (keys %{$self->{_training_data_hash}}) {
            my $pattern = '(\S+)\s*=\s*(\S+)';
            $self->{_training_data_hash}->{$sample}->[0] =~ /$pattern/;
            my ($feature,$value) =  ($1, $2);
            my $new_feature_and_value = "$feature=$value";
            my $answer = $self->prediction_for_single_data_point($root_node, [$new_feature_and_value]);
            $predicted_values{$value} = $answer->{'prediction'};
            $leafnode_for_values{"$value,$predicted_values{$value}"} = $answer->{'solution_path'}->[-1];
        }
        my @leaf_nodes_used = keys %{{map {$_ => 1} values %leafnode_for_values}};
        foreach my $leaf (@leaf_nodes_used) {
            my @xvalues;
            my @yvalues;
            foreach my $x (sort {$a <=> $b} keys %predicted_values) {
                if ($leaf == $leafnode_for_values{"$x,$predicted_values{$x}"}) {
                    push @xvalues, $x;
                    push @yvalues, $predicted_values{$x};
                }
            }
            $self->{_output_for_plots}->{scalar(keys %{$self->{_output_for_plots}}) + 1} = [\@xvalues,\@yvalues];
        }
    } elsif ($ncols == 3) {
        foreach my $sample (keys %{$self->{_training_data_hash}}) {
            my $pattern = '(\S+)\s*=\s*(\S+)';        
            my @features_and_vals;
            my @newvalues;
            foreach my $feature_and_val (@{$self->{_training_data_hash}->{$sample}}) {
                $feature_and_val =~ /$pattern/;
                my ($feature,$value) =  ($1, $2);
                push @newvalues, $value;
                my $new_feature_and_value = "$feature=$value";
                push @features_and_vals, $new_feature_and_value;
            }
            my $answer = $self->prediction_for_single_data_point($root_node, \@features_and_vals);
            $predicted_values{"@newvalues"} = $answer->{'prediction'};
            $leafnode_for_values{"@newvalues"} = $answer->{'solution_path'}->[-1];
        }
        my @leaf_nodes_used = keys %{{map {$_ => 1} values %leafnode_for_values}};
        foreach my $leaf (@leaf_nodes_used) {
            my @xvalues;
            my @yvalues;
            foreach my $kee (keys %predicted_values) {
                if ($leaf == $leafnode_for_values{$kee}) {
                    push @xvalues, $kee;
                    push @yvalues, $predicted_values{$kee};
                }
            }
            $self->{_output_for_surface_plots}->{scalar(keys %{$self->{_output_for_surface_plots}}) + 1} = [\@xvalues,\@yvalues];
        }
    } else {
        die "\nThe module does not yet include regression based predictions when you have more than " .
            "two predictor variables --- on account of the difficulty of properly visualizing the " .
            "quality of such predictions.  Future versions of this modue may nonetheless allow for " .
            "regression based predictions in such cases (depending on user feedback)";
    }
}

sub bulk_predictions_for_data_in_a_csv_file {
    my $self = shift;
    my ($root_node, $filename, $columns) = @_;
    die("Aborted. bulk_predictions_for_data_in_a_csv_file() is only for CSV files") unless $filename =~ /\.csv$/;
    my $basefilename = basename($filename, '.csv');
    my $out_file_name = $basefilename . "_output.csv";
    unlink $out_file_name if -e $out_file_name;
    open FILEOUT, "> $out_file_name" or die "Unable to open $out_file_name: $!";
    $|++;
    open FILEIN, $filename or die "Unable to open $filename: $!";
    my $record_index = 0;
    my @fieldnames;
    while (<FILEIN>) {
        next if /^[ ]*\r?\n?$/;
        $_ =~ s/\r?\n?$//;
        my $record = $self->{_csv_cleanup_needed} ? cleanup_csv($_) : $_;
        if ($record_index == 0) {
            @fieldnames =   grep $_, split /,/, substr($record, index($record,','));
            $record_index++;
            next;
        }
        my @feature_vals = grep $_, split /,/, $record;
        my @features_and_vals;
        foreach my $col_index (@$columns) {
            push @features_and_vals, "$fieldnames[$col_index-1]=$feature_vals[$col_index-1]"
        }
        my $answer = $self->prediction_for_single_data_point($root_node, \@features_and_vals);
        print FILEOUT "$record        =>        $answer->{'prediction'}\n";
    }
    close FILEIN;
    close FILEOUT;
}

sub mse_for_tree_regression_for_all_training_samples {
    my $self = shift;
    my $root_node = shift;
    my %predicted_values;
    my %dependent_var_values;
    my %leafnode_for_values;
    my $total_error = 0.0;
    my %samples_at_leafnode;
    foreach my $sample (keys %{$self->{_training_data_hash}}) {
        my $pattern = '(\S+)\s*=\s*(\S+)';        
        my @features_and_vals;
        my $newvalue;
        foreach my $feature_and_val (@{$self->{_training_data_hash}->{$sample}}) {
            $feature_and_val =~ /$pattern/;
            my ($feature,$value) =  ($1, $2);
            if (contained_in($feature, @{$self->{_feature_names}})) {
                my $new_feature_and_value = "$feature=$value";
                push @features_and_vals, "$feature=$value";
            }
            $newvalue = $value;
        } 
        my $answer = $self->prediction_for_single_data_point($root_node, \@features_and_vals);
        $predicted_values{"@features_and_vals"} = $answer->{'prediction'};
        $self->{_samples_dependent_var_val_hash}->{$sample} =~ /$pattern/;
        my ($dep_feature,$dep_value) = ($1, $2);
        $dependent_var_values{"@features_and_vals"} = $dep_value;
        my $leafnode_for_sample = $answer->{'solution_path'}->[-1];
        $leafnode_for_values{"@features_and_vals"} = $answer->{'solution_path'}->[-1];
        my $error_for_sample = abs($predicted_values{"@features_and_vals"} - $dependent_var_values{"@features_and_vals"});
        $total_error += $error_for_sample;
        if (exists $samples_at_leafnode{$leafnode_for_sample}) {
            push @{$samples_at_leafnode{$leafnode_for_sample}}, $sample;
        } else {
            $samples_at_leafnode{$leafnode_for_sample} = [$sample];
        }
    }
    my @leafnodes_used = keys %{{map {$_ => 1} values %leafnode_for_values}};
    my $errors_at_leafnode = { map {$_ => 0.0} @leafnodes_used };
    foreach my $kee (keys %predicted_values) {
        foreach my $leaf (@leafnodes_used) {
            $errors_at_leafnode->{$leaf} += abs($predicted_values{$kee} - $dependent_var_values{$kee})
                          if $leaf == $leafnode_for_values{$kee};
        }
    }
    my $total_error_per_data_point = $total_error / (scalar keys %{$self->{_training_data_hash}});
    print "\n\nTree Regression: Total MSE per sample with tree regression: $total_error_per_data_point\n";
    foreach my $leafnode (@leafnodes_used) {
        my $error_per_data_point = $errors_at_leafnode->{$leafnode} / @{$samples_at_leafnode{$leafnode}};
        print "    MSE per sample at leafnode $leafnode: $error_per_data_point\n";
    }
    my $error_with_linear_regression = $self->{_root_node}->get_node_error();
    print "For comparision, the MSE per sample error with Linear Regression: $error_with_linear_regression\n";
}

##  Calculated the predicted value for the dependent variable from a given value for all
##  the predictor variables.
sub prediction_for_single_data_point {
    my $self = shift;
    my $root_node = shift;
    my $features_and_values = shift;
    die "Error in the names you have used for features and/or values when calling " .
        "prediction_for_single_data_point()" unless $self->_check_names_used($features_and_values);
    my $pattern = '(\S+)\s*=\s*(\S+)';        
    my @new_features_and_values;
    foreach my $feature_and_value (@$features_and_values) {
        $feature_and_value =~ /$pattern/;
        my ($feature,$value) =  ($1, $2);
        push @new_features_and_values, "$feature=$value";
    }
    my %answer;
    $answer{'solution_path'} = [];
    my $prediction = $self->recursive_descent_for_prediction($root_node,\@new_features_and_values, \%answer);
    $answer{'solution_path'} = [reverse( @{$answer{'solution_path'}} )];
    return \%answer;
}

sub recursive_descent_for_prediction {
    my $self = shift;
    my $node = shift;
    my $feature_and_values = shift;
    my $answer = shift;
    my @children = @{$node->get_children()};
    if (@children == 0) {
        my $leaf_node_prediction = $node->node_prediction_from_features_and_values($feature_and_values); 
        $answer->{'prediction'} = $leaf_node_prediction;
        push @{$answer->{'solution_path'}}, $node->get_serial_num();
        return $answer;
    }
    my $feature_tested_at_node = $node->get_feature();
    print "\nFeature tested at node for prediction: $feature_tested_at_node\n" if $self->{_debug3};
    my $value_for_feature;
    my $path_found;
    my $pattern = '(\S+)\s*=\s*(\S+)';
    my ($feature,$value);
    foreach my $feature_and_value (@$feature_and_values) {
        $feature_and_value =~ /$pattern/;
        my ($feature,$value) =  ($1, $2);
        if ($feature eq $feature_tested_at_node) {
            $value_for_feature = $value;
        }
    }
    if (! defined $value_for_feature) {
        my $leaf_node_prediction = $node->node_prediction_from_features_and_values($feature_and_values); 
        $answer->{'prediction'} = $leaf_node_prediction;
        push @{$answer->{'solution_path'}}, $node->get_serial_num();
        return $answer;
    }
    foreach my $child (@children) {
        my @branch_features_and_values = @{$child->get_branch_features_and_values_or_thresholds()};
        my $last_feature_and_value_on_branch = $branch_features_and_values[-1]; 
        my $pattern1 = '(.+)<(.+)';
        my $pattern2 = '(.+)>(.+)';
        if ($last_feature_and_value_on_branch =~ /$pattern1/) {
            my ($feature,$threshold) = ($1, $2);
            if ($value_for_feature <= $threshold) {
                $path_found = 1;
                $answer = $self->recursive_descent_for_prediction($child, $feature_and_values, $answer);
                push @{$answer->{'solution_path'}}, $node->get_serial_num();
                last;
            }
        }
        if ($last_feature_and_value_on_branch =~ /$pattern2/) {
            my ($feature,$threshold) = ($1, $2);
            if ($value_for_feature > $threshold) {
                $path_found = 1;
                $answer = $self->recursive_descent_for_prediction($child, $feature_and_values, $answer);
                push @{$answer->{'solution_path'}}, $node->get_serial_num();
                last;
            }
        }
    }
    return $answer if $path_found;
}

#--------------------------------------  Utility Methods   ----------------------------------------

##  This method is used to verify that you used legal feature names in the test
##  sample that you want to classify with the decision tree.
sub _check_names_used {
    my $self = shift;
    my $features_and_values_test_data = shift;
    my @features_and_values_test_data = @$features_and_values_test_data;
    my $pattern = '(\S+)\s*=\s*(\S+)';
    foreach my $feature_and_value (@features_and_values_test_data) {
        $feature_and_value =~ /$pattern/;
        my ($feature,$value) = ($1,$2);
        die "Your test data has formatting error" unless defined($feature) && defined($value);
        return 0 unless contained_in($feature, @{$self->{_feature_names}});
    }
    return 1;
}

sub display_all_plots {
    my $self = shift;
    my $ncols = $self->{_XMatrix}->cols;
    unlink "regression_plots.png" if -e "regression_plots.png";
    my $master_datafile = $self->{_training_datafile};
    my $filename = basename($master_datafile);
    my $temp_file = "__temp_" . $filename;
    unlink $temp_file if -e $temp_file;
    open OUTPUT, ">$temp_file"
           or die "Unable to open a temp file in this directory: $!\n";
    if ($ncols == 2) {
        my @predictor_entries = $self->{_XMatrix}->col(0)->as_list;
        my @dependent_val_vals = $self->{_YVector}->col(0)->as_list;
        map {print OUTPUT "$predictor_entries[$_] $dependent_val_vals[$_]\n"} 0 .. $self->{_XMatrix}->rows - 1;
        print OUTPUT "\n\n";
        foreach my $plot (sort {$a <=> $b} keys %{$self->{_output_for_plots}}) {
            map {print OUTPUT "$self->{_output_for_plots}->{$plot}->[0]->[$_] $self->{_output_for_plots}->{$plot}->[1]->[$_]\n"} 0 .. @{$self->{_output_for_plots}->{$plot}->[0]} - 1;
            print OUTPUT "\n\n"
        }
        close OUTPUT;
        my $gplot = Graphics::GnuplotIF->new( persist => 1 );
        my $hardcopy_plot = Graphics::GnuplotIF->new();
        $hardcopy_plot->gnuplot_cmd('set terminal png', "set output \"regression_plots.png\"");        
        $gplot->gnuplot_cmd( "set noclip" );
        $gplot->gnuplot_cmd( "set pointsize 2" );
        my $arg_string = "";
        foreach my $i (0 .. scalar(keys %{$self->{_output_for_plots}})) {
            if ($i == 0) {            
                $arg_string .= "\"$temp_file\" index $i using 1:2 notitle with points lt -1 pt 1, ";
            } elsif ($i == 1) {
                $arg_string .= "\"$temp_file\" index $i using 1:2 title \"linear regression\" with lines lt 1 lw 4, ";
            } elsif ($i == 2) {
                $arg_string .= "\"$temp_file\" index $i using 1:2 title \"tree regression\" with lines lt 3 lw 4, ";
            } else {
                $arg_string .= "\"$temp_file\" index $i using 1:2 notitle with lines lt 3 lw 4, ";
            }
        }
        $arg_string = $arg_string =~ /^(.*),[ ]+$/;
        $arg_string = $1;
        $hardcopy_plot->gnuplot_cmd( "plot $arg_string" );
        $gplot->gnuplot_cmd( "plot $arg_string" );
        $gplot->gnuplot_pause(-1);
    } elsif ($ncols == 3) {
        my @dependent_val_vals = $self->{_YVector}->col(0)->as_list;
        foreach my $i (0 .. $self->{_XMatrix}->rows - 1) {
            my @onerow = $self->{_XMatrix}->row($i)->as_list;
            pop @onerow;
            print OUTPUT "@onerow $dependent_val_vals[$i]\n";
        }
        print OUTPUT "\n\n";
        foreach my $plot (sort {$a <=> $b} keys %{$self->{_output_for_surface_plots}}) {
            my @plot_data = @{$self->{_output_for_surface_plots}->{$plot}};
            my @predictors = @{$plot_data[0]};
            my @predictions = @{$plot_data[1]};
            map {print OUTPUT "$predictors[$_] $predictions[$_]\n"} 0 .. @predictions - 1;
            print OUTPUT "\n\n"
        }
        close OUTPUT;
        my $gplot = Graphics::GnuplotIF->new( persist => 1 );
        my $hardcopy_plot = Graphics::GnuplotIF->new();
        $hardcopy_plot->gnuplot_cmd('set terminal png', "set output \"regression_plots.png\"");        
        $gplot->gnuplot_cmd( "set noclip" );
        $gplot->gnuplot_cmd( "set pointsize 2" );
        my $arg_string = "";
        foreach my $i (0 .. scalar(keys %{$self->{_output_for_surface_plots}})) {
            if ($i == 0) {            
                $arg_string .= "\"$temp_file\" index $i using 1:2:3 notitle with points lt -1 pt 1, ";
            } elsif ($i == 1) {
                $arg_string .= "\"$temp_file\" index $i using 1:2:3 title \"linear regression\" with points lt 1 pt 2, ";
            } elsif ($i == 2) {
                $arg_string .= "\"$temp_file\" index $i using 1:2:3 title \"tree regression\" with points lt 3 pt 3, ";
            } else {
                $arg_string .= "\"$temp_file\" index $i using 1:2:3 notitle with points lt 3 pt 3, ";
            }
        }
        $arg_string = $arg_string =~ /^(.*),[ ]+$/;
        $arg_string = $1;
        $hardcopy_plot->gnuplot_cmd( "splot $arg_string" );
        $gplot->gnuplot_cmd( "splot $arg_string" );
        $gplot->gnuplot_pause(-1);
    } else {
        die "no visual displays for regression from more then 2 predictor vars";
    }   
}  

sub DESTROY {
    unlink glob "__temp_*";
}

############################################## Utility Routines ##########################################
# checks whether an element is in an array:
sub contained_in {
    my $ele = shift;
    my @array = @_;
    my $count = 0;
    map {$count++ if $ele eq $_} @array;
    return $count;
}

sub minmax {
    my $arr = shift;
    my ($min, $max);
    foreach my $i (0..@{$arr}-1) {
        if ( (!defined $min) || ($arr->[$i] < $min) ) {
            $min = $arr->[$i];
        }
        if ( (!defined $max) || ($arr->[$i] > $max) ) {
            $max = $arr->[$i];
        }
    }
    return ($min, $max);
}

sub sample_index {
    my $arg = shift;
    $arg =~ /_(.+)$/;
    return $1;
}    

sub check_for_illegal_params {
    my @params = @_;
    my @legal_params = qw / training_datafile
                            max_depth_desired
                            dependent_variable_column
                            predictor_columns
                            mse_threshold
                            need_data_normalization
                            jacobian_choice
                            csv_cleanup_needed
                            debug1_r
                            debug2_r
                            debug3_r
                          /;
    my $found_match_flag;
    foreach my $param (@params) {
        foreach my $legal (@legal_params) {
            $found_match_flag = 0;
            if ($param eq $legal) {
                $found_match_flag = 1;
                last;
            }
        }
        last if $found_match_flag == 0;
    }
    return $found_match_flag;
}

sub cleanup_csv {
    my $line = shift;
    $line =~ tr/\/:?()[]{}'/          /;
#    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]+\"/g;
    my @double_quoted = substr($line, index($line,',')) =~ /\"[^\"]*\"/g;
    for (@double_quoted) {
        my $item = $_;
        $item = substr($item, 1, -1);
        $item =~ s/^\s+|,|\s+$//g;
        $item = join '_',  split /\s+/, $item;
        substr($line, index($line, $_), length($_)) = $item;
    }
    my @white_spaced = $line =~ /,(\s*[^,]+)(?=,|$)/g;
    for (@white_spaced) {
        my $item = $_;
        $item =~ s/\s+/_/g;
        $item =~ s/^\s*_|_\s*$//g;
        substr($line, index($line, $_), length($_)) = $item;
    }
    $line =~ s/,\s*(?=,|$)/,NA/g;
    return $line;
}

sub transpose {
    my $matrix = shift;
    my $num_rows = $matrix->rows();
    my $num_cols = $matrix->cols();
    my $transpose = Math::GSL::Matrix->new($num_cols, $num_rows);
    foreach my $i (0..$num_rows-1) {
        my @row = $matrix->row($i)->as_list;
        $transpose->set_col($i, \@row );
    }
    return $transpose;
}

sub vector_subtract {
    my $vec1 = shift;
    my $vec2 = shift;
    die "wrong data types for vector subtract calculation\n" if @$vec1 != @$vec2;
    my @result;
    foreach my $i (0..@$vec1-1){
        push @result, $vec1->[$i] - $vec2->[$i];
    }
    return @result;
}

sub vector_norm {
    my $vec = shift;       # assume it to be a column vector
    my ($rows, $cols) = $vec->dim;
    die "vector_norm() can only be called for a single column matrix" if $cols > 1;
    my @norm = (transpose($vec) * $vec)->as_list;
    return sqrt($norm[0]);
}

sub display_matrix {
    my $matrix = shift;
    my $nrows = $matrix->rows();
    my $ncols = $matrix->cols();
    print "\nDisplaying a matrix of size $nrows rows and $ncols columns:\n";
    foreach my $i (0..$nrows-1) {
        my $row = $matrix->row($i);
        my @row_as_list = $row->as_list;
        map { printf("%.4f ", $_) } @row_as_list;
        print "\n";
    }
    print "\n\n";
}

# Meant only for an array of strings (no nesting):
sub deep_copy_array {
    my $ref_in = shift;
    my $ref_out;
    return [] if scalar @$ref_in == 0;
    foreach my $i (0..@{$ref_in}-1) {
        $ref_out->[$i] = $ref_in->[$i];
    }
    return $ref_out;
}


#############################################  Class RTNode  #############################################

# The nodes of a regression tree are instances of this class:
package RTNode;

use strict; 
use Carp;

# $feature is the feature test at the current node.  $branch_features_and_values is
# an anonymous array holding the feature names and corresponding values on the path
# from the root to the current node:
sub new {                                                           
    my ($class, $feature, $error, $beta, $branch_features_and_values_or_thresholds, $rt, $root_or_not) = @_; 
    $root_or_not = '' if !defined $root_or_not;
    if ($root_or_not eq 'root') {
        $rt->{nodes_created} = -1;
        $rt->{class_names} = undef;
    }
    my $self = {                                                         
            _rt                      => $rt,
            _feature                 => $feature,                                       
            _error                   => $error,                                       
            _beta                    => $beta,                                       
            _branch_features_and_values_or_thresholds => $branch_features_and_values_or_thresholds,
            _num_data_points         => undef,                                       
            _XMatrix                 => undef,
            _YVector                 => undef,
            _linked_to               => [],                                          
    };
    bless $self, $class;
    $self->{_serial_number} =  $self->get_next_serial_num();
    return $self;
}

sub node_prediction_from_features_and_values {
    my $self = shift;
    my $feature_and_values = shift;
    my $ncols = $self->{_XMatrix}->cols;
    my $pattern = '(\S+)\s*=\s*(\S+)';
    my ($feature,$value);
    my @Xlist;
    foreach my $feature_name (@{$self->{_rt}->{_feature_names}}) {
        foreach my $feature_and_value (@{$feature_and_values}) {
            $feature_and_value =~ /$pattern/;
            my ($feature, $value) = ($1, $2);
            push @Xlist, $value if $feature_name eq $feature; 
        }
    }
    push @Xlist, 1;
    my $dataMatrix = Math::GSL::Matrix->new(1, $ncols);
    $dataMatrix->set_row(0, \@Xlist);
    my $prediction = $dataMatrix * $self->get_node_beta();
    return $prediction->get_elem(0,0);
}

sub node_prediction_from_data_as_matrix {
    my $self = shift;
    my $dataMatrix = shift;
    my $prediction = $dataMatrix * $self->get_node_beta();
    return $prediction->get_elem(0,0);
}

sub node_prediction_from_data_as_list {
    my $self = shift;
    my $data_as_list = shift;
    my @data_arr =  @{$data_as_list};
    my $ncols = $self->{_XMatrix}->cols;
    die "wrong number of elements in data list" if @data_arr != $ncols - 1;
    push @data_arr, 1;
    my $dataMatrix = Math::GSL::Matrix->new(1, $self->{_XMatrix}->cols);
    my $prediction = $dataMatrix * $self->get_node_beta();
    return $prediction->get_elem(0,0);
}

sub how_many_nodes {
    my $self = shift;
    return $self->{_rt}->{nodes_created} + 1;
}

sub get_num_data_points {
    my $self = shift;
    return $self->{_num_data_points};
}  

sub set_num_data_points {
    my $self = shift;
    my $how_many = shift;
    $self->{_num_data_points} = $how_many;
}

sub set_node_XMatrix {
    my $self = shift;
    my $xmatrix = shift;
    $self->{_XMatrix} = $xmatrix;
}

sub get_node_XMatrix {
    my $self = shift;
    return $self->{_XMatrix};
}

sub set_node_YVector {
    my $self = shift;
    my $yvector = shift;
    $self->{_YVector} = $yvector;
}

sub get_node_YVector {
    my $self = shift;
    return $self->{_YVector};
}  

sub set_node_error {
    my $self = shift;
    my $error = shift;
    $self->{_error} = $error;
}

sub get_node_error {
    my $self = shift;
    return $self->{_error};
}

sub set_node_beta {
    my $self = shift;
    my $beta = shift;
    $self->{_beta} = $beta;
}

sub get_node_beta {
    my $self = shift;
    return $self->{_beta};
}

sub get_next_serial_num {
    my $self = shift;
    $self->{_rt}->{nodes_created} += 1;
    return $self->{_rt}->{nodes_created};
}

sub get_serial_num {
    my $self = shift;
    $self->{_serial_number};
}

# this returns the feature test at the current node
sub get_feature {                                  
    my $self = shift;                              
    return $self->{ _feature };                    
}

sub set_feature {
    my $self = shift;
    my $feature = shift;
    $self->{_feature} = $feature;
}

sub get_branch_features_and_values_or_thresholds {
    my $self = shift; 
    return $self->{_branch_features_and_values_or_thresholds};
}

sub get_children {       
    my $self = shift;                   
    return $self->{_linked_to};
}

sub add_child_link {         
    my ($self, $new_node, ) = @_;                            
    push @{$self->{_linked_to}}, $new_node;                  
}

sub delete_all_links {                  
    my $self = shift;                   
    $self->{_linked_to} = undef;        
}

sub display_node {
    my $self = shift; 
    my $feature_at_node = $self->get_feature() || " ";
    my $serial_num = $self->get_serial_num();
    my @branch_features_and_values_or_thresholds = @{$self->get_branch_features_and_values_or_thresholds()};
    print "\n\nNODE $serial_num" .
          ":\n   Branch features and values to this node: @branch_features_and_values_or_thresholds" .
          "\n   Best feature test at current node: $feature_at_node\n\n";
    $self->{_rt}->estimate_regression_coefficients($self->get_node_XMatrix(), $self->get_node_YVector(), 1);
}

sub display_regression_tree {
    my $self = shift;
    my $offset = shift;
    my $serial_num = $self->get_serial_num();
    if (@{$self->get_children()} > 0) {
        my $feature_at_node = $self->get_feature() || " ";
        my @branch_features_and_values_or_thresholds = @{$self->get_branch_features_and_values_or_thresholds()};
        print "NODE $serial_num: $offset BRANCH TESTS TO NODE: @branch_features_and_values_or_thresholds\n";
        my $second_line_offset = "$offset" . " " x (8 + length("$serial_num"));
        print "$second_line_offset" . "Decision Feature: $feature_at_node\n\n";
        $offset .= "   ";
        foreach my $child (@{$self->get_children()}) {
            $child->display_regression_tree($offset);
        }
    } else {
        my @branch_features_and_values_or_thresholds = @{$self->get_branch_features_and_values_or_thresholds()};
        print "NODE $serial_num: $offset BRANCH TESTS TO LEAF NODE: @branch_features_and_values_or_thresholds\n";
        my $second_line_offset = "$offset" . " " x (8 + length("$serial_num"));
    }
}

1;